From a197c5d4f2d8bd94a9056afb1784a8dfc11e947c Mon Sep 17 00:00:00 2001 From: Veeupup Date: Mon, 2 May 2022 20:52:41 +0800 Subject: [PATCH 01/15] datatype serialization with format_settings Signed-off-by: Veeupup --- .../src/types/serializations/array.rs | 22 +++++++++--- .../src/types/serializations/boolean.rs | 22 +++++++++--- .../src/types/serializations/date.rs | 18 +++++++--- .../src/types/serializations/mod.rs | 22 +++++++++--- .../src/types/serializations/null.rs | 16 +++++++-- .../src/types/serializations/nullable.rs | 26 ++++++++++---- .../src/types/serializations/number.rs | 21 ++++++++--- .../src/types/serializations/string.rs | 20 +++++++++-- .../src/types/serializations/struct_.rs | 35 +++++++++++-------- .../src/types/serializations/timestamp.rs | 18 ++++++++-- .../src/types/serializations/variant.rs | 20 +++++++++-- .../tests/it/types/serializations.rs | 8 +++-- common/io/src/format_settings.rs | 2 ++ 13 files changed, 192 insertions(+), 58 deletions(-) diff --git a/common/datavalues/src/types/serializations/array.rs b/common/datavalues/src/types/serializations/array.rs index 417985d510cf1..d6043072eb32f 100644 --- a/common/datavalues/src/types/serializations/array.rs +++ b/common/datavalues/src/types/serializations/array.rs @@ -12,8 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; + use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use serde_json::Value; use crate::prelude::*; @@ -25,7 +28,7 @@ pub struct ArraySerializer { } impl TypeSerializer for ArraySerializer { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, format: Arc) -> Result { if let DataValue::Array(vals) = value { let mut res = String::new(); res.push('['); @@ -37,7 +40,7 @@ impl TypeSerializer for ArraySerializer { } first = false; - let s = self.inner.serialize_value(val)?; + let s = self.inner.serialize_value(val, format.clone())?; if quoted { res.push_str(&format!("'{}'", s)); } else { @@ -51,24 +54,33 @@ impl TypeSerializer for ArraySerializer { } } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column( + &self, + column: &ColumnRef, + format: Arc, + ) -> Result> { let column: &ArrayColumn = Series::check_get(column)?; let mut result = Vec::with_capacity(column.len()); for i in 0..column.len() { let val = column.get(i); - let s = self.serialize_value(&val)?; + let s = self.serialize_value(&val, format.clone())?; result.push(s); } Ok(result) } - fn serialize_json(&self, _column: &ColumnRef) -> Result> { + fn serialize_json( + &self, + _column: &ColumnRef, + _format: Arc, + ) -> Result> { todo!() } fn serialize_clickhouse_format( &self, _column: &ColumnRef, + _format: Arc, ) -> Result { todo!() } diff --git a/common/datavalues/src/types/serializations/boolean.rs b/common/datavalues/src/types/serializations/boolean.rs index df4feae432927..846f78f245d64 100644 --- a/common/datavalues/src/types/serializations/boolean.rs +++ b/common/datavalues/src/types/serializations/boolean.rs @@ -12,9 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; + use common_arrow::arrow::bitmap::Bitmap; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use opensrv_clickhouse::types::column::ArcColumnWrapper; use opensrv_clickhouse::types::column::ColumnFrom; use serde_json::Value; @@ -28,7 +31,7 @@ const TRUE_STR: &str = "1"; const FALSE_STR: &str = "0"; impl TypeSerializer for BooleanSerializer { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, _format: Arc) -> Result { if let DataValue::Boolean(x) = value { if *x { Ok(TRUE_STR.to_owned()) @@ -40,7 +43,11 @@ impl TypeSerializer for BooleanSerializer { } } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column( + &self, + column: &ColumnRef, + _format: Arc, + ) -> Result> { let array: &BooleanColumn = Series::check_get(column)?; let result: Vec = array @@ -56,7 +63,11 @@ impl TypeSerializer for BooleanSerializer { Ok(result) } - fn serialize_json(&self, column: &ColumnRef) -> Result> { + fn serialize_json( + &self, + column: &ColumnRef, + _format: Arc, + ) -> Result> { let array: &BooleanColumn = Series::check_get(column)?; let result: Vec = array .iter() @@ -68,6 +79,7 @@ impl TypeSerializer for BooleanSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, + _format: Arc, ) -> Result { let col: &BooleanColumn = Series::check_get(column)?; let values: Vec = col.iter().map(|c| c as u8).collect(); @@ -78,13 +90,15 @@ impl TypeSerializer for BooleanSerializer { &self, column: &ColumnRef, _valids: Option<&Bitmap>, + format: Arc, ) -> Result> { - self.serialize_json(column) + self.serialize_json(column, format) } fn serialize_json_object_suppress_error( &self, column: &ColumnRef, + _format: Arc, ) -> Result>> { let column: &BooleanColumn = Series::check_get(column)?; let result: Vec> = column diff --git a/common/datavalues/src/types/serializations/date.rs b/common/datavalues/src/types/serializations/date.rs index b926cd679c6f3..be2f4fd8377a9 100644 --- a/common/datavalues/src/types/serializations/date.rs +++ b/common/datavalues/src/types/serializations/date.rs @@ -11,15 +11,16 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - use std::marker::PhantomData; use std::ops::AddAssign; +use std::sync::Arc; use chrono::Date; use chrono::Duration; use chrono::NaiveDate; use chrono_tz::Tz; use common_exception::*; +use common_io::prelude::FormatSettings; use num::cast::AsPrimitive; use opensrv_clickhouse::types::column::ArcColumnWrapper; use opensrv_clickhouse::types::column::ColumnFrom; @@ -43,14 +44,18 @@ impl> Default for DateSerializer { const DATE_FMT: &str = "%Y-%m-%d"; impl> TypeSerializer for DateSerializer { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, _format: Arc) -> Result { let mut date = NaiveDate::from_ymd(1970, 1, 1); let d = Duration::days(value.as_i64()?); date.add_assign(d); Ok(date.format(DATE_FMT).to_string()) } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column( + &self, + column: &ColumnRef, + _format: Arc, + ) -> Result> { let column: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = column @@ -65,7 +70,11 @@ impl> TypeSerializer for DateSerializer { Ok(result) } - fn serialize_json(&self, column: &ColumnRef) -> Result> { + fn serialize_json( + &self, + column: &ColumnRef, + _format: Arc, + ) -> Result> { let array: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = array .iter() @@ -83,6 +92,7 @@ impl> TypeSerializer for DateSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, + _format: Arc, ) -> Result { let array: &PrimitiveColumn = Series::check_get(column)?; let tz: Tz = "UTC".parse().unwrap(); diff --git a/common/datavalues/src/types/serializations/mod.rs b/common/datavalues/src/types/serializations/mod.rs index aa52b456d8408..1bf813c57dfdb 100644 --- a/common/datavalues/src/types/serializations/mod.rs +++ b/common/datavalues/src/types/serializations/mod.rs @@ -12,9 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; + use common_arrow::arrow::bitmap::Bitmap; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use enum_dispatch::enum_dispatch; use opensrv_clickhouse::types::column::ArcColumnData; use serde_json::Value; @@ -44,15 +47,25 @@ pub use variant::*; #[enum_dispatch] pub trait TypeSerializer: Send + Sync { - fn serialize_value(&self, value: &DataValue) -> Result; - fn serialize_json(&self, column: &ColumnRef) -> Result>; - fn serialize_column(&self, column: &ColumnRef) -> Result>; - fn serialize_clickhouse_format(&self, column: &ColumnRef) -> Result; + fn serialize_value(&self, value: &DataValue, format: Arc) -> Result; + fn serialize_json(&self, column: &ColumnRef, format: Arc) + -> Result>; + fn serialize_column( + &self, + column: &ColumnRef, + format: Arc, + ) -> Result>; + fn serialize_clickhouse_format( + &self, + column: &ColumnRef, + _format: Arc, + ) -> Result; fn serialize_json_object( &self, _column: &ColumnRef, _valids: Option<&Bitmap>, + _format: Arc, ) -> Result> { Err(ErrorCode::BadDataValueType( "Error parsing JSON: unsupported data type", @@ -62,6 +75,7 @@ pub trait TypeSerializer: Send + Sync { fn serialize_json_object_suppress_error( &self, _column: &ColumnRef, + _format: Arc, ) -> Result>> { Err(ErrorCode::BadDataValueType( "Error parsing JSON: unsupported data type", diff --git a/common/datavalues/src/types/serializations/null.rs b/common/datavalues/src/types/serializations/null.rs index c096b04ad5775..351516467c31e 100644 --- a/common/datavalues/src/types/serializations/null.rs +++ b/common/datavalues/src/types/serializations/null.rs @@ -15,6 +15,7 @@ use std::sync::Arc; use common_exception::Result; +use common_io::prelude::FormatSettings; use opensrv_clickhouse::types::column::ArcColumnWrapper; use opensrv_clickhouse::types::column::ColumnFrom; use opensrv_clickhouse::types::column::NullableColumnData; @@ -30,16 +31,24 @@ pub struct NullSerializer {} const NULL_STR: &str = "NULL"; impl TypeSerializer for NullSerializer { - fn serialize_value(&self, _value: &DataValue) -> Result { + fn serialize_value(&self, _value: &DataValue, _format: Arc) -> Result { Ok(NULL_STR.to_owned()) } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column( + &self, + column: &ColumnRef, + _format: Arc, + ) -> Result> { let result: Vec = vec![NULL_STR.to_owned(); column.len()]; Ok(result) } - fn serialize_json(&self, column: &ColumnRef) -> Result> { + fn serialize_json( + &self, + column: &ColumnRef, + _format: Arc, + ) -> Result> { let null = Value::Null; let result: Vec = vec![null; column.len()]; Ok(result) @@ -48,6 +57,7 @@ impl TypeSerializer for NullSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, + _format: Arc, ) -> Result { let nulls = vec![1u8; column.len()]; let inner = Vec::column_from::(vec![1u8; column.len()]); diff --git a/common/datavalues/src/types/serializations/nullable.rs b/common/datavalues/src/types/serializations/nullable.rs index bb7c0bdd5ea47..3545b7808cfa8 100644 --- a/common/datavalues/src/types/serializations/nullable.rs +++ b/common/datavalues/src/types/serializations/nullable.rs @@ -15,6 +15,7 @@ use std::sync::Arc; use common_exception::Result; +use common_io::prelude::FormatSettings; use opensrv_clickhouse::types::column::NullableColumnData; use serde_json::Value; @@ -32,18 +33,22 @@ pub struct NullableSerializer { } impl TypeSerializer for NullableSerializer { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, format: Arc) -> Result { if value.is_null() { Ok("NULL".to_owned()) } else { - self.inner.serialize_value(value) + self.inner.serialize_value(value, format) } } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column( + &self, + column: &ColumnRef, + format: Arc, + ) -> Result> { let column: &NullableColumn = Series::check_get(column)?; let rows = column.len(); - let mut res = self.inner.serialize_column(column.inner())?; + let mut res = self.inner.serialize_column(column.inner(), format)?; (0..rows).for_each(|row| { if column.null_at(row) { @@ -53,10 +58,14 @@ impl TypeSerializer for NullableSerializer { Ok(res) } - fn serialize_json(&self, column: &ColumnRef) -> Result> { + fn serialize_json( + &self, + column: &ColumnRef, + format: Arc, + ) -> Result> { let column: &NullableColumn = Series::check_get(column)?; let rows = column.len(); - let mut res = self.inner.serialize_json(column.inner())?; + let mut res = self.inner.serialize_json(column.inner(), format)?; (0..rows).for_each(|row| { if column.null_at(row) { @@ -69,9 +78,12 @@ impl TypeSerializer for NullableSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, + format: Arc, ) -> Result { let column: &NullableColumn = Series::check_get(column)?; - let inner = self.inner.serialize_clickhouse_format(column.inner())?; + let inner = self + .inner + .serialize_clickhouse_format(column.inner(), format)?; let nulls = column.ensure_validity().iter().map(|v| !v as u8).collect(); let data = NullableColumnData { nulls, inner }; diff --git a/common/datavalues/src/types/serializations/number.rs b/common/datavalues/src/types/serializations/number.rs index de37347f62d2c..e0a0a2bfa566c 100644 --- a/common/datavalues/src/types/serializations/number.rs +++ b/common/datavalues/src/types/serializations/number.rs @@ -13,9 +13,11 @@ // limitations under the License. use std::marker::PhantomData; +use std::sync::Arc; use common_arrow::arrow::bitmap::Bitmap; use common_exception::Result; +use common_io::prelude::FormatSettings; use common_io::prelude::Marshal; use common_io::prelude::Unmarshal; use opensrv_clickhouse::types::column::ArcColumnWrapper; @@ -49,17 +51,25 @@ where T: PrimitiveType + opensrv_clickhouse::io::Marshal + opensrv_clickhouse::io::Unmarshal { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, _format: Arc) -> Result { Ok(format!("{:?}", value)) } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column( + &self, + column: &ColumnRef, + _format: Arc, + ) -> Result> { let column: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = column.iter().map(|x| format!("{}", x)).collect(); Ok(result) } - fn serialize_json(&self, column: &ColumnRef) -> Result> { + fn serialize_json( + &self, + column: &ColumnRef, + _format: Arc, + ) -> Result> { let column: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = column .iter() @@ -71,6 +81,7 @@ where T: PrimitiveType fn serialize_clickhouse_format( &self, column: &ColumnRef, + _format: Arc, ) -> Result { let col: &PrimitiveColumn = Series::check_get(column)?; let values: Vec = col.iter().map(|c| c.to_owned()).collect(); @@ -81,13 +92,15 @@ where T: PrimitiveType &self, column: &ColumnRef, _valids: Option<&Bitmap>, + format: Arc, ) -> Result> { - self.serialize_json(column) + self.serialize_json(column, format) } fn serialize_json_object_suppress_error( &self, column: &ColumnRef, + _format: Arc, ) -> Result>> { let column: &PrimitiveColumn = Series::check_get(column)?; let result: Vec> = column diff --git a/common/datavalues/src/types/serializations/string.rs b/common/datavalues/src/types/serializations/string.rs index d16019d8a0d1b..e81102d1e100f 100644 --- a/common/datavalues/src/types/serializations/string.rs +++ b/common/datavalues/src/types/serializations/string.rs @@ -12,9 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; + use common_arrow::arrow::bitmap::Bitmap; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use opensrv_clickhouse::types::column::ArcColumnWrapper; use opensrv_clickhouse::types::column::ColumnFrom; use serde_json::Value; @@ -25,7 +28,7 @@ use crate::prelude::*; pub struct StringSerializer {} impl TypeSerializer for StringSerializer { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, _format: Arc) -> Result { if let DataValue::String(x) = value { Ok(String::from_utf8_lossy(x).to_string()) } else { @@ -33,7 +36,11 @@ impl TypeSerializer for StringSerializer { } } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column( + &self, + column: &ColumnRef, + _format: Arc, + ) -> Result> { let column: &StringColumn = Series::check_get(column)?; let result: Vec = column .iter() @@ -42,7 +49,11 @@ impl TypeSerializer for StringSerializer { Ok(result) } - fn serialize_json(&self, column: &ColumnRef) -> Result> { + fn serialize_json( + &self, + column: &ColumnRef, + _format: Arc, + ) -> Result> { let column: &StringColumn = Series::check_get(column)?; let result: Vec = column .iter() @@ -54,6 +65,7 @@ impl TypeSerializer for StringSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, + _format: Arc, ) -> Result { let column: &StringColumn = Series::check_get(column)?; let values: Vec<&[u8]> = column.iter().collect(); @@ -64,6 +76,7 @@ impl TypeSerializer for StringSerializer { &self, column: &ColumnRef, valids: Option<&Bitmap>, + _format: Arc, ) -> Result> { let column: &StringColumn = Series::check_get(column)?; let mut result: Vec = Vec::new(); @@ -98,6 +111,7 @@ impl TypeSerializer for StringSerializer { fn serialize_json_object_suppress_error( &self, column: &ColumnRef, + _format: Arc, ) -> Result>> { let column: &StringColumn = Series::check_get(column)?; let result: Vec> = column diff --git a/common/datavalues/src/types/serializations/struct_.rs b/common/datavalues/src/types/serializations/struct_.rs index fa71936801ceb..e30cc449a2a93 100644 --- a/common/datavalues/src/types/serializations/struct_.rs +++ b/common/datavalues/src/types/serializations/struct_.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use itertools::izip; use opensrv_clickhouse::types::column::ArcColumnData; use opensrv_clickhouse::types::column::TupleColumnData; @@ -31,7 +32,7 @@ pub struct StructSerializer { } impl TypeSerializer for StructSerializer { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, format: Arc) -> Result { if let DataValue::Struct(vals) = value { let mut res = String::new(); res.push('('); @@ -43,7 +44,7 @@ impl TypeSerializer for StructSerializer { } first = false; - let s = inner.serialize_value(val)?; + let s = inner.serialize_value(val, format.clone())?; if typ.data_type_id().is_quoted() { res.push_str(&format!("'{}'", s)); } else { @@ -57,36 +58,40 @@ impl TypeSerializer for StructSerializer { } } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column( + &self, + column: &ColumnRef, + format: Arc, + ) -> Result> { let column: &StructColumn = Series::check_get(column)?; let mut result = Vec::with_capacity(column.len()); for i in 0..column.len() { let val = column.get(i); - let s = self.serialize_value(&val)?; + let s = self.serialize_value(&val, format.clone())?; result.push(s); } Ok(result) } - fn serialize_json(&self, _column: &ColumnRef) -> Result> { - // let column: &StructColumn = Series::check_get(column)?; - // let inner_columns = column.values(); - // let result = self - // .inners - // .iter() - // .zip(inner_columns.iter()) - // .map(|(inner, col)| inner.serialize_json(col)) - // .collect::>>>()?; + fn serialize_json( + &self, + _column: &ColumnRef, + _format: Arc, + ) -> Result> { todo!() } - fn serialize_clickhouse_format(&self, column: &ColumnRef) -> Result { + fn serialize_clickhouse_format( + &self, + column: &ColumnRef, + format: Arc, + ) -> Result { let column: &StructColumn = Series::check_get(column)?; let result = self .inners .iter() .zip(column.values().iter()) - .map(|(inner, col)| inner.serialize_clickhouse_format(col)) + .map(|(inner, col)| inner.serialize_clickhouse_format(col, format.clone())) .collect::>>()?; let data = TupleColumnData { inner: result }; diff --git a/common/datavalues/src/types/serializations/timestamp.rs b/common/datavalues/src/types/serializations/timestamp.rs index 2b132d00f3f2c..9a0c2a052a5c5 100644 --- a/common/datavalues/src/types/serializations/timestamp.rs +++ b/common/datavalues/src/types/serializations/timestamp.rs @@ -12,9 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; + use chrono::DateTime; use chrono_tz::Tz; use common_exception::*; +use common_io::prelude::FormatSettings; use opensrv_clickhouse::types::column::ArcColumnWrapper; use opensrv_clickhouse::types::column::ColumnFrom; use serde_json::Value; @@ -42,13 +45,17 @@ impl TimestampSerializer { const TIME_FMT: &str = "%Y-%m-%d %H:%M:%S"; impl TypeSerializer for TimestampSerializer { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, _format: Arc) -> Result { let value = DFTryFrom::try_from(value.clone())?; let dt = self.to_timestamp(&value); Ok(dt.format(TIME_FMT).to_string()) } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column( + &self, + column: &ColumnRef, + _format: Arc, + ) -> Result> { let column: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = column .iter() @@ -60,7 +67,11 @@ impl TypeSerializer for TimestampSerializer { Ok(result) } - fn serialize_json(&self, column: &ColumnRef) -> Result> { + fn serialize_json( + &self, + column: &ColumnRef, + _format: Arc, + ) -> Result> { let array: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = array .iter() @@ -75,6 +86,7 @@ impl TypeSerializer for TimestampSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, + _format: Arc, ) -> Result { let array: &PrimitiveColumn = Series::check_get(column)?; let values: Vec> = array.iter().map(|v| self.to_timestamp(v)).collect(); diff --git a/common/datavalues/src/types/serializations/variant.rs b/common/datavalues/src/types/serializations/variant.rs index aaa4be3ada9ec..bc900f7320439 100644 --- a/common/datavalues/src/types/serializations/variant.rs +++ b/common/datavalues/src/types/serializations/variant.rs @@ -12,9 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; + use common_arrow::arrow::bitmap::Bitmap; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use opensrv_clickhouse::types::column::ArcColumnWrapper; use opensrv_clickhouse::types::column::ColumnFrom; use serde_json; @@ -26,7 +29,7 @@ use crate::prelude::*; pub struct VariantSerializer {} impl TypeSerializer for VariantSerializer { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, _format: Arc) -> Result { if let DataValue::Variant(v) = value { Ok(v.to_string()) } else { @@ -34,13 +37,21 @@ impl TypeSerializer for VariantSerializer { } } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column( + &self, + column: &ColumnRef, + _format: Arc, + ) -> Result> { let column: &VariantColumn = Series::check_get(column)?; let result: Vec = column.iter().map(|v| v.to_string()).collect(); Ok(result) } - fn serialize_json(&self, column: &ColumnRef) -> Result> { + fn serialize_json( + &self, + column: &ColumnRef, + _format: Arc, + ) -> Result> { let column: &VariantColumn = Series::check_get(column)?; let result: Vec = column.iter().map(|v| v.as_ref().to_owned()).collect(); Ok(result) @@ -49,6 +60,7 @@ impl TypeSerializer for VariantSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, + _format: Arc, ) -> Result { let column: &VariantColumn = Series::check_get(column)?; let values: Vec = column.iter().map(|v| v.to_string()).collect(); @@ -60,6 +72,7 @@ impl TypeSerializer for VariantSerializer { &self, column: &ColumnRef, valids: Option<&Bitmap>, + _format: Arc, ) -> Result> { let column: &VariantColumn = Series::check_get(column)?; let mut result: Vec = Vec::new(); @@ -89,6 +102,7 @@ impl TypeSerializer for VariantSerializer { fn serialize_json_object_suppress_error( &self, column: &ColumnRef, + _format: Arc, ) -> Result>> { let column: &VariantColumn = Series::check_get(column)?; let result: Vec> = column diff --git a/common/datavalues/tests/it/types/serializations.rs b/common/datavalues/tests/it/types/serializations.rs index 23d8c04102e5c..bc0d54db7d771 100644 --- a/common/datavalues/tests/it/types/serializations.rs +++ b/common/datavalues/tests/it/types/serializations.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use common_datavalues::prelude::*; use common_exception::Result; +use common_io::prelude::FormatSettings; use pretty_assertions::assert_eq; use serde_json::json; @@ -144,12 +145,13 @@ fn test_serializers() -> Result<()> { }, ]; + let format = Arc::new(FormatSettings::default()); for test in tests { let serializer = test.data_type.create_serializer(); - let val_res = serializer.serialize_value(&test.value)?; + let val_res = serializer.serialize_value(&test.value, format.clone())?; assert_eq!(&val_res, test.val_str, "case: {:#?}", test.name); - let col_res = serializer.serialize_column(&test.column)?; + let col_res = serializer.serialize_column(&test.column, format.clone())?; assert_eq!(col_res, test.col_str, "case: {:#?}", test.name); } @@ -175,7 +177,7 @@ fn test_serializers() -> Result<()> { DataValue::Boolean(true), DataValue::UInt64(18869), ]); - let result = serializer.serialize_value(&value)?; + let result = serializer.serialize_value(&value, format.clone())?; let expect = "(1.2, 'hello', 1, '2021-08-30')"; assert_eq!(&result, expect); } diff --git a/common/io/src/format_settings.rs b/common/io/src/format_settings.rs index 39d04a86d77d9..583651d9d9ae4 100644 --- a/common/io/src/format_settings.rs +++ b/common/io/src/format_settings.rs @@ -26,6 +26,7 @@ pub struct FormatSettings { pub empty_as_default: bool, pub skip_header: bool, pub compression: Compression, + pub timezone: Vec, } impl Default for FormatSettings { @@ -36,6 +37,7 @@ impl Default for FormatSettings { empty_as_default: false, skip_header: false, compression: Compression::None, + timezone: vec![b'U', b'T', b'C'], } } } From db78d7af3c3e219010a0d0959965a09c2a5e354f Mon Sep 17 00:00:00 2001 From: Veeupup Date: Mon, 2 May 2022 21:42:26 +0800 Subject: [PATCH 02/15] query format with tz Signed-off-by: Veeupup --- .../src/types/serializations/array.rs | 14 +++++++------- .../src/types/serializations/boolean.rs | 14 +++++++------- .../src/types/serializations/date.rs | 10 +++++----- .../src/types/serializations/mod.rs | 14 +++++++------- .../src/types/serializations/null.rs | 8 ++++---- .../src/types/serializations/nullable.rs | 8 ++++---- .../src/types/serializations/number.rs | 14 +++++++------- .../src/types/serializations/string.rs | 14 +++++++------- .../src/types/serializations/struct_.rs | 14 +++++++------- .../src/types/serializations/timestamp.rs | 10 +++++----- .../src/types/serializations/variant.rs | 14 +++++++------- .../tests/it/types/serializations.rs | 8 ++++---- .../src/scalars/expressions/cast_with_type.rs | 5 ++++- .../json_extract_path_text.rs | 5 ++++- .../scalars/semi_structureds/parse_json.rs | 11 ++++++++--- .../servers/clickhouse/interactive_worker.rs | 4 +++- .../clickhouse/writers/query_writer.rs | 19 ++++++++++--------- query/src/servers/http/clickhouse_handler.rs | 4 ++-- query/src/servers/http/formats/tsv_output.rs | 5 +++-- .../servers/http/v1/http_query_handlers.rs | 9 +++++++-- query/src/servers/http/v1/json_block.rs | 5 +++-- query/src/servers/http/v1/query/http_query.rs | 9 +++++---- .../http/v1/query/result_data_manager.rs | 9 +++++---- query/src/servers/http/v1/statement.rs | 5 ++++- .../servers/mysql/mysql_interactive_worker.rs | 3 ++- .../mysql/writers/query_result_writer.rs | 15 +++++++++------ .../it/servers/http/formats/tsv_output.rs | 4 ++-- query/tests/it/servers/http/json_block.rs | 4 ++-- 28 files changed, 144 insertions(+), 114 deletions(-) diff --git a/common/datavalues/src/types/serializations/array.rs b/common/datavalues/src/types/serializations/array.rs index d6043072eb32f..e4ba93fa3be48 100644 --- a/common/datavalues/src/types/serializations/array.rs +++ b/common/datavalues/src/types/serializations/array.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; + use common_exception::ErrorCode; use common_exception::Result; @@ -28,7 +28,7 @@ pub struct ArraySerializer { } impl TypeSerializer for ArraySerializer { - fn serialize_value(&self, value: &DataValue, format: Arc) -> Result { + fn serialize_value(&self, value: &DataValue, format: &FormatSettings) -> Result { if let DataValue::Array(vals) = value { let mut res = String::new(); res.push('['); @@ -40,7 +40,7 @@ impl TypeSerializer for ArraySerializer { } first = false; - let s = self.inner.serialize_value(val, format.clone())?; + let s = self.inner.serialize_value(val, format)?; if quoted { res.push_str(&format!("'{}'", s)); } else { @@ -57,13 +57,13 @@ impl TypeSerializer for ArraySerializer { fn serialize_column( &self, column: &ColumnRef, - format: Arc, + format: &FormatSettings, ) -> Result> { let column: &ArrayColumn = Series::check_get(column)?; let mut result = Vec::with_capacity(column.len()); for i in 0..column.len() { let val = column.get(i); - let s = self.serialize_value(&val, format.clone())?; + let s = self.serialize_value(&val, format)?; result.push(s); } Ok(result) @@ -72,7 +72,7 @@ impl TypeSerializer for ArraySerializer { fn serialize_json( &self, _column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result> { todo!() } @@ -80,7 +80,7 @@ impl TypeSerializer for ArraySerializer { fn serialize_clickhouse_format( &self, _column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result { todo!() } diff --git a/common/datavalues/src/types/serializations/boolean.rs b/common/datavalues/src/types/serializations/boolean.rs index 846f78f245d64..63af380495d03 100644 --- a/common/datavalues/src/types/serializations/boolean.rs +++ b/common/datavalues/src/types/serializations/boolean.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; + use common_arrow::arrow::bitmap::Bitmap; use common_exception::ErrorCode; @@ -31,7 +31,7 @@ const TRUE_STR: &str = "1"; const FALSE_STR: &str = "0"; impl TypeSerializer for BooleanSerializer { - fn serialize_value(&self, value: &DataValue, _format: Arc) -> Result { + fn serialize_value(&self, value: &DataValue, _format: &FormatSettings) -> Result { if let DataValue::Boolean(x) = value { if *x { Ok(TRUE_STR.to_owned()) @@ -46,7 +46,7 @@ impl TypeSerializer for BooleanSerializer { fn serialize_column( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result> { let array: &BooleanColumn = Series::check_get(column)?; @@ -66,7 +66,7 @@ impl TypeSerializer for BooleanSerializer { fn serialize_json( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result> { let array: &BooleanColumn = Series::check_get(column)?; let result: Vec = array @@ -79,7 +79,7 @@ impl TypeSerializer for BooleanSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result { let col: &BooleanColumn = Series::check_get(column)?; let values: Vec = col.iter().map(|c| c as u8).collect(); @@ -90,7 +90,7 @@ impl TypeSerializer for BooleanSerializer { &self, column: &ColumnRef, _valids: Option<&Bitmap>, - format: Arc, + format: &FormatSettings, ) -> Result> { self.serialize_json(column, format) } @@ -98,7 +98,7 @@ impl TypeSerializer for BooleanSerializer { fn serialize_json_object_suppress_error( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result>> { let column: &BooleanColumn = Series::check_get(column)?; let result: Vec> = column diff --git a/common/datavalues/src/types/serializations/date.rs b/common/datavalues/src/types/serializations/date.rs index be2f4fd8377a9..fb08ae32d4dca 100644 --- a/common/datavalues/src/types/serializations/date.rs +++ b/common/datavalues/src/types/serializations/date.rs @@ -13,7 +13,7 @@ // limitations under the License. use std::marker::PhantomData; use std::ops::AddAssign; -use std::sync::Arc; + use chrono::Date; use chrono::Duration; @@ -44,7 +44,7 @@ impl> Default for DateSerializer { const DATE_FMT: &str = "%Y-%m-%d"; impl> TypeSerializer for DateSerializer { - fn serialize_value(&self, value: &DataValue, _format: Arc) -> Result { + fn serialize_value(&self, value: &DataValue, _format: &FormatSettings) -> Result { let mut date = NaiveDate::from_ymd(1970, 1, 1); let d = Duration::days(value.as_i64()?); date.add_assign(d); @@ -54,7 +54,7 @@ impl> TypeSerializer for DateSerializer { fn serialize_column( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result> { let column: &PrimitiveColumn = Series::check_get(column)?; @@ -73,7 +73,7 @@ impl> TypeSerializer for DateSerializer { fn serialize_json( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result> { let array: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = array @@ -92,7 +92,7 @@ impl> TypeSerializer for DateSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result { let array: &PrimitiveColumn = Series::check_get(column)?; let tz: Tz = "UTC".parse().unwrap(); diff --git a/common/datavalues/src/types/serializations/mod.rs b/common/datavalues/src/types/serializations/mod.rs index 1bf813c57dfdb..8a06d12479b57 100644 --- a/common/datavalues/src/types/serializations/mod.rs +++ b/common/datavalues/src/types/serializations/mod.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; + use common_arrow::arrow::bitmap::Bitmap; use common_exception::ErrorCode; @@ -47,25 +47,25 @@ pub use variant::*; #[enum_dispatch] pub trait TypeSerializer: Send + Sync { - fn serialize_value(&self, value: &DataValue, format: Arc) -> Result; - fn serialize_json(&self, column: &ColumnRef, format: Arc) + fn serialize_value(&self, value: &DataValue, format: &FormatSettings) -> Result; + fn serialize_json(&self, column: &ColumnRef, format: &FormatSettings) -> Result>; fn serialize_column( &self, column: &ColumnRef, - format: Arc, + format: &FormatSettings, ) -> Result>; fn serialize_clickhouse_format( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result; fn serialize_json_object( &self, _column: &ColumnRef, _valids: Option<&Bitmap>, - _format: Arc, + _format: &FormatSettings, ) -> Result> { Err(ErrorCode::BadDataValueType( "Error parsing JSON: unsupported data type", @@ -75,7 +75,7 @@ pub trait TypeSerializer: Send + Sync { fn serialize_json_object_suppress_error( &self, _column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result>> { Err(ErrorCode::BadDataValueType( "Error parsing JSON: unsupported data type", diff --git a/common/datavalues/src/types/serializations/null.rs b/common/datavalues/src/types/serializations/null.rs index 351516467c31e..c5cbbfa34cecd 100644 --- a/common/datavalues/src/types/serializations/null.rs +++ b/common/datavalues/src/types/serializations/null.rs @@ -31,14 +31,14 @@ pub struct NullSerializer {} const NULL_STR: &str = "NULL"; impl TypeSerializer for NullSerializer { - fn serialize_value(&self, _value: &DataValue, _format: Arc) -> Result { + fn serialize_value(&self, _value: &DataValue, _format: &FormatSettings) -> Result { Ok(NULL_STR.to_owned()) } fn serialize_column( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result> { let result: Vec = vec![NULL_STR.to_owned(); column.len()]; Ok(result) @@ -47,7 +47,7 @@ impl TypeSerializer for NullSerializer { fn serialize_json( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result> { let null = Value::Null; let result: Vec = vec![null; column.len()]; @@ -57,7 +57,7 @@ impl TypeSerializer for NullSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result { let nulls = vec![1u8; column.len()]; let inner = Vec::column_from::(vec![1u8; column.len()]); diff --git a/common/datavalues/src/types/serializations/nullable.rs b/common/datavalues/src/types/serializations/nullable.rs index 3545b7808cfa8..8466e717b03c4 100644 --- a/common/datavalues/src/types/serializations/nullable.rs +++ b/common/datavalues/src/types/serializations/nullable.rs @@ -33,7 +33,7 @@ pub struct NullableSerializer { } impl TypeSerializer for NullableSerializer { - fn serialize_value(&self, value: &DataValue, format: Arc) -> Result { + fn serialize_value(&self, value: &DataValue, format: &FormatSettings) -> Result { if value.is_null() { Ok("NULL".to_owned()) } else { @@ -44,7 +44,7 @@ impl TypeSerializer for NullableSerializer { fn serialize_column( &self, column: &ColumnRef, - format: Arc, + format: &FormatSettings, ) -> Result> { let column: &NullableColumn = Series::check_get(column)?; let rows = column.len(); @@ -61,7 +61,7 @@ impl TypeSerializer for NullableSerializer { fn serialize_json( &self, column: &ColumnRef, - format: Arc, + format: &FormatSettings, ) -> Result> { let column: &NullableColumn = Series::check_get(column)?; let rows = column.len(); @@ -78,7 +78,7 @@ impl TypeSerializer for NullableSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, - format: Arc, + format: &FormatSettings, ) -> Result { let column: &NullableColumn = Series::check_get(column)?; let inner = self diff --git a/common/datavalues/src/types/serializations/number.rs b/common/datavalues/src/types/serializations/number.rs index e0a0a2bfa566c..767e3c864cfc2 100644 --- a/common/datavalues/src/types/serializations/number.rs +++ b/common/datavalues/src/types/serializations/number.rs @@ -13,7 +13,7 @@ // limitations under the License. use std::marker::PhantomData; -use std::sync::Arc; + use common_arrow::arrow::bitmap::Bitmap; use common_exception::Result; @@ -51,14 +51,14 @@ where T: PrimitiveType + opensrv_clickhouse::io::Marshal + opensrv_clickhouse::io::Unmarshal { - fn serialize_value(&self, value: &DataValue, _format: Arc) -> Result { + fn serialize_value(&self, value: &DataValue, _format: &FormatSettings) -> Result { Ok(format!("{:?}", value)) } fn serialize_column( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result> { let column: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = column.iter().map(|x| format!("{}", x)).collect(); @@ -68,7 +68,7 @@ where T: PrimitiveType fn serialize_json( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result> { let column: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = column @@ -81,7 +81,7 @@ where T: PrimitiveType fn serialize_clickhouse_format( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result { let col: &PrimitiveColumn = Series::check_get(column)?; let values: Vec = col.iter().map(|c| c.to_owned()).collect(); @@ -92,7 +92,7 @@ where T: PrimitiveType &self, column: &ColumnRef, _valids: Option<&Bitmap>, - format: Arc, + format: &FormatSettings, ) -> Result> { self.serialize_json(column, format) } @@ -100,7 +100,7 @@ where T: PrimitiveType fn serialize_json_object_suppress_error( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result>> { let column: &PrimitiveColumn = Series::check_get(column)?; let result: Vec> = column diff --git a/common/datavalues/src/types/serializations/string.rs b/common/datavalues/src/types/serializations/string.rs index e81102d1e100f..b63bc591c3776 100644 --- a/common/datavalues/src/types/serializations/string.rs +++ b/common/datavalues/src/types/serializations/string.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; + use common_arrow::arrow::bitmap::Bitmap; use common_exception::ErrorCode; @@ -28,7 +28,7 @@ use crate::prelude::*; pub struct StringSerializer {} impl TypeSerializer for StringSerializer { - fn serialize_value(&self, value: &DataValue, _format: Arc) -> Result { + fn serialize_value(&self, value: &DataValue, _format: &FormatSettings) -> Result { if let DataValue::String(x) = value { Ok(String::from_utf8_lossy(x).to_string()) } else { @@ -39,7 +39,7 @@ impl TypeSerializer for StringSerializer { fn serialize_column( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result> { let column: &StringColumn = Series::check_get(column)?; let result: Vec = column @@ -52,7 +52,7 @@ impl TypeSerializer for StringSerializer { fn serialize_json( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result> { let column: &StringColumn = Series::check_get(column)?; let result: Vec = column @@ -65,7 +65,7 @@ impl TypeSerializer for StringSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result { let column: &StringColumn = Series::check_get(column)?; let values: Vec<&[u8]> = column.iter().collect(); @@ -76,7 +76,7 @@ impl TypeSerializer for StringSerializer { &self, column: &ColumnRef, valids: Option<&Bitmap>, - _format: Arc, + _format: &FormatSettings, ) -> Result> { let column: &StringColumn = Series::check_get(column)?; let mut result: Vec = Vec::new(); @@ -111,7 +111,7 @@ impl TypeSerializer for StringSerializer { fn serialize_json_object_suppress_error( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result>> { let column: &StringColumn = Series::check_get(column)?; let result: Vec> = column diff --git a/common/datavalues/src/types/serializations/struct_.rs b/common/datavalues/src/types/serializations/struct_.rs index e30cc449a2a93..bd0c3c88f2dd1 100644 --- a/common/datavalues/src/types/serializations/struct_.rs +++ b/common/datavalues/src/types/serializations/struct_.rs @@ -32,7 +32,7 @@ pub struct StructSerializer { } impl TypeSerializer for StructSerializer { - fn serialize_value(&self, value: &DataValue, format: Arc) -> Result { + fn serialize_value(&self, value: &DataValue, format: &FormatSettings) -> Result { if let DataValue::Struct(vals) = value { let mut res = String::new(); res.push('('); @@ -44,7 +44,7 @@ impl TypeSerializer for StructSerializer { } first = false; - let s = inner.serialize_value(val, format.clone())?; + let s = inner.serialize_value(val, format)?; if typ.data_type_id().is_quoted() { res.push_str(&format!("'{}'", s)); } else { @@ -61,13 +61,13 @@ impl TypeSerializer for StructSerializer { fn serialize_column( &self, column: &ColumnRef, - format: Arc, + format: &FormatSettings, ) -> Result> { let column: &StructColumn = Series::check_get(column)?; let mut result = Vec::with_capacity(column.len()); for i in 0..column.len() { let val = column.get(i); - let s = self.serialize_value(&val, format.clone())?; + let s = self.serialize_value(&val, format)?; result.push(s); } Ok(result) @@ -76,7 +76,7 @@ impl TypeSerializer for StructSerializer { fn serialize_json( &self, _column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result> { todo!() } @@ -84,14 +84,14 @@ impl TypeSerializer for StructSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, - format: Arc, + format: &FormatSettings, ) -> Result { let column: &StructColumn = Series::check_get(column)?; let result = self .inners .iter() .zip(column.values().iter()) - .map(|(inner, col)| inner.serialize_clickhouse_format(col, format.clone())) + .map(|(inner, col)| inner.serialize_clickhouse_format(col, format)) .collect::>>()?; let data = TupleColumnData { inner: result }; diff --git a/common/datavalues/src/types/serializations/timestamp.rs b/common/datavalues/src/types/serializations/timestamp.rs index 9a0c2a052a5c5..3ed7c28b380d4 100644 --- a/common/datavalues/src/types/serializations/timestamp.rs +++ b/common/datavalues/src/types/serializations/timestamp.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; + use chrono::DateTime; use chrono_tz::Tz; @@ -45,7 +45,7 @@ impl TimestampSerializer { const TIME_FMT: &str = "%Y-%m-%d %H:%M:%S"; impl TypeSerializer for TimestampSerializer { - fn serialize_value(&self, value: &DataValue, _format: Arc) -> Result { + fn serialize_value(&self, value: &DataValue, _format: &FormatSettings) -> Result { let value = DFTryFrom::try_from(value.clone())?; let dt = self.to_timestamp(&value); Ok(dt.format(TIME_FMT).to_string()) @@ -54,7 +54,7 @@ impl TypeSerializer for TimestampSerializer { fn serialize_column( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result> { let column: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = column @@ -70,7 +70,7 @@ impl TypeSerializer for TimestampSerializer { fn serialize_json( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result> { let array: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = array @@ -86,7 +86,7 @@ impl TypeSerializer for TimestampSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result { let array: &PrimitiveColumn = Series::check_get(column)?; let values: Vec> = array.iter().map(|v| self.to_timestamp(v)).collect(); diff --git a/common/datavalues/src/types/serializations/variant.rs b/common/datavalues/src/types/serializations/variant.rs index bc900f7320439..0e568a807d52d 100644 --- a/common/datavalues/src/types/serializations/variant.rs +++ b/common/datavalues/src/types/serializations/variant.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; + use common_arrow::arrow::bitmap::Bitmap; use common_exception::ErrorCode; @@ -29,7 +29,7 @@ use crate::prelude::*; pub struct VariantSerializer {} impl TypeSerializer for VariantSerializer { - fn serialize_value(&self, value: &DataValue, _format: Arc) -> Result { + fn serialize_value(&self, value: &DataValue, _format: &FormatSettings) -> Result { if let DataValue::Variant(v) = value { Ok(v.to_string()) } else { @@ -40,7 +40,7 @@ impl TypeSerializer for VariantSerializer { fn serialize_column( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result> { let column: &VariantColumn = Series::check_get(column)?; let result: Vec = column.iter().map(|v| v.to_string()).collect(); @@ -50,7 +50,7 @@ impl TypeSerializer for VariantSerializer { fn serialize_json( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result> { let column: &VariantColumn = Series::check_get(column)?; let result: Vec = column.iter().map(|v| v.as_ref().to_owned()).collect(); @@ -60,7 +60,7 @@ impl TypeSerializer for VariantSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result { let column: &VariantColumn = Series::check_get(column)?; let values: Vec = column.iter().map(|v| v.to_string()).collect(); @@ -72,7 +72,7 @@ impl TypeSerializer for VariantSerializer { &self, column: &ColumnRef, valids: Option<&Bitmap>, - _format: Arc, + _format: &FormatSettings, ) -> Result> { let column: &VariantColumn = Series::check_get(column)?; let mut result: Vec = Vec::new(); @@ -102,7 +102,7 @@ impl TypeSerializer for VariantSerializer { fn serialize_json_object_suppress_error( &self, column: &ColumnRef, - _format: Arc, + _format: &FormatSettings, ) -> Result>> { let column: &VariantColumn = Series::check_get(column)?; let result: Vec> = column diff --git a/common/datavalues/tests/it/types/serializations.rs b/common/datavalues/tests/it/types/serializations.rs index bc0d54db7d771..0610bfe9b643e 100644 --- a/common/datavalues/tests/it/types/serializations.rs +++ b/common/datavalues/tests/it/types/serializations.rs @@ -145,13 +145,13 @@ fn test_serializers() -> Result<()> { }, ]; - let format = Arc::new(FormatSettings::default()); + let format = FormatSettings::default(); for test in tests { let serializer = test.data_type.create_serializer(); - let val_res = serializer.serialize_value(&test.value, format.clone())?; + let val_res = serializer.serialize_value(&test.value, &format)?; assert_eq!(&val_res, test.val_str, "case: {:#?}", test.name); - let col_res = serializer.serialize_column(&test.column, format.clone())?; + let col_res = serializer.serialize_column(&test.column, &format)?; assert_eq!(col_res, test.col_str, "case: {:#?}", test.name); } @@ -177,7 +177,7 @@ fn test_serializers() -> Result<()> { DataValue::Boolean(true), DataValue::UInt64(18869), ]); - let result = serializer.serialize_value(&value, format.clone())?; + let result = serializer.serialize_value(&value, &format)?; let expect = "(1.2, 'hello', 1, '2021-08-30')"; assert_eq!(&result, expect); } diff --git a/common/functions/src/scalars/expressions/cast_with_type.rs b/common/functions/src/scalars/expressions/cast_with_type.rs index 8815390f592b9..0fd0c2cd3885f 100644 --- a/common/functions/src/scalars/expressions/cast_with_type.rs +++ b/common/functions/src/scalars/expressions/cast_with_type.rs @@ -14,6 +14,7 @@ use std::sync::Arc; +use common_io::prelude::FormatSettings; use common_arrow::arrow::array::ArrayRef; use common_arrow::arrow::bitmap::Bitmap; use common_arrow::arrow::compute::cast; @@ -201,7 +202,9 @@ pub fn cast_to_variant( let mut builder = ColumnBuilder::::with_capacity(size); if from_type.data_type_id().is_numeric() || from_type.data_type_id() == TypeID::Boolean { let serializer = from_type.create_serializer(); - match serializer.serialize_json_object(&column, None) { + // TODO(veeupup): check if we can use default format_settings + let format = FormatSettings::default(); + match serializer.serialize_json_object(&column, None, &format) { Ok(values) => { for v in values { builder.append(&VariantValue::from(v)); diff --git a/common/functions/src/scalars/semi_structureds/json_extract_path_text.rs b/common/functions/src/scalars/semi_structureds/json_extract_path_text.rs index 8c446c1918e9c..47bc6e55ceca6 100644 --- a/common/functions/src/scalars/semi_structureds/json_extract_path_text.rs +++ b/common/functions/src/scalars/semi_structureds/json_extract_path_text.rs @@ -14,6 +14,7 @@ use std::fmt; +use common_io::prelude::FormatSettings; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; @@ -83,7 +84,9 @@ impl Function for JsonExtractPathTextFunction { let mut builder = ColumnBuilder::::with_capacity(input_rows); let serializer = data_type.create_serializer(); - match serializer.serialize_json_object(columns[0].column(), None) { + // TODO(veeupup): check if we can use default format_settings + let format = FormatSettings::default(); + match serializer.serialize_json_object(columns[0].column(), None, &format) { Ok(values) => { for v in values { builder.append(&VariantValue::from(v)); diff --git a/common/functions/src/scalars/semi_structureds/parse_json.rs b/common/functions/src/scalars/semi_structureds/parse_json.rs index 3c27a701435b5..207ccd929822e 100644 --- a/common/functions/src/scalars/semi_structureds/parse_json.rs +++ b/common/functions/src/scalars/semi_structureds/parse_json.rs @@ -14,6 +14,7 @@ use std::fmt; +use common_io::prelude::FormatSettings; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; @@ -91,6 +92,8 @@ impl Function for ParseJsonFunctionImpl::with_capacity(input_rows); @@ -100,7 +103,7 @@ impl Function for ParseJsonFunctionImpl { for v in values { match v { @@ -132,7 +135,7 @@ impl Function for ParseJsonFunctionImpl { for (i, v) in values.iter().enumerate() { if let Some(valids) = valids { @@ -162,7 +165,9 @@ impl Function for ParseJsonFunctionImpl { for v in values { builder.append(&VariantValue::from(v)); diff --git a/query/src/servers/clickhouse/interactive_worker.rs b/query/src/servers/clickhouse/interactive_worker.rs index 4d2d2a98a70cc..68b381b49bb6d 100644 --- a/query/src/servers/clickhouse/interactive_worker.rs +++ b/query/src/servers/clickhouse/interactive_worker.rs @@ -50,7 +50,9 @@ impl ClickHouseSession for InteractiveWorker { let session = self.session.clone(); let get_query_result = InteractiveWorkerBase::do_query(ctx, session); - if let Err(cause) = query_writer.write(get_query_result.await).await { + let query_ctx = self.session.get_shared_query_context().await.map_err(|err| to_clickhouse_err(err))?; + let format = query_ctx.get_format_settings().map_err(|err| to_clickhouse_err(err))?; + if let Err(cause) = query_writer.write(get_query_result.await, &format).await { let new_error = cause.add_message(&ctx.state.query); return Err(to_clickhouse_err(new_error)); } diff --git a/query/src/servers/clickhouse/writers/query_writer.rs b/query/src/servers/clickhouse/writers/query_writer.rs index d01c2fb74e652..b2364e8b5366e 100644 --- a/query/src/servers/clickhouse/writers/query_writer.rs +++ b/query/src/servers/clickhouse/writers/query_writer.rs @@ -14,6 +14,7 @@ use std::borrow::Cow; +use common_io::prelude::FormatSettings; use common_base::ProgressValues; use common_datablocks::DataBlock; use common_datavalues::prelude::*; @@ -46,11 +47,11 @@ impl<'a> QueryWriter<'a> { } } - pub async fn write(&mut self, receiver: Result>) -> Result<()> { + pub async fn write(&mut self, receiver: Result>, format: &FormatSettings) -> Result<()> { match receiver { Err(error) => self.write_error(error).await, Ok(receiver) => { - let write_data = self.write_data(receiver); + let write_data = self.write_data(receiver, format); write_data.await } } @@ -85,8 +86,8 @@ impl<'a> QueryWriter<'a> { } } - async fn write_block(&mut self, block: DataBlock) -> Result<()> { - let block = to_clickhouse_block(block)?; + async fn write_block(&mut self, block: DataBlock, format: &FormatSettings) -> Result<()> { + let block = to_clickhouse_block(block, format)?; match self.conn.write_block(&block).await { Ok(_) => Ok(()), @@ -94,7 +95,7 @@ impl<'a> QueryWriter<'a> { } } - async fn write_data(&mut self, mut receiver: Receiver) -> Result<()> { + async fn write_data(&mut self, mut receiver: Receiver, format: &FormatSettings) -> Result<()> { loop { match receiver.next().await { None => { @@ -106,13 +107,13 @@ impl<'a> QueryWriter<'a> { return Ok(()); } Some(BlockItem::Block(Ok(block))) => { - self.write_block(block).await?; + self.write_block(block, format).await?; } Some(BlockItem::InsertSample(block)) => { let schema = block.schema(); let header = DataBlock::empty_with_schema(schema.clone()); - self.write_block(header).await?; + self.write_block(header, format).await?; } } } @@ -132,7 +133,7 @@ pub fn from_clickhouse_err(res: opensrv_clickhouse::errors::Error) -> ErrorCode ErrorCode::LogicalError(format!("clickhouse-srv expception: {:?}", res)) } -pub fn to_clickhouse_block(block: DataBlock) -> Result { +pub fn to_clickhouse_block(block: DataBlock, format: &FormatSettings) -> Result { let mut result = Block::new(); if block.num_columns() == 0 { return Ok(result); @@ -145,7 +146,7 @@ pub fn to_clickhouse_block(block: DataBlock) -> Result { let serializer = field.data_type().create_serializer(); result.append_column(column::new_column( name, - serializer.serialize_clickhouse_format(&column.convert_full_column())?, + serializer.serialize_clickhouse_format(&column.convert_full_column(), format)?, )); } Ok(result) diff --git a/query/src/servers/http/clickhouse_handler.rs b/query/src/servers/http/clickhouse_handler.rs index e37dc0ae2d8dd..a6061addc874a 100644 --- a/query/src/servers/http/clickhouse_handler.rs +++ b/query/src/servers/http/clickhouse_handler.rs @@ -88,12 +88,12 @@ async fn execute( interpreter.execute(input_stream).await? }; let mut data_stream = ctx.try_create_abortable(data_stream)?; - + let format = ctx.get_format_settings()?; let stream = stream! { while let Some(block) = data_stream.next().await { match block{ Ok(block) => { - yield(block_to_tsv(&block)) + yield(block_to_tsv(&block, &format)) }, Err(err) => yield(Err(err)), }; diff --git a/query/src/servers/http/formats/tsv_output.rs b/query/src/servers/http/formats/tsv_output.rs index 5d85fedab8356..452248487c5dc 100644 --- a/query/src/servers/http/formats/tsv_output.rs +++ b/query/src/servers/http/formats/tsv_output.rs @@ -17,11 +17,12 @@ use common_datavalues::DataType; use common_datavalues::TypeSerializer; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; const FIELD_DELIMITER: u8 = b'\t'; const ROW_DELIMITER: u8 = b'\n'; -pub fn block_to_tsv(block: &DataBlock) -> Result> { +pub fn block_to_tsv(block: &DataBlock, format: &FormatSettings) -> Result> { let rows_size = block.column(0).len(); let columns_size = block.num_columns(); @@ -33,7 +34,7 @@ pub fn block_to_tsv(block: &DataBlock) -> Result> { let data_type = field.data_type(); let serializer = data_type.create_serializer(); // todo(youngsofun): escape - col_table.push(serializer.serialize_column(&column).map_err(|e| { + col_table.push(serializer.serialize_column(&column, format).map_err(|e| { ErrorCode::UnexpectedError(format!( "fail to serialize filed {}, error = {}", field.name(), diff --git a/query/src/servers/http/v1/http_query_handlers.rs b/query/src/servers/http/v1/http_query_handlers.rs index 473452f2f8d54..15d0ac20ef16e 100644 --- a/query/src/servers/http/v1/http_query_handlers.rs +++ b/query/src/servers/http/v1/http_query_handlers.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_io::prelude::FormatSettings; use common_base::ProgressValues; use common_datavalues::DataSchemaRef; use common_exception::ErrorCode; @@ -175,9 +176,11 @@ async fn query_page_handler( let http_query_manager = ctx.session_mgr.get_http_query_manager(); match http_query_manager.get_query(&query_id).await { Some(query) => { + // TODO(veeupup): get query_ctx here to get format_settings + let format = FormatSettings::default(); query.clear_expire_time().await; let resp = query - .get_response_page(page_no) + .get_response_page(page_no, &format) .await .map_err(|err| poem::Error::from_string(err.message(), StatusCode::NOT_FOUND))?; query.update_expire_time().await; @@ -199,10 +202,12 @@ pub(crate) async fn query_handler( .try_create_query(&query_id, ctx, req) .await; + // TODO(veeupup): get query_ctx's format_settings here + let format = FormatSettings::default(); match query { Ok(query) => { let resp = query - .get_response_page(0) + .get_response_page(0, &format) .await .map_err(|err| poem::Error::from_string(err.message(), StatusCode::NOT_FOUND))?; query.update_expire_time().await; diff --git a/query/src/servers/http/v1/json_block.rs b/query/src/servers/http/v1/json_block.rs index 8336b01bd8404..4c1b41ca7c5b2 100644 --- a/query/src/servers/http/v1/json_block.rs +++ b/query/src/servers/http/v1/json_block.rs @@ -14,6 +14,7 @@ use std::sync::Arc; +use common_io::prelude::FormatSettings; use common_datablocks::DataBlock; use common_datavalues::DataSchema; use common_datavalues::DataSchemaRef; @@ -39,7 +40,7 @@ impl JsonBlock { } } - pub fn new(block: &DataBlock) -> Result { + pub fn new(block: &DataBlock, format: &FormatSettings) -> Result { let mut col_table = Vec::new(); let columns_size = block.columns().len(); for col_index in 0..columns_size { @@ -48,7 +49,7 @@ impl JsonBlock { let field = block.schema().field(col_index); let data_type = field.data_type(); let serializer = data_type.create_serializer(); - col_table.push(serializer.serialize_json(&column).map_err(|e| { + col_table.push(serializer.serialize_json(&column, format).map_err(|e| { ErrorCode::UnexpectedError(format!( "fail to serialize filed {}, error = {}", field.name(), diff --git a/query/src/servers/http/v1/query/http_query.rs b/query/src/servers/http/v1/query/http_query.rs index d2e83700b5fd2..95f0cb96d34a9 100644 --- a/query/src/servers/http/v1/query/http_query.rs +++ b/query/src/servers/http/v1/query/http_query.rs @@ -17,6 +17,7 @@ use std::sync::Arc; use std::time::Duration; use std::time::Instant; +use common_io::prelude::FormatSettings; use common_base::tokio::sync::mpsc; use common_base::tokio::sync::Mutex as TokioMutex; use common_base::tokio::sync::RwLock; @@ -175,9 +176,9 @@ impl HttpQuery { self.request.pagination.wait_time_secs == 0 } - pub async fn get_response_page(&self, page_no: usize) -> Result { + pub async fn get_response_page(&self, page_no: usize, format: &FormatSettings) -> Result { Ok(HttpQueryResponseInternal { - data: Some(self.get_page(page_no).await?), + data: Some(self.get_page(page_no, format).await?), session_id: self.session_id.clone(), state: self.get_state().await, }) @@ -202,10 +203,10 @@ impl HttpQuery { } } - async fn get_page(&self, page_no: usize) -> Result { + async fn get_page(&self, page_no: usize, format: &FormatSettings) -> Result { let mut data = self.data.lock().await; let page = data - .get_a_page(page_no, &self.request.pagination.get_wait_type()) + .get_a_page(page_no, &self.request.pagination.get_wait_type(), format) .await?; let response = ResponseData { page, diff --git a/query/src/servers/http/v1/query/result_data_manager.rs b/query/src/servers/http/v1/query/result_data_manager.rs index 10c58921bdf81..bebe82ee66e16 100644 --- a/query/src/servers/http/v1/query/result_data_manager.rs +++ b/query/src/servers/http/v1/query/result_data_manager.rs @@ -14,6 +14,7 @@ use std::time::Instant; +use common_io::prelude::FormatSettings; use common_base::tokio; use common_base::tokio::sync::mpsc; use common_base::tokio::sync::mpsc::error::TryRecvError; @@ -71,10 +72,10 @@ impl ResultDataManager { } } - pub async fn get_a_page(&mut self, page_no: usize, tp: &Wait) -> Result { + pub async fn get_a_page(&mut self, page_no: usize, tp: &Wait, format: &FormatSettings) -> Result { let next_no = self.total_pages; if page_no == next_no && !self.end { - let (block, end) = self.collect_new_page(tp).await?; + let (block, end) = self.collect_new_page(tp, format).await?; let num_row = block.num_rows(); self.total_rows += num_row; let page = Page { @@ -120,7 +121,7 @@ impl ResultDataManager { } } - pub async fn collect_new_page(&mut self, tp: &Wait) -> Result<(JsonBlock, bool)> { + pub async fn collect_new_page(&mut self, tp: &Wait, format: &FormatSettings) -> Result<(JsonBlock, bool)> { let mut results: Vec = Vec::new(); let mut rows = 0; let block_rx = &mut self.block_rx; @@ -130,7 +131,7 @@ impl ResultDataManager { match ResultDataManager::receive(block_rx, tp).await { Ok(block) => { rows += block.num_rows(); - results.push(JsonBlock::new(&block)?); + results.push(JsonBlock::new(&block, format)?); // TODO(youngsofun): set it in post if needed if rows >= TARGET_ROWS_PER_PAGE { break; diff --git a/query/src/servers/http/v1/statement.rs b/query/src/servers/http/v1/statement.rs index 62276e10c9223..16862af186e57 100644 --- a/query/src/servers/http/v1/statement.rs +++ b/query/src/servers/http/v1/statement.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_io::prelude::FormatSettings; use hyper::StatusCode; use poem::error::Result as PoemResult; use poem::post; @@ -53,10 +54,12 @@ pub async fn statement_handler( let query = http_query_manager .try_create_query(&query_id, ctx, req) .await; + // TODO(veeupup): get query_ctx's format_settings here + let format = FormatSettings::default(); match query { Ok(query) => { let resp = query - .get_response_page(0) + .get_response_page(0, &format) .await .map_err(|err| poem::Error::from_string(err.message(), StatusCode::NOT_FOUND))?; http_query_manager.remove_query(&query_id).await; diff --git a/query/src/servers/mysql/mysql_interactive_worker.rs b/query/src/servers/mysql/mysql_interactive_worker.rs index c25c36c58f876..d4b76e5915036 100644 --- a/query/src/servers/mysql/mysql_interactive_worker.rs +++ b/query/src/servers/mysql/mysql_interactive_worker.rs @@ -179,7 +179,8 @@ impl AsyncMysqlShim for InteractiveWorker let instant = Instant::now(); let blocks = self.base.do_query(query).await; - let mut write_result = writer.write(blocks); + let format = self.session.get_shared_query_context().await?.get_format_settings()?; + let mut write_result = writer.write(blocks, &format); if let Err(cause) = write_result { let suffix = format!("(while in query {})", query); diff --git a/query/src/servers/mysql/writers/query_result_writer.rs b/query/src/servers/mysql/writers/query_result_writer.rs index d2ca5a3474f55..71a6658a18759 100644 --- a/query/src/servers/mysql/writers/query_result_writer.rs +++ b/query/src/servers/mysql/writers/query_result_writer.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. + use chrono_tz::Tz; use common_datablocks::DataBlock; use common_datavalues::prelude::TypeID; @@ -28,6 +29,7 @@ use common_exception::Result; use common_exception::ABORT_QUERY; use common_exception::ABORT_SESSION; use common_tracing::tracing; +use common_io::prelude::FormatSettings; use opensrv_mysql::*; pub struct DFQueryResultWriter<'a, W: std::io::Write> { @@ -39,10 +41,10 @@ impl<'a, W: std::io::Write> DFQueryResultWriter<'a, W> { DFQueryResultWriter::<'a, W> { inner: Some(inner) } } - pub fn write(&mut self, query_result: Result<(Vec, String)>) -> Result<()> { + pub fn write(&mut self, query_result: Result<(Vec, String)>, format: &FormatSettings) -> Result<()> { if let Some(writer) = self.inner.take() { match query_result { - Ok((blocks, extra_info)) => Self::ok(blocks, extra_info, writer)?, + Ok((blocks, extra_info)) => Self::ok(blocks, extra_info, writer, format)?, Err(error) => Self::err(&error, writer)?, } } @@ -53,6 +55,7 @@ impl<'a, W: std::io::Write> DFQueryResultWriter<'a, W> { blocks: Vec, extra_info: String, dataset_writer: QueryResultWriter<'a, W>, + format: &FormatSettings ) -> Result<()> { // XXX: num_columns == 0 may is error? let default_response = OkResponse { @@ -151,19 +154,19 @@ impl<'a, W: std::io::Write> DFQueryResultWriter<'a, W> { } (TypeID::Struct, DataValue::Struct(_)) => { let serializer = data_type.create_serializer(); - row_writer.write_col(serializer.serialize_value(&val)?)? + row_writer.write_col(serializer.serialize_value(&val, format)?)? } (TypeID::Variant, DataValue::Variant(_)) => { let serializer = data_type.create_serializer(); - row_writer.write_col(serializer.serialize_value(&val)?)? + row_writer.write_col(serializer.serialize_value(&val, format)?)? } (TypeID::VariantArray, DataValue::Variant(_)) => { let serializer = data_type.create_serializer(); - row_writer.write_col(serializer.serialize_value(&val)?)? + row_writer.write_col(serializer.serialize_value(&val, format)?)? } (TypeID::VariantObject, DataValue::Variant(_)) => { let serializer = data_type.create_serializer(); - row_writer.write_col(serializer.serialize_value(&val)?)? + row_writer.write_col(serializer.serialize_value(&val, format)?)? } (_, DataValue::Int64(v)) => row_writer.write_col(v)?, diff --git a/query/tests/it/servers/http/formats/tsv_output.rs b/query/tests/it/servers/http/formats/tsv_output.rs index 93a8aaea5b166..105756bb1bb83 100644 --- a/query/tests/it/servers/http/formats/tsv_output.rs +++ b/query/tests/it/servers/http/formats/tsv_output.rs @@ -59,8 +59,8 @@ fn test_data_block(is_nullable: bool) -> Result<()> { } else { block }; - - let json_block = String::from_utf8(block_to_tsv(&block)?)?; + let format = FormatSettings::default(); + let json_block = String::from_utf8(block_to_tsv(&block, &format)?)?; let expect = "1\ta\t1\t1.1\t1970-01-02\n\ 2\tb\t1\t2.2\t1970-01-03\n\ 3\tc\t0\t3.3\t1970-01-04\n"; diff --git a/query/tests/it/servers/http/json_block.rs b/query/tests/it/servers/http/json_block.rs index ac4426acc107c..3d1efa4e74219 100644 --- a/query/tests/it/servers/http/json_block.rs +++ b/query/tests/it/servers/http/json_block.rs @@ -67,8 +67,8 @@ fn test_data_block(is_nullable: bool) -> Result<()> { } else { block }; - - let json_block = JsonBlock::new(&block)?; + let format = FormatSettings::default(); + let json_block = JsonBlock::new(&block, &format)?; let expect = vec![ vec![val(1), val("a"), val(true), val(1.1), val("1970-01-02")], vec![val(2), val("b"), val(true), val(2.2), val("1970-01-03")], From 47c25eb5256bb15e6da5205ffb9152d235c23b6e Mon Sep 17 00:00:00 2001 From: Veeupup Date: Thu, 5 May 2022 11:33:22 +0800 Subject: [PATCH 03/15] timestamp serialization with tz Signed-off-by: Veeupup --- common/datavalues/src/types/data_type.rs | 5 +++++ .../datavalues/src/types/serializations/timestamp.rs | 4 ++++ common/datavalues/src/types/type_timestamp.rs | 4 ++++ query/src/servers/clickhouse/writers/query_writer.rs | 12 +++++++++++- query/src/servers/http/v1/http_query_handlers.rs | 2 +- .../src/servers/mysql/writers/query_result_writer.rs | 6 +++++- query/src/sessions/query_ctx_shared.rs | 1 + 7 files changed, 31 insertions(+), 3 deletions(-) diff --git a/common/datavalues/src/types/data_type.rs b/common/datavalues/src/types/data_type.rs index f24b27235c706..29683d772491d 100644 --- a/common/datavalues/src/types/data_type.rs +++ b/common/datavalues/src/types/data_type.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::any::Any; +use chrono_tz::Tz; use std::collections::BTreeMap; use common_arrow::arrow::datatypes::DataType as ArrowType; @@ -125,6 +126,10 @@ pub trait DataType: std::fmt::Debug + Sync + Send + DynClone { fn create_mutable(&self, capacity: usize) -> Box; fn create_serializer(&self) -> TypeSerializerImpl; + /// work for timestamp serializer + fn create_serializer_with_tz(&self, _tz: Tz) -> TypeSerializerImpl { + unimplemented!() + } fn create_deserializer(&self, capacity: usize) -> TypeDeserializerImpl; } diff --git a/common/datavalues/src/types/serializations/timestamp.rs b/common/datavalues/src/types/serializations/timestamp.rs index 3ed7c28b380d4..f619e4bdbb434 100644 --- a/common/datavalues/src/types/serializations/timestamp.rs +++ b/common/datavalues/src/types/serializations/timestamp.rs @@ -37,6 +37,10 @@ impl Default for TimestampSerializer { } impl TimestampSerializer { + pub fn new_with_tz(tz: Tz) -> Self { + Self {tz} + } + pub fn to_timestamp(&self, value: &i64) -> DateTime { value.to_timestamp(&self.tz) } diff --git a/common/datavalues/src/types/type_timestamp.rs b/common/datavalues/src/types/type_timestamp.rs index 5b21d91aa409b..826016c7823bd 100644 --- a/common/datavalues/src/types/type_timestamp.rs +++ b/common/datavalues/src/types/type_timestamp.rs @@ -150,6 +150,10 @@ impl DataType for TimestampType { TimestampSerializer::default().into() } + fn create_serializer_with_tz(&self, tz: Tz) -> TypeSerializerImpl { + TimestampSerializer::new_with_tz(tz).into() + } + fn create_deserializer(&self, capacity: usize) -> TypeDeserializerImpl { let tz = "UTC".parse::().unwrap(); TimestampDeserializer { diff --git a/query/src/servers/clickhouse/writers/query_writer.rs b/query/src/servers/clickhouse/writers/query_writer.rs index b2364e8b5366e..872feb6f87faf 100644 --- a/query/src/servers/clickhouse/writers/query_writer.rs +++ b/query/src/servers/clickhouse/writers/query_writer.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::borrow::Cow; +use chrono_tz::Tz; use common_io::prelude::FormatSettings; use common_base::ProgressValues; @@ -143,7 +144,16 @@ pub fn to_clickhouse_block(block: DataBlock, format: &FormatSettings) -> Result< let column = block.column(column_index); let field = block.schema().field(column_index); let name = field.name(); - let serializer = field.data_type().create_serializer(); + let serializer = if field.data_type().data_type_id() == TypeID::Timestamp { + let tz = + String::from_utf8(format.timezone.clone()).map_err(|_| ErrorCode::LogicalError("timezone must be set"))?; + let tz = tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; + field.data_type().create_serializer_with_tz(tz) + }else { + field.data_type().create_serializer() + }; result.append_column(column::new_column( name, serializer.serialize_clickhouse_format(&column.convert_full_column(), format)?, diff --git a/query/src/servers/http/v1/http_query_handlers.rs b/query/src/servers/http/v1/http_query_handlers.rs index 15d0ac20ef16e..1eb6172142705 100644 --- a/query/src/servers/http/v1/http_query_handlers.rs +++ b/query/src/servers/http/v1/http_query_handlers.rs @@ -202,7 +202,7 @@ pub(crate) async fn query_handler( .try_create_query(&query_id, ctx, req) .await; - // TODO(veeupup): get query_ctx's format_settings here + // TODO(veeupup): get global query_ctx's format_settings, because we cann't set session settings now let format = FormatSettings::default(); match query { Ok(query) => { diff --git a/query/src/servers/mysql/writers/query_result_writer.rs b/query/src/servers/mysql/writers/query_result_writer.rs index 71a6658a18759..05aabf96f9b9d 100644 --- a/query/src/servers/mysql/writers/query_result_writer.rs +++ b/query/src/servers/mysql/writers/query_result_writer.rs @@ -111,7 +111,11 @@ impl<'a, W: std::io::Write> DFQueryResultWriter<'a, W> { } let block = blocks[0].clone(); - let tz: Tz = "UTC".parse().unwrap(); + let tz = + String::from_utf8(format.timezone.clone()).map_err(|_| ErrorCode::LogicalError("timezone must be set"))?; + let tz = tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; match convert_schema(block.schema()) { Err(error) => Self::err(&error, dataset_writer), Ok(columns) => { diff --git a/query/src/sessions/query_ctx_shared.rs b/query/src/sessions/query_ctx_shared.rs index 3da38e24efbe7..b77f5a8b6cdda 100644 --- a/query/src/sessions/query_ctx_shared.rs +++ b/query/src/sessions/query_ctx_shared.rs @@ -263,6 +263,7 @@ impl QueryContextShared { format.field_delimiter = settings.get_field_delimiter()?; format.empty_as_default = settings.get_empty_as_default()? > 0; format.skip_header = settings.get_skip_header()? > 0; + format.timezone = settings.get_timezone()?; } Ok(format) } From dc132f653f3a8920a6e31b47101634954d06f962 Mon Sep 17 00:00:00 2001 From: Veeupup Date: Thu, 5 May 2022 12:37:01 +0800 Subject: [PATCH 04/15] source work with tz Signed-off-by: Veeupup --- Cargo.lock | 1 + common/datavalues/src/types/data_type.rs | 6 +++++- common/datavalues/src/types/type_nullable.rs | 16 ++++++++++++++++ common/datavalues/src/types/type_timestamp.rs | 9 +++++++++ common/streams/Cargo.toml | 1 + common/streams/src/sources/source_csv.rs | 15 ++++++++++++++- common/streams/src/sources/source_ndjson.rs | 14 ++++++++++++-- query/src/servers/http/clickhouse_handler.rs | 5 ++++- query/src/servers/http/v1/load.rs | 7 +++++-- query/src/sql/statements/value_source.rs | 15 ++++++++++++++- query/src/storages/s3/s3_stage_source.rs | 9 ++++++++- 11 files changed, 89 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 347b1ef16f500..f04ecb412d15b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1282,6 +1282,7 @@ version = "0.1.0" dependencies = [ "async-stream", "async-trait", + "chrono-tz", "common-arrow", "common-base", "common-datablocks", diff --git a/common/datavalues/src/types/data_type.rs b/common/datavalues/src/types/data_type.rs index 29683d772491d..854b330856e5d 100644 --- a/common/datavalues/src/types/data_type.rs +++ b/common/datavalues/src/types/data_type.rs @@ -126,11 +126,15 @@ pub trait DataType: std::fmt::Debug + Sync + Send + DynClone { fn create_mutable(&self, capacity: usize) -> Box; fn create_serializer(&self) -> TypeSerializerImpl; - /// work for timestamp serializer + /// work only for timestamp serializer fn create_serializer_with_tz(&self, _tz: Tz) -> TypeSerializerImpl { unimplemented!() } fn create_deserializer(&self, capacity: usize) -> TypeDeserializerImpl; + /// work only for timestamp deserializer + fn create_deserializer_with_tz(&self, _capacity: usize, _tz: Tz) -> TypeDeserializerImpl { + unimplemented!() + } } pub fn from_arrow_type(dt: &ArrowType) -> DataTypeImpl { diff --git a/common/datavalues/src/types/type_nullable.rs b/common/datavalues/src/types/type_nullable.rs index 903d682bea9b2..fb08ca8bdcdde 100644 --- a/common/datavalues/src/types/type_nullable.rs +++ b/common/datavalues/src/types/type_nullable.rs @@ -14,6 +14,7 @@ use std::collections::BTreeMap; use std::sync::Arc; +use chrono_tz::Tz; use common_arrow::arrow::datatypes::DataType as ArrowType; use common_arrow::bitmap::MutableBitmap; @@ -87,6 +88,13 @@ impl DataType for NullableType { .into() } + fn create_serializer_with_tz(&self, tz: Tz) -> TypeSerializerImpl { + NullableSerializer { + inner: Box::new(self.inner.create_serializer_with_tz(tz)), + } + .into() + } + fn create_deserializer(&self, capacity: usize) -> TypeDeserializerImpl { NullableDeserializer { inner: Box::new(self.inner.create_deserializer(capacity)), @@ -95,6 +103,14 @@ impl DataType for NullableType { .into() } + fn create_deserializer_with_tz(&self, capacity:usize, tz: Tz) ->TypeDeserializerImpl { + NullableDeserializer { + inner: Box::new(self.inner.create_deserializer_with_tz(capacity, tz)), + bitmap: MutableBitmap::with_capacity(capacity), + } + .into() + } + fn create_mutable(&self, capacity: usize) -> Box { Box::new(MutableNullableColumn::new( self.inner.create_mutable(capacity), diff --git a/common/datavalues/src/types/type_timestamp.rs b/common/datavalues/src/types/type_timestamp.rs index 826016c7823bd..504f2398ecffd 100644 --- a/common/datavalues/src/types/type_timestamp.rs +++ b/common/datavalues/src/types/type_timestamp.rs @@ -164,6 +164,15 @@ impl DataType for TimestampType { .into() } + fn create_deserializer_with_tz(&self, capacity: usize, tz: Tz) -> TypeDeserializerImpl { + TimestampDeserializer { + builder: MutablePrimitiveColumn::::with_capacity(capacity), + tz, + precision: self.precision, + } + .into() + } + fn create_mutable(&self, capacity: usize) -> Box { Box::new(MutablePrimitiveColumn::::with_capacity(capacity)) } diff --git a/common/streams/Cargo.toml b/common/streams/Cargo.toml index e70b83393842f..80e48ffc242d5 100644 --- a/common/streams/Cargo.toml +++ b/common/streams/Cargo.toml @@ -27,6 +27,7 @@ common-tracing = { path = "../tracing" } # Crates.io dependencies async-stream = "0.3.3" async-trait = "0.1.53" +chrono-tz = "0.6.1" csv-async = "1.2.4" futures = "0.3.21" pin-project-lite = "0.2.8" diff --git a/common/streams/src/sources/source_csv.rs b/common/streams/src/sources/source_csv.rs index d8d0eef036fa4..51c2ac553442f 100644 --- a/common/streams/src/sources/source_csv.rs +++ b/common/streams/src/sources/source_csv.rs @@ -13,10 +13,12 @@ // limitations under the License. use async_trait::async_trait; +use chrono_tz::Tz; use common_datablocks::DataBlock; use common_datavalues::DataSchemaRef; use common_datavalues::DataType; use common_datavalues::TypeDeserializer; +use common_datavalues::TypeID; use common_exception::ErrorCode; use common_exception::Result; use common_exception::ToErrorCode; @@ -38,6 +40,7 @@ pub struct CsvSourceBuilder { size_limit: usize, field_delimiter: u8, record_delimiter: Terminator, + tz: Tz, } impl CsvSourceBuilder { @@ -60,6 +63,9 @@ impl CsvSourceBuilder { let empty_as_default = format_settings.empty_as_default; let skip_header = format_settings.skip_header; + let tz = String::from_utf8(format_settings.timezone.clone()).unwrap(); + let tz = tz.parse::().unwrap(); + CsvSourceBuilder { schema, skip_header, @@ -68,6 +74,7 @@ impl CsvSourceBuilder { empty_as_default, block_size: 10000, size_limit: usize::MAX, + tz, } } @@ -160,7 +167,13 @@ where R: AsyncRead + Unpin + Send .schema .fields() .iter() - .map(|f| f.data_type().create_deserializer(self.builder.block_size)) + .map(|f| { + if f.data_type().data_type_id() == TypeID::Timestamp { + f.data_type().create_deserializer_with_tz(self.builder.block_size, self.builder.tz.clone()) + }else { + f.data_type().create_deserializer(self.builder.block_size) + } + }) .collect::>(); let mut rows = 0; diff --git a/common/streams/src/sources/source_ndjson.rs b/common/streams/src/sources/source_ndjson.rs index 7879ae70f1056..66eaba256055e 100644 --- a/common/streams/src/sources/source_ndjson.rs +++ b/common/streams/src/sources/source_ndjson.rs @@ -15,10 +15,12 @@ use std::borrow::Cow; use async_trait::async_trait; +use chrono_tz::Tz; use common_datablocks::DataBlock; use common_datavalues::DataSchemaRef; use common_datavalues::DataType; use common_datavalues::TypeDeserializer; +use common_datavalues::TypeID; use common_exception::ErrorCode; use common_exception::Result; use common_exception::ToErrorCode; @@ -32,14 +34,16 @@ pub struct NDJsonSourceBuilder { schema: DataSchemaRef, block_size: usize, size_limit: usize, + tz: Tz } impl NDJsonSourceBuilder { - pub fn create(schema: DataSchemaRef) -> Self { + pub fn create(schema: DataSchemaRef, tz: Tz) -> Self { NDJsonSourceBuilder { schema, block_size: 10000, size_limit: usize::MAX, + tz, } } @@ -107,7 +111,13 @@ where R: AsyncBufRead + Unpin + Send .schema .fields() .iter() - .map(|f| f.data_type().create_deserializer(self.builder.block_size)) + .map(|f| { + if f.data_type().data_type_id() == TypeID::Timestamp { + f.data_type().create_deserializer_with_tz(self.builder.block_size, self.builder.tz.clone()) + }else { + f.data_type().create_deserializer(self.builder.block_size) + } + }) .collect::>(); let fields = self diff --git a/query/src/servers/http/clickhouse_handler.rs b/query/src/servers/http/clickhouse_handler.rs index a6061addc874a..17ed261a2b4e6 100644 --- a/query/src/servers/http/clickhouse_handler.rs +++ b/query/src/servers/http/clickhouse_handler.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::sync::Arc; +use chrono_tz::Tz; use async_stream::stream; use common_exception::ErrorCode; @@ -224,7 +225,9 @@ pub async fn clickhouse_handler_post( } async fn build_ndjson_stream(plan: &PlanNode, body: Body) -> Result { - let builder = NDJsonSourceBuilder::create(plan.schema()); + // TODO(veeupup): HTTP with global session tz + let tz = "UTC".parse::().unwrap(); + let builder = NDJsonSourceBuilder::create(plan.schema(), tz); let cursor = futures::io::Cursor::new( body.into_vec() .await diff --git a/query/src/servers/http/v1/load.rs b/query/src/servers/http/v1/load.rs index 2bc435d30a434..a6159c3778080 100644 --- a/query/src/servers/http/v1/load.rs +++ b/query/src/servers/http/v1/load.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::sync::Arc; +use chrono_tz::Tz; use async_compat::CompatExt; use async_stream::stream; @@ -279,7 +280,8 @@ fn build_ndjson_stream( plan: &PlanNode, mut multipart: Multipart, ) -> PoemResult { - let builder = NDJsonSourceBuilder::create(plan.schema()); + let tz = "UTC".parse::().unwrap(); + let builder = NDJsonSourceBuilder::create(plan.schema(), tz); let stream = stream! { while let Ok(Some(field)) = multipart.next_field().await { let bytes = field.bytes().await.map_err_to_code(ErrorCode::BadBytes, || "Read part to field bytes error")?; @@ -383,7 +385,8 @@ async fn ndjson_source_pipe_builder( plan: &PlanNode, mut multipart: Multipart, ) -> PoemResult { - let builder = NDJsonSourceBuilder::create(plan.schema()); + let tz = "UTC".parse::().unwrap(); + let builder = NDJsonSourceBuilder::create(plan.schema(), tz); let mut source_pipe_builder = SourcePipeBuilder::create(); while let Ok(Some(field)) = multipart.next_field().await { let bytes = field diff --git a/query/src/sql/statements/value_source.rs b/query/src/sql/statements/value_source.rs index 33c3184f4f860..5881b0f75af18 100644 --- a/query/src/sql/statements/value_source.rs +++ b/query/src/sql/statements/value_source.rs @@ -15,6 +15,7 @@ use std::ops::Not; use std::sync::Arc; +use chrono_tz::Tz; use common_datablocks::DataBlock; use common_datavalues::prelude::*; @@ -55,11 +56,23 @@ impl ValueSource { } pub async fn read<'a>(&self, reader: &mut CpBufferReader<'a>) -> Result { + let format = self.ctx.get_format_settings()?; + let tz = + String::from_utf8(format.timezone.clone()).map_err(|_| ErrorCode::LogicalError("timezone must be set"))?; + let tz = tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; let mut desers = self .schema .fields() .iter() - .map(|f| f.data_type().create_deserializer(1024)) + .map(|f| { + if f.data_type().data_type_id() == TypeID::Timestamp { + f.data_type().create_deserializer_with_tz(1024, tz) + }else { + f.data_type().create_deserializer(1024) + } + }) .collect::>(); let col_size = desers.len(); diff --git a/query/src/storages/s3/s3_stage_source.rs b/query/src/storages/s3/s3_stage_source.rs index 868c788300433..031d3fe3962ad 100644 --- a/query/src/storages/s3/s3_stage_source.rs +++ b/query/src/storages/s3/s3_stage_source.rs @@ -15,6 +15,7 @@ use std::collections::VecDeque; use std::future::Future; use std::sync::Arc; +use chrono_tz::Tz; use common_datablocks::DataBlock; use common_datavalues::DataSchemaRef; @@ -122,7 +123,13 @@ impl StageSource { stage_info: &UserStageInfo, reader: BytesReader, ) -> Result> { - let mut builder = NDJsonSourceBuilder::create(schema); + let format = ctx.get_format_settings()?; + let tz = + String::from_utf8(format.timezone.clone()).map_err(|_| ErrorCode::LogicalError("timezone must be set"))?; + let tz = tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; + let mut builder = NDJsonSourceBuilder::create(schema, tz); let size_limit = stage_info.copy_options.size_limit; // Size limit. From 6bbfe8bdf92bf7a597342e8fbe82a7046ffec0bf Mon Sep 17 00:00:00 2001 From: Veeupup Date: Thu, 5 May 2022 16:13:02 +0800 Subject: [PATCH 05/15] date number function work with tz Signed-off-by: Veeupup --- Cargo.lock | 1 + common/datavalues/src/types/data_type.rs | 2 +- .../src/types/serializations/array.rs | 14 +--- .../src/types/serializations/boolean.rs | 8 +- .../src/types/serializations/date.rs | 7 +- .../src/types/serializations/mod.rs | 11 +-- .../src/types/serializations/null.rs | 6 +- .../src/types/serializations/nullable.rs | 12 +-- .../src/types/serializations/number.rs | 7 +- .../src/types/serializations/string.rs | 8 +- .../src/types/serializations/struct_.rs | 12 +-- .../src/types/serializations/timestamp.rs | 10 +-- .../src/types/serializations/variant.rs | 8 +- common/datavalues/src/types/type_nullable.rs | 4 +- common/functions/Cargo.toml | 1 + .../src/scalars/dates/interval_function.rs | 8 +- .../src/scalars/dates/number_function.rs | 74 ++++++++++--------- .../src/scalars/dates/round_function.rs | 15 +++- .../src/scalars/expressions/cast_with_type.rs | 2 +- .../functions/src/scalars/expressions/ctx.rs | 7 +- .../json_extract_path_text.rs | 2 +- .../scalars/semi_structureds/parse_json.rs | 2 +- common/streams/src/sources/source_csv.rs | 7 +- common/streams/src/sources/source_ndjson.rs | 9 ++- .../servers/clickhouse/interactive_worker.rs | 10 ++- .../clickhouse/writers/query_writer.rs | 22 ++++-- query/src/servers/http/clickhouse_handler.rs | 2 +- .../servers/http/v1/http_query_handlers.rs | 2 +- query/src/servers/http/v1/json_block.rs | 2 +- query/src/servers/http/v1/load.rs | 2 +- query/src/servers/http/v1/query/http_query.rs | 8 +- .../http/v1/query/result_data_manager.rs | 15 +++- query/src/servers/http/v1/statement.rs | 4 +- .../servers/mysql/mysql_interactive_worker.rs | 6 +- .../mysql/writers/query_result_writer.rs | 27 ++++--- query/src/sql/statements/value_source.rs | 8 +- query/src/storages/s3/s3_stage_source.rs | 6 +- 37 files changed, 175 insertions(+), 176 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f04ecb412d15b..10e0e941dca9f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -975,6 +975,7 @@ dependencies = [ "bstr", "bumpalo", "bytes 1.1.0", + "chrono-tz", "common-arrow", "common-datablocks", "common-datavalues", diff --git a/common/datavalues/src/types/data_type.rs b/common/datavalues/src/types/data_type.rs index 854b330856e5d..0c3a1d888da5a 100644 --- a/common/datavalues/src/types/data_type.rs +++ b/common/datavalues/src/types/data_type.rs @@ -13,9 +13,9 @@ // limitations under the License. use std::any::Any; -use chrono_tz::Tz; use std::collections::BTreeMap; +use chrono_tz::Tz; use common_arrow::arrow::datatypes::DataType as ArrowType; use common_arrow::arrow::datatypes::Field as ArrowField; use common_exception::Result; diff --git a/common/datavalues/src/types/serializations/array.rs b/common/datavalues/src/types/serializations/array.rs index e4ba93fa3be48..816008dc36b2a 100644 --- a/common/datavalues/src/types/serializations/array.rs +++ b/common/datavalues/src/types/serializations/array.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. - - use common_exception::ErrorCode; use common_exception::Result; use common_io::prelude::FormatSettings; @@ -54,11 +52,7 @@ impl TypeSerializer for ArraySerializer { } } - fn serialize_column( - &self, - column: &ColumnRef, - format: &FormatSettings, - ) -> Result> { + fn serialize_column(&self, column: &ColumnRef, format: &FormatSettings) -> Result> { let column: &ArrayColumn = Series::check_get(column)?; let mut result = Vec::with_capacity(column.len()); for i in 0..column.len() { @@ -69,11 +63,7 @@ impl TypeSerializer for ArraySerializer { Ok(result) } - fn serialize_json( - &self, - _column: &ColumnRef, - _format: &FormatSettings, - ) -> Result> { + fn serialize_json(&self, _column: &ColumnRef, _format: &FormatSettings) -> Result> { todo!() } diff --git a/common/datavalues/src/types/serializations/boolean.rs b/common/datavalues/src/types/serializations/boolean.rs index 63af380495d03..1ca1a43052638 100644 --- a/common/datavalues/src/types/serializations/boolean.rs +++ b/common/datavalues/src/types/serializations/boolean.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. - - use common_arrow::arrow::bitmap::Bitmap; use common_exception::ErrorCode; use common_exception::Result; @@ -63,11 +61,7 @@ impl TypeSerializer for BooleanSerializer { Ok(result) } - fn serialize_json( - &self, - column: &ColumnRef, - _format: &FormatSettings, - ) -> Result> { + fn serialize_json(&self, column: &ColumnRef, _format: &FormatSettings) -> Result> { let array: &BooleanColumn = Series::check_get(column)?; let result: Vec = array .iter() diff --git a/common/datavalues/src/types/serializations/date.rs b/common/datavalues/src/types/serializations/date.rs index fb08ae32d4dca..7441977e066e5 100644 --- a/common/datavalues/src/types/serializations/date.rs +++ b/common/datavalues/src/types/serializations/date.rs @@ -14,7 +14,6 @@ use std::marker::PhantomData; use std::ops::AddAssign; - use chrono::Date; use chrono::Duration; use chrono::NaiveDate; @@ -70,11 +69,7 @@ impl> TypeSerializer for DateSerializer { Ok(result) } - fn serialize_json( - &self, - column: &ColumnRef, - _format: &FormatSettings, - ) -> Result> { + fn serialize_json(&self, column: &ColumnRef, _format: &FormatSettings) -> Result> { let array: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = array .iter() diff --git a/common/datavalues/src/types/serializations/mod.rs b/common/datavalues/src/types/serializations/mod.rs index 8a06d12479b57..c1ab933c7b5ac 100644 --- a/common/datavalues/src/types/serializations/mod.rs +++ b/common/datavalues/src/types/serializations/mod.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. - - use common_arrow::arrow::bitmap::Bitmap; use common_exception::ErrorCode; use common_exception::Result; @@ -48,13 +46,8 @@ pub use variant::*; #[enum_dispatch] pub trait TypeSerializer: Send + Sync { fn serialize_value(&self, value: &DataValue, format: &FormatSettings) -> Result; - fn serialize_json(&self, column: &ColumnRef, format: &FormatSettings) - -> Result>; - fn serialize_column( - &self, - column: &ColumnRef, - format: &FormatSettings, - ) -> Result>; + fn serialize_json(&self, column: &ColumnRef, format: &FormatSettings) -> Result>; + fn serialize_column(&self, column: &ColumnRef, format: &FormatSettings) -> Result>; fn serialize_clickhouse_format( &self, column: &ColumnRef, diff --git a/common/datavalues/src/types/serializations/null.rs b/common/datavalues/src/types/serializations/null.rs index c5cbbfa34cecd..e2097a1e92c1f 100644 --- a/common/datavalues/src/types/serializations/null.rs +++ b/common/datavalues/src/types/serializations/null.rs @@ -44,11 +44,7 @@ impl TypeSerializer for NullSerializer { Ok(result) } - fn serialize_json( - &self, - column: &ColumnRef, - _format: &FormatSettings, - ) -> Result> { + fn serialize_json(&self, column: &ColumnRef, _format: &FormatSettings) -> Result> { let null = Value::Null; let result: Vec = vec![null; column.len()]; Ok(result) diff --git a/common/datavalues/src/types/serializations/nullable.rs b/common/datavalues/src/types/serializations/nullable.rs index 8466e717b03c4..b09ade25bf42d 100644 --- a/common/datavalues/src/types/serializations/nullable.rs +++ b/common/datavalues/src/types/serializations/nullable.rs @@ -41,11 +41,7 @@ impl TypeSerializer for NullableSerializer { } } - fn serialize_column( - &self, - column: &ColumnRef, - format: &FormatSettings, - ) -> Result> { + fn serialize_column(&self, column: &ColumnRef, format: &FormatSettings) -> Result> { let column: &NullableColumn = Series::check_get(column)?; let rows = column.len(); let mut res = self.inner.serialize_column(column.inner(), format)?; @@ -58,11 +54,7 @@ impl TypeSerializer for NullableSerializer { Ok(res) } - fn serialize_json( - &self, - column: &ColumnRef, - format: &FormatSettings, - ) -> Result> { + fn serialize_json(&self, column: &ColumnRef, format: &FormatSettings) -> Result> { let column: &NullableColumn = Series::check_get(column)?; let rows = column.len(); let mut res = self.inner.serialize_json(column.inner(), format)?; diff --git a/common/datavalues/src/types/serializations/number.rs b/common/datavalues/src/types/serializations/number.rs index 767e3c864cfc2..15f37053bc672 100644 --- a/common/datavalues/src/types/serializations/number.rs +++ b/common/datavalues/src/types/serializations/number.rs @@ -14,7 +14,6 @@ use std::marker::PhantomData; - use common_arrow::arrow::bitmap::Bitmap; use common_exception::Result; use common_io::prelude::FormatSettings; @@ -65,11 +64,7 @@ where T: PrimitiveType Ok(result) } - fn serialize_json( - &self, - column: &ColumnRef, - _format: &FormatSettings, - ) -> Result> { + fn serialize_json(&self, column: &ColumnRef, _format: &FormatSettings) -> Result> { let column: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = column .iter() diff --git a/common/datavalues/src/types/serializations/string.rs b/common/datavalues/src/types/serializations/string.rs index b63bc591c3776..092e2958b1693 100644 --- a/common/datavalues/src/types/serializations/string.rs +++ b/common/datavalues/src/types/serializations/string.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. - - use common_arrow::arrow::bitmap::Bitmap; use common_exception::ErrorCode; use common_exception::Result; @@ -49,11 +47,7 @@ impl TypeSerializer for StringSerializer { Ok(result) } - fn serialize_json( - &self, - column: &ColumnRef, - _format: &FormatSettings, - ) -> Result> { + fn serialize_json(&self, column: &ColumnRef, _format: &FormatSettings) -> Result> { let column: &StringColumn = Series::check_get(column)?; let result: Vec = column .iter() diff --git a/common/datavalues/src/types/serializations/struct_.rs b/common/datavalues/src/types/serializations/struct_.rs index bd0c3c88f2dd1..e4b12bbed38ff 100644 --- a/common/datavalues/src/types/serializations/struct_.rs +++ b/common/datavalues/src/types/serializations/struct_.rs @@ -58,11 +58,7 @@ impl TypeSerializer for StructSerializer { } } - fn serialize_column( - &self, - column: &ColumnRef, - format: &FormatSettings, - ) -> Result> { + fn serialize_column(&self, column: &ColumnRef, format: &FormatSettings) -> Result> { let column: &StructColumn = Series::check_get(column)?; let mut result = Vec::with_capacity(column.len()); for i in 0..column.len() { @@ -73,11 +69,7 @@ impl TypeSerializer for StructSerializer { Ok(result) } - fn serialize_json( - &self, - _column: &ColumnRef, - _format: &FormatSettings, - ) -> Result> { + fn serialize_json(&self, _column: &ColumnRef, _format: &FormatSettings) -> Result> { todo!() } diff --git a/common/datavalues/src/types/serializations/timestamp.rs b/common/datavalues/src/types/serializations/timestamp.rs index f619e4bdbb434..8b60445e9c710 100644 --- a/common/datavalues/src/types/serializations/timestamp.rs +++ b/common/datavalues/src/types/serializations/timestamp.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. - - use chrono::DateTime; use chrono_tz::Tz; use common_exception::*; @@ -38,7 +36,7 @@ impl Default for TimestampSerializer { impl TimestampSerializer { pub fn new_with_tz(tz: Tz) -> Self { - Self {tz} + Self { tz } } pub fn to_timestamp(&self, value: &i64) -> DateTime { @@ -71,11 +69,7 @@ impl TypeSerializer for TimestampSerializer { Ok(result) } - fn serialize_json( - &self, - column: &ColumnRef, - _format: &FormatSettings, - ) -> Result> { + fn serialize_json(&self, column: &ColumnRef, _format: &FormatSettings) -> Result> { let array: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = array .iter() diff --git a/common/datavalues/src/types/serializations/variant.rs b/common/datavalues/src/types/serializations/variant.rs index 0e568a807d52d..175b8fcb492b8 100644 --- a/common/datavalues/src/types/serializations/variant.rs +++ b/common/datavalues/src/types/serializations/variant.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. - - use common_arrow::arrow::bitmap::Bitmap; use common_exception::ErrorCode; use common_exception::Result; @@ -47,11 +45,7 @@ impl TypeSerializer for VariantSerializer { Ok(result) } - fn serialize_json( - &self, - column: &ColumnRef, - _format: &FormatSettings, - ) -> Result> { + fn serialize_json(&self, column: &ColumnRef, _format: &FormatSettings) -> Result> { let column: &VariantColumn = Series::check_get(column)?; let result: Vec = column.iter().map(|v| v.as_ref().to_owned()).collect(); Ok(result) diff --git a/common/datavalues/src/types/type_nullable.rs b/common/datavalues/src/types/type_nullable.rs index fb08ca8bdcdde..d93c48fa3d958 100644 --- a/common/datavalues/src/types/type_nullable.rs +++ b/common/datavalues/src/types/type_nullable.rs @@ -14,8 +14,8 @@ use std::collections::BTreeMap; use std::sync::Arc; -use chrono_tz::Tz; +use chrono_tz::Tz; use common_arrow::arrow::datatypes::DataType as ArrowType; use common_arrow::bitmap::MutableBitmap; use common_exception::ErrorCode; @@ -103,7 +103,7 @@ impl DataType for NullableType { .into() } - fn create_deserializer_with_tz(&self, capacity:usize, tz: Tz) ->TypeDeserializerImpl { + fn create_deserializer_with_tz(&self, capacity: usize, tz: Tz) -> TypeDeserializerImpl { NullableDeserializer { inner: Box::new(self.inner.create_deserializer_with_tz(capacity, tz)), bitmap: MutableBitmap::with_capacity(capacity), diff --git a/common/functions/Cargo.toml b/common/functions/Cargo.toml index cb0fee2fc441f..730c2bcd34c48 100644 --- a/common/functions/Cargo.toml +++ b/common/functions/Cargo.toml @@ -24,6 +24,7 @@ blake3 = "1.3.1" bstr = "0.2.17" bumpalo = "3.9.1" bytes = "1.1.0" +chrono-tz = "0.6.1" crc32fast = "1.3.2" dyn-clone = "1.0.5" hex = "0.4.3" diff --git a/common/functions/src/scalars/dates/interval_function.rs b/common/functions/src/scalars/dates/interval_function.rs index 72bc4244a646e..a27d60431f3fe 100644 --- a/common/functions/src/scalars/dates/interval_function.rs +++ b/common/functions/src/scalars/dates/interval_function.rs @@ -15,6 +15,7 @@ use std::fmt; use std::marker::PhantomData; use std::sync::Arc; +use chrono_tz::Tz; use common_datavalues::chrono::Datelike; use common_datavalues::chrono::Duration; @@ -141,12 +142,15 @@ where fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, _input_rows: usize, ) -> Result { // Todo(zhyass): define the ctx out of the eval. - let mut ctx = EvalContext::new(self.factor, self.precision, None); + let tz = func_ctx.tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; + let mut ctx = EvalContext::new(self.factor, self.precision, None, tz); let col = scalar_binary_op( columns[0].column(), columns[1].column(), diff --git a/common/functions/src/scalars/dates/number_function.rs b/common/functions/src/scalars/dates/number_function.rs index 8b6e6ff2e3af3..16640773ec39e 100644 --- a/common/functions/src/scalars/dates/number_function.rs +++ b/common/functions/src/scalars/dates/number_function.rs @@ -15,6 +15,7 @@ use std::fmt; use std::marker::PhantomData; +use chrono_tz::Tz; use common_datavalues::chrono::DateTime; use common_datavalues::chrono::Datelike; use common_datavalues::chrono::TimeZone; @@ -46,7 +47,7 @@ pub struct NumberFunction { pub trait NumberOperator { const IS_DETERMINISTIC: bool; - fn to_number(_value: DateTime) -> R; + fn to_number(_value: DateTime, tz: &Tz) -> R; // Used to check the monotonicity of the function. // For example, ToDayOfYear is monotonous only when the time range is the same year. @@ -68,7 +69,7 @@ pub struct ToYYYYMM; impl NumberOperator for ToYYYYMM { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u32 { + fn to_number(value: DateTime, tz: &Tz) -> u32 { value.year() as u32 * 100 + value.month() } } @@ -79,7 +80,7 @@ pub struct ToYYYYMMDD; impl NumberOperator for ToYYYYMMDD { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u32 { + fn to_number(value: DateTime, tz: &Tz) -> u32 { value.year() as u32 * 10000 + value.month() * 100 + value.day() } } @@ -90,7 +91,7 @@ pub struct ToYYYYMMDDhhmmss; impl NumberOperator for ToYYYYMMDDhhmmss { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u64 { + fn to_number(value: DateTime, tz: &Tz) -> u64 { value.year() as u64 * 10000000000 + value.month() as u64 * 100000000 + value.day() as u64 * 1000000 @@ -106,9 +107,9 @@ pub struct ToStartOfYear; impl NumberOperator for ToStartOfYear { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> i32 { - let end: DateTime = Utc.ymd(value.year(), 1, 1).and_hms(0, 0, 0); - get_day(end) as i32 + fn to_number(value: DateTime, tz: &Tz) -> i32 { + let end = tz.ymd(value.year(), 1, 1).and_hms(0, 0, 0); + get_day(end, tz) as i32 } fn return_type() -> Option { @@ -122,14 +123,14 @@ pub struct ToStartOfISOYear; impl NumberOperator for ToStartOfISOYear { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> i32 { + fn to_number(value: DateTime, tz: &Tz) -> i32 { let week_day = value.weekday().num_days_from_monday(); let iso_week = value.iso_week(); let iso_week_num = iso_week.week(); let sub_days = (iso_week_num - 1) * 7 + week_day; let result = value.timestamp_millis() - sub_days as i64 * 24 * 3600 * 1000; - let end: DateTime = Utc.timestamp_millis(result); - get_day(end) as i32 + let end = tz.timestamp_millis(result); + get_day(end, tz) as i32 } fn return_type() -> Option { @@ -143,10 +144,10 @@ pub struct ToStartOfQuarter; impl NumberOperator for ToStartOfQuarter { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> i32 { + fn to_number(value: DateTime, tz: &Tz) -> i32 { let new_month = value.month0() / 3 * 3 + 1; - let date = Utc.ymd(value.year(), new_month, 1).and_hms(0, 0, 0); - get_day(date) as i32 + let date = tz.ymd(value.year(), new_month, 1).and_hms(0, 0, 0); + get_day(date, tz) as i32 } fn return_type() -> Option { @@ -160,9 +161,9 @@ pub struct ToStartOfMonth; impl NumberOperator for ToStartOfMonth { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> i32 { - let date = Utc.ymd(value.year(), value.month(), 1).and_hms(0, 0, 0); - get_day(date) as i32 + fn to_number(value: DateTime, tz: &Tz) -> i32 { + let date = tz.ymd(value.year(), value.month(), 1).and_hms(0, 0, 0); + get_day(date, tz) as i32 } fn return_type() -> Option { @@ -176,7 +177,7 @@ pub struct ToMonth; impl NumberOperator for ToMonth { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u8 { + fn to_number(value: DateTime, tz: &Tz) -> u8 { value.month() as u8 } @@ -193,7 +194,7 @@ pub struct ToDayOfYear; impl NumberOperator for ToDayOfYear { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u16 { + fn to_number(value: DateTime, tz: &Tz) -> u16 { value.ordinal() as u16 } @@ -210,7 +211,7 @@ pub struct ToDayOfMonth; impl NumberOperator for ToDayOfMonth { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u8 { + fn to_number(value: DateTime, tz: &Tz) -> u8 { value.day() as u8 } @@ -227,7 +228,7 @@ pub struct ToDayOfWeek; impl NumberOperator for ToDayOfWeek { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u8 { + fn to_number(value: DateTime, tz: &Tz) -> u8 { value.weekday().number_from_monday() as u8 } @@ -243,7 +244,7 @@ pub struct ToHour; impl NumberOperator for ToHour { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u8 { + fn to_number(value: DateTime, tz: &Tz) -> u8 { value.hour() as u8 } @@ -260,7 +261,7 @@ pub struct ToMinute; impl NumberOperator for ToMinute { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u8 { + fn to_number(value: DateTime, tz: &Tz) -> u8 { value.minute() as u8 } @@ -279,7 +280,7 @@ pub struct ToSecond; impl NumberOperator for ToSecond { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u8 { + fn to_number(value: DateTime, tz: &Tz) -> u8 { value.second() as u8 } @@ -298,9 +299,9 @@ pub struct ToMonday; impl NumberOperator for ToMonday { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u16 { + fn to_number(value: DateTime, tz: &Tz) -> u16 { let weekday = value.weekday(); - (get_day(value) as u32 - weekday.num_days_from_monday()) as u16 + (get_day(value, tz) as u32 - weekday.num_days_from_monday()) as u16 } } @@ -310,7 +311,7 @@ pub struct ToYear; impl NumberOperator for ToYear { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u16 { + fn to_number(value: DateTime, tz: &Tz) -> u16 { value.year() as u16 } } @@ -360,7 +361,7 @@ where fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &common_datavalues::ColumnsWithField, _input_rows: usize, ) -> Result { @@ -368,9 +369,10 @@ where let number_array = match type_id { TypeID::Date => { + let tz = "UTC".parse::().unwrap(); let func = |v: i32, _ctx: &mut EvalContext| { - let date_time = Utc.timestamp(v as i64 * 24 * 3600, 0_u32); - T::to_number(date_time) + let date_time = tz.timestamp(v as i64 * 24 * 3600, 0_u32); + T::to_number(date_time, &tz) }; let col = scalar_unary_op::( columns[0].column(), @@ -380,9 +382,15 @@ where Ok(col.arc()) } TypeID::Timestamp => { + // round_func need to calcute it with origin timezone + // such as in UTC: 2022-03-31 22:00 and in +8:00 time is 2022-04-01 6:00 + // then the result of to the month of should be 2022-04-01 6:00 rather than 2022-03-01 22:00 + let tz = func_ctx.tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; let func = |v: i64, _ctx: &mut EvalContext| { - let date_time = Utc.timestamp(v / 1_000_000, 0_u32); - T::to_number(date_time) + let date_time = tz.timestamp(v / 1_000_000, 0_u32); + T::to_number(date_time, &tz) }; let col = scalar_unary_op::( columns[0].column(), @@ -442,8 +450,8 @@ impl fmt::Display for NumberFunction { } } -fn get_day(date: DateTime) -> i64 { - let start: DateTime = Utc.ymd(1970, 1, 1).and_hms(0, 0, 0); +fn get_day(date: DateTime, tz: &Tz) -> i64 { + let start = tz.ymd(1970, 1, 1).and_hms(0, 0, 0); let duration = date.signed_duration_since(start); duration.num_days() } diff --git a/common/functions/src/scalars/dates/round_function.rs b/common/functions/src/scalars/dates/round_function.rs index 591349171c83c..a9e5c8248a2b4 100644 --- a/common/functions/src/scalars/dates/round_function.rs +++ b/common/functions/src/scalars/dates/round_function.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use chrono_tz::Tz; +use common_datavalues::chrono::TimeZone; use std::fmt; use common_datavalues::prelude::*; @@ -56,7 +58,7 @@ impl RoundFunction { // Consider about the timezones/offsets // Currently: assuming timezone offset is a multiple of round. #[inline] - fn execute(&self, time: i64) -> i64 { + fn execute(&self, time: i64, tz: &Tz) -> i64 { let round = self.round as i64; time / MICROSECONDS / round * round * MICROSECONDS } @@ -73,13 +75,18 @@ impl Function for RoundFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &common_datavalues::ColumnsWithField, _input_rows: usize, ) -> Result { - let func = |val: i64, _ctx: &mut EvalContext| self.execute(val); + let func = |val: i64, ctx: &mut EvalContext| self.execute(val, &ctx.tz); + let mut eval_context = EvalContext::default(); + let tz = func_ctx.tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; + eval_context.tz = tz; let col = - scalar_unary_op::(columns[0].column(), func, &mut EvalContext::default())?; + scalar_unary_op::(columns[0].column(), func, &mut eval_context)?; for micros in col.iter() { let _ = check_timestamp(*micros)?; } diff --git a/common/functions/src/scalars/expressions/cast_with_type.rs b/common/functions/src/scalars/expressions/cast_with_type.rs index 0fd0c2cd3885f..39b766c8f71b7 100644 --- a/common/functions/src/scalars/expressions/cast_with_type.rs +++ b/common/functions/src/scalars/expressions/cast_with_type.rs @@ -14,7 +14,6 @@ use std::sync::Arc; -use common_io::prelude::FormatSettings; use common_arrow::arrow::array::ArrayRef; use common_arrow::arrow::bitmap::Bitmap; use common_arrow::arrow::compute::cast; @@ -23,6 +22,7 @@ use common_arrow::bitmap::MutableBitmap; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use super::cast_from_datetimes::cast_from_date; use super::cast_from_string::cast_from_string; diff --git a/common/functions/src/scalars/expressions/ctx.rs b/common/functions/src/scalars/expressions/ctx.rs index b3029f8f826dd..94fc56674bf3c 100644 --- a/common/functions/src/scalars/expressions/ctx.rs +++ b/common/functions/src/scalars/expressions/ctx.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use chrono_tz::Tz; use common_exception::ErrorCode; #[derive(Debug, Clone)] @@ -19,24 +20,28 @@ pub struct EvalContext { pub factor: i64, pub precision: usize, pub error: Option, + pub tz: Tz, } impl Default for EvalContext { fn default() -> Self { + let tz = "UTC".parse::().unwrap(); Self { factor: 1, precision: 0, error: None, + tz, } } } impl EvalContext { - pub fn new(factor: i64, precision: usize, error: Option) -> Self { + pub fn new(factor: i64, precision: usize, error: Option, tz: Tz) -> Self { Self { factor, precision, error, + tz } } diff --git a/common/functions/src/scalars/semi_structureds/json_extract_path_text.rs b/common/functions/src/scalars/semi_structureds/json_extract_path_text.rs index 47bc6e55ceca6..e0e58dbde8697 100644 --- a/common/functions/src/scalars/semi_structureds/json_extract_path_text.rs +++ b/common/functions/src/scalars/semi_structureds/json_extract_path_text.rs @@ -14,10 +14,10 @@ use std::fmt; -use common_io::prelude::FormatSettings; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use crate::scalars::semi_structureds::get::extract_value_by_path; use crate::scalars::semi_structureds::get::parse_path_keys; diff --git a/common/functions/src/scalars/semi_structureds/parse_json.rs b/common/functions/src/scalars/semi_structureds/parse_json.rs index 207ccd929822e..85da7d5992982 100644 --- a/common/functions/src/scalars/semi_structureds/parse_json.rs +++ b/common/functions/src/scalars/semi_structureds/parse_json.rs @@ -14,10 +14,10 @@ use std::fmt; -use common_io::prelude::FormatSettings; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use crate::scalars::Function; use crate::scalars::FunctionContext; diff --git a/common/streams/src/sources/source_csv.rs b/common/streams/src/sources/source_csv.rs index 51c2ac553442f..7410eeaf9fb70 100644 --- a/common/streams/src/sources/source_csv.rs +++ b/common/streams/src/sources/source_csv.rs @@ -169,8 +169,11 @@ where R: AsyncRead + Unpin + Send .iter() .map(|f| { if f.data_type().data_type_id() == TypeID::Timestamp { - f.data_type().create_deserializer_with_tz(self.builder.block_size, self.builder.tz.clone()) - }else { + f.data_type().create_deserializer_with_tz( + self.builder.block_size, + self.builder.tz.clone(), + ) + } else { f.data_type().create_deserializer(self.builder.block_size) } }) diff --git a/common/streams/src/sources/source_ndjson.rs b/common/streams/src/sources/source_ndjson.rs index 66eaba256055e..80bfeec1e0968 100644 --- a/common/streams/src/sources/source_ndjson.rs +++ b/common/streams/src/sources/source_ndjson.rs @@ -34,7 +34,7 @@ pub struct NDJsonSourceBuilder { schema: DataSchemaRef, block_size: usize, size_limit: usize, - tz: Tz + tz: Tz, } impl NDJsonSourceBuilder { @@ -113,8 +113,11 @@ where R: AsyncBufRead + Unpin + Send .iter() .map(|f| { if f.data_type().data_type_id() == TypeID::Timestamp { - f.data_type().create_deserializer_with_tz(self.builder.block_size, self.builder.tz.clone()) - }else { + f.data_type().create_deserializer_with_tz( + self.builder.block_size, + self.builder.tz.clone(), + ) + } else { f.data_type().create_deserializer(self.builder.block_size) } }) diff --git a/query/src/servers/clickhouse/interactive_worker.rs b/query/src/servers/clickhouse/interactive_worker.rs index 68b381b49bb6d..f965c7cce701c 100644 --- a/query/src/servers/clickhouse/interactive_worker.rs +++ b/query/src/servers/clickhouse/interactive_worker.rs @@ -50,8 +50,14 @@ impl ClickHouseSession for InteractiveWorker { let session = self.session.clone(); let get_query_result = InteractiveWorkerBase::do_query(ctx, session); - let query_ctx = self.session.get_shared_query_context().await.map_err(|err| to_clickhouse_err(err))?; - let format = query_ctx.get_format_settings().map_err(|err| to_clickhouse_err(err))?; + let query_ctx = self + .session + .get_shared_query_context() + .await + .map_err(|err| to_clickhouse_err(err))?; + let format = query_ctx + .get_format_settings() + .map_err(|err| to_clickhouse_err(err))?; if let Err(cause) = query_writer.write(get_query_result.await, &format).await { let new_error = cause.add_message(&ctx.state.query); return Err(to_clickhouse_err(new_error)); diff --git a/query/src/servers/clickhouse/writers/query_writer.rs b/query/src/servers/clickhouse/writers/query_writer.rs index 872feb6f87faf..1b297be8d6de1 100644 --- a/query/src/servers/clickhouse/writers/query_writer.rs +++ b/query/src/servers/clickhouse/writers/query_writer.rs @@ -13,14 +13,14 @@ // limitations under the License. use std::borrow::Cow; -use chrono_tz::Tz; -use common_io::prelude::FormatSettings; +use chrono_tz::Tz; use common_base::ProgressValues; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use common_tracing::tracing; use futures::channel::mpsc::Receiver; use futures::StreamExt; @@ -48,7 +48,11 @@ impl<'a> QueryWriter<'a> { } } - pub async fn write(&mut self, receiver: Result>, format: &FormatSettings) -> Result<()> { + pub async fn write( + &mut self, + receiver: Result>, + format: &FormatSettings, + ) -> Result<()> { match receiver { Err(error) => self.write_error(error).await, Ok(receiver) => { @@ -96,7 +100,11 @@ impl<'a> QueryWriter<'a> { } } - async fn write_data(&mut self, mut receiver: Receiver, format: &FormatSettings) -> Result<()> { + async fn write_data( + &mut self, + mut receiver: Receiver, + format: &FormatSettings, + ) -> Result<()> { loop { match receiver.next().await { None => { @@ -145,13 +153,13 @@ pub fn to_clickhouse_block(block: DataBlock, format: &FormatSettings) -> Result< let field = block.schema().field(column_index); let name = field.name(); let serializer = if field.data_type().data_type_id() == TypeID::Timestamp { - let tz = - String::from_utf8(format.timezone.clone()).map_err(|_| ErrorCode::LogicalError("timezone must be set"))?; + let tz = String::from_utf8(format.timezone.clone()) + .map_err(|_| ErrorCode::LogicalError("timezone must be set"))?; let tz = tz.parse::().map_err(|_| { ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") })?; field.data_type().create_serializer_with_tz(tz) - }else { + } else { field.data_type().create_serializer() }; result.append_column(column::new_column( diff --git a/query/src/servers/http/clickhouse_handler.rs b/query/src/servers/http/clickhouse_handler.rs index 17ed261a2b4e6..33fc04933e932 100644 --- a/query/src/servers/http/clickhouse_handler.rs +++ b/query/src/servers/http/clickhouse_handler.rs @@ -13,9 +13,9 @@ // limitations under the License. use std::sync::Arc; -use chrono_tz::Tz; use async_stream::stream; +use chrono_tz::Tz; use common_exception::ErrorCode; use common_exception::Result; use common_exception::ToErrorCode; diff --git a/query/src/servers/http/v1/http_query_handlers.rs b/query/src/servers/http/v1/http_query_handlers.rs index 1eb6172142705..a3ce737f6f4f0 100644 --- a/query/src/servers/http/v1/http_query_handlers.rs +++ b/query/src/servers/http/v1/http_query_handlers.rs @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use common_io::prelude::FormatSettings; use common_base::ProgressValues; use common_datavalues::DataSchemaRef; use common_exception::ErrorCode; +use common_io::prelude::FormatSettings; use common_tracing::tracing; use poem::error::Error as PoemError; use poem::error::Result as PoemResult; diff --git a/query/src/servers/http/v1/json_block.rs b/query/src/servers/http/v1/json_block.rs index 4c1b41ca7c5b2..c4cf94cd41de3 100644 --- a/query/src/servers/http/v1/json_block.rs +++ b/query/src/servers/http/v1/json_block.rs @@ -14,7 +14,6 @@ use std::sync::Arc; -use common_io::prelude::FormatSettings; use common_datablocks::DataBlock; use common_datavalues::DataSchema; use common_datavalues::DataSchemaRef; @@ -22,6 +21,7 @@ use common_datavalues::DataType; use common_datavalues::TypeSerializer; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use serde_json::Value as JsonValue; #[derive(Debug, Clone)] diff --git a/query/src/servers/http/v1/load.rs b/query/src/servers/http/v1/load.rs index a6159c3778080..578885db2d1b3 100644 --- a/query/src/servers/http/v1/load.rs +++ b/query/src/servers/http/v1/load.rs @@ -13,10 +13,10 @@ // limitations under the License. use std::sync::Arc; -use chrono_tz::Tz; use async_compat::CompatExt; use async_stream::stream; +use chrono_tz::Tz; use common_base::ProgressValues; use common_exception::ErrorCode; use common_exception::ToErrorCode; diff --git a/query/src/servers/http/v1/query/http_query.rs b/query/src/servers/http/v1/query/http_query.rs index 95f0cb96d34a9..da2c64d3cc354 100644 --- a/query/src/servers/http/v1/query/http_query.rs +++ b/query/src/servers/http/v1/query/http_query.rs @@ -17,13 +17,13 @@ use std::sync::Arc; use std::time::Duration; use std::time::Instant; -use common_io::prelude::FormatSettings; use common_base::tokio::sync::mpsc; use common_base::tokio::sync::Mutex as TokioMutex; use common_base::tokio::sync::RwLock; use common_base::ProgressValues; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use serde::Deserialize; use super::HttpQueryContext; @@ -176,7 +176,11 @@ impl HttpQuery { self.request.pagination.wait_time_secs == 0 } - pub async fn get_response_page(&self, page_no: usize, format: &FormatSettings) -> Result { + pub async fn get_response_page( + &self, + page_no: usize, + format: &FormatSettings, + ) -> Result { Ok(HttpQueryResponseInternal { data: Some(self.get_page(page_no, format).await?), session_id: self.session_id.clone(), diff --git a/query/src/servers/http/v1/query/result_data_manager.rs b/query/src/servers/http/v1/query/result_data_manager.rs index bebe82ee66e16..010a53203ff21 100644 --- a/query/src/servers/http/v1/query/result_data_manager.rs +++ b/query/src/servers/http/v1/query/result_data_manager.rs @@ -14,13 +14,13 @@ use std::time::Instant; -use common_io::prelude::FormatSettings; use common_base::tokio; use common_base::tokio::sync::mpsc; use common_base::tokio::sync::mpsc::error::TryRecvError; use common_datablocks::DataBlock; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use common_tracing::tracing; use crate::servers::http::v1::JsonBlock; @@ -72,7 +72,12 @@ impl ResultDataManager { } } - pub async fn get_a_page(&mut self, page_no: usize, tp: &Wait, format: &FormatSettings) -> Result { + pub async fn get_a_page( + &mut self, + page_no: usize, + tp: &Wait, + format: &FormatSettings, + ) -> Result { let next_no = self.total_pages; if page_no == next_no && !self.end { let (block, end) = self.collect_new_page(tp, format).await?; @@ -121,7 +126,11 @@ impl ResultDataManager { } } - pub async fn collect_new_page(&mut self, tp: &Wait, format: &FormatSettings) -> Result<(JsonBlock, bool)> { + pub async fn collect_new_page( + &mut self, + tp: &Wait, + format: &FormatSettings, + ) -> Result<(JsonBlock, bool)> { let mut results: Vec = Vec::new(); let mut rows = 0; let block_rx = &mut self.block_rx; diff --git a/query/src/servers/http/v1/statement.rs b/query/src/servers/http/v1/statement.rs index 16862af186e57..81a8d2fe170d7 100644 --- a/query/src/servers/http/v1/statement.rs +++ b/query/src/servers/http/v1/statement.rs @@ -54,8 +54,8 @@ pub async fn statement_handler( let query = http_query_manager .try_create_query(&query_id, ctx, req) .await; - // TODO(veeupup): get query_ctx's format_settings here - let format = FormatSettings::default(); + // TODO(veeupup): get query_ctx's format_settings here + let format = FormatSettings::default(); match query { Ok(query) => { let resp = query diff --git a/query/src/servers/mysql/mysql_interactive_worker.rs b/query/src/servers/mysql/mysql_interactive_worker.rs index d4b76e5915036..4ce4af49c760a 100644 --- a/query/src/servers/mysql/mysql_interactive_worker.rs +++ b/query/src/servers/mysql/mysql_interactive_worker.rs @@ -179,7 +179,11 @@ impl AsyncMysqlShim for InteractiveWorker let instant = Instant::now(); let blocks = self.base.do_query(query).await; - let format = self.session.get_shared_query_context().await?.get_format_settings()?; + let format = self + .session + .get_shared_query_context() + .await? + .get_format_settings()?; let mut write_result = writer.write(blocks, &format); if let Err(cause) = write_result { diff --git a/query/src/servers/mysql/writers/query_result_writer.rs b/query/src/servers/mysql/writers/query_result_writer.rs index 05aabf96f9b9d..7920fc82c28de 100644 --- a/query/src/servers/mysql/writers/query_result_writer.rs +++ b/query/src/servers/mysql/writers/query_result_writer.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. - use chrono_tz::Tz; use common_datablocks::DataBlock; use common_datavalues::prelude::TypeID; @@ -28,8 +27,8 @@ use common_exception::ErrorCode; use common_exception::Result; use common_exception::ABORT_QUERY; use common_exception::ABORT_SESSION; -use common_tracing::tracing; use common_io::prelude::FormatSettings; +use common_tracing::tracing; use opensrv_mysql::*; pub struct DFQueryResultWriter<'a, W: std::io::Write> { @@ -41,7 +40,11 @@ impl<'a, W: std::io::Write> DFQueryResultWriter<'a, W> { DFQueryResultWriter::<'a, W> { inner: Some(inner) } } - pub fn write(&mut self, query_result: Result<(Vec, String)>, format: &FormatSettings) -> Result<()> { + pub fn write( + &mut self, + query_result: Result<(Vec, String)>, + format: &FormatSettings, + ) -> Result<()> { if let Some(writer) = self.inner.take() { match query_result { Ok((blocks, extra_info)) => Self::ok(blocks, extra_info, writer, format)?, @@ -55,7 +58,7 @@ impl<'a, W: std::io::Write> DFQueryResultWriter<'a, W> { blocks: Vec, extra_info: String, dataset_writer: QueryResultWriter<'a, W>, - format: &FormatSettings + format: &FormatSettings, ) -> Result<()> { // XXX: num_columns == 0 may is error? let default_response = OkResponse { @@ -111,8 +114,8 @@ impl<'a, W: std::io::Write> DFQueryResultWriter<'a, W> { } let block = blocks[0].clone(); - let tz = - String::from_utf8(format.timezone.clone()).map_err(|_| ErrorCode::LogicalError("timezone must be set"))?; + let tz = String::from_utf8(format.timezone.clone()) + .map_err(|_| ErrorCode::LogicalError("timezone must be set"))?; let tz = tz.parse::().map_err(|_| { ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") })?; @@ -158,19 +161,23 @@ impl<'a, W: std::io::Write> DFQueryResultWriter<'a, W> { } (TypeID::Struct, DataValue::Struct(_)) => { let serializer = data_type.create_serializer(); - row_writer.write_col(serializer.serialize_value(&val, format)?)? + row_writer + .write_col(serializer.serialize_value(&val, format)?)? } (TypeID::Variant, DataValue::Variant(_)) => { let serializer = data_type.create_serializer(); - row_writer.write_col(serializer.serialize_value(&val, format)?)? + row_writer + .write_col(serializer.serialize_value(&val, format)?)? } (TypeID::VariantArray, DataValue::Variant(_)) => { let serializer = data_type.create_serializer(); - row_writer.write_col(serializer.serialize_value(&val, format)?)? + row_writer + .write_col(serializer.serialize_value(&val, format)?)? } (TypeID::VariantObject, DataValue::Variant(_)) => { let serializer = data_type.create_serializer(); - row_writer.write_col(serializer.serialize_value(&val, format)?)? + row_writer + .write_col(serializer.serialize_value(&val, format)?)? } (_, DataValue::Int64(v)) => row_writer.write_col(v)?, diff --git a/query/src/sql/statements/value_source.rs b/query/src/sql/statements/value_source.rs index 5881b0f75af18..b7b03b6c4bb68 100644 --- a/query/src/sql/statements/value_source.rs +++ b/query/src/sql/statements/value_source.rs @@ -15,8 +15,8 @@ use std::ops::Not; use std::sync::Arc; -use chrono_tz::Tz; +use chrono_tz::Tz; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::BacktraceGuard; @@ -57,8 +57,8 @@ impl ValueSource { pub async fn read<'a>(&self, reader: &mut CpBufferReader<'a>) -> Result { let format = self.ctx.get_format_settings()?; - let tz = - String::from_utf8(format.timezone.clone()).map_err(|_| ErrorCode::LogicalError("timezone must be set"))?; + let tz = String::from_utf8(format.timezone.clone()) + .map_err(|_| ErrorCode::LogicalError("timezone must be set"))?; let tz = tz.parse::().map_err(|_| { ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") })?; @@ -69,7 +69,7 @@ impl ValueSource { .map(|f| { if f.data_type().data_type_id() == TypeID::Timestamp { f.data_type().create_deserializer_with_tz(1024, tz) - }else { + } else { f.data_type().create_deserializer(1024) } }) diff --git a/query/src/storages/s3/s3_stage_source.rs b/query/src/storages/s3/s3_stage_source.rs index 031d3fe3962ad..5a18b69f09e54 100644 --- a/query/src/storages/s3/s3_stage_source.rs +++ b/query/src/storages/s3/s3_stage_source.rs @@ -15,8 +15,8 @@ use std::collections::VecDeque; use std::future::Future; use std::sync::Arc; -use chrono_tz::Tz; +use chrono_tz::Tz; use common_datablocks::DataBlock; use common_datavalues::DataSchemaRef; use common_exception::ErrorCode; @@ -124,8 +124,8 @@ impl StageSource { reader: BytesReader, ) -> Result> { let format = ctx.get_format_settings()?; - let tz = - String::from_utf8(format.timezone.clone()).map_err(|_| ErrorCode::LogicalError("timezone must be set"))?; + let tz = String::from_utf8(format.timezone.clone()) + .map_err(|_| ErrorCode::LogicalError("timezone must be set"))?; let tz = tz.parse::().map_err(|_| { ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") })?; From d0fe3e86376624293e7b496d25d66aa9feb59b01 Mon Sep 17 00:00:00 2001 From: Veeupup Date: Thu, 5 May 2022 17:02:38 +0800 Subject: [PATCH 06/15] refactor: date round function work with tz Signed-off-by: Veeupup --- common/functions/src/scalars/dates/date.rs | 31 +++++++--- .../src/scalars/dates/interval_function.rs | 2 +- .../src/scalars/dates/number_function.rs | 11 +++- .../src/scalars/dates/round_function.rs | 59 ++++++++++++++++--- .../functions/src/scalars/expressions/ctx.rs | 2 +- 5 files changed, 83 insertions(+), 22 deletions(-) diff --git a/common/functions/src/scalars/dates/date.rs b/common/functions/src/scalars/dates/date.rs index 5e6beb9851c49..28f0db55d0e36 100644 --- a/common/functions/src/scalars/dates/date.rs +++ b/common/functions/src/scalars/dates/date.rs @@ -15,6 +15,7 @@ use super::now::NowFunction; use super::number_function::ToMondayFunction; use super::number_function::ToYearFunction; +use super::round_function::Round; use super::AddDaysFunction; use super::AddMonthsFunction; use super::AddTimesFunction; @@ -47,7 +48,7 @@ use crate::scalars::FunctionFeatures; pub struct DateFunction {} impl DateFunction { - fn round_function_creator(round: u32) -> FunctionDescription { + fn round_function_creator(round: Round) -> FunctionDescription { let creator: FactoryCreator = Box::new(move |display_name, args| { RoundFunction::try_create(display_name, args, round) }); @@ -84,17 +85,29 @@ impl DateFunction { factory.register("toYear", ToYearFunction::desc()); // rounders - factory.register("toStartOfSecond", Self::round_function_creator(1)); - factory.register("toStartOfMinute", Self::round_function_creator(60)); - factory.register("toStartOfFiveMinutes", Self::round_function_creator(5 * 60)); - factory.register("toStartOfTenMinutes", Self::round_function_creator(10 * 60)); + factory.register( + "toStartOfSecond", + Self::round_function_creator(Round::Second), + ); + factory.register( + "toStartOfMinute", + Self::round_function_creator(Round::Minute), + ); + factory.register( + "toStartOfFiveMinutes", + Self::round_function_creator(Round::FiveMinutes), + ); + factory.register( + "toStartOfTenMinutes", + Self::round_function_creator(Round::TenMinutes), + ); factory.register( "toStartOfFifteenMinutes", - Self::round_function_creator(15 * 60), + Self::round_function_creator(Round::FifteenMinutes), ); - factory.register("timeSlot", Self::round_function_creator(30 * 60)); - factory.register("toStartOfHour", Self::round_function_creator(60 * 60)); - factory.register("toStartOfDay", Self::round_function_creator(60 * 60 * 24)); + factory.register("timeSlot", Self::round_function_creator(Round::TimeSlot)); + factory.register("toStartOfHour", Self::round_function_creator(Round::Hour)); + factory.register("toStartOfDay", Self::round_function_creator(Round::Day)); factory.register("toStartOfWeek", ToStartOfWeekFunction::desc()); diff --git a/common/functions/src/scalars/dates/interval_function.rs b/common/functions/src/scalars/dates/interval_function.rs index a27d60431f3fe..5d55ca8246404 100644 --- a/common/functions/src/scalars/dates/interval_function.rs +++ b/common/functions/src/scalars/dates/interval_function.rs @@ -15,8 +15,8 @@ use std::fmt; use std::marker::PhantomData; use std::sync::Arc; -use chrono_tz::Tz; +use chrono_tz::Tz; use common_datavalues::chrono::Datelike; use common_datavalues::chrono::Duration; use common_datavalues::chrono::NaiveDate; diff --git a/common/functions/src/scalars/dates/number_function.rs b/common/functions/src/scalars/dates/number_function.rs index 16640773ec39e..ec1caef47ded5 100644 --- a/common/functions/src/scalars/dates/number_function.rs +++ b/common/functions/src/scalars/dates/number_function.rs @@ -25,6 +25,7 @@ use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; +use super::round_function::Round; use crate::scalars::function_factory::FunctionDescription; use crate::scalars::scalar_unary_op; use crate::scalars::CastFunction; @@ -268,7 +269,7 @@ impl NumberOperator for ToMinute { // ToMinute is NOT a monotonic function in general, unless the time range is within the same hour. fn factor_function() -> Option> { Some( - RoundFunction::try_create("toStartOfHour", &[&TimestampType::new_impl(0)], 60 * 60) + RoundFunction::try_create("toStartOfHour", &[&TimestampType::new_impl(0)], Round::Hour) .unwrap(), ) } @@ -287,8 +288,12 @@ impl NumberOperator for ToSecond { // ToSecond is NOT a monotonic function in general, unless the time range is within the same minute. fn factor_function() -> Option> { Some( - RoundFunction::try_create("toStartOfMinute", &[&TimestampType::new_impl(0)], 60) - .unwrap(), + RoundFunction::try_create( + "toStartOfMinute", + &[&TimestampType::new_impl(0)], + Round::Second, + ) + .unwrap(), ) } } diff --git a/common/functions/src/scalars/dates/round_function.rs b/common/functions/src/scalars/dates/round_function.rs index a9e5c8248a2b4..0868dae76d1ad 100644 --- a/common/functions/src/scalars/dates/round_function.rs +++ b/common/functions/src/scalars/dates/round_function.rs @@ -12,10 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -use chrono_tz::Tz; -use common_datavalues::chrono::TimeZone; use std::fmt; +use chrono_tz::Tz; +use common_datavalues::chrono::Datelike; +use common_datavalues::chrono::TimeZone; +use common_datavalues::chrono::Timelike; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; @@ -26,17 +28,29 @@ use crate::scalars::Function; use crate::scalars::FunctionContext; use crate::scalars::Monotonicity; +#[derive(Clone, Copy)] +pub enum Round { + Second, + Minute, + FiveMinutes, + TenMinutes, + FifteenMinutes, + TimeSlot, + Hour, + Day, +} + #[derive(Clone)] pub struct RoundFunction { display_name: String, - round: u32, + round: Round, } impl RoundFunction { pub fn try_create( display_name: &str, args: &[&DataTypeImpl], - round: u32, + round: Round, ) -> Result> { if args[0].data_type_id() != TypeID::Timestamp { return Err(ErrorCode::BadDataValueType(format!( @@ -59,8 +73,38 @@ impl RoundFunction { // Currently: assuming timezone offset is a multiple of round. #[inline] fn execute(&self, time: i64, tz: &Tz) -> i64 { - let round = self.round as i64; - time / MICROSECONDS / round * round * MICROSECONDS + let dt = tz.timestamp(time / MICROSECONDS, 0_u32); + match self.round { + Round::Second => dt.timestamp_micros(), + Round::Minute => tz + .ymd(dt.year(), dt.month(), dt.day()) + .and_hms_micro(dt.hour(), 0, 0, 0) + .timestamp_micros(), + Round::FiveMinutes => tz + .ymd(dt.year(), dt.month(), dt.day()) + .and_hms_micro(dt.hour(), dt.minute() / 5 * 5, 0, 0) + .timestamp_micros(), + Round::TenMinutes => tz + .ymd(dt.year(), dt.month(), dt.day()) + .and_hms_micro(dt.hour(), dt.minute() / 10 * 10, 0, 0) + .timestamp_micros(), + Round::FifteenMinutes => tz + .ymd(dt.year(), dt.month(), dt.day()) + .and_hms_micro(dt.hour(), dt.minute() / 15 * 15, 0, 0) + .timestamp_micros(), + Round::TimeSlot => tz + .ymd(dt.year(), dt.month(), dt.day()) + .and_hms_micro(dt.hour(), dt.minute() / 30 * 30, 0, 0) + .timestamp_micros(), + Round::Hour => tz + .ymd(dt.year(), dt.month(), dt.day()) + .and_hms_micro(dt.hour(), 0, 0, 0) + .timestamp_micros(), + Round::Day => tz + .ymd(dt.year(), dt.month(), dt.day()) + .and_hms_micro(0, 0, 0, 0) + .timestamp_micros(), + } } } @@ -85,8 +129,7 @@ impl Function for RoundFunction { ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") })?; eval_context.tz = tz; - let col = - scalar_unary_op::(columns[0].column(), func, &mut eval_context)?; + let col = scalar_unary_op::(columns[0].column(), func, &mut eval_context)?; for micros in col.iter() { let _ = check_timestamp(*micros)?; } diff --git a/common/functions/src/scalars/expressions/ctx.rs b/common/functions/src/scalars/expressions/ctx.rs index 94fc56674bf3c..10f5cca1c98c5 100644 --- a/common/functions/src/scalars/expressions/ctx.rs +++ b/common/functions/src/scalars/expressions/ctx.rs @@ -41,7 +41,7 @@ impl EvalContext { factor, precision, error, - tz + tz, } } From b9ea557c4a060b4622c1287bc7af71e126e609be Mon Sep 17 00:00:00 2001 From: Veeupup Date: Thu, 5 May 2022 19:49:54 +0800 Subject: [PATCH 07/15] cast func work with tz Signed-off-by: Veeupup --- .../src/scalars/comparisons/comparison.rs | 5 +- .../functions/src/scalars/conditionals/if.rs | 56 ++++++-- .../src/scalars/conditionals/in_basic.rs | 129 +++++++++++++++--- .../functions/src/scalars/dates/week_date.rs | 26 ++-- .../functions/src/scalars/expressions/cast.rs | 4 +- .../expressions/cast_from_datetimes.rs | 19 ++- .../scalars/expressions/cast_from_string.rs | 4 +- .../src/scalars/expressions/cast_with_type.rs | 60 ++++++-- .../src/scalars/function_monotonic.rs | 6 +- .../src/scalars/hashes/city64_with_seed.rs | 10 +- .../functions/src/scalars/hashes/sha2hash.rs | 10 +- common/functions/src/scalars/logics/and.rs | 1 + common/functions/src/scalars/logics/logic.rs | 11 +- common/functions/src/scalars/logics/macros.rs | 6 +- common/functions/src/scalars/logics/not.rs | 9 +- common/functions/src/scalars/logics/or.rs | 1 + common/functions/src/scalars/logics/xor.rs | 10 +- .../functions/src/scalars/others/inet_ntoa.rs | 4 +- common/functions/src/scalars/strings/bin.rs | 12 +- .../src/scalars/strings/export_set.rs | 4 +- common/functions/src/scalars/strings/hex.rs | 12 +- common/functions/src/scalars/strings/oct.rs | 11 +- .../src/scalars/strings/regexp_instr.rs | 6 +- .../src/scalars/strings/regexp_replace.rs | 5 +- .../src/scalars/strings/regexp_substr.rs | 5 +- .../functions/src/scalars/strings/repeat.rs | 9 +- .../src/scalars/strings/substring.rs | 26 +++- .../src/scalars/strings/substring_index.rs | 26 +++- 28 files changed, 376 insertions(+), 111 deletions(-) diff --git a/common/functions/src/scalars/comparisons/comparison.rs b/common/functions/src/scalars/comparisons/comparison.rs index 9fc8173981174..fdee6f37b3c10 100644 --- a/common/functions/src/scalars/comparisons/comparison.rs +++ b/common/functions/src/scalars/comparisons/comparison.rs @@ -299,14 +299,15 @@ where F: Fn(T::Simd, T::Simd) -> u8 + Send + Sync + Clone, { fn eval(&self, l: &ColumnWithField, r: &ColumnWithField) -> Result { + let func_ctx = FunctionContext::default(); let lhs = if self.need_cast && l.data_type() != &self.least_supertype { - cast_column_field(l, l.data_type(), &self.least_supertype)? + cast_column_field(l, l.data_type(), &self.least_supertype, &func_ctx)? } else { l.column().clone() }; let rhs = if self.need_cast && r.data_type() != &self.least_supertype { - cast_column_field(r, r.data_type(), &self.least_supertype)? + cast_column_field(r, r.data_type(), &self.least_supertype, &func_ctx)? } else { r.column().clone() }; diff --git a/common/functions/src/scalars/conditionals/if.rs b/common/functions/src/scalars/conditionals/if.rs index 8e9ca350d9420..b76dca8d92a79 100644 --- a/common/functions/src/scalars/conditionals/if.rs +++ b/common/functions/src/scalars/conditionals/if.rs @@ -57,6 +57,7 @@ impl IfFunction { &self, cond_col: &ColumnRef, columns: &ColumnsWithField, + func_ctx: &FunctionContext, ) -> Result { debug_assert!(cond_col.is_const()); // whether nullable or not, we can use viewer to make it @@ -77,6 +78,7 @@ impl IfFunction { cond_col: &BooleanColumn, columns: &ColumnsWithField, input_rows: usize, + func_ctx: &FunctionContext, ) -> Result { debug_assert!(columns[0].column().is_const() || columns[1].column().is_const()); let (lhs_col, rhs_col, reverse) = if columns[0].column().is_const() { @@ -85,8 +87,18 @@ impl IfFunction { (&columns[1], &columns[0], true) }; - let lhs = cast_column_field(lhs_col, lhs_col.data_type(), &self.least_supertype)?; - let rhs = cast_column_field(rhs_col, rhs_col.data_type(), &self.least_supertype)?; + let lhs = cast_column_field( + lhs_col, + lhs_col.data_type(), + &self.least_supertype, + func_ctx, + )?; + let rhs = cast_column_field( + rhs_col, + rhs_col.data_type(), + &self.least_supertype, + func_ctx, + )?; let type_id = remove_nullable(&lhs.data_type()).data_type_id(); @@ -181,12 +193,23 @@ impl IfFunction { cond_col: &BooleanColumn, columns: &ColumnsWithField, input_rows: usize, + func_ctx: &FunctionContext, ) -> Result { let lhs_col = &columns[0]; let rhs_col = &columns[1]; - let lhs = cast_column_field(lhs_col, lhs_col.data_type(), &self.least_supertype)?; - let rhs = cast_column_field(rhs_col, rhs_col.data_type(), &self.least_supertype)?; + let lhs = cast_column_field( + lhs_col, + lhs_col.data_type(), + &self.least_supertype, + func_ctx, + )?; + let rhs = cast_column_field( + rhs_col, + rhs_col.data_type(), + &self.least_supertype, + func_ctx, + )?; let type_id = remove_nullable(&self.least_supertype).data_type_id(); @@ -217,12 +240,23 @@ impl IfFunction { &self, cond_col: &BooleanColumn, columns: &ColumnsWithField, + func_ctx: &FunctionContext, ) -> Result { let lhs_col = &columns[0]; let rhs_col = &columns[1]; - let lhs = cast_column_field(lhs_col, lhs_col.data_type(), &self.least_supertype)?; - let rhs = cast_column_field(rhs_col, rhs_col.data_type(), &self.least_supertype)?; + let lhs = cast_column_field( + lhs_col, + lhs_col.data_type(), + &self.least_supertype, + func_ctx, + )?; + let rhs = cast_column_field( + rhs_col, + rhs_col.data_type(), + &self.least_supertype, + func_ctx, + )?; debug_assert!(!self.least_supertype.is_nullable()); let type_id = self.least_supertype.data_type_id(); @@ -256,7 +290,7 @@ impl Function for IfFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { @@ -265,24 +299,24 @@ impl Function for IfFunction { // 1. fast path for cond nullable or const or null column if cond_col.is_const() { - return self.eval_cond_const(&cond_col, &columns[1..]); + return self.eval_cond_const(&cond_col, &columns[1..], &func_ctx); } let cond_col = Series::check_get_scalar::(&cond_col)?; // 2. handle when lhs / rhs is const if columns[1].column().is_const() || columns[2].column().is_const() { - return self.eval_const(cond_col, &columns[1..], input_rows); + return self.eval_const(cond_col, &columns[1..], input_rows, &func_ctx); } // 3. handle nullable column let whether_nullable = |col: &ColumnRef| col.is_nullable() || col.data_type().is_null(); if whether_nullable(columns[1].column()) || whether_nullable(columns[2].column()) { - return self.eval_nullable(cond_col, &columns[1..], input_rows); + return self.eval_nullable(cond_col, &columns[1..], input_rows, &func_ctx); } // 4. all normal type and are not nullable/const - self.eval_generic(cond_col, &columns[1..]) + self.eval_generic(cond_col, &columns[1..], &func_ctx) } } diff --git a/common/functions/src/scalars/conditionals/in_basic.rs b/common/functions/src/scalars/conditionals/in_basic.rs index 49c3f9d5d1e83..8b6664aca569b 100644 --- a/common/functions/src/scalars/conditionals/in_basic.rs +++ b/common/functions/src/scalars/conditionals/in_basic.rs @@ -59,11 +59,11 @@ impl InFunction { } macro_rules! scalar_contains { - ($T: ident, $INPUT_COL: expr, $ROWS: expr, $COLUMNS: expr, $CAST_TYPE: ident) => {{ + ($T: ident, $INPUT_COL: expr, $ROWS: expr, $COLUMNS: expr, $CAST_TYPE: ident, $FUNC_CTX: expr) => {{ let mut builder: ColumnBuilder = ColumnBuilder::with_capacity($ROWS); let mut vals_set = HashSet::with_capacity($ROWS - 1); for col in &$COLUMNS[1..] { - let col = cast_column_field(col, col.data_type(), &$CAST_TYPE)?; + let col = cast_column_field(col, col.data_type(), &$CAST_TYPE, &$FUNC_CTX)?; let col_viewer = $T::try_create_viewer(&col)?; if col_viewer.valid_at(0) { let val = col_viewer.value_at(0).to_owned_scalar(); @@ -81,11 +81,11 @@ macro_rules! scalar_contains { } macro_rules! float_contains { - ($T: ident, $INPUT_COL: expr, $ROWS: expr, $COLUMNS: expr, $CAST_TYPE: ident) => {{ + ($T: ident, $INPUT_COL: expr, $ROWS: expr, $COLUMNS: expr, $CAST_TYPE: ident, $FUNC_CTX: expr) => {{ let mut builder: ColumnBuilder = ColumnBuilder::with_capacity($ROWS); let mut vals_set = HashSet::with_capacity($ROWS - 1); for col in &$COLUMNS[1..] { - let col = cast_column_field(col, col.data_type(), &$CAST_TYPE)?; + let col = cast_column_field(col, col.data_type(), &$CAST_TYPE, &$FUNC_CTX)?; let col_viewer = $T::try_create_viewer(&col)?; if col_viewer.valid_at(0) { let val = col_viewer.value_at(0); @@ -116,7 +116,7 @@ impl Function for InFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { @@ -150,50 +150,139 @@ impl Function for InFunction { } let least_super_type_id = remove_nullable(&least_super_dt).data_type_id(); - let input_col = cast_column_field(&columns[0], columns[0].data_type(), &least_super_dt)?; + let input_col = cast_column_field( + &columns[0], + columns[0].data_type(), + &least_super_dt, + &func_ctx, + )?; match least_super_type_id { TypeID::Boolean => { - scalar_contains!(bool, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + bool, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::UInt8 => { - scalar_contains!(u8, input_col, input_rows, columns, least_super_dt) + scalar_contains!(u8, input_col, input_rows, columns, least_super_dt, func_ctx) } TypeID::UInt16 => { - scalar_contains!(u16, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + u16, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::UInt32 => { - scalar_contains!(u32, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + u32, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::UInt64 => { - scalar_contains!(u64, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + u64, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::Int8 => { - scalar_contains!(i8, input_col, input_rows, columns, least_super_dt) + scalar_contains!(i8, input_col, input_rows, columns, least_super_dt, func_ctx) } TypeID::Int16 => { - scalar_contains!(i16, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + i16, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::Int32 => { - scalar_contains!(i32, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + i32, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::Int64 => { - scalar_contains!(i64, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + i64, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::String => { - scalar_contains!(Vu8, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + Vu8, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::Float32 => { - float_contains!(f32, input_col, input_rows, columns, least_super_dt) + float_contains!( + f32, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::Float64 => { - float_contains!(f64, input_col, input_rows, columns, least_super_dt) + float_contains!( + f64, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::Date => { - scalar_contains!(i32, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + i32, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::Timestamp => { - scalar_contains!(i64, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + i64, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } _ => Result::Err(ErrorCode::BadDataValueType(format!( "{} type is not supported for IN now", diff --git a/common/functions/src/scalars/dates/week_date.rs b/common/functions/src/scalars/dates/week_date.rs index 5932e0e068411..03d678a3ae5b7 100644 --- a/common/functions/src/scalars/dates/week_date.rs +++ b/common/functions/src/scalars/dates/week_date.rs @@ -16,11 +16,11 @@ use std::fmt; use std::marker::PhantomData; use std::ops::Sub; +use chrono_tz::Tz; use common_datavalues::chrono::DateTime; use common_datavalues::chrono::Datelike; use common_datavalues::chrono::Duration; use common_datavalues::chrono::TimeZone; -use common_datavalues::chrono::Utc; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; @@ -45,7 +45,7 @@ pub trait WeekResultFunction { const IS_DETERMINISTIC: bool; fn return_type() -> DataTypeImpl; - fn to_number(_value: DateTime, mode: u64) -> R; + fn to_number(_value: DateTime, mode: u64, tz: &Tz) -> R; fn factor_function() -> Option> { None } @@ -60,7 +60,7 @@ impl WeekResultFunction for ToStartOfWeek { fn return_type() -> DataTypeImpl { DateType::new_impl() } - fn to_number(value: DateTime, week_mode: u64) -> i32 { + fn to_number(value: DateTime, week_mode: u64, tz: &Tz) -> i32 { let mut weekday = value.weekday().number_from_sunday(); if week_mode & 1 == 1 { weekday = value.weekday().number_from_monday(); @@ -68,7 +68,7 @@ impl WeekResultFunction for ToStartOfWeek { weekday -= 1; let duration = Duration::days(weekday as i64); let result = value.sub(duration); - get_day(result) + get_day(result, tz) } } @@ -124,7 +124,7 @@ where fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { @@ -147,12 +147,16 @@ where mode = week_mode; } + let tz = func_ctx.tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; + match columns[0].data_type().data_type_id() { TypeID::Date => { let col: &Int32Column = Series::check_get(columns[0].column())?; let iter = col.scalar_iter().map(|v| { - let date_time = Utc.timestamp(v as i64 * 24 * 3600, 0_u32); - T::to_number(date_time, mode) + let date_time = tz.timestamp(v as i64 * 24 * 3600, 0_u32); + T::to_number(date_time, mode, &tz) }); let col = PrimitiveColumn::::from_owned_iterator(iter).arc(); let viewer = i32::try_create_viewer(&col)?; @@ -164,8 +168,8 @@ where TypeID::Timestamp => { let col: &Int64Column = Series::check_get(columns[0].column())?; let iter = col.scalar_iter().map(|v| { - let date_time = Utc.timestamp(v / 1_000_000, 0_u32); - T::to_number(date_time, mode) + let date_time = tz.timestamp(v / 1_000_000, 0_u32); + T::to_number(date_time, mode, &tz) }); let col = PrimitiveColumn::::from_owned_iterator(iter).arc(); let viewer = i32::try_create_viewer(&col)?; @@ -222,8 +226,8 @@ impl fmt::Display for WeekFunction { } } -fn get_day(date: DateTime) -> i32 { - let start: DateTime = Utc.ymd(1970, 1, 1).and_hms(0, 0, 0); +fn get_day(date: DateTime, tz: &Tz) -> i32 { + let start = tz.ymd(1970, 1, 1).and_hms(0, 0, 0); let duration = date.signed_duration_since(start); duration.num_days() as i32 } diff --git a/common/functions/src/scalars/expressions/cast.rs b/common/functions/src/scalars/expressions/cast.rs index 4369cb7da9b89..f7cc788a2491c 100644 --- a/common/functions/src/scalars/expressions/cast.rs +++ b/common/functions/src/scalars/expressions/cast.rs @@ -82,11 +82,11 @@ impl Function for CastFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, _input_rows: usize, ) -> Result { - cast_column_field(&columns[0], &self.from_type, &self.target_type) + cast_column_field(&columns[0], &self.from_type, &self.target_type, &func_ctx) } } diff --git a/common/functions/src/scalars/expressions/cast_from_datetimes.rs b/common/functions/src/scalars/expressions/cast_from_datetimes.rs index 71d80ea6c96e3..739b5a1fd616c 100644 --- a/common/functions/src/scalars/expressions/cast_from_datetimes.rs +++ b/common/functions/src/scalars/expressions/cast_from_datetimes.rs @@ -14,15 +14,18 @@ use std::sync::Arc; +use chrono_tz::Tz; use common_arrow::arrow::bitmap::Bitmap; use common_datavalues::chrono::DateTime; use common_datavalues::chrono::TimeZone; use common_datavalues::chrono::Utc; use common_datavalues::prelude::*; +use common_exception::ErrorCode; use common_exception::Result; use super::cast_with_type::arrow_cast_compute; use super::cast_with_type::CastOptions; +use crate::scalars::FunctionContext; const DATE_FMT: &str = "%Y-%m-%d"; // const TIME_FMT: &str = "%Y-%m-%d %H:%M:%S"; @@ -32,6 +35,7 @@ pub fn cast_from_date( from_type: &DataTypeImpl, data_type: &DataTypeImpl, cast_options: &CastOptions, + func_ctx: &FunctionContext, ) -> Result<(ColumnRef, Option)> { let c = Series::remove_nullable(column); let c: &Int32Column = Series::check_get(&c)?; @@ -42,7 +46,8 @@ pub fn cast_from_date( let mut builder = ColumnBuilder::::with_capacity(size); for v in c.iter() { - let s = timestamp_to_string(Utc.timestamp(*v as i64 * 24 * 3600, 0_u32), DATE_FMT); + let utc = "UTC".parse::().unwrap(); + let s = timestamp_to_string(utc.timestamp(*v as i64 * 24 * 3600, 0_u32), DATE_FMT); builder.append(s.as_bytes()); } Ok((builder.build(size), None)) @@ -54,7 +59,7 @@ pub fn cast_from_date( Ok((result, None)) } - _ => arrow_cast_compute(column, from_type, data_type, cast_options), + _ => arrow_cast_compute(column, from_type, data_type, cast_options, func_ctx), } } @@ -63,6 +68,7 @@ pub fn cast_from_timestamp( from_type: &DataTypeImpl, data_type: &DataTypeImpl, cast_options: &CastOptions, + func_ctx: &FunctionContext, ) -> Result<(ColumnRef, Option)> { let c = Series::remove_nullable(column); let c: &Int64Column = Series::check_get(&c)?; @@ -73,9 +79,12 @@ pub fn cast_from_timestamp( match data_type.data_type_id() { TypeID::String => { let mut builder = MutableStringColumn::with_capacity(size); + let tz = func_ctx.tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; for v in c.iter() { let s = timestamp_to_string( - date_time64.utc_timestamp(*v), + tz.timestamp(*v / 1_000_000, (*v % 1_000_000 * 1_000) as u32), date_time64.format_string().as_str(), ); builder.append_value(s.as_bytes()); @@ -97,11 +106,11 @@ pub fn cast_from_timestamp( Ok((result, None)) } - _ => arrow_cast_compute(column, from_type, data_type, cast_options), + _ => arrow_cast_compute(column, from_type, data_type, cast_options, func_ctx), } } #[inline] -fn timestamp_to_string(date: DateTime, fmt: &str) -> String { +fn timestamp_to_string(date: DateTime, fmt: &str) -> String { date.format(fmt).to_string() } diff --git a/common/functions/src/scalars/expressions/cast_from_string.rs b/common/functions/src/scalars/expressions/cast_from_string.rs index 7c962bb5c5582..4e8abbc397b48 100644 --- a/common/functions/src/scalars/expressions/cast_from_string.rs +++ b/common/functions/src/scalars/expressions/cast_from_string.rs @@ -24,12 +24,14 @@ use common_exception::Result; use super::cast_with_type::arrow_cast_compute; use super::cast_with_type::new_mutable_bitmap; use super::cast_with_type::CastOptions; +use crate::scalars::FunctionContext; pub fn cast_from_string( column: &ColumnRef, from_type: &DataTypeImpl, data_type: &DataTypeImpl, cast_options: &CastOptions, + func_ctx: &FunctionContext, ) -> Result<(ColumnRef, Option)> { let str_column = Series::remove_nullable(column); let str_column: &StringColumn = Series::check_get(&str_column)?; @@ -78,7 +80,7 @@ pub fn cast_from_string( Ok((builder.build(size), Some(bitmap.into()))) } TypeID::Interval => todo!(), - _ => arrow_cast_compute(column, from_type, data_type, cast_options), + _ => arrow_cast_compute(column, from_type, data_type, cast_options, func_ctx), } } diff --git a/common/functions/src/scalars/expressions/cast_with_type.rs b/common/functions/src/scalars/expressions/cast_with_type.rs index 39b766c8f71b7..f5535276abef5 100644 --- a/common/functions/src/scalars/expressions/cast_with_type.rs +++ b/common/functions/src/scalars/expressions/cast_with_type.rs @@ -14,6 +14,7 @@ use std::sync::Arc; +use chrono_tz::Tz; use common_arrow::arrow::array::ArrayRef; use common_arrow::arrow::bitmap::Bitmap; use common_arrow::arrow::compute::cast; @@ -28,6 +29,7 @@ use super::cast_from_datetimes::cast_from_date; use super::cast_from_string::cast_from_string; use super::cast_from_variant::cast_from_variant; use crate::scalars::expressions::cast_from_datetimes::cast_from_timestamp; +use crate::scalars::FunctionContext; #[derive(PartialEq, Eq, Debug, Clone, Copy)] pub struct CastOptions { @@ -67,23 +69,27 @@ pub fn cast_column_field( column_with_field: &ColumnWithField, from_type: &DataTypeImpl, target_type: &DataTypeImpl, + func_ctx: &FunctionContext, ) -> Result { cast_with_type( column_with_field.column(), from_type, target_type, &DEFAULT_CAST_OPTIONS, + func_ctx, ) } // No logical type is specified // Use Default options pub fn default_column_cast(column: &ColumnRef, data_type: &DataTypeImpl) -> Result { + let func_ctx = FunctionContext::default(); cast_with_type( column, &column.data_type(), data_type, &DEFAULT_CAST_OPTIONS, + &func_ctx, ) } @@ -92,6 +98,7 @@ pub fn cast_with_type( from_type: &DataTypeImpl, target_type: &DataTypeImpl, cast_options: &CastOptions, + func_ctx: &FunctionContext, ) -> Result { // they are pyhsically the same type if &column.data_type() == target_type { @@ -115,7 +122,7 @@ pub fn cast_with_type( if column.is_const() { let col: &ConstColumn = Series::check_get(column)?; let inner = col.inner(); - let res = cast_with_type(inner, from_type, target_type, cast_options)?; + let res = cast_with_type(inner, from_type, target_type, cast_options, func_ctx)?; return Ok(ConstColumn::new(res, column.len()).arc()); } @@ -123,18 +130,37 @@ pub fn cast_with_type( let nonull_data_type = remove_nullable(target_type); let (result, valids) = match nonull_from_type.data_type_id() { - TypeID::String => { - cast_from_string(column, &nonull_from_type, &nonull_data_type, cast_options) - } - TypeID::Date => cast_from_date(column, &nonull_from_type, &nonull_data_type, cast_options), - TypeID::Timestamp => { - cast_from_timestamp(column, &nonull_from_type, &nonull_data_type, cast_options) - } + TypeID::String => cast_from_string( + column, + &nonull_from_type, + &nonull_data_type, + cast_options, + func_ctx, + ), + TypeID::Date => cast_from_date( + column, + &nonull_from_type, + &nonull_data_type, + cast_options, + func_ctx, + ), + TypeID::Timestamp => cast_from_timestamp( + column, + &nonull_from_type, + &nonull_data_type, + cast_options, + func_ctx, + ), TypeID::Variant | TypeID::VariantArray | TypeID::VariantObject => { cast_from_variant(column, &nonull_data_type) } - // TypeID::Interval => arrow_cast_compute(column, &nonull_data_type, cast_options), - _ => arrow_cast_compute(column, &nonull_from_type, &nonull_data_type, cast_options), + _ => arrow_cast_compute( + column, + &nonull_from_type, + &nonull_data_type, + cast_options, + func_ctx, + ), }?; // check date/timestamp bound @@ -184,6 +210,7 @@ pub fn cast_to_variant( column: &ColumnRef, from_type: &DataTypeImpl, data_type: &DataTypeImpl, + func_ctx: &FunctionContext, ) -> Result<(ColumnRef, Option)> { let column = Series::remove_nullable(column); let size = column.len(); @@ -201,8 +228,14 @@ pub fn cast_to_variant( } let mut builder = ColumnBuilder::::with_capacity(size); if from_type.data_type_id().is_numeric() || from_type.data_type_id() == TypeID::Boolean { - let serializer = from_type.create_serializer(); - // TODO(veeupup): check if we can use default format_settings + let serializer = if from_type.data_type_id() == TypeID::Timestamp { + let tz = func_ctx.tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; + from_type.create_serializer_with_tz(tz) + } else { + from_type.create_serializer() + }; let format = FormatSettings::default(); match serializer.serialize_json_object(&column, None, &format) { Ok(values) => { @@ -227,9 +260,10 @@ pub fn arrow_cast_compute( from_type: &DataTypeImpl, data_type: &DataTypeImpl, cast_options: &CastOptions, + func_ctx: &FunctionContext, ) -> Result<(ColumnRef, Option)> { if data_type.data_type_id().is_variant() { - return cast_to_variant(column, from_type, data_type); + return cast_to_variant(column, from_type, data_type, func_ctx); } let arrow_array = column.as_arrow_array(); diff --git a/common/functions/src/scalars/function_monotonic.rs b/common/functions/src/scalars/function_monotonic.rs index c85da52d6c084..2142fd46c1201 100644 --- a/common/functions/src/scalars/function_monotonic.rs +++ b/common/functions/src/scalars/function_monotonic.rs @@ -17,6 +17,7 @@ use common_exception::ErrorCode; use common_exception::Result; use super::cast_column_field; +use super::FunctionContext; #[derive(Clone)] pub struct Monotonicity { @@ -102,7 +103,8 @@ impl Monotonicity { }; if let (Some(max), Some(min)) = (max, min) { - let col = cast_column_field(&min, min.data_type(), &f64::to_data_type())?; + let func_ctx = FunctionContext::default(); + let col = cast_column_field(&min, min.data_type(), &f64::to_data_type(), &func_ctx)?; let min_val = col.get_f64(0)?; if min_val >= 0.0 { @@ -113,7 +115,7 @@ impl Monotonicity { return Ok(-1); } - let col = cast_column_field(&max, max.data_type(), &f64::to_data_type())?; + let col = cast_column_field(&max, max.data_type(), &f64::to_data_type(), &func_ctx)?; let max_val = col.get_f64(0)?; if max_val <= 0.0 { diff --git a/common/functions/src/scalars/hashes/city64_with_seed.rs b/common/functions/src/scalars/hashes/city64_with_seed.rs index 72a4a6397438c..584c41b110576 100644 --- a/common/functions/src/scalars/hashes/city64_with_seed.rs +++ b/common/functions/src/scalars/hashes/city64_with_seed.rs @@ -114,7 +114,7 @@ impl Function for City64WithSeedFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &common_datavalues::ColumnsWithField, _input_rows: usize, ) -> Result { @@ -137,8 +137,12 @@ impl Function for City64WithSeedFunction { }); Ok(Arc::new(result_col)) } else { - let seed_col = - cast_column_field(&columns[1], columns[1].data_type(), &UInt64Type::new_impl())?; + let seed_col = cast_column_field( + &columns[1], + columns[1].data_type(), + &UInt64Type::new_impl(), + &func_ctx, + )?; let seed_viewer = u64::try_create_viewer(&seed_col)?; let result_col = with_match_scalar_types_error!(physical_data_type, |$S| { diff --git a/common/functions/src/scalars/hashes/sha2hash.rs b/common/functions/src/scalars/hashes/sha2hash.rs index d40da97429ebd..ea8524f878f5a 100644 --- a/common/functions/src/scalars/hashes/sha2hash.rs +++ b/common/functions/src/scalars/hashes/sha2hash.rs @@ -71,7 +71,7 @@ impl Function for Sha2HashFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &common_datavalues::ColumnsWithField, _input_rows: usize, ) -> Result { @@ -124,8 +124,12 @@ impl Function for Sha2HashFunction { Ok(Arc::new(col)) } else { - let l = - cast_column_field(&columns[1], columns[1].data_type(), &UInt16Type::new_impl())?; + let l = cast_column_field( + &columns[1], + columns[1].data_type(), + &UInt16Type::new_impl(), + &func_ctx, + )?; let l_viewer = u16::try_create_viewer(&l)?; let mut col_builder = MutableStringColumn::with_capacity(l.len()); diff --git a/common/functions/src/scalars/logics/and.rs b/common/functions/src/scalars/logics/and.rs index bfa8f895d14e5..eef2d0027a2de 100644 --- a/common/functions/src/scalars/logics/and.rs +++ b/common/functions/src/scalars/logics/and.rs @@ -22,6 +22,7 @@ use crate::calcute; use crate::impl_logic_expression; use crate::scalars::cast_column_field; use crate::scalars::Function; +use crate::scalars::FunctionContext; use crate::scalars::FunctionDescription; use crate::scalars::FunctionFeatures; diff --git a/common/functions/src/scalars/logics/logic.rs b/common/functions/src/scalars/logics/logic.rs index c62e4de6791d5..c1e6d10bd8f7f 100644 --- a/common/functions/src/scalars/logics/logic.rs +++ b/common/functions/src/scalars/logics/logic.rs @@ -53,7 +53,12 @@ pub struct LogicFunctionImpl { } pub trait LogicExpression: Sync + Send { - fn eval(columns: &ColumnsWithField, input_rows: usize, nullable: bool) -> Result; + fn eval( + func_ctx: FunctionContext, + columns: &ColumnsWithField, + input_rows: usize, + nullable: bool, + ) -> Result; } impl LogicFunctionImpl @@ -97,11 +102,11 @@ where F: LogicExpression + Clone fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { - F::eval(columns, input_rows, self.nullable) + F::eval(func_ctx, columns, input_rows, self.nullable) } } diff --git a/common/functions/src/scalars/logics/macros.rs b/common/functions/src/scalars/logics/macros.rs index 3ada8655fd7ef..8b5b544e163f8 100644 --- a/common/functions/src/scalars/logics/macros.rs +++ b/common/functions/src/scalars/logics/macros.rs @@ -28,15 +28,15 @@ macro_rules! impl_logic_expression { pub struct $name; impl LogicExpression for $name { - fn eval(columns: &ColumnsWithField, input_rows: usize, nullable: bool) -> Result { + fn eval(func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, nullable: bool) -> Result { let dt = if nullable { NullableType::new_impl(BooleanType::new_impl()) } else { BooleanType::new_impl() }; - let lhs = cast_column_field(&columns[0], columns[0].data_type(), &dt)?; - let rhs = cast_column_field(&columns[1], columns[1].data_type(), &dt)?; + let lhs = cast_column_field(&columns[0], columns[0].data_type(), &dt, &func_ctx)?; + let rhs = cast_column_field(&columns[1], columns[1].data_type(), &dt, &func_ctx)?; if nullable { let lhs_viewer = bool::try_create_viewer(&lhs)?; diff --git a/common/functions/src/scalars/logics/not.rs b/common/functions/src/scalars/logics/not.rs index 283c45ff7eae2..e485e8c73df81 100644 --- a/common/functions/src/scalars/logics/not.rs +++ b/common/functions/src/scalars/logics/not.rs @@ -20,6 +20,7 @@ use super::logic::LogicFunctionImpl; use super::logic::LogicOperator; use crate::scalars::cast_column_field; use crate::scalars::Function; +use crate::scalars::FunctionContext; use crate::scalars::FunctionDescription; use crate::scalars::FunctionFeatures; @@ -27,11 +28,17 @@ use crate::scalars::FunctionFeatures; pub struct LogicNotExpression; impl LogicExpression for LogicNotExpression { - fn eval(columns: &ColumnsWithField, input_rows: usize, _nullable: bool) -> Result { + fn eval( + func_ctx: FunctionContext, + columns: &ColumnsWithField, + input_rows: usize, + _nullable: bool, + ) -> Result { let col = cast_column_field( &columns[0], columns[0].data_type(), &BooleanType::new_impl(), + &func_ctx, )?; let col_viewer = bool::try_create_viewer(&col)?; diff --git a/common/functions/src/scalars/logics/or.rs b/common/functions/src/scalars/logics/or.rs index 5518e3c436778..2a960277059ca 100644 --- a/common/functions/src/scalars/logics/or.rs +++ b/common/functions/src/scalars/logics/or.rs @@ -22,6 +22,7 @@ use crate::calcute; use crate::impl_logic_expression; use crate::scalars::cast_column_field; use crate::scalars::Function; +use crate::scalars::FunctionContext; use crate::scalars::FunctionDescription; use crate::scalars::FunctionFeatures; diff --git a/common/functions/src/scalars/logics/xor.rs b/common/functions/src/scalars/logics/xor.rs index 3c87c53e5b9cc..f2c72799a5f20 100644 --- a/common/functions/src/scalars/logics/xor.rs +++ b/common/functions/src/scalars/logics/xor.rs @@ -21,6 +21,7 @@ use super::logic::LogicOperator; use crate::calcute; use crate::scalars::cast_column_field; use crate::scalars::Function; +use crate::scalars::FunctionContext; use crate::scalars::FunctionDescription; use crate::scalars::FunctionFeatures; @@ -28,16 +29,23 @@ use crate::scalars::FunctionFeatures; pub struct LogicXorExpression; impl LogicExpression for LogicXorExpression { - fn eval(columns: &ColumnsWithField, input_rows: usize, _nullable: bool) -> Result { + fn eval( + func_ctx: FunctionContext, + columns: &ColumnsWithField, + input_rows: usize, + _nullable: bool, + ) -> Result { let lhs = cast_column_field( &columns[0], columns[0].data_type(), &BooleanType::new_impl(), + &func_ctx, )?; let rhs = cast_column_field( &columns[1], columns[1].data_type(), &BooleanType::new_impl(), + &func_ctx, )?; let lhs_viewer = bool::try_create_viewer(&lhs)?; let rhs_viewer = bool::try_create_viewer(&rhs)?; diff --git a/common/functions/src/scalars/others/inet_ntoa.rs b/common/functions/src/scalars/others/inet_ntoa.rs index 2a3c4269491aa..9081bbe9ff539 100644 --- a/common/functions/src/scalars/others/inet_ntoa.rs +++ b/common/functions/src/scalars/others/inet_ntoa.rs @@ -70,7 +70,7 @@ impl Function for InetNtoaFunctionImpl Result { @@ -86,6 +86,7 @@ impl Function for InetNtoaFunctionImpl Function for InetNtoaFunctionImpl Result { @@ -67,6 +67,7 @@ impl Function for BinFunction { &columns[0], columns[0].data_type(), &UInt64Type::new_impl(), + &func_ctx, )?; let col = col.as_any().downcast_ref::().unwrap(); for val in col.iter() { @@ -74,8 +75,12 @@ impl Function for BinFunction { } } TypeID::Int8 | TypeID::Int16 | TypeID::Int32 | TypeID::Int64 => { - let col = - cast_column_field(&columns[0], columns[0].data_type(), &Int64Type::new_impl())?; + let col = cast_column_field( + &columns[0], + columns[0].data_type(), + &Int64Type::new_impl(), + &func_ctx, + )?; let col = col.as_any().downcast_ref::().unwrap(); for val in col.iter() { builder.append(format!("{:b}", val).as_bytes()); @@ -86,6 +91,7 @@ impl Function for BinFunction { &columns[0], columns[0].data_type(), &Float64Type::new_impl(), + &func_ctx, )?; let col = col.as_any().downcast_ref::().unwrap(); for val in col.iter() { diff --git a/common/functions/src/scalars/strings/export_set.rs b/common/functions/src/scalars/strings/export_set.rs index 4ca96eb340ce0..3233c010be416 100644 --- a/common/functions/src/scalars/strings/export_set.rs +++ b/common/functions/src/scalars/strings/export_set.rs @@ -72,7 +72,7 @@ impl Function for ExportSetFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { @@ -94,6 +94,7 @@ impl Function for ExportSetFunction { &number_bits_column.data_type(), &t, &DEFAULT_CAST_OPTIONS, + &func_ctx, )?; let bits_column = cast_with_type( @@ -101,6 +102,7 @@ impl Function for ExportSetFunction { &columns[0].column().data_type(), &t, &DEFAULT_CAST_OPTIONS, + &func_ctx, )?; if input_rows != 1 diff --git a/common/functions/src/scalars/strings/hex.rs b/common/functions/src/scalars/strings/hex.rs index 2e3abaced0b8c..91dee529619a8 100644 --- a/common/functions/src/scalars/strings/hex.rs +++ b/common/functions/src/scalars/strings/hex.rs @@ -62,7 +62,7 @@ impl Function for HexFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, _input_rows: usize, ) -> Result { @@ -72,14 +72,19 @@ impl Function for HexFunction { &columns[0], columns[0].data_type(), &UInt64Type::new_impl(), + &func_ctx, )?; let col = col.as_any().downcast_ref::().unwrap(); let iter = col.iter().map(|val| format!("{:x}", val).into_bytes()); Ok(Arc::new(StringColumn::from_owned_iterator(iter))) } TypeID::Int8 | TypeID::Int16 | TypeID::Int32 | TypeID::Int64 => { - let col = - cast_column_field(&columns[0], columns[0].data_type(), &Int64Type::new_impl())?; + let col = cast_column_field( + &columns[0], + columns[0].data_type(), + &Int64Type::new_impl(), + &func_ctx, + )?; let col = col.as_any().downcast_ref::().unwrap(); let iter = col.iter().map(|val| match val.cmp(&0) { Ordering::Less => format!("-{:x}", val.unsigned_abs()).into_bytes(), @@ -92,6 +97,7 @@ impl Function for HexFunction { &columns[0], columns[0].data_type(), &StringType::new_impl(), + &func_ctx, )?; let col = col.as_any().downcast_ref::().unwrap(); let iter = col.iter().map(|val| { diff --git a/common/functions/src/scalars/strings/oct.rs b/common/functions/src/scalars/strings/oct.rs index f3f941dab028b..ea0fd71d30f56 100644 --- a/common/functions/src/scalars/strings/oct.rs +++ b/common/functions/src/scalars/strings/oct.rs @@ -81,7 +81,7 @@ impl Function for OctFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { @@ -93,6 +93,7 @@ impl Function for OctFunction { &columns[0], columns[0].data_type(), &UInt64Type::new_impl(), + &func_ctx, )?; let col = col.as_any().downcast_ref::().unwrap(); for val in col.iter() { @@ -100,8 +101,12 @@ impl Function for OctFunction { } } _ => { - let col = - cast_column_field(&columns[0], columns[0].data_type(), &Int64Type::new_impl())?; + let col = cast_column_field( + &columns[0], + columns[0].data_type(), + &Int64Type::new_impl(), + &func_ctx, + )?; let col = col.as_any().downcast_ref::().unwrap(); for val in col.iter() { builder.append(val.oct_string().as_bytes()); diff --git a/common/functions/src/scalars/strings/regexp_instr.rs b/common/functions/src/scalars/strings/regexp_instr.rs index e8534a8339610..4829981ba9e82 100644 --- a/common/functions/src/scalars/strings/regexp_instr.rs +++ b/common/functions/src/scalars/strings/regexp_instr.rs @@ -80,7 +80,7 @@ impl Function for RegexpInStrFunction { // Notes: https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-instr fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { @@ -101,6 +101,7 @@ impl Function for RegexpInStrFunction { &columns[2], columns[2].data_type(), &NullableType::new_impl(Int64Type::new_impl()), + &func_ctx, )? } 3 => { @@ -108,6 +109,7 @@ impl Function for RegexpInStrFunction { &columns[3], columns[3].data_type(), &NullableType::new_impl(Int64Type::new_impl()), + &func_ctx, )? } 4 => { @@ -115,6 +117,7 @@ impl Function for RegexpInStrFunction { &columns[4], columns[4].data_type(), &NullableType::new_impl(Int64Type::new_impl()), + &func_ctx, )? } _ => { @@ -122,6 +125,7 @@ impl Function for RegexpInStrFunction { &columns[5], columns[5].data_type(), &NullableType::new_impl(StringType::new_impl()), + &func_ctx, )? } } diff --git a/common/functions/src/scalars/strings/regexp_replace.rs b/common/functions/src/scalars/strings/regexp_replace.rs index cec90aa64d715..a85b2f87b325a 100644 --- a/common/functions/src/scalars/strings/regexp_replace.rs +++ b/common/functions/src/scalars/strings/regexp_replace.rs @@ -81,7 +81,7 @@ impl Function for RegexpReplaceFunction { // Notes: https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-replace fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { @@ -101,6 +101,7 @@ impl Function for RegexpReplaceFunction { &columns[3], columns[3].data_type(), &NullableType::new_impl(Int64Type::new_impl()), + &func_ctx, )? } 4 => { @@ -108,6 +109,7 @@ impl Function for RegexpReplaceFunction { &columns[4], columns[4].data_type(), &NullableType::new_impl(Int64Type::new_impl()), + &func_ctx, )? } _ => { @@ -115,6 +117,7 @@ impl Function for RegexpReplaceFunction { &columns[5], columns[5].data_type(), &NullableType::new_impl(StringType::new_impl()), + &func_ctx, )? } } diff --git a/common/functions/src/scalars/strings/regexp_substr.rs b/common/functions/src/scalars/strings/regexp_substr.rs index 115163be11f67..52b7ed2fb8097 100644 --- a/common/functions/src/scalars/strings/regexp_substr.rs +++ b/common/functions/src/scalars/strings/regexp_substr.rs @@ -81,7 +81,7 @@ impl Function for RegexpSubStrFunction { // Notes: https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-substr fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { @@ -101,6 +101,7 @@ impl Function for RegexpSubStrFunction { &columns[2], columns[2].data_type(), &NullableType::new_impl(Int64Type::new_impl()), + &func_ctx, )? } 3 => { @@ -108,6 +109,7 @@ impl Function for RegexpSubStrFunction { &columns[3], columns[3].data_type(), &NullableType::new_impl(Int64Type::new_impl()), + &func_ctx, )? } _ => { @@ -115,6 +117,7 @@ impl Function for RegexpSubStrFunction { &columns[4], columns[4].data_type(), &NullableType::new_impl(StringType::new_impl()), + &func_ctx, )? } } diff --git a/common/functions/src/scalars/strings/repeat.rs b/common/functions/src/scalars/strings/repeat.rs index 4e2d1848bc709..4c22ff8f294c0 100644 --- a/common/functions/src/scalars/strings/repeat.rs +++ b/common/functions/src/scalars/strings/repeat.rs @@ -69,13 +69,18 @@ impl Function for RepeatFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { let col1_viewer = Vu8::try_create_viewer(columns[0].column())?; - let col2 = cast_column_field(&columns[1], columns[1].data_type(), &UInt64Type::new_impl())?; + let col2 = cast_column_field( + &columns[1], + columns[1].data_type(), + &UInt64Type::new_impl(), + &func_ctx, + )?; let col2_viewer = u64::try_create_viewer(&col2)?; let mut builder = ColumnBuilder::::with_capacity(input_rows); diff --git a/common/functions/src/scalars/strings/substring.rs b/common/functions/src/scalars/strings/substring.rs index bde4b465ce782..e6a05feceb2f1 100644 --- a/common/functions/src/scalars/strings/substring.rs +++ b/common/functions/src/scalars/strings/substring.rs @@ -77,23 +77,35 @@ impl Function for SubstringFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { - let s_column = - cast_column_field(&columns[0], columns[0].data_type(), &StringType::new_impl())?; + let s_column = cast_column_field( + &columns[0], + columns[0].data_type(), + &StringType::new_impl(), + &func_ctx, + )?; let s_viewer = Vu8::try_create_viewer(&s_column)?; - let p_column = - cast_column_field(&columns[1], columns[1].data_type(), &Int64Type::new_impl())?; + let p_column = cast_column_field( + &columns[1], + columns[1].data_type(), + &Int64Type::new_impl(), + &func_ctx, + )?; let p_viewer = i64::try_create_viewer(&p_column)?; let mut builder = ColumnBuilder::::with_capacity(input_rows); if columns.len() > 2 { - let p2_column = - cast_column_field(&columns[2], columns[2].data_type(), &UInt64Type::new_impl())?; + let p2_column = cast_column_field( + &columns[2], + columns[2].data_type(), + &UInt64Type::new_impl(), + &func_ctx, + )?; let p2_viewer = u64::try_create_viewer(&p2_column)?; let iter = izip!(s_viewer, p_viewer, p2_viewer); diff --git a/common/functions/src/scalars/strings/substring_index.rs b/common/functions/src/scalars/strings/substring_index.rs index 3c68eb6571865..aa9d345fa90e1 100644 --- a/common/functions/src/scalars/strings/substring_index.rs +++ b/common/functions/src/scalars/strings/substring_index.rs @@ -72,20 +72,32 @@ impl Function for SubstringIndexFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { - let s_column = - cast_column_field(&columns[0], columns[0].data_type(), &StringType::new_impl())?; + let s_column = cast_column_field( + &columns[0], + columns[0].data_type(), + &StringType::new_impl(), + &func_ctx, + )?; let s_viewer = Vu8::try_create_viewer(&s_column)?; - let d_column = - cast_column_field(&columns[1], columns[1].data_type(), &StringType::new_impl())?; + let d_column = cast_column_field( + &columns[1], + columns[1].data_type(), + &StringType::new_impl(), + &func_ctx, + )?; let d_viewer = Vu8::try_create_viewer(&d_column)?; - let c_column = - cast_column_field(&columns[2], columns[2].data_type(), &Int64Type::new_impl())?; + let c_column = cast_column_field( + &columns[2], + columns[2].data_type(), + &Int64Type::new_impl(), + &func_ctx, + )?; let c_viewer = i64::try_create_viewer(&c_column)?; let iter = izip!(s_viewer, d_viewer, c_viewer); From c76c854ff9d8eeb26c5acbc419a1d75bfd9f74f3 Mon Sep 17 00:00:00 2001 From: Veeupup Date: Fri, 6 May 2022 12:09:47 +0800 Subject: [PATCH 08/15] interval function with tz Signed-off-by: Veeupup --- .../src/scalars/dates/interval_function.rs | 1 + common/functions/src/scalars/dates/macros.rs | 15 +++------------ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/common/functions/src/scalars/dates/interval_function.rs b/common/functions/src/scalars/dates/interval_function.rs index 5d55ca8246404..8e41b2c26647c 100644 --- a/common/functions/src/scalars/dates/interval_function.rs +++ b/common/functions/src/scalars/dates/interval_function.rs @@ -21,6 +21,7 @@ use common_datavalues::chrono::Datelike; use common_datavalues::chrono::Duration; use common_datavalues::chrono::NaiveDate; use common_datavalues::chrono::NaiveDateTime; +use common_datavalues::chrono::TimeZone; use common_datavalues::prelude::*; use common_datavalues::with_match_primitive_types_error; use common_exception::ErrorCode; diff --git a/common/functions/src/scalars/dates/macros.rs b/common/functions/src/scalars/dates/macros.rs index 2bad39a3a0c70..a9de4bf6523de 100644 --- a/common/functions/src/scalars/dates/macros.rs +++ b/common/functions/src/scalars/dates/macros.rs @@ -39,6 +39,7 @@ macro_rules! impl_interval_year_month { #[macro_export] macro_rules! define_date_add_year_months { ($l: ident, $r: ident, $ctx: ident, $op: expr) => {{ + let tz = $ctx.tz; let factor = $ctx.factor; let epoch = NaiveDate::from_ymd(1970, 1, 1); let naive = epoch.checked_add_signed(Duration::days($l as i64)); @@ -65,21 +66,11 @@ macro_rules! define_date_add_year_months { #[macro_export] macro_rules! define_timestamp_add_year_months { ($l: ident, $r: ident, $ctx: ident, $op: expr) => {{ + let tz = $ctx.tz; let factor = $ctx.factor; let micros = $l; - let naive = NaiveDateTime::from_timestamp_opt( - micros / 1_000_000, - (micros % 1_000_000 * 1000) as u32, - ); - if naive.is_none() { - $ctx.set_error(ErrorCode::Overflow(format!( - "Overflow on datetime with microseconds {}", - $l - ))); - return 0; - }; + let date = tz.timestamp(micros / 1_000_000, (micros % 1_000_000 * 1000) as u32); - let date = naive.unwrap(); let new_date = $op(date.year(), date.month(), date.day(), $r.as_() * factor); new_date.map_or_else( |e| { From 53daa6d3786650492ff7193ecf2deb61777f87cc Mon Sep 17 00:00:00 2001 From: Veeupup Date: Fri, 6 May 2022 15:39:32 +0800 Subject: [PATCH 09/15] save Signed-off-by: Veeupup --- common/datavalues/src/types/data_type.rs | 10 ++-------- common/datavalues/src/types/type_nullable.rs | 15 --------------- common/datavalues/src/types/type_timestamp.rs | 13 ------------- .../src/scalars/dates/interval_function.rs | 5 +---- .../src/scalars/dates/number_function.rs | 4 +--- .../functions/src/scalars/dates/round_function.rs | 5 +---- common/functions/src/scalars/dates/week_date.rs | 4 +--- .../scalars/expressions/cast_from_datetimes.rs | 4 +--- .../src/scalars/expressions/cast_from_string.rs | 13 ++++++++----- .../src/scalars/expressions/cast_from_variant.rs | 9 ++++++--- .../src/scalars/expressions/cast_with_type.rs | 11 ++--------- common/functions/src/scalars/function.rs | 5 +++-- common/io/src/format_settings.rs | 2 +- common/streams/src/sources/source_csv.rs | 9 +-------- common/streams/src/sources/source_ndjson.rs | 9 +-------- query/src/interpreters/interpreter_insert.rs | 4 ++++ .../transforms/transform_expression_executor.rs | 4 ++++ query/src/pipelines/transforms/transform_sink.rs | 5 ++++- .../servers/clickhouse/writers/query_writer.rs | 11 +---------- query/src/sessions/query_ctx.rs | 14 ++++++++------ query/src/sql/statements/value_source.rs | 4 ---- .../02_0012_function_datetimes_tz.result | 3 +++ .../02_function/02_0012_function_datetimes_tz.sql | 7 +++++++ 23 files changed, 60 insertions(+), 110 deletions(-) create mode 100644 tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.result create mode 100644 tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.sql diff --git a/common/datavalues/src/types/data_type.rs b/common/datavalues/src/types/data_type.rs index 0c3a1d888da5a..536cec25e9334 100644 --- a/common/datavalues/src/types/data_type.rs +++ b/common/datavalues/src/types/data_type.rs @@ -125,16 +125,10 @@ pub trait DataType: std::fmt::Debug + Sync + Send + DynClone { } fn create_mutable(&self, capacity: usize) -> Box; + fn create_serializer(&self) -> TypeSerializerImpl; - /// work only for timestamp serializer - fn create_serializer_with_tz(&self, _tz: Tz) -> TypeSerializerImpl { - unimplemented!() - } + fn create_deserializer(&self, capacity: usize) -> TypeDeserializerImpl; - /// work only for timestamp deserializer - fn create_deserializer_with_tz(&self, _capacity: usize, _tz: Tz) -> TypeDeserializerImpl { - unimplemented!() - } } pub fn from_arrow_type(dt: &ArrowType) -> DataTypeImpl { diff --git a/common/datavalues/src/types/type_nullable.rs b/common/datavalues/src/types/type_nullable.rs index d93c48fa3d958..8e6db84604b75 100644 --- a/common/datavalues/src/types/type_nullable.rs +++ b/common/datavalues/src/types/type_nullable.rs @@ -88,13 +88,6 @@ impl DataType for NullableType { .into() } - fn create_serializer_with_tz(&self, tz: Tz) -> TypeSerializerImpl { - NullableSerializer { - inner: Box::new(self.inner.create_serializer_with_tz(tz)), - } - .into() - } - fn create_deserializer(&self, capacity: usize) -> TypeDeserializerImpl { NullableDeserializer { inner: Box::new(self.inner.create_deserializer(capacity)), @@ -103,14 +96,6 @@ impl DataType for NullableType { .into() } - fn create_deserializer_with_tz(&self, capacity: usize, tz: Tz) -> TypeDeserializerImpl { - NullableDeserializer { - inner: Box::new(self.inner.create_deserializer_with_tz(capacity, tz)), - bitmap: MutableBitmap::with_capacity(capacity), - } - .into() - } - fn create_mutable(&self, capacity: usize) -> Box { Box::new(MutableNullableColumn::new( self.inner.create_mutable(capacity), diff --git a/common/datavalues/src/types/type_timestamp.rs b/common/datavalues/src/types/type_timestamp.rs index 504f2398ecffd..5b21d91aa409b 100644 --- a/common/datavalues/src/types/type_timestamp.rs +++ b/common/datavalues/src/types/type_timestamp.rs @@ -150,10 +150,6 @@ impl DataType for TimestampType { TimestampSerializer::default().into() } - fn create_serializer_with_tz(&self, tz: Tz) -> TypeSerializerImpl { - TimestampSerializer::new_with_tz(tz).into() - } - fn create_deserializer(&self, capacity: usize) -> TypeDeserializerImpl { let tz = "UTC".parse::().unwrap(); TimestampDeserializer { @@ -164,15 +160,6 @@ impl DataType for TimestampType { .into() } - fn create_deserializer_with_tz(&self, capacity: usize, tz: Tz) -> TypeDeserializerImpl { - TimestampDeserializer { - builder: MutablePrimitiveColumn::::with_capacity(capacity), - tz, - precision: self.precision, - } - .into() - } - fn create_mutable(&self, capacity: usize) -> Box { Box::new(MutablePrimitiveColumn::::with_capacity(capacity)) } diff --git a/common/functions/src/scalars/dates/interval_function.rs b/common/functions/src/scalars/dates/interval_function.rs index 8e41b2c26647c..ae0e072f351dc 100644 --- a/common/functions/src/scalars/dates/interval_function.rs +++ b/common/functions/src/scalars/dates/interval_function.rs @@ -148,10 +148,7 @@ where _input_rows: usize, ) -> Result { // Todo(zhyass): define the ctx out of the eval. - let tz = func_ctx.tz.parse::().map_err(|_| { - ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") - })?; - let mut ctx = EvalContext::new(self.factor, self.precision, None, tz); + let mut ctx = EvalContext::new(self.factor, self.precision, None, func_ctx.tz.clone()); let col = scalar_binary_op( columns[0].column(), columns[1].column(), diff --git a/common/functions/src/scalars/dates/number_function.rs b/common/functions/src/scalars/dates/number_function.rs index ec1caef47ded5..213cc9ff2bd4d 100644 --- a/common/functions/src/scalars/dates/number_function.rs +++ b/common/functions/src/scalars/dates/number_function.rs @@ -390,9 +390,7 @@ where // round_func need to calcute it with origin timezone // such as in UTC: 2022-03-31 22:00 and in +8:00 time is 2022-04-01 6:00 // then the result of to the month of should be 2022-04-01 6:00 rather than 2022-03-01 22:00 - let tz = func_ctx.tz.parse::().map_err(|_| { - ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") - })?; + let tz = func_ctx.tz; let func = |v: i64, _ctx: &mut EvalContext| { let date_time = tz.timestamp(v / 1_000_000, 0_u32); T::to_number(date_time, &tz) diff --git a/common/functions/src/scalars/dates/round_function.rs b/common/functions/src/scalars/dates/round_function.rs index 0868dae76d1ad..41c94c69e274d 100644 --- a/common/functions/src/scalars/dates/round_function.rs +++ b/common/functions/src/scalars/dates/round_function.rs @@ -125,10 +125,7 @@ impl Function for RoundFunction { ) -> Result { let func = |val: i64, ctx: &mut EvalContext| self.execute(val, &ctx.tz); let mut eval_context = EvalContext::default(); - let tz = func_ctx.tz.parse::().map_err(|_| { - ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") - })?; - eval_context.tz = tz; + eval_context.tz = func_ctx.tz.clone(); let col = scalar_unary_op::(columns[0].column(), func, &mut eval_context)?; for micros in col.iter() { let _ = check_timestamp(*micros)?; diff --git a/common/functions/src/scalars/dates/week_date.rs b/common/functions/src/scalars/dates/week_date.rs index 03d678a3ae5b7..4c5308839fa17 100644 --- a/common/functions/src/scalars/dates/week_date.rs +++ b/common/functions/src/scalars/dates/week_date.rs @@ -147,9 +147,7 @@ where mode = week_mode; } - let tz = func_ctx.tz.parse::().map_err(|_| { - ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") - })?; + let tz = func_ctx.tz; match columns[0].data_type().data_type_id() { TypeID::Date => { diff --git a/common/functions/src/scalars/expressions/cast_from_datetimes.rs b/common/functions/src/scalars/expressions/cast_from_datetimes.rs index 739b5a1fd616c..8a1c9702ca915 100644 --- a/common/functions/src/scalars/expressions/cast_from_datetimes.rs +++ b/common/functions/src/scalars/expressions/cast_from_datetimes.rs @@ -79,9 +79,7 @@ pub fn cast_from_timestamp( match data_type.data_type_id() { TypeID::String => { let mut builder = MutableStringColumn::with_capacity(size); - let tz = func_ctx.tz.parse::().map_err(|_| { - ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") - })?; + let tz = func_ctx.tz; for v in c.iter() { let s = timestamp_to_string( tz.timestamp(*v / 1_000_000, (*v % 1_000_000 * 1_000) as u32), diff --git a/common/functions/src/scalars/expressions/cast_from_string.rs b/common/functions/src/scalars/expressions/cast_from_string.rs index 4e8abbc397b48..96ee2a4cb513c 100644 --- a/common/functions/src/scalars/expressions/cast_from_string.rs +++ b/common/functions/src/scalars/expressions/cast_from_string.rs @@ -12,14 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -// use chrono_tz::Tz; +use chrono_tz::Tz; use common_arrow::arrow::bitmap::Bitmap; use common_arrow::arrow::temporal_conversions::EPOCH_DAYS_FROM_CE; +use common_datavalues::chrono::DateTime; use common_datavalues::chrono::Datelike; +use common_datavalues::chrono::TimeZone; use common_datavalues::chrono::NaiveDate; use common_datavalues::chrono::NaiveDateTime; use common_datavalues::prelude::*; use common_exception::Result; +use common_exception::ErrorCode; use super::cast_with_type::arrow_cast_compute; use super::cast_with_type::new_mutable_bitmap; @@ -55,9 +58,9 @@ pub fn cast_from_string( TypeID::Timestamp => { let mut builder = ColumnBuilder::::with_capacity(size); - + let tz = func_ctx.tz; for (row, v) in str_column.iter().enumerate() { - match string_to_timestamp(v) { + match string_to_timestamp(v, &tz) { Some(d) => { builder.append(d.timestamp_micros()); } @@ -86,9 +89,9 @@ pub fn cast_from_string( // TODO support timezone #[inline] -pub fn string_to_timestamp(date_str: impl AsRef<[u8]>) -> Option { +pub fn string_to_timestamp(date_str: impl AsRef<[u8]>, tz: &Tz) -> Option> { let s = std::str::from_utf8(date_str.as_ref()).ok(); - s.and_then(|c| NaiveDateTime::parse_from_str(c, "%Y-%m-%d %H:%M:%S%.9f").ok()) + s.and_then(|c| tz.datetime_from_str(c, "%Y-%m-%d %H:%M:%S%.f").ok()) } #[inline] diff --git a/common/functions/src/scalars/expressions/cast_from_variant.rs b/common/functions/src/scalars/expressions/cast_from_variant.rs index 7da68fbe25a98..9a67ea51b7f77 100644 --- a/common/functions/src/scalars/expressions/cast_from_variant.rs +++ b/common/functions/src/scalars/expressions/cast_from_variant.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use chrono_tz::Tz; use common_arrow::arrow::bitmap::Bitmap; use common_arrow::arrow::temporal_conversions::EPOCH_DAYS_FROM_CE; use common_datavalues::chrono::Datelike; @@ -19,8 +20,9 @@ use common_datavalues::prelude::*; use common_datavalues::with_match_primitive_type_id; use common_exception::ErrorCode; use common_exception::Result; -use serde_json::Value as JsonValue; +use serde_json::Value as JsonValue; +use crate::scalars::FunctionContext; use super::cast_from_string::string_to_date; use super::cast_from_string::string_to_timestamp; use super::cast_with_type::new_mutable_bitmap; @@ -28,6 +30,7 @@ use super::cast_with_type::new_mutable_bitmap; pub fn cast_from_variant( column: &ColumnRef, data_type: &DataTypeImpl, + func_ctx: &FunctionContext, ) -> Result<(ColumnRef, Option)> { let column = Series::remove_nullable(column); let json_column: &VariantColumn = if column.is_const() { @@ -134,12 +137,12 @@ pub fn cast_from_variant( TypeID::Timestamp => { // TODO(veeupup): support datetime with precision let mut builder = ColumnBuilder::::with_capacity(size); - + let tz = func_ctx.tz; for (row, value) in json_column.iter().enumerate() { match value.as_ref() { JsonValue::Null => bitmap.set(row, false), JsonValue::String(v) => { - if let Some(d) = string_to_timestamp(v) { + if let Some(d) = string_to_timestamp(v, &tz) { builder.append(d.timestamp_micros()); } else { bitmap.set(row, false); diff --git a/common/functions/src/scalars/expressions/cast_with_type.rs b/common/functions/src/scalars/expressions/cast_with_type.rs index f5535276abef5..0ae0b61e55530 100644 --- a/common/functions/src/scalars/expressions/cast_with_type.rs +++ b/common/functions/src/scalars/expressions/cast_with_type.rs @@ -152,7 +152,7 @@ pub fn cast_with_type( func_ctx, ), TypeID::Variant | TypeID::VariantArray | TypeID::VariantObject => { - cast_from_variant(column, &nonull_data_type) + cast_from_variant(column, &nonull_data_type, func_ctx) } _ => arrow_cast_compute( column, @@ -228,14 +228,7 @@ pub fn cast_to_variant( } let mut builder = ColumnBuilder::::with_capacity(size); if from_type.data_type_id().is_numeric() || from_type.data_type_id() == TypeID::Boolean { - let serializer = if from_type.data_type_id() == TypeID::Timestamp { - let tz = func_ctx.tz.parse::().map_err(|_| { - ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") - })?; - from_type.create_serializer_with_tz(tz) - } else { - from_type.create_serializer() - }; + let serializer = from_type.create_serializer(); let format = FormatSettings::default(); match serializer.serialize_json_object(&column, None, &format) { Ok(values) => { diff --git a/common/functions/src/scalars/function.rs b/common/functions/src/scalars/function.rs index 7e33f67845d85..13e2800f42dd8 100644 --- a/common/functions/src/scalars/function.rs +++ b/common/functions/src/scalars/function.rs @@ -14,6 +14,7 @@ use std::fmt; +use chrono_tz::Tz; use common_datavalues::ColumnRef; use common_datavalues::ColumnsWithField; use common_datavalues::DataTypeImpl; @@ -25,13 +26,13 @@ use super::Monotonicity; /// for now, this is only store Timezone #[derive(Clone)] pub struct FunctionContext { - pub tz: String, + pub tz: Tz, } impl Default for FunctionContext { fn default() -> Self { Self { - tz: "UTC".to_string(), + tz: "UTC".parse::().unwrap(), } } } diff --git a/common/io/src/format_settings.rs b/common/io/src/format_settings.rs index 583651d9d9ae4..67b2ab2005268 100644 --- a/common/io/src/format_settings.rs +++ b/common/io/src/format_settings.rs @@ -37,7 +37,7 @@ impl Default for FormatSettings { empty_as_default: false, skip_header: false, compression: Compression::None, - timezone: vec![b'U', b'T', b'C'], + timezone: "UTC".as_bytes().to_vec(), } } } diff --git a/common/streams/src/sources/source_csv.rs b/common/streams/src/sources/source_csv.rs index 7410eeaf9fb70..ae56f637cac0c 100644 --- a/common/streams/src/sources/source_csv.rs +++ b/common/streams/src/sources/source_csv.rs @@ -168,14 +168,7 @@ where R: AsyncRead + Unpin + Send .fields() .iter() .map(|f| { - if f.data_type().data_type_id() == TypeID::Timestamp { - f.data_type().create_deserializer_with_tz( - self.builder.block_size, - self.builder.tz.clone(), - ) - } else { - f.data_type().create_deserializer(self.builder.block_size) - } + f.data_type().create_deserializer(self.builder.block_size) }) .collect::>(); diff --git a/common/streams/src/sources/source_ndjson.rs b/common/streams/src/sources/source_ndjson.rs index 80bfeec1e0968..54e43bb6dc530 100644 --- a/common/streams/src/sources/source_ndjson.rs +++ b/common/streams/src/sources/source_ndjson.rs @@ -112,14 +112,7 @@ where R: AsyncBufRead + Unpin + Send .fields() .iter() .map(|f| { - if f.data_type().data_type_id() == TypeID::Timestamp { - f.data_type().create_deserializer_with_tz( - self.builder.block_size, - self.builder.tz.clone(), - ) - } else { - f.data_type().create_deserializer(self.builder.block_size) - } + f.data_type().create_deserializer(self.builder.block_size) }) .collect::>(); diff --git a/query/src/interpreters/interpreter_insert.rs b/query/src/interpreters/interpreter_insert.rs index 724bc2ab24170..fed8bdad285c7 100644 --- a/query/src/interpreters/interpreter_insert.rs +++ b/query/src/interpreters/interpreter_insert.rs @@ -15,6 +15,7 @@ use std::collections::VecDeque; use std::sync::Arc; +use chrono_tz::Tz; use common_datavalues::DataType; use common_exception::ErrorCode; use common_exception::Result; @@ -120,6 +121,9 @@ impl InsertInterpreter { let tz = String::from_utf8(tz).map_err(|_| { ErrorCode::LogicalError("Timezone has been checked and should be valid.") })?; + let tz = tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; let func_ctx = FunctionContext { tz }; pipeline.add_transform(|transform_input_port, transform_output_port| { TransformCastSchema::try_create( diff --git a/query/src/pipelines/transforms/transform_expression_executor.rs b/query/src/pipelines/transforms/transform_expression_executor.rs index a2c6d1e5ff131..32b47aa7a884f 100644 --- a/query/src/pipelines/transforms/transform_expression_executor.rs +++ b/query/src/pipelines/transforms/transform_expression_executor.rs @@ -14,6 +14,7 @@ use std::collections::HashMap; use std::sync::Arc; +use chrono_tz::Tz; use common_datablocks::DataBlock; use common_datavalues::prelude::*; @@ -197,6 +198,9 @@ impl ExpressionExecutor { let tz = String::from_utf8(tz).map_err(|_| { ErrorCode::LogicalError("Timezone has beeen checked and should be valid.") })?; + let tz = tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; let func_ctx = FunctionContext { tz }; let column = f.func.eval(func_ctx, &arg_columns, rows)?; Ok(ColumnWithField::new( diff --git a/query/src/pipelines/transforms/transform_sink.rs b/query/src/pipelines/transforms/transform_sink.rs index 44dfc58caa0b7..99f5b0afe971c 100644 --- a/query/src/pipelines/transforms/transform_sink.rs +++ b/query/src/pipelines/transforms/transform_sink.rs @@ -14,7 +14,7 @@ use std::any::Any; use std::sync::Arc; - +use chrono_tz::Tz; use common_datavalues::DataSchemaRef; use common_datavalues::DataType; use common_exception::ErrorCode; @@ -100,6 +100,9 @@ impl Processor for SinkTransform { let tz = String::from_utf8(tz).map_err(|_| { ErrorCode::LogicalError("Timezone has beeen checked and should be valid.") })?; + let tz = tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; let func_ctx = FunctionContext { tz }; input_stream = Box::pin(CastStream::try_create( input_stream, diff --git a/query/src/servers/clickhouse/writers/query_writer.rs b/query/src/servers/clickhouse/writers/query_writer.rs index 1b297be8d6de1..fe5fc00db3835 100644 --- a/query/src/servers/clickhouse/writers/query_writer.rs +++ b/query/src/servers/clickhouse/writers/query_writer.rs @@ -152,16 +152,7 @@ pub fn to_clickhouse_block(block: DataBlock, format: &FormatSettings) -> Result< let column = block.column(column_index); let field = block.schema().field(column_index); let name = field.name(); - let serializer = if field.data_type().data_type_id() == TypeID::Timestamp { - let tz = String::from_utf8(format.timezone.clone()) - .map_err(|_| ErrorCode::LogicalError("timezone must be set"))?; - let tz = tz.parse::().map_err(|_| { - ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") - })?; - field.data_type().create_serializer_with_tz(tz) - } else { - field.data_type().create_serializer() - }; + let serializer = field.data_type().create_serializer(); result.append_column(column::new_column( name, serializer.serialize_clickhouse_format(&column.convert_full_column(), format)?, diff --git a/query/src/sessions/query_ctx.rs b/query/src/sessions/query_ctx.rs index 7e6cdacb8623b..bfb3979f62288 100644 --- a/query/src/sessions/query_ctx.rs +++ b/query/src/sessions/query_ctx.rs @@ -18,7 +18,7 @@ use std::net::SocketAddr; use std::sync::atomic::Ordering; use std::sync::atomic::Ordering::Acquire; use std::sync::Arc; - +use chrono_tz::Tz; use common_base::tokio::task::JoinHandle; use common_base::Progress; use common_base::ProgressValues; @@ -397,11 +397,13 @@ impl QueryContext { } pub fn try_get_function_context(&self) -> Result { - Ok(FunctionContext { - tz: String::from_utf8(self.get_settings().get_timezone()?).map_err(|_| { - ErrorCode::LogicalError("Timezone has been checked and should be valid.") - })?, - }) + let tz = String::from_utf8(self.get_settings().get_timezone()?).map_err(|_| { + ErrorCode::LogicalError("Timezone has been checked and should be valid.") + })?; + let tz = tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; + Ok(FunctionContext {tz}) } } diff --git a/query/src/sql/statements/value_source.rs b/query/src/sql/statements/value_source.rs index b7b03b6c4bb68..b1d5c30eb9b71 100644 --- a/query/src/sql/statements/value_source.rs +++ b/query/src/sql/statements/value_source.rs @@ -67,11 +67,7 @@ impl ValueSource { .fields() .iter() .map(|f| { - if f.data_type().data_type_id() == TypeID::Timestamp { - f.data_type().create_deserializer_with_tz(1024, tz) - } else { f.data_type().create_deserializer(1024) - } }) .collect::>(); diff --git a/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.result b/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.result new file mode 100644 index 0000000000000..b4e0d14713cc5 --- /dev/null +++ b/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.result @@ -0,0 +1,3 @@ +====CAST==== +2021-08-30 10:47:42.000000 +2021-08-30 18:47:42.000000 diff --git a/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.sql b/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.sql new file mode 100644 index 0000000000000..46f35c2ee78d5 --- /dev/null +++ b/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.sql @@ -0,0 +1,7 @@ +-- toDateTime with tz +select "====CAST====" +select toDateTime(1630320462000000); +select toDateTime('1000-01-01 00:00:00'); +set timezone='Asia/Shanghai'; +select toDateTime(1630320462000000); +select toDateTime('1000-01-01 08:00:00'); From df9218808bd6d59f442d96a475d3c7d03082394f Mon Sep 17 00:00:00 2001 From: Veeupup Date: Fri, 6 May 2022 16:32:00 +0800 Subject: [PATCH 10/15] save --- Cargo.lock | 1 + common/datablocks/Cargo.toml | 1 + .../src/kernels/data_block_group_by_hash.rs | 11 +++-- common/datavalues/src/types/data_type.rs | 2 +- .../src/types/deserializations/boolean.rs | 14 +++--- .../src/types/deserializations/date.rs | 20 ++++---- .../src/types/deserializations/mod.rs | 28 +++++------ .../src/types/deserializations/null.rs | 14 +++--- .../src/types/deserializations/nullable.rs | 46 +++++++++---------- .../src/types/deserializations/number.rs | 16 +++---- .../src/types/deserializations/string.rs | 16 +++---- .../src/types/deserializations/timestamp.rs | 20 ++++---- .../src/types/deserializations/variant.rs | 16 +++---- common/datavalues/src/types/type_nullable.rs | 2 +- .../functions/src/scalars/conditionals/if.rs | 2 +- .../src/scalars/dates/interval_function.rs | 2 +- common/functions/src/scalars/dates/macros.rs | 2 +- .../src/scalars/dates/number_function.rs | 24 +++++----- .../expressions/cast_from_datetimes.rs | 4 +- .../scalars/expressions/cast_from_string.rs | 4 +- .../scalars/expressions/cast_from_variant.rs | 2 +- .../src/scalars/expressions/cast_with_type.rs | 4 +- common/io/src/format_settings.rs | 7 +-- common/streams/src/sources/source_csv.rs | 12 ++--- common/streams/src/sources/source_ndjson.rs | 9 ++-- .../group_by/aggregator_groups_builder.rs | 4 +- query/src/servers/http/clickhouse_handler.rs | 4 +- query/src/servers/http/v1/load.rs | 6 +-- .../mysql/writers/query_result_writer.rs | 6 +-- query/src/sessions/query_ctx_shared.rs | 9 +++- query/src/sql/statements/value_source.rs | 11 ++--- query/src/storages/s3/s3_stage_source.rs | 8 +--- 32 files changed, 163 insertions(+), 164 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 10e0e941dca9f..0207e05a8d38b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -915,6 +915,7 @@ dependencies = [ "common-datavalues", "common-exception", "common-infallible", + "common-io", "pretty_assertions", "regex", ] diff --git a/common/datablocks/Cargo.toml b/common/datablocks/Cargo.toml index 2b6dd1fa2b810..833dcd9b1409c 100644 --- a/common/datablocks/Cargo.toml +++ b/common/datablocks/Cargo.toml @@ -12,6 +12,7 @@ test = false [dependencies] # In alphabetical order # Workspace dependencies +common-io = { path = "../io" } common-arrow = { path = "../arrow" } common-datavalues = { path = "../datavalues" } common-exception = { path = "../exception" } diff --git a/common/datablocks/src/kernels/data_block_group_by_hash.rs b/common/datablocks/src/kernels/data_block_group_by_hash.rs index dc6b19c6269d6..910176dcb67a4 100644 --- a/common/datablocks/src/kernels/data_block_group_by_hash.rs +++ b/common/datablocks/src/kernels/data_block_group_by_hash.rs @@ -18,6 +18,7 @@ use std::hash::Hash; use std::marker::PhantomData; use std::ops::Not; +use common_io::prelude::FormatSettings; use common_datavalues::prelude::*; use common_exception::Result; @@ -226,14 +227,14 @@ impl HashMethodSerializer { let mut keys: Vec<&[u8]> = keys.iter().map(|x| x.as_slice()).collect(); let rows = keys.len(); - + let format = FormatSettings::default(); let mut res = Vec::with_capacity(group_fields.len()); for f in group_fields.iter() { let data_type = f.data_type(); let mut deserializer = data_type.create_deserializer(rows); for (_row, key) in keys.iter_mut().enumerate() { - deserializer.de_binary(key)?; + deserializer.de_binary(key, &format)?; } res.push(deserializer.finish_to_column()); } @@ -325,9 +326,10 @@ where T: PrimitiveType let mut deserializer = non_null_type.create_deserializer(rows); let reader = vec8.as_slice(); + let format = FormatSettings::default(); let col = match f.is_nullable() { false => { - deserializer.de_fixed_binary_batch(&reader[offsize..], step, rows)?; + deserializer.de_fixed_binary_batch(&reader[offsize..], step, rows, &format)?; deserializer.finish_to_column() } @@ -337,6 +339,7 @@ where T: PrimitiveType &reader[null_offsize..], step, rows, + &format )?; null_offsize += 1; @@ -346,7 +349,7 @@ where T: PrimitiveType // we store 1 for nulls in fixed_hash let bitmap = col.values().not(); - deserializer.de_fixed_binary_batch(&reader[offsize..], step, rows)?; + deserializer.de_fixed_binary_batch(&reader[offsize..], step, rows, &format)?; let inner = deserializer.finish_to_column(); NullableColumn::wrap_inner(inner, Some(bitmap)) } diff --git a/common/datavalues/src/types/data_type.rs b/common/datavalues/src/types/data_type.rs index 536cec25e9334..a5fbd7f024100 100644 --- a/common/datavalues/src/types/data_type.rs +++ b/common/datavalues/src/types/data_type.rs @@ -15,7 +15,7 @@ use std::any::Any; use std::collections::BTreeMap; -use chrono_tz::Tz; + use common_arrow::arrow::datatypes::DataType as ArrowType; use common_arrow::arrow::datatypes::Field as ArrowField; use common_exception::Result; diff --git a/common/datavalues/src/types/deserializations/boolean.rs b/common/datavalues/src/types/deserializations/boolean.rs index 0185d5fa4e981..0edcd8e1c10f3 100644 --- a/common/datavalues/src/types/deserializations/boolean.rs +++ b/common/datavalues/src/types/deserializations/boolean.rs @@ -23,17 +23,17 @@ pub struct BooleanDeserializer { } impl TypeDeserializer for BooleanDeserializer { - fn de_binary(&mut self, reader: &mut &[u8]) -> Result<()> { + fn de_binary(&mut self, reader: &mut &[u8], _format: &FormatSettings) -> Result<()> { let value: bool = reader.read_scalar()?; self.builder.append_value(value); Ok(()) } - fn de_default(&mut self) { + fn de_default(&mut self, _format: &FormatSettings) { self.builder.append_value(false); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize) -> Result<()> { + fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize, _format: &FormatSettings) -> Result<()> { for row in 0..rows { let mut reader = &reader[step * row..]; let value: bool = reader.read_scalar()?; @@ -43,7 +43,7 @@ impl TypeDeserializer for BooleanDeserializer { Ok(()) } - fn de_whole_text(&mut self, reader: &[u8]) -> Result<()> { + fn de_whole_text(&mut self, reader: &[u8], _format: &FormatSettings) -> Result<()> { if reader.eq_ignore_ascii_case(b"true") { self.builder.append_value(true); } else if reader.eq_ignore_ascii_case(b"false") { @@ -54,7 +54,7 @@ impl TypeDeserializer for BooleanDeserializer { Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { let v = if BufferReadExt::ignore_insensitive_bytes(reader, b"true")? { Ok(true) } else if BufferReadExt::ignore_insensitive_bytes(reader, b"false")? { @@ -67,7 +67,7 @@ impl TypeDeserializer for BooleanDeserializer { Ok(()) } - fn de_json(&mut self, value: &serde_json::Value) -> Result<()> { + fn de_json(&mut self, value: &serde_json::Value, _format: &FormatSettings) -> Result<()> { match value { serde_json::Value::Bool(v) => self.builder.append_value(*v), _ => return Err(ErrorCode::BadBytes("Incorrect boolean value")), @@ -75,7 +75,7 @@ impl TypeDeserializer for BooleanDeserializer { Ok(()) } - fn append_data_value(&mut self, value: DataValue) -> Result<()> { + fn append_data_value(&mut self, value: DataValue, _format: &FormatSettings) -> Result<()> { self.builder.append_value(value.as_bool()?); Ok(()) } diff --git a/common/datavalues/src/types/deserializations/date.rs b/common/datavalues/src/types/deserializations/date.rs index 2c7cfdc102df3..615b4572e00e5 100644 --- a/common/datavalues/src/types/deserializations/date.rs +++ b/common/datavalues/src/types/deserializations/date.rs @@ -33,18 +33,18 @@ where T: PrimitiveType, T: Unmarshal + StatBuffer + FromLexical, { - fn de_binary(&mut self, reader: &mut &[u8]) -> Result<()> { + fn de_binary(&mut self, reader: &mut &[u8], _format: &FormatSettings) -> Result<()> { let value: T = reader.read_scalar()?; let _ = check_date(value.as_i32())?; self.builder.append_value(value); Ok(()) } - fn de_default(&mut self) { + fn de_default(&mut self, _format: &FormatSettings) { self.builder.append_value(T::default()); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize) -> Result<()> { + fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize, _format: &FormatSettings) -> Result<()> { for row in 0..rows { let mut reader = &reader[step * row..]; let value: T = reader.read_scalar()?; @@ -54,7 +54,7 @@ where Ok(()) } - fn de_json(&mut self, value: &serde_json::Value) -> Result<()> { + fn de_json(&mut self, value: &serde_json::Value, _format: &FormatSettings) -> Result<()> { match value { serde_json::Value::String(v) => { let mut reader = BufferReader::new(v.as_bytes()); @@ -68,7 +68,7 @@ where } } - fn de_whole_text(&mut self, reader: &[u8]) -> Result<()> { + fn de_whole_text(&mut self, reader: &[u8], _format: &FormatSettings) -> Result<()> { let mut reader = BufferReader::new(reader); let date = reader.read_date_text()?; let days = uniform(date); @@ -78,7 +78,7 @@ where Ok(()) } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_quoted(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { reader.must_ignore_byte(b'\'')?; let date = reader.read_date_text()?; let days = uniform(date); @@ -89,7 +89,7 @@ where Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { let date = reader.read_date_text()?; let days = uniform(date); let _ = check_date(days.as_i32())?; @@ -97,7 +97,7 @@ where Ok(()) } - fn de_text_csv(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_csv(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { let maybe_quote = reader.ignore(|f| f == b'\'' || f == b'"')?; let date = reader.read_date_text()?; let days = uniform(date); @@ -109,7 +109,7 @@ where Ok(()) } - fn de_text_json(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_json(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { reader.must_ignore_byte(b'"')?; let date = reader.read_date_text()?; let days = uniform(date); @@ -120,7 +120,7 @@ where Ok(()) } - fn append_data_value(&mut self, value: DataValue) -> Result<()> { + fn append_data_value(&mut self, value: DataValue, _format: &FormatSettings) -> Result<()> { let v = value.as_i64()? as i32; let _ = check_date(v)?; self.builder.append_value(v.as_()); diff --git a/common/datavalues/src/types/deserializations/mod.rs b/common/datavalues/src/types/deserializations/mod.rs index f6ec37975cc0c..75e2be63092af 100644 --- a/common/datavalues/src/types/deserializations/mod.rs +++ b/common/datavalues/src/types/deserializations/mod.rs @@ -39,35 +39,35 @@ pub use variant::*; #[enum_dispatch] pub trait TypeDeserializer: Send + Sync { - fn de_binary(&mut self, reader: &mut &[u8]) -> Result<()>; + fn de_binary(&mut self, reader: &mut &[u8], format: &FormatSettings) -> Result<()>; - fn de_default(&mut self); + fn de_default(&mut self, format: &FormatSettings); - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize) -> Result<()>; + fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize, format: &FormatSettings) -> Result<()>; - fn de_json(&mut self, reader: &Value) -> Result<()>; + fn de_json(&mut self, reader: &Value, format: &FormatSettings) -> Result<()>; - fn de_null(&mut self) -> bool { + fn de_null(&mut self, _format: &FormatSettings) -> bool { false } - fn de_whole_text(&mut self, reader: &[u8]) -> Result<()>; + fn de_whole_text(&mut self, reader: &[u8], format: &FormatSettings) -> Result<()>; - fn de_text(&mut self, reader: &mut CheckpointReader) -> Result<()>; + fn de_text(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()>; - fn de_text_csv(&mut self, reader: &mut CheckpointReader) -> Result<()> { - self.de_text(reader) + fn de_text_csv(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { + self.de_text(reader, format) } - fn de_text_json(&mut self, reader: &mut CheckpointReader) -> Result<()> { - self.de_text(reader) + fn de_text_json(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { + self.de_text(reader, format) } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader) -> Result<()> { - self.de_text(reader) + fn de_text_quoted(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { + self.de_text(reader, format) } - fn append_data_value(&mut self, value: DataValue) -> Result<()>; + fn append_data_value(&mut self, value: DataValue, format: &FormatSettings) -> Result<()>; /// Note this method will return err only when inner builder is empty. fn pop_data_value(&mut self) -> Result; diff --git a/common/datavalues/src/types/deserializations/null.rs b/common/datavalues/src/types/deserializations/null.rs index 1529e5c495ed7..9612ecd13fefc 100644 --- a/common/datavalues/src/types/deserializations/null.rs +++ b/common/datavalues/src/types/deserializations/null.rs @@ -27,37 +27,37 @@ pub struct NullDeserializer { } impl TypeDeserializer for NullDeserializer { - fn de_binary(&mut self, _reader: &mut &[u8]) -> Result<()> { + fn de_binary(&mut self, _reader: &mut &[u8], _format: &FormatSettings) -> Result<()> { self.builder.append_default(); Ok(()) } - fn de_default(&mut self) { + fn de_default(&mut self, _format: &FormatSettings) { self.builder.append_default(); } - fn de_fixed_binary_batch(&mut self, _reader: &[u8], _step: usize, rows: usize) -> Result<()> { + fn de_fixed_binary_batch(&mut self, _reader: &[u8], _step: usize, rows: usize, _format: &FormatSettings) -> Result<()> { for _ in 0..rows { self.builder.append_default(); } Ok(()) } - fn de_json(&mut self, _value: &serde_json::Value) -> Result<()> { + fn de_json(&mut self, _value: &serde_json::Value, _format: &FormatSettings) -> Result<()> { self.builder.append_default(); Ok(()) } - fn de_whole_text(&mut self, _reader: &[u8]) -> Result<()> { + fn de_whole_text(&mut self, _reader: &[u8], _format: &FormatSettings) -> Result<()> { Ok(()) } - fn de_text(&mut self, _reader: &mut CheckpointReader) -> Result<()> { + fn de_text(&mut self, _reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { self.builder.append_default(); Ok(()) } - fn append_data_value(&mut self, _value: DataValue) -> Result<()> { + fn append_data_value(&mut self, _value: DataValue, _format: &FormatSettings) -> Result<()> { self.builder.append_default(); Ok(()) } diff --git a/common/datavalues/src/types/deserializations/nullable.rs b/common/datavalues/src/types/deserializations/nullable.rs index 5b5e232c3fd67..36c7b45c19f66 100644 --- a/common/datavalues/src/types/deserializations/nullable.rs +++ b/common/datavalues/src/types/deserializations/nullable.rs @@ -29,84 +29,84 @@ pub struct NullableDeserializer { } impl TypeDeserializer for NullableDeserializer { - fn de_binary(&mut self, reader: &mut &[u8]) -> Result<()> { + fn de_binary(&mut self, reader: &mut &[u8], format: &FormatSettings) -> Result<()> { let valid: bool = reader.read_scalar()?; if valid { - self.inner.de_binary(reader)?; + self.inner.de_binary(reader, format)?; } else { - self.inner.de_default(); + self.inner.de_default(format); } self.bitmap.push(valid); Ok(()) } - fn de_default(&mut self) { - self.inner.de_default(); + fn de_default(&mut self, format: &FormatSettings) { + self.inner.de_default(format); self.bitmap.push(false); } - fn de_fixed_binary_batch(&mut self, _reader: &[u8], _step: usize, _rows: usize) -> Result<()> { + fn de_fixed_binary_batch(&mut self, _reader: &[u8], _step: usize, _rows: usize, _format: &FormatSettings) -> Result<()> { // it's covered outside unreachable!() } - fn de_json(&mut self, value: &serde_json::Value) -> Result<()> { + fn de_json(&mut self, value: &serde_json::Value, format: &FormatSettings) -> Result<()> { match value { serde_json::Value::Null => { - self.de_null(); + self.de_null(format); Ok(()) } other => { self.bitmap.push(true); - self.inner.de_json(other) + self.inner.de_json(other, format) } } } // TODO: support null text setting - fn de_text(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { if reader.ignore_insensitive_bytes(b"null")? { - self.de_default(); + self.de_default(format); return Ok(()); } - self.inner.de_text(reader)?; + self.inner.de_text(reader, format)?; self.bitmap.push(true); Ok(()) } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_quoted(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { if reader.ignore_insensitive_bytes(b"null")? { - self.de_default(); + self.de_default(format); return Ok(()); } - self.inner.de_text_quoted(reader)?; + self.inner.de_text_quoted(reader, format)?; self.bitmap.push(true); Ok(()) } - fn de_whole_text(&mut self, reader: &[u8]) -> Result<()> { + fn de_whole_text(&mut self, reader: &[u8], format: &FormatSettings) -> Result<()> { if reader.eq_ignore_ascii_case(b"null") { - self.de_default(); + self.de_default(format); return Ok(()); } - self.inner.de_whole_text(reader)?; + self.inner.de_whole_text(reader, format)?; self.bitmap.push(true); Ok(()) } - fn de_null(&mut self) -> bool { - self.inner.de_default(); + fn de_null(&mut self, format: &FormatSettings) -> bool { + self.inner.de_default(format); self.bitmap.push(false); true } - fn append_data_value(&mut self, value: DataValue) -> Result<()> { + fn append_data_value(&mut self, value: DataValue, format: &FormatSettings) -> Result<()> { if value.is_null() { - self.inner.de_default(); + self.inner.de_default(format); self.bitmap.push(false); } else { - self.inner.append_data_value(value)?; + self.inner.append_data_value(value, format)?; self.bitmap.push(true); } Ok(()) diff --git a/common/datavalues/src/types/deserializations/number.rs b/common/datavalues/src/types/deserializations/number.rs index 6f5064e59b1c3..6a29e2580e53e 100644 --- a/common/datavalues/src/types/deserializations/number.rs +++ b/common/datavalues/src/types/deserializations/number.rs @@ -28,17 +28,17 @@ where T: PrimitiveType, T: Unmarshal + StatBuffer + FromLexical, { - fn de_binary(&mut self, reader: &mut &[u8]) -> Result<()> { + fn de_binary(&mut self, reader: &mut &[u8], _format: &FormatSettings) -> Result<()> { let value: T = reader.read_scalar()?; self.builder.append_value(value); Ok(()) } - fn de_default(&mut self) { + fn de_default(&mut self, _format: &FormatSettings) { self.builder.append_value(T::default()); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize) -> Result<()> { + fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize, _format: &FormatSettings) -> Result<()> { for row in 0..rows { let mut reader = &reader[step * row..]; let value: T = reader.read_scalar()?; @@ -47,7 +47,7 @@ where Ok(()) } - fn de_json(&mut self, value: &serde_json::Value) -> Result<()> { + fn de_json(&mut self, value: &serde_json::Value, _format: &FormatSettings) -> Result<()> { match value { serde_json::Value::Number(v) => { let v = v.to_string(); @@ -65,7 +65,7 @@ where } } - fn de_whole_text(&mut self, reader: &[u8]) -> Result<()> { + fn de_whole_text(&mut self, reader: &[u8], _format: &FormatSettings) -> Result<()> { let mut reader = BufferReader::new(reader); let v: T = if !T::FLOATING { reader.read_int_text() @@ -78,7 +78,7 @@ where Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { let v: T = if !T::FLOATING { reader.read_int_text() } else { @@ -88,11 +88,11 @@ where Ok(()) } - fn de_null(&mut self) -> bool { + fn de_null(&mut self, _format: &FormatSettings) -> bool { false } - fn append_data_value(&mut self, value: DataValue) -> Result<()> { + fn append_data_value(&mut self, value: DataValue, _format: &FormatSettings) -> Result<()> { self.builder.append_data_value(value) } diff --git a/common/datavalues/src/types/deserializations/string.rs b/common/datavalues/src/types/deserializations/string.rs index f71e268b920f6..9e29b53c8cde0 100644 --- a/common/datavalues/src/types/deserializations/string.rs +++ b/common/datavalues/src/types/deserializations/string.rs @@ -37,7 +37,7 @@ impl StringDeserializer { impl TypeDeserializer for StringDeserializer { // See GroupHash.rs for StringColumn #[allow(clippy::uninit_vec)] - fn de_binary(&mut self, reader: &mut &[u8]) -> Result<()> { + fn de_binary(&mut self, reader: &mut &[u8], _format: &FormatSettings) -> Result<()> { let offset: u64 = reader.read_uvarint()?; self.buffer.clear(); @@ -51,11 +51,11 @@ impl TypeDeserializer for StringDeserializer { Ok(()) } - fn de_default(&mut self) { + fn de_default(&mut self, _format: &FormatSettings) { self.builder.append_value(""); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize) -> Result<()> { + fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize, _format: &FormatSettings) -> Result<()> { for row in 0..rows { let reader = &reader[step * row..]; self.builder.append_value(reader); @@ -63,7 +63,7 @@ impl TypeDeserializer for StringDeserializer { Ok(()) } - fn de_json(&mut self, value: &serde_json::Value) -> Result<()> { + fn de_json(&mut self, value: &serde_json::Value, _format: &FormatSettings) -> Result<()> { match value { serde_json::Value::String(s) => { self.builder.append_value(s); @@ -73,26 +73,26 @@ impl TypeDeserializer for StringDeserializer { } } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_quoted(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { self.buffer.clear(); reader.read_quoted_text(&mut self.buffer, b'\'')?; self.builder.append_value(self.buffer.as_slice()); Ok(()) } - fn de_whole_text(&mut self, reader: &[u8]) -> Result<()> { + fn de_whole_text(&mut self, reader: &[u8], _format: &FormatSettings) -> Result<()> { self.builder.append_value(reader); Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { self.buffer.clear(); reader.read_escaped_string_text(&mut self.buffer)?; self.builder.append_value(self.buffer.as_slice()); Ok(()) } - fn append_data_value(&mut self, value: DataValue) -> Result<()> { + fn append_data_value(&mut self, value: DataValue, _format: &FormatSettings) -> Result<()> { self.builder.append_data_value(value) } diff --git a/common/datavalues/src/types/deserializations/timestamp.rs b/common/datavalues/src/types/deserializations/timestamp.rs index d33abcd80804b..8a661079103de 100644 --- a/common/datavalues/src/types/deserializations/timestamp.rs +++ b/common/datavalues/src/types/deserializations/timestamp.rs @@ -27,18 +27,18 @@ pub struct TimestampDeserializer { } impl TypeDeserializer for TimestampDeserializer { - fn de_binary(&mut self, reader: &mut &[u8]) -> Result<()> { + fn de_binary(&mut self, reader: &mut &[u8], _format: &FormatSettings) -> Result<()> { let value: i64 = reader.read_scalar()?; let _ = check_timestamp(value)?; self.builder.append_value(value); Ok(()) } - fn de_default(&mut self) { + fn de_default(&mut self, _format: &FormatSettings) { self.builder.append_value(i64::default()); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize) -> Result<()> { + fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize, _format: &FormatSettings) -> Result<()> { for row in 0..rows { let mut reader = &reader[step * row..]; let value: i64 = reader.read_scalar()?; @@ -48,7 +48,7 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn de_json(&mut self, value: &serde_json::Value) -> Result<()> { + fn de_json(&mut self, value: &serde_json::Value, _format: &FormatSettings) -> Result<()> { match value { serde_json::Value::String(v) => { let v = v.clone(); @@ -64,7 +64,7 @@ impl TypeDeserializer for TimestampDeserializer { } } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_quoted(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { reader.must_ignore_byte(b'\'')?; let ts = reader.read_timestamp_text(&self.tz)?; let micros = ts.timestamp_micros(); @@ -74,7 +74,7 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn de_whole_text(&mut self, reader: &[u8]) -> Result<()> { + fn de_whole_text(&mut self, reader: &[u8], _format: &FormatSettings) -> Result<()> { let mut reader = BufferReader::new(reader); let ts = reader.read_timestamp_text(&self.tz)?; let micros = ts.timestamp_micros(); @@ -84,7 +84,7 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { let ts = reader.read_timestamp_text(&self.tz)?; let micros = ts.timestamp_micros(); let _ = check_timestamp(micros)?; @@ -92,7 +92,7 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn de_text_csv(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_csv(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { let maybe_quote = reader.ignore(|f| f == b'\'' || f == b'"')?; let ts = reader.read_timestamp_text(&self.tz)?; let micros = ts.timestamp_micros(); @@ -104,7 +104,7 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn de_text_json(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_json(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { reader.must_ignore_byte(b'"')?; let ts = reader.read_timestamp_text(&self.tz)?; let micros = ts.timestamp_micros(); @@ -115,7 +115,7 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn append_data_value(&mut self, value: DataValue) -> Result<()> { + fn append_data_value(&mut self, value: DataValue, _format: &FormatSettings) -> Result<()> { let v = value.as_i64()?; let _ = check_timestamp(v)?; self.builder.append_value(v.as_()); diff --git a/common/datavalues/src/types/deserializations/variant.rs b/common/datavalues/src/types/deserializations/variant.rs index d84e5de6cf33b..18f0a3f1855d0 100644 --- a/common/datavalues/src/types/deserializations/variant.rs +++ b/common/datavalues/src/types/deserializations/variant.rs @@ -35,7 +35,7 @@ impl VariantDeserializer { impl TypeDeserializer for VariantDeserializer { #[allow(clippy::uninit_vec)] - fn de_binary(&mut self, reader: &mut &[u8]) -> Result<()> { + fn de_binary(&mut self, reader: &mut &[u8], _format: &FormatSettings) -> Result<()> { let offset: u64 = reader.read_uvarint()?; self.buffer.clear(); @@ -50,12 +50,12 @@ impl TypeDeserializer for VariantDeserializer { Ok(()) } - fn de_default(&mut self) { + fn de_default(&mut self, _format: &FormatSettings) { self.builder .append_value(VariantValue::from(serde_json::Value::Null)); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize) -> Result<()> { + fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize, _format: &FormatSettings) -> Result<()> { for row in 0..rows { let reader = &reader[step * row..]; let val = serde_json::from_slice(reader)?; @@ -64,12 +64,12 @@ impl TypeDeserializer for VariantDeserializer { Ok(()) } - fn de_json(&mut self, value: &serde_json::Value) -> Result<()> { + fn de_json(&mut self, value: &serde_json::Value, _format: &FormatSettings) -> Result<()> { self.builder.append_value(VariantValue::from(value)); Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { self.buffer.clear(); reader.read_escaped_string_text(&mut self.buffer)?; let val = serde_json::from_slice(self.buffer.as_slice())?; @@ -77,13 +77,13 @@ impl TypeDeserializer for VariantDeserializer { Ok(()) } - fn de_whole_text(&mut self, reader: &[u8]) -> Result<()> { + fn de_whole_text(&mut self, reader: &[u8], _format: &FormatSettings) -> Result<()> { let val = serde_json::from_slice(reader)?; self.builder.append_value(val); Ok(()) } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_quoted(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { self.buffer.clear(); reader.read_quoted_text(&mut self.buffer, b'\'')?; @@ -92,7 +92,7 @@ impl TypeDeserializer for VariantDeserializer { Ok(()) } - fn append_data_value(&mut self, value: DataValue) -> Result<()> { + fn append_data_value(&mut self, value: DataValue, _format: &FormatSettings) -> Result<()> { self.builder.append_data_value(value) } diff --git a/common/datavalues/src/types/type_nullable.rs b/common/datavalues/src/types/type_nullable.rs index 8e6db84604b75..5595a5aa48731 100644 --- a/common/datavalues/src/types/type_nullable.rs +++ b/common/datavalues/src/types/type_nullable.rs @@ -15,7 +15,7 @@ use std::collections::BTreeMap; use std::sync::Arc; -use chrono_tz::Tz; + use common_arrow::arrow::datatypes::DataType as ArrowType; use common_arrow::bitmap::MutableBitmap; use common_exception::ErrorCode; diff --git a/common/functions/src/scalars/conditionals/if.rs b/common/functions/src/scalars/conditionals/if.rs index b76dca8d92a79..60b08977ec8cb 100644 --- a/common/functions/src/scalars/conditionals/if.rs +++ b/common/functions/src/scalars/conditionals/if.rs @@ -57,7 +57,7 @@ impl IfFunction { &self, cond_col: &ColumnRef, columns: &ColumnsWithField, - func_ctx: &FunctionContext, + _func_ctx: &FunctionContext, ) -> Result { debug_assert!(cond_col.is_const()); // whether nullable or not, we can use viewer to make it diff --git a/common/functions/src/scalars/dates/interval_function.rs b/common/functions/src/scalars/dates/interval_function.rs index ae0e072f351dc..e941eee098c5b 100644 --- a/common/functions/src/scalars/dates/interval_function.rs +++ b/common/functions/src/scalars/dates/interval_function.rs @@ -16,7 +16,7 @@ use std::fmt; use std::marker::PhantomData; use std::sync::Arc; -use chrono_tz::Tz; + use common_datavalues::chrono::Datelike; use common_datavalues::chrono::Duration; use common_datavalues::chrono::NaiveDate; diff --git a/common/functions/src/scalars/dates/macros.rs b/common/functions/src/scalars/dates/macros.rs index a9de4bf6523de..73a6097fcd177 100644 --- a/common/functions/src/scalars/dates/macros.rs +++ b/common/functions/src/scalars/dates/macros.rs @@ -39,7 +39,7 @@ macro_rules! impl_interval_year_month { #[macro_export] macro_rules! define_date_add_year_months { ($l: ident, $r: ident, $ctx: ident, $op: expr) => {{ - let tz = $ctx.tz; + let _tz = $ctx.tz; let factor = $ctx.factor; let epoch = NaiveDate::from_ymd(1970, 1, 1); let naive = epoch.checked_add_signed(Duration::days($l as i64)); diff --git a/common/functions/src/scalars/dates/number_function.rs b/common/functions/src/scalars/dates/number_function.rs index 213cc9ff2bd4d..1c7edf60fc85c 100644 --- a/common/functions/src/scalars/dates/number_function.rs +++ b/common/functions/src/scalars/dates/number_function.rs @@ -20,7 +20,7 @@ use common_datavalues::chrono::DateTime; use common_datavalues::chrono::Datelike; use common_datavalues::chrono::TimeZone; use common_datavalues::chrono::Timelike; -use common_datavalues::chrono::Utc; + use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; @@ -70,7 +70,7 @@ pub struct ToYYYYMM; impl NumberOperator for ToYYYYMM { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime, tz: &Tz) -> u32 { + fn to_number(value: DateTime, _tz: &Tz) -> u32 { value.year() as u32 * 100 + value.month() } } @@ -81,7 +81,7 @@ pub struct ToYYYYMMDD; impl NumberOperator for ToYYYYMMDD { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime, tz: &Tz) -> u32 { + fn to_number(value: DateTime, _tz: &Tz) -> u32 { value.year() as u32 * 10000 + value.month() * 100 + value.day() } } @@ -92,7 +92,7 @@ pub struct ToYYYYMMDDhhmmss; impl NumberOperator for ToYYYYMMDDhhmmss { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime, tz: &Tz) -> u64 { + fn to_number(value: DateTime, _tz: &Tz) -> u64 { value.year() as u64 * 10000000000 + value.month() as u64 * 100000000 + value.day() as u64 * 1000000 @@ -178,7 +178,7 @@ pub struct ToMonth; impl NumberOperator for ToMonth { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime, tz: &Tz) -> u8 { + fn to_number(value: DateTime, _tz: &Tz) -> u8 { value.month() as u8 } @@ -195,7 +195,7 @@ pub struct ToDayOfYear; impl NumberOperator for ToDayOfYear { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime, tz: &Tz) -> u16 { + fn to_number(value: DateTime, _tz: &Tz) -> u16 { value.ordinal() as u16 } @@ -212,7 +212,7 @@ pub struct ToDayOfMonth; impl NumberOperator for ToDayOfMonth { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime, tz: &Tz) -> u8 { + fn to_number(value: DateTime, _tz: &Tz) -> u8 { value.day() as u8 } @@ -229,7 +229,7 @@ pub struct ToDayOfWeek; impl NumberOperator for ToDayOfWeek { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime, tz: &Tz) -> u8 { + fn to_number(value: DateTime, _tz: &Tz) -> u8 { value.weekday().number_from_monday() as u8 } @@ -245,7 +245,7 @@ pub struct ToHour; impl NumberOperator for ToHour { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime, tz: &Tz) -> u8 { + fn to_number(value: DateTime, _tz: &Tz) -> u8 { value.hour() as u8 } @@ -262,7 +262,7 @@ pub struct ToMinute; impl NumberOperator for ToMinute { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime, tz: &Tz) -> u8 { + fn to_number(value: DateTime, _tz: &Tz) -> u8 { value.minute() as u8 } @@ -281,7 +281,7 @@ pub struct ToSecond; impl NumberOperator for ToSecond { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime, tz: &Tz) -> u8 { + fn to_number(value: DateTime, _tz: &Tz) -> u8 { value.second() as u8 } @@ -316,7 +316,7 @@ pub struct ToYear; impl NumberOperator for ToYear { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime, tz: &Tz) -> u16 { + fn to_number(value: DateTime, _tz: &Tz) -> u16 { value.year() as u16 } } diff --git a/common/functions/src/scalars/expressions/cast_from_datetimes.rs b/common/functions/src/scalars/expressions/cast_from_datetimes.rs index 8a1c9702ca915..4cdb4904ca9f6 100644 --- a/common/functions/src/scalars/expressions/cast_from_datetimes.rs +++ b/common/functions/src/scalars/expressions/cast_from_datetimes.rs @@ -18,9 +18,9 @@ use chrono_tz::Tz; use common_arrow::arrow::bitmap::Bitmap; use common_datavalues::chrono::DateTime; use common_datavalues::chrono::TimeZone; -use common_datavalues::chrono::Utc; + use common_datavalues::prelude::*; -use common_exception::ErrorCode; + use common_exception::Result; use super::cast_with_type::arrow_cast_compute; diff --git a/common/functions/src/scalars/expressions/cast_from_string.rs b/common/functions/src/scalars/expressions/cast_from_string.rs index 96ee2a4cb513c..f404fb87cfc2e 100644 --- a/common/functions/src/scalars/expressions/cast_from_string.rs +++ b/common/functions/src/scalars/expressions/cast_from_string.rs @@ -19,10 +19,10 @@ use common_datavalues::chrono::DateTime; use common_datavalues::chrono::Datelike; use common_datavalues::chrono::TimeZone; use common_datavalues::chrono::NaiveDate; -use common_datavalues::chrono::NaiveDateTime; + use common_datavalues::prelude::*; use common_exception::Result; -use common_exception::ErrorCode; + use super::cast_with_type::arrow_cast_compute; use super::cast_with_type::new_mutable_bitmap; diff --git a/common/functions/src/scalars/expressions/cast_from_variant.rs b/common/functions/src/scalars/expressions/cast_from_variant.rs index 9a67ea51b7f77..c845e79213343 100644 --- a/common/functions/src/scalars/expressions/cast_from_variant.rs +++ b/common/functions/src/scalars/expressions/cast_from_variant.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use chrono_tz::Tz; + use common_arrow::arrow::bitmap::Bitmap; use common_arrow::arrow::temporal_conversions::EPOCH_DAYS_FROM_CE; use common_datavalues::chrono::Datelike; diff --git a/common/functions/src/scalars/expressions/cast_with_type.rs b/common/functions/src/scalars/expressions/cast_with_type.rs index 0ae0b61e55530..cb2527944e907 100644 --- a/common/functions/src/scalars/expressions/cast_with_type.rs +++ b/common/functions/src/scalars/expressions/cast_with_type.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use chrono_tz::Tz; + use common_arrow::arrow::array::ArrayRef; use common_arrow::arrow::bitmap::Bitmap; use common_arrow::arrow::compute::cast; @@ -210,7 +210,7 @@ pub fn cast_to_variant( column: &ColumnRef, from_type: &DataTypeImpl, data_type: &DataTypeImpl, - func_ctx: &FunctionContext, + _func_ctx: &FunctionContext, ) -> Result<(ColumnRef, Option)> { let column = Series::remove_nullable(column); let size = column.len(); diff --git a/common/io/src/format_settings.rs b/common/io/src/format_settings.rs index 67b2ab2005268..48bfbcae69c7a 100644 --- a/common/io/src/format_settings.rs +++ b/common/io/src/format_settings.rs @@ -14,19 +14,20 @@ use std::str::FromStr; +use chrono_tz::Tz; use common_exception::ErrorCode; use common_exception::Result; use serde::Deserialize; use serde::Serialize; -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub struct FormatSettings { pub record_delimiter: Vec, pub field_delimiter: Vec, pub empty_as_default: bool, pub skip_header: bool, pub compression: Compression, - pub timezone: Vec, + pub timezone: Tz, } impl Default for FormatSettings { @@ -37,7 +38,7 @@ impl Default for FormatSettings { empty_as_default: false, skip_header: false, compression: Compression::None, - timezone: "UTC".as_bytes().to_vec(), + timezone: "UTC".parse::().unwrap(), } } } diff --git a/common/streams/src/sources/source_csv.rs b/common/streams/src/sources/source_csv.rs index ae56f637cac0c..f911d82563739 100644 --- a/common/streams/src/sources/source_csv.rs +++ b/common/streams/src/sources/source_csv.rs @@ -62,9 +62,7 @@ impl CsvSourceBuilder { let empty_as_default = format_settings.empty_as_default; let skip_header = format_settings.skip_header; - - let tz = String::from_utf8(format_settings.timezone.clone()).unwrap(); - let tz = tz.parse::().unwrap(); + let tz = format_settings.timezone.clone(); CsvSourceBuilder { schema, @@ -183,16 +181,18 @@ where R: AsyncRead + Unpin + Send if record.is_empty() { break; } + let mut format = FormatSettings::default(); + format.timezone = self.builder.tz.clone(); for (col, pack) in packs.iter_mut().enumerate() { match record.get(col) { Some(bytes) => { if bytes.is_empty() && self.builder.empty_as_default { - pack.de_default(); + pack.de_default(&format); } else { - pack.de_whole_text(bytes)? + pack.de_whole_text(bytes, &format)? } } - None => pack.de_default(), + None => pack.de_default(&format), } } rows += 1; diff --git a/common/streams/src/sources/source_ndjson.rs b/common/streams/src/sources/source_ndjson.rs index 54e43bb6dc530..69a8c3e086fa8 100644 --- a/common/streams/src/sources/source_ndjson.rs +++ b/common/streams/src/sources/source_ndjson.rs @@ -24,6 +24,7 @@ use common_datavalues::TypeID; use common_exception::ErrorCode; use common_exception::Result; use common_exception::ToErrorCode; +use common_io::prelude::FormatSettings; use futures::AsyncBufRead; use futures::AsyncBufReadExt; @@ -34,16 +35,16 @@ pub struct NDJsonSourceBuilder { schema: DataSchemaRef, block_size: usize, size_limit: usize, - tz: Tz, + format: FormatSettings, } impl NDJsonSourceBuilder { - pub fn create(schema: DataSchemaRef, tz: Tz) -> Self { + pub fn create(schema: DataSchemaRef, format: FormatSettings) -> Self { NDJsonSourceBuilder { schema, block_size: 10000, size_limit: usize::MAX, - tz, + format } } @@ -149,7 +150,7 @@ where R: AsyncBufRead + Unpin + Send for ((name, type_name), deser) in fields.iter().zip(packs.iter_mut()) { let value = &json[name]; - deser.de_json(value).map_err(|e| { + deser.de_json(value, &self.builder.format).map_err(|e| { let value_str = format!("{:?}", value); ErrorCode::BadBytes(format!( "error at row {} column {}: type={}, err={}, value={}", diff --git a/query/src/pipelines/transforms/group_by/aggregator_groups_builder.rs b/query/src/pipelines/transforms/group_by/aggregator_groups_builder.rs index a105cabda530f..66c50e84cf177 100644 --- a/query/src/pipelines/transforms/group_by/aggregator_groups_builder.rs +++ b/query/src/pipelines/transforms/group_by/aggregator_groups_builder.rs @@ -23,6 +23,7 @@ use common_datavalues::PrimitiveType; use common_datavalues::ScalarColumnBuilder; use common_datavalues::TypeDeserializer; use common_exception::Result; +use common_io::prelude::FormatSettings; use crate::pipelines::new::processors::AggregatorParams; use crate::pipelines::transforms::group_by::keys_ref::KeysRef; @@ -100,12 +101,13 @@ impl GroupColumnsBuilder for SerializedKeysGroupColumnsBuilder { let rows = self.data.len(); let mut res = Vec::with_capacity(self.groups_fields.len()); + let format = FormatSettings::default(); for group_field in self.groups_fields.iter() { let data_type = group_field.data_type(); let mut deserializer = data_type.create_deserializer(rows); for (_, key) in keys.iter_mut().enumerate() { - deserializer.de_binary(key)?; + deserializer.de_binary(key, &format)?; } res.push(deserializer.finish_to_column()); } diff --git a/query/src/servers/http/clickhouse_handler.rs b/query/src/servers/http/clickhouse_handler.rs index 33fc04933e932..743fdb3eef462 100644 --- a/query/src/servers/http/clickhouse_handler.rs +++ b/query/src/servers/http/clickhouse_handler.rs @@ -19,6 +19,7 @@ use chrono_tz::Tz; use common_exception::ErrorCode; use common_exception::Result; use common_exception::ToErrorCode; +use common_io::prelude::FormatSettings; use common_planners::PlanNode; use common_streams::NDJsonSourceBuilder; use common_streams::SendableDataBlockStream; @@ -226,8 +227,7 @@ pub async fn clickhouse_handler_post( async fn build_ndjson_stream(plan: &PlanNode, body: Body) -> Result { // TODO(veeupup): HTTP with global session tz - let tz = "UTC".parse::().unwrap(); - let builder = NDJsonSourceBuilder::create(plan.schema(), tz); + let builder = NDJsonSourceBuilder::create(plan.schema(), FormatSettings::default()); let cursor = futures::io::Cursor::new( body.into_vec() .await diff --git a/query/src/servers/http/v1/load.rs b/query/src/servers/http/v1/load.rs index 578885db2d1b3..572e017d95558 100644 --- a/query/src/servers/http/v1/load.rs +++ b/query/src/servers/http/v1/load.rs @@ -280,8 +280,7 @@ fn build_ndjson_stream( plan: &PlanNode, mut multipart: Multipart, ) -> PoemResult { - let tz = "UTC".parse::().unwrap(); - let builder = NDJsonSourceBuilder::create(plan.schema(), tz); + let builder = NDJsonSourceBuilder::create(plan.schema(), FormatSettings::default()); let stream = stream! { while let Ok(Some(field)) = multipart.next_field().await { let bytes = field.bytes().await.map_err_to_code(ErrorCode::BadBytes, || "Read part to field bytes error")?; @@ -385,8 +384,7 @@ async fn ndjson_source_pipe_builder( plan: &PlanNode, mut multipart: Multipart, ) -> PoemResult { - let tz = "UTC".parse::().unwrap(); - let builder = NDJsonSourceBuilder::create(plan.schema(), tz); + let builder = NDJsonSourceBuilder::create(plan.schema(), FormatSettings::default()); let mut source_pipe_builder = SourcePipeBuilder::create(); while let Ok(Some(field)) = multipart.next_field().await { let bytes = field diff --git a/query/src/servers/mysql/writers/query_result_writer.rs b/query/src/servers/mysql/writers/query_result_writer.rs index 7920fc82c28de..122716fc29a31 100644 --- a/query/src/servers/mysql/writers/query_result_writer.rs +++ b/query/src/servers/mysql/writers/query_result_writer.rs @@ -114,11 +114,7 @@ impl<'a, W: std::io::Write> DFQueryResultWriter<'a, W> { } let block = blocks[0].clone(); - let tz = String::from_utf8(format.timezone.clone()) - .map_err(|_| ErrorCode::LogicalError("timezone must be set"))?; - let tz = tz.parse::().map_err(|_| { - ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") - })?; + let tz = format.timezone.clone(); match convert_schema(block.schema()) { Err(error) => Self::err(&error, dataset_writer), Ok(columns) => { diff --git a/query/src/sessions/query_ctx_shared.rs b/query/src/sessions/query_ctx_shared.rs index b77f5a8b6cdda..0ecd7f9646ac4 100644 --- a/query/src/sessions/query_ctx_shared.rs +++ b/query/src/sessions/query_ctx_shared.rs @@ -16,7 +16,7 @@ use std::collections::hash_map::Entry; use std::collections::HashMap; use std::sync::atomic::AtomicUsize; use std::sync::Arc; - +use chrono_tz::Tz; use common_base::Progress; use common_base::Runtime; use common_contexts::DalContext; @@ -263,7 +263,12 @@ impl QueryContextShared { format.field_delimiter = settings.get_field_delimiter()?; format.empty_as_default = settings.get_empty_as_default()? > 0; format.skip_header = settings.get_skip_header()? > 0; - format.timezone = settings.get_timezone()?; + let tz = String::from_utf8(settings.get_timezone()?).map_err(|_| { + ErrorCode::LogicalError("Timezone has been checked and should be valid.") + })?; + format.timezone = tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; } Ok(format) } diff --git a/query/src/sql/statements/value_source.rs b/query/src/sql/statements/value_source.rs index b1d5c30eb9b71..19ea405b8d014 100644 --- a/query/src/sql/statements/value_source.rs +++ b/query/src/sql/statements/value_source.rs @@ -57,11 +57,7 @@ impl ValueSource { pub async fn read<'a>(&self, reader: &mut CpBufferReader<'a>) -> Result { let format = self.ctx.get_format_settings()?; - let tz = String::from_utf8(format.timezone.clone()) - .map_err(|_| ErrorCode::LogicalError("timezone must be set"))?; - let tz = tz.parse::().map_err(|_| { - ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") - })?; + let tz = format.timezone.clone(); let mut desers = self .schema .fields() @@ -120,6 +116,7 @@ impl ValueSource { )); } + let format = self.ctx.get_format_settings()?; for col_idx in 0..col_size { let _ = reader.ignore_white_spaces()?; let col_end = if col_idx + 1 == col_size { b')' } else { b',' }; @@ -131,7 +128,7 @@ impl ValueSource { // Disable backtrace here. self.backtrace_guard.disable(); let (need_fallback, pop_count) = deser - .de_text_quoted(reader) + .de_text_quoted(reader, &format) .and_then(|_| { let _ = reader.ignore_white_spaces()?; let need_fallback = reader.ignore_byte(col_end)?.not(); @@ -158,7 +155,7 @@ impl ValueSource { .await?; for (append_idx, deser) in desers.iter_mut().enumerate().take(col_size) { - deser.append_data_value(values[append_idx].clone())?; + deser.append_data_value(values[append_idx].clone(), &format)?; } return Ok(()); diff --git a/query/src/storages/s3/s3_stage_source.rs b/query/src/storages/s3/s3_stage_source.rs index 5a18b69f09e54..d57f0b184a3cd 100644 --- a/query/src/storages/s3/s3_stage_source.rs +++ b/query/src/storages/s3/s3_stage_source.rs @@ -123,13 +123,7 @@ impl StageSource { stage_info: &UserStageInfo, reader: BytesReader, ) -> Result> { - let format = ctx.get_format_settings()?; - let tz = String::from_utf8(format.timezone.clone()) - .map_err(|_| ErrorCode::LogicalError("timezone must be set"))?; - let tz = tz.parse::().map_err(|_| { - ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") - })?; - let mut builder = NDJsonSourceBuilder::create(schema, tz); + let mut builder = NDJsonSourceBuilder::create(schema, ctx.get_format_settings()?); let size_limit = stage_info.copy_options.size_limit; // Size limit. From 49d5d20a3f37d8af1b5aa5a36d815a4ad50d72b3 Mon Sep 17 00:00:00 2001 From: Veeupup Date: Fri, 6 May 2022 16:41:24 +0800 Subject: [PATCH 11/15] remove tz in de/se timestamp dt Signed-off-by: Veeupup --- .../src/types/deserializations/timestamp.rs | 25 +++++++------ .../src/types/serializations/timestamp.rs | 36 +++++++------------ common/datavalues/src/types/type_timestamp.rs | 2 -- 3 files changed, 24 insertions(+), 39 deletions(-) diff --git a/common/datavalues/src/types/deserializations/timestamp.rs b/common/datavalues/src/types/deserializations/timestamp.rs index 8a661079103de..6880b505bec8d 100644 --- a/common/datavalues/src/types/deserializations/timestamp.rs +++ b/common/datavalues/src/types/deserializations/timestamp.rs @@ -22,7 +22,6 @@ use crate::prelude::*; pub struct TimestampDeserializer { pub builder: MutablePrimitiveColumn, - pub tz: Tz, pub precision: usize, } @@ -48,12 +47,12 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn de_json(&mut self, value: &serde_json::Value, _format: &FormatSettings) -> Result<()> { + fn de_json(&mut self, value: &serde_json::Value, format: &FormatSettings) -> Result<()> { match value { serde_json::Value::String(v) => { let v = v.clone(); let mut reader = BufferReader::new(v.as_bytes()); - let ts = reader.read_timestamp_text(&self.tz)?; + let ts = reader.read_timestamp_text(&format.timezone)?; let micros = ts.timestamp_micros(); let _ = check_timestamp(micros)?; @@ -64,9 +63,9 @@ impl TypeDeserializer for TimestampDeserializer { } } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { + fn de_text_quoted(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { reader.must_ignore_byte(b'\'')?; - let ts = reader.read_timestamp_text(&self.tz)?; + let ts = reader.read_timestamp_text(&format.timezone)?; let micros = ts.timestamp_micros(); let _ = check_timestamp(micros)?; reader.must_ignore_byte(b'\'')?; @@ -74,9 +73,9 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn de_whole_text(&mut self, reader: &[u8], _format: &FormatSettings) -> Result<()> { + fn de_whole_text(&mut self, reader: &[u8], format: &FormatSettings) -> Result<()> { let mut reader = BufferReader::new(reader); - let ts = reader.read_timestamp_text(&self.tz)?; + let ts = reader.read_timestamp_text(&format.timezone)?; let micros = ts.timestamp_micros(); let _ = check_timestamp(micros)?; reader.must_eof()?; @@ -84,17 +83,17 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { - let ts = reader.read_timestamp_text(&self.tz)?; + fn de_text(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { + let ts = reader.read_timestamp_text(&format.timezone)?; let micros = ts.timestamp_micros(); let _ = check_timestamp(micros)?; self.builder.append_value(micros.as_()); Ok(()) } - fn de_text_csv(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { + fn de_text_csv(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { let maybe_quote = reader.ignore(|f| f == b'\'' || f == b'"')?; - let ts = reader.read_timestamp_text(&self.tz)?; + let ts = reader.read_timestamp_text(&format.timezone)?; let micros = ts.timestamp_micros(); let _ = check_timestamp(micros)?; if maybe_quote { @@ -104,9 +103,9 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn de_text_json(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { + fn de_text_json(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { reader.must_ignore_byte(b'"')?; - let ts = reader.read_timestamp_text(&self.tz)?; + let ts = reader.read_timestamp_text(&format.timezone)?; let micros = ts.timestamp_micros(); let _ = check_timestamp(micros)?; reader.must_ignore_byte(b'"')?; diff --git a/common/datavalues/src/types/serializations/timestamp.rs b/common/datavalues/src/types/serializations/timestamp.rs index 8b60445e9c710..ab3627c1f91fd 100644 --- a/common/datavalues/src/types/serializations/timestamp.rs +++ b/common/datavalues/src/types/serializations/timestamp.rs @@ -22,59 +22,47 @@ use serde_json::Value; use crate::prelude::*; -#[derive(Debug, Clone)] -pub struct TimestampSerializer { - tz: Tz, -} +#[derive(Debug, Clone, Default)] +pub struct TimestampSerializer; -impl Default for TimestampSerializer { - fn default() -> Self { - let tz = "UTC".parse::().unwrap(); - Self { tz } - } -} impl TimestampSerializer { - pub fn new_with_tz(tz: Tz) -> Self { - Self { tz } - } - - pub fn to_timestamp(&self, value: &i64) -> DateTime { - value.to_timestamp(&self.tz) + pub fn to_timestamp(&self, value: &i64, tz: &Tz) -> DateTime { + value.to_timestamp(tz) } } const TIME_FMT: &str = "%Y-%m-%d %H:%M:%S"; impl TypeSerializer for TimestampSerializer { - fn serialize_value(&self, value: &DataValue, _format: &FormatSettings) -> Result { + fn serialize_value(&self, value: &DataValue, format: &FormatSettings) -> Result { let value = DFTryFrom::try_from(value.clone())?; - let dt = self.to_timestamp(&value); + let dt = self.to_timestamp(&value, &format.timezone); Ok(dt.format(TIME_FMT).to_string()) } fn serialize_column( &self, column: &ColumnRef, - _format: &FormatSettings, + format: &FormatSettings, ) -> Result> { let column: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = column .iter() .map(|v| { - let dt = self.to_timestamp(v); + let dt = self.to_timestamp(v,&format.timezone); dt.format(TIME_FMT).to_string() }) .collect(); Ok(result) } - fn serialize_json(&self, column: &ColumnRef, _format: &FormatSettings) -> Result> { + fn serialize_json(&self, column: &ColumnRef, format: &FormatSettings) -> Result> { let array: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = array .iter() .map(|v| { - let dt = self.to_timestamp(v); + let dt = self.to_timestamp(v,&format.timezone); serde_json::to_value(dt.format(TIME_FMT).to_string()).unwrap() }) .collect(); @@ -84,10 +72,10 @@ impl TypeSerializer for TimestampSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, - _format: &FormatSettings, + format: &FormatSettings, ) -> Result { let array: &PrimitiveColumn = Series::check_get(column)?; - let values: Vec> = array.iter().map(|v| self.to_timestamp(v)).collect(); + let values: Vec> = array.iter().map(|v| self.to_timestamp(v,&format.timezone)).collect(); Ok(Vec::column_from::(values)) } } diff --git a/common/datavalues/src/types/type_timestamp.rs b/common/datavalues/src/types/type_timestamp.rs index 5b21d91aa409b..a0b34f2f958ab 100644 --- a/common/datavalues/src/types/type_timestamp.rs +++ b/common/datavalues/src/types/type_timestamp.rs @@ -151,10 +151,8 @@ impl DataType for TimestampType { } fn create_deserializer(&self, capacity: usize) -> TypeDeserializerImpl { - let tz = "UTC".parse::().unwrap(); TimestampDeserializer { builder: MutablePrimitiveColumn::::with_capacity(capacity), - tz, precision: self.precision, } .into() From 71b5b00c9fa46daae09eb8c2077c90986d9405f0 Mon Sep 17 00:00:00 2001 From: Veeupup Date: Fri, 6 May 2022 17:31:09 +0800 Subject: [PATCH 12/15] add datetime tz stateless-test Signed-off-by: Veeupup --- .../src/scalars/dates/round_function.rs | 2 +- .../02_0012_function_datetimes.sql | 1 + .../02_0012_function_datetimes_tz.result | 56 +++++++++++++ .../02_0012_function_datetimes_tz.sql | 84 +++++++++++++++++-- 4 files changed, 135 insertions(+), 8 deletions(-) diff --git a/common/functions/src/scalars/dates/round_function.rs b/common/functions/src/scalars/dates/round_function.rs index 41c94c69e274d..453d23319a8ba 100644 --- a/common/functions/src/scalars/dates/round_function.rs +++ b/common/functions/src/scalars/dates/round_function.rs @@ -78,7 +78,7 @@ impl RoundFunction { Round::Second => dt.timestamp_micros(), Round::Minute => tz .ymd(dt.year(), dt.month(), dt.day()) - .and_hms_micro(dt.hour(), 0, 0, 0) + .and_hms_micro(dt.hour(), dt.minute(), 0, 0) .timestamp_micros(), Round::FiveMinutes => tz .ymd(dt.year(), dt.month(), dt.day()) diff --git a/tests/suites/0_stateless/02_function/02_0012_function_datetimes.sql b/tests/suites/0_stateless/02_function/02_0012_function_datetimes.sql index ee4d9c0743581..fbe9cd66221c3 100644 --- a/tests/suites/0_stateless/02_function/02_0012_function_datetimes.sql +++ b/tests/suites/0_stateless/02_function/02_0012_function_datetimes.sql @@ -1,3 +1,4 @@ +set timezone = 'UTC'; SELECT today() >= 18869; SELECT now() >= 1630295616; select toDateTime(1630320462000000), toInt64(toDateTime(1630320462000000)) = 1630320462000000; diff --git a/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.result b/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.result index b4e0d14713cc5..be61cbb84437f 100644 --- a/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.result +++ b/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.result @@ -1,3 +1,59 @@ ====CAST==== 2021-08-30 10:47:42.000000 +2000-01-01 00:00:00.000000 2021-08-30 18:47:42.000000 +2000-01-01 12:00:00.000000 +====INSERT_WITH_VALUES==== +2021-04-30 22:48:00.000000 +2021-04-30 22:48:00.000000 +2021-05-01 06:48:00.000000 +2021-05-01 06:48:00.000000 +====NUMBER_FUNCTION==== +==UTC== +202104 +20210430 +20210430220000 +2021-04-01 +4 +120 +30 +5 +==Asia/Shanghai== +202105 +20210501 +20210501060000 +2021-05-01 +5 +121 +1 +6 +====ROUNDER_FUNCTION==== +==UTC== +2021-04-30 22:48:31 +2021-04-30 22:48:00 +2021-04-30 22:45:00 +2021-04-30 22:40:00 +2021-04-30 22:45:00 +2021-04-30 22:30:00 +2021-04-30 22:00:00 +2021-04-30 00:00:00 +2021-04-25 +==Asia/Shanghai== +2021-05-01 06:48:31 +2021-05-01 06:48:00 +2021-05-01 06:45:00 +2021-05-01 06:40:00 +2021-05-01 06:45:00 +2021-05-01 06:30:00 +2021-05-01 06:00:00 +2021-05-01 00:00:00 +2021-04-25 +====INTERVAL_FUNCTION==== +==UTC== +2021-05-30 22:48:31.999000 +2020-02-29 22:00:00.000000 +2021-02-28 22:00:00.000000 +==Asia/Shanghai== +2021-06-01 14:48:31.999000 +2020-03-01 06:00:00.000000 +2021-03-01 14:00:00.000000 diff --git a/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.sql b/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.sql index 46f35c2ee78d5..7fc20020a0193 100644 --- a/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.sql +++ b/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.sql @@ -1,7 +1,77 @@ --- toDateTime with tz -select "====CAST====" -select toDateTime(1630320462000000); -select toDateTime('1000-01-01 00:00:00'); -set timezone='Asia/Shanghai'; -select toDateTime(1630320462000000); -select toDateTime('1000-01-01 08:00:00'); +-- cast function +select "====CAST===="; +set timezone='UTC'; +select toTimestamp(1630320462000000); +select toTimestamp('2000-01-01 00:00:00'); +set timezone='Asia/Shanghai'; -- Asia/Shanghai: +8:00 +select toTimestamp(1630320462000000); +select toTimestamp('2000-01-01 12:00:00'); +-- insert into table, serialization and deserialization +select "====INSERT_WITH_VALUES===="; +set timezone = 'UTC'; +create table tt (a timestamp); +insert into table tt values ('2021-04-30 22:48:00'), (toTimestamp('2021-04-30 22:48:00')); +select * from tt; +set timezone = 'Asia/Shanghai'; +select * from tt; +-- number function +-- 1619820000000000 = 2021-04-30 22:00:00 +select "====NUMBER_FUNCTION===="; +select "==UTC=="; +set timezone = 'UTC'; +select toyyyymm(toTimestamp(1619820000000000)); +select toyyyymmdd(toTimestamp(1619820000000000)); +select toyyyymmddhhmmss(toTimestamp(1619820000000000)); +select tostartofmonth(toTimestamp(1619820000000000)); +select tomonth(toTimestamp(1619820000000000)); +select todayofyear(toTimestamp(1619820000000000)); +select todayofmonth(toTimestamp(1619820000000000)); +select todayofweek(toTimestamp(1619820000000000)); +set timezone = 'Asia/Shanghai'; +select "==Asia/Shanghai=="; +select toyyyymm(toTimestamp(1619820000000000)); +select toyyyymmdd(toTimestamp(1619820000000000)); +select toyyyymmddhhmmss(toTimestamp(1619820000000000)); +select tostartofmonth(toTimestamp(1619820000000000)); +select tomonth(toTimestamp(1619820000000000)); +select todayofyear(toTimestamp(1619820000000000)); +select todayofmonth(toTimestamp(1619820000000000)); +select todayofweek(toTimestamp(1619820000000000)); +-- round function +select "====ROUNDER_FUNCTION===="; +-- 1619822911999000 = 2021-04-30 22:48:31.999 +select "==UTC=="; +set timezone = 'UTC'; +select tostartofsecond(toTimestamp(1619822911999000)); +select tostartofminute(toTimestamp(1619822911999000)); +select tostartoffiveminutes(toTimestamp(1619822911999000)); +select tostartoftenminutes(toTimestamp(1619822911999000)); +select tostartoffifteenminutes(toTimestamp(1619822911999000)); +select timeslot(toTimestamp(1619822911999000)); +select tostartofhour(toTimestamp(1619822911999000)); +select tostartofday(toTimestamp(1619822911999000)); +select tostartofweek(toTimestamp(1619822911999000)); +set timezone = 'Asia/Shanghai'; +select "==Asia/Shanghai=="; +select tostartofsecond(toTimestamp(1619822911999000)); +select tostartofminute(toTimestamp(1619822911999000)); +select tostartoffiveminutes(toTimestamp(1619822911999000)); +select tostartoftenminutes(toTimestamp(1619822911999000)); +select tostartoffifteenminutes(toTimestamp(1619822911999000)); +select timeslot(toTimestamp(1619822911999000)); +select tostartofhour(toTimestamp(1619822911999000)); +select tostartofday(toTimestamp(1619822911999000)); +select tostartofweek(toTimestamp(1619822911999000)); +select "====INTERVAL_FUNCTION===="; +-- 1619822911999000 = 2021-04-30 22:48:31.999 +-- 1583013600000000 = 2020-02-29 22:00:00 +select "==UTC=="; +set timezone = 'UTC'; +select addMonths(totimestamp(1619822911999000), 1); +select totimestamp(1583013600000000); +select addYears(totimestamp(1583013600000000), 1); +select "==Asia/Shanghai=="; +set timezone = 'Asia/Shanghai'; +select addMonths(totimestamp(1619822911999000), 1); +select totimestamp(1583013600000000); +select addYears(totimestamp(1583013600000000), 1); \ No newline at end of file From 28fd15f4d03b14eba75493524bdd823226aa9b85 Mon Sep 17 00:00:00 2001 From: Veeupup Date: Fri, 6 May 2022 18:08:19 +0800 Subject: [PATCH 13/15] make lint Signed-off-by: Veeupup --- .../src/kernels/data_block_group_by_hash.rs | 4 +-- common/datavalues/src/types/data_type.rs | 1 - .../src/types/deserializations/boolean.rs | 14 ++++++-- .../src/types/deserializations/date.rs | 32 ++++++++++++++--- .../src/types/deserializations/mod.rs | 34 +++++++++++++++---- .../src/types/deserializations/null.rs | 14 ++++++-- .../src/types/deserializations/nullable.rs | 20 +++++++++-- .../src/types/deserializations/number.rs | 14 ++++++-- .../src/types/deserializations/string.rs | 20 +++++++++-- .../src/types/deserializations/timestamp.rs | 33 ++++++++++++++---- .../src/types/deserializations/variant.rs | 20 +++++++++-- .../src/types/serializations/timestamp.rs | 16 ++++----- common/datavalues/src/types/type_nullable.rs | 1 - common/datavalues/src/types/type_timestamp.rs | 1 - .../tests/it/types/deserializations.rs | 4 ++- .../src/scalars/dates/interval_function.rs | 3 +- .../src/scalars/dates/number_function.rs | 1 - .../src/scalars/dates/round_function.rs | 6 ++-- .../expressions/cast_from_datetimes.rs | 2 -- .../scalars/expressions/cast_from_string.rs | 4 +-- .../scalars/expressions/cast_from_variant.rs | 5 ++- .../src/scalars/expressions/cast_with_type.rs | 1 - common/streams/src/sources/source_csv.rs | 13 ++++--- common/streams/src/sources/source_ndjson.rs | 8 ++--- .../streams/tests/it/sources/source_ndjson.rs | 4 +-- .../transform_expression_executor.rs | 2 +- .../pipelines/transforms/transform_sink.rs | 1 + .../servers/clickhouse/interactive_worker.rs | 6 ++-- .../clickhouse/writers/query_writer.rs | 1 - query/src/servers/http/clickhouse_handler.rs | 1 - query/src/servers/http/v1/load.rs | 1 - .../mysql/writers/query_result_writer.rs | 3 +- query/src/sessions/query_ctx.rs | 3 +- query/src/sessions/query_ctx_shared.rs | 1 + query/src/sql/statements/value_source.rs | 7 +--- query/src/storages/s3/s3_stage_source.rs | 1 - .../it/servers/http/formats/tsv_output.rs | 1 + query/tests/it/servers/http/json_block.rs | 25 ++++++++++++-- 38 files changed, 231 insertions(+), 97 deletions(-) diff --git a/common/datablocks/src/kernels/data_block_group_by_hash.rs b/common/datablocks/src/kernels/data_block_group_by_hash.rs index 910176dcb67a4..6467c8be54082 100644 --- a/common/datablocks/src/kernels/data_block_group_by_hash.rs +++ b/common/datablocks/src/kernels/data_block_group_by_hash.rs @@ -18,9 +18,9 @@ use std::hash::Hash; use std::marker::PhantomData; use std::ops::Not; -use common_io::prelude::FormatSettings; use common_datavalues::prelude::*; use common_exception::Result; +use common_io::prelude::FormatSettings; use crate::DataBlock; @@ -339,7 +339,7 @@ where T: PrimitiveType &reader[null_offsize..], step, rows, - &format + &format, )?; null_offsize += 1; diff --git a/common/datavalues/src/types/data_type.rs b/common/datavalues/src/types/data_type.rs index a5fbd7f024100..987f47c0fd23d 100644 --- a/common/datavalues/src/types/data_type.rs +++ b/common/datavalues/src/types/data_type.rs @@ -15,7 +15,6 @@ use std::any::Any; use std::collections::BTreeMap; - use common_arrow::arrow::datatypes::DataType as ArrowType; use common_arrow::arrow::datatypes::Field as ArrowField; use common_exception::Result; diff --git a/common/datavalues/src/types/deserializations/boolean.rs b/common/datavalues/src/types/deserializations/boolean.rs index 0edcd8e1c10f3..3bac9f8171b54 100644 --- a/common/datavalues/src/types/deserializations/boolean.rs +++ b/common/datavalues/src/types/deserializations/boolean.rs @@ -33,7 +33,13 @@ impl TypeDeserializer for BooleanDeserializer { self.builder.append_value(false); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize, _format: &FormatSettings) -> Result<()> { + fn de_fixed_binary_batch( + &mut self, + reader: &[u8], + step: usize, + rows: usize, + _format: &FormatSettings, + ) -> Result<()> { for row in 0..rows { let mut reader = &reader[step * row..]; let value: bool = reader.read_scalar()?; @@ -54,7 +60,11 @@ impl TypeDeserializer for BooleanDeserializer { Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { + fn de_text( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { let v = if BufferReadExt::ignore_insensitive_bytes(reader, b"true")? { Ok(true) } else if BufferReadExt::ignore_insensitive_bytes(reader, b"false")? { diff --git a/common/datavalues/src/types/deserializations/date.rs b/common/datavalues/src/types/deserializations/date.rs index 615b4572e00e5..2d8d6f4f93e1f 100644 --- a/common/datavalues/src/types/deserializations/date.rs +++ b/common/datavalues/src/types/deserializations/date.rs @@ -44,7 +44,13 @@ where self.builder.append_value(T::default()); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize, _format: &FormatSettings) -> Result<()> { + fn de_fixed_binary_batch( + &mut self, + reader: &[u8], + step: usize, + rows: usize, + _format: &FormatSettings, + ) -> Result<()> { for row in 0..rows { let mut reader = &reader[step * row..]; let value: T = reader.read_scalar()?; @@ -78,7 +84,11 @@ where Ok(()) } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { + fn de_text_quoted( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { reader.must_ignore_byte(b'\'')?; let date = reader.read_date_text()?; let days = uniform(date); @@ -89,7 +99,11 @@ where Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { + fn de_text( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { let date = reader.read_date_text()?; let days = uniform(date); let _ = check_date(days.as_i32())?; @@ -97,7 +111,11 @@ where Ok(()) } - fn de_text_csv(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { + fn de_text_csv( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { let maybe_quote = reader.ignore(|f| f == b'\'' || f == b'"')?; let date = reader.read_date_text()?; let days = uniform(date); @@ -109,7 +127,11 @@ where Ok(()) } - fn de_text_json(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { + fn de_text_json( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { reader.must_ignore_byte(b'"')?; let date = reader.read_date_text()?; let days = uniform(date); diff --git a/common/datavalues/src/types/deserializations/mod.rs b/common/datavalues/src/types/deserializations/mod.rs index 75e2be63092af..6adf6f9d151be 100644 --- a/common/datavalues/src/types/deserializations/mod.rs +++ b/common/datavalues/src/types/deserializations/mod.rs @@ -43,7 +43,13 @@ pub trait TypeDeserializer: Send + Sync { fn de_default(&mut self, format: &FormatSettings); - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize, format: &FormatSettings) -> Result<()>; + fn de_fixed_binary_batch( + &mut self, + reader: &[u8], + step: usize, + rows: usize, + format: &FormatSettings, + ) -> Result<()>; fn de_json(&mut self, reader: &Value, format: &FormatSettings) -> Result<()>; @@ -53,17 +59,33 @@ pub trait TypeDeserializer: Send + Sync { fn de_whole_text(&mut self, reader: &[u8], format: &FormatSettings) -> Result<()>; - fn de_text(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()>; - - fn de_text_csv(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { + fn de_text( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()>; + + fn de_text_csv( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { self.de_text(reader, format) } - fn de_text_json(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { + fn de_text_json( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { self.de_text(reader, format) } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { + fn de_text_quoted( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { self.de_text(reader, format) } diff --git a/common/datavalues/src/types/deserializations/null.rs b/common/datavalues/src/types/deserializations/null.rs index 9612ecd13fefc..f6f6cd7c42b2a 100644 --- a/common/datavalues/src/types/deserializations/null.rs +++ b/common/datavalues/src/types/deserializations/null.rs @@ -36,7 +36,13 @@ impl TypeDeserializer for NullDeserializer { self.builder.append_default(); } - fn de_fixed_binary_batch(&mut self, _reader: &[u8], _step: usize, rows: usize, _format: &FormatSettings) -> Result<()> { + fn de_fixed_binary_batch( + &mut self, + _reader: &[u8], + _step: usize, + rows: usize, + _format: &FormatSettings, + ) -> Result<()> { for _ in 0..rows { self.builder.append_default(); } @@ -52,7 +58,11 @@ impl TypeDeserializer for NullDeserializer { Ok(()) } - fn de_text(&mut self, _reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { + fn de_text( + &mut self, + _reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { self.builder.append_default(); Ok(()) } diff --git a/common/datavalues/src/types/deserializations/nullable.rs b/common/datavalues/src/types/deserializations/nullable.rs index 36c7b45c19f66..c5cff5dabac20 100644 --- a/common/datavalues/src/types/deserializations/nullable.rs +++ b/common/datavalues/src/types/deserializations/nullable.rs @@ -45,7 +45,13 @@ impl TypeDeserializer for NullableDeserializer { self.bitmap.push(false); } - fn de_fixed_binary_batch(&mut self, _reader: &[u8], _step: usize, _rows: usize, _format: &FormatSettings) -> Result<()> { + fn de_fixed_binary_batch( + &mut self, + _reader: &[u8], + _step: usize, + _rows: usize, + _format: &FormatSettings, + ) -> Result<()> { // it's covered outside unreachable!() } @@ -64,7 +70,11 @@ impl TypeDeserializer for NullableDeserializer { } // TODO: support null text setting - fn de_text(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { + fn de_text( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { if reader.ignore_insensitive_bytes(b"null")? { self.de_default(format); return Ok(()); @@ -74,7 +84,11 @@ impl TypeDeserializer for NullableDeserializer { Ok(()) } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { + fn de_text_quoted( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { if reader.ignore_insensitive_bytes(b"null")? { self.de_default(format); return Ok(()); diff --git a/common/datavalues/src/types/deserializations/number.rs b/common/datavalues/src/types/deserializations/number.rs index 6a29e2580e53e..e6cb80095cfda 100644 --- a/common/datavalues/src/types/deserializations/number.rs +++ b/common/datavalues/src/types/deserializations/number.rs @@ -38,7 +38,13 @@ where self.builder.append_value(T::default()); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize, _format: &FormatSettings) -> Result<()> { + fn de_fixed_binary_batch( + &mut self, + reader: &[u8], + step: usize, + rows: usize, + _format: &FormatSettings, + ) -> Result<()> { for row in 0..rows { let mut reader = &reader[step * row..]; let value: T = reader.read_scalar()?; @@ -78,7 +84,11 @@ where Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { + fn de_text( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { let v: T = if !T::FLOATING { reader.read_int_text() } else { diff --git a/common/datavalues/src/types/deserializations/string.rs b/common/datavalues/src/types/deserializations/string.rs index 9e29b53c8cde0..a3e4f9c13df0f 100644 --- a/common/datavalues/src/types/deserializations/string.rs +++ b/common/datavalues/src/types/deserializations/string.rs @@ -55,7 +55,13 @@ impl TypeDeserializer for StringDeserializer { self.builder.append_value(""); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize, _format: &FormatSettings) -> Result<()> { + fn de_fixed_binary_batch( + &mut self, + reader: &[u8], + step: usize, + rows: usize, + _format: &FormatSettings, + ) -> Result<()> { for row in 0..rows { let reader = &reader[step * row..]; self.builder.append_value(reader); @@ -73,7 +79,11 @@ impl TypeDeserializer for StringDeserializer { } } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { + fn de_text_quoted( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { self.buffer.clear(); reader.read_quoted_text(&mut self.buffer, b'\'')?; self.builder.append_value(self.buffer.as_slice()); @@ -85,7 +95,11 @@ impl TypeDeserializer for StringDeserializer { Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { + fn de_text( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { self.buffer.clear(); reader.read_escaped_string_text(&mut self.buffer)?; self.builder.append_value(self.buffer.as_slice()); diff --git a/common/datavalues/src/types/deserializations/timestamp.rs b/common/datavalues/src/types/deserializations/timestamp.rs index 6880b505bec8d..f48721945b22b 100644 --- a/common/datavalues/src/types/deserializations/timestamp.rs +++ b/common/datavalues/src/types/deserializations/timestamp.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use chrono_tz::Tz; use common_exception::*; use common_io::prelude::*; use num::cast::AsPrimitive; @@ -37,7 +36,13 @@ impl TypeDeserializer for TimestampDeserializer { self.builder.append_value(i64::default()); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize, _format: &FormatSettings) -> Result<()> { + fn de_fixed_binary_batch( + &mut self, + reader: &[u8], + step: usize, + rows: usize, + _format: &FormatSettings, + ) -> Result<()> { for row in 0..rows { let mut reader = &reader[step * row..]; let value: i64 = reader.read_scalar()?; @@ -63,7 +68,11 @@ impl TypeDeserializer for TimestampDeserializer { } } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { + fn de_text_quoted( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { reader.must_ignore_byte(b'\'')?; let ts = reader.read_timestamp_text(&format.timezone)?; let micros = ts.timestamp_micros(); @@ -83,7 +92,11 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { + fn de_text( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { let ts = reader.read_timestamp_text(&format.timezone)?; let micros = ts.timestamp_micros(); let _ = check_timestamp(micros)?; @@ -91,7 +104,11 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn de_text_csv(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { + fn de_text_csv( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { let maybe_quote = reader.ignore(|f| f == b'\'' || f == b'"')?; let ts = reader.read_timestamp_text(&format.timezone)?; let micros = ts.timestamp_micros(); @@ -103,7 +120,11 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn de_text_json(&mut self, reader: &mut CheckpointReader, format: &FormatSettings) -> Result<()> { + fn de_text_json( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { reader.must_ignore_byte(b'"')?; let ts = reader.read_timestamp_text(&format.timezone)?; let micros = ts.timestamp_micros(); diff --git a/common/datavalues/src/types/deserializations/variant.rs b/common/datavalues/src/types/deserializations/variant.rs index 18f0a3f1855d0..af13c92ee3a10 100644 --- a/common/datavalues/src/types/deserializations/variant.rs +++ b/common/datavalues/src/types/deserializations/variant.rs @@ -55,7 +55,13 @@ impl TypeDeserializer for VariantDeserializer { .append_value(VariantValue::from(serde_json::Value::Null)); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize, _format: &FormatSettings) -> Result<()> { + fn de_fixed_binary_batch( + &mut self, + reader: &[u8], + step: usize, + rows: usize, + _format: &FormatSettings, + ) -> Result<()> { for row in 0..rows { let reader = &reader[step * row..]; let val = serde_json::from_slice(reader)?; @@ -69,7 +75,11 @@ impl TypeDeserializer for VariantDeserializer { Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { + fn de_text( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { self.buffer.clear(); reader.read_escaped_string_text(&mut self.buffer)?; let val = serde_json::from_slice(self.buffer.as_slice())?; @@ -83,7 +93,11 @@ impl TypeDeserializer for VariantDeserializer { Ok(()) } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader, _format: &FormatSettings) -> Result<()> { + fn de_text_quoted( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { self.buffer.clear(); reader.read_quoted_text(&mut self.buffer, b'\'')?; diff --git a/common/datavalues/src/types/serializations/timestamp.rs b/common/datavalues/src/types/serializations/timestamp.rs index ab3627c1f91fd..bbde1680641be 100644 --- a/common/datavalues/src/types/serializations/timestamp.rs +++ b/common/datavalues/src/types/serializations/timestamp.rs @@ -25,7 +25,6 @@ use crate::prelude::*; #[derive(Debug, Clone, Default)] pub struct TimestampSerializer; - impl TimestampSerializer { pub fn to_timestamp(&self, value: &i64, tz: &Tz) -> DateTime { value.to_timestamp(tz) @@ -41,16 +40,12 @@ impl TypeSerializer for TimestampSerializer { Ok(dt.format(TIME_FMT).to_string()) } - fn serialize_column( - &self, - column: &ColumnRef, - format: &FormatSettings, - ) -> Result> { + fn serialize_column(&self, column: &ColumnRef, format: &FormatSettings) -> Result> { let column: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = column .iter() .map(|v| { - let dt = self.to_timestamp(v,&format.timezone); + let dt = self.to_timestamp(v, &format.timezone); dt.format(TIME_FMT).to_string() }) .collect(); @@ -62,7 +57,7 @@ impl TypeSerializer for TimestampSerializer { let result: Vec = array .iter() .map(|v| { - let dt = self.to_timestamp(v,&format.timezone); + let dt = self.to_timestamp(v, &format.timezone); serde_json::to_value(dt.format(TIME_FMT).to_string()).unwrap() }) .collect(); @@ -75,7 +70,10 @@ impl TypeSerializer for TimestampSerializer { format: &FormatSettings, ) -> Result { let array: &PrimitiveColumn = Series::check_get(column)?; - let values: Vec> = array.iter().map(|v| self.to_timestamp(v,&format.timezone)).collect(); + let values: Vec> = array + .iter() + .map(|v| self.to_timestamp(v, &format.timezone)) + .collect(); Ok(Vec::column_from::(values)) } } diff --git a/common/datavalues/src/types/type_nullable.rs b/common/datavalues/src/types/type_nullable.rs index 5595a5aa48731..903d682bea9b2 100644 --- a/common/datavalues/src/types/type_nullable.rs +++ b/common/datavalues/src/types/type_nullable.rs @@ -15,7 +15,6 @@ use std::collections::BTreeMap; use std::sync::Arc; - use common_arrow::arrow::datatypes::DataType as ArrowType; use common_arrow::bitmap::MutableBitmap; use common_exception::ErrorCode; diff --git a/common/datavalues/src/types/type_timestamp.rs b/common/datavalues/src/types/type_timestamp.rs index a0b34f2f958ab..65b28edb31db6 100644 --- a/common/datavalues/src/types/type_timestamp.rs +++ b/common/datavalues/src/types/type_timestamp.rs @@ -18,7 +18,6 @@ use std::sync::Arc; use chrono::DateTime; use chrono::TimeZone; use chrono::Utc; -use chrono_tz::Tz; use common_arrow::arrow::datatypes::DataType as ArrowType; use common_exception::ErrorCode; use common_exception::Result; diff --git a/common/datavalues/tests/it/types/deserializations.rs b/common/datavalues/tests/it/types/deserializations.rs index 14928d33f4095..cbcf6ef993527 100644 --- a/common/datavalues/tests/it/types/deserializations.rs +++ b/common/datavalues/tests/it/types/deserializations.rs @@ -14,6 +14,7 @@ use common_datavalues::prelude::*; use common_exception::Result; +use common_io::prelude::FormatSettings; #[test] fn test_nullable_deserializer_pop() -> Result<()> { @@ -25,10 +26,11 @@ fn test_nullable_deserializer_pop() -> Result<()> { ]; let data_type = NullableType::new_impl(BooleanType::new_impl()); let mut deserializer = data_type.create_deserializer(4); + let format = FormatSettings::default(); // Append data value for value in values_vec.iter() { - deserializer.append_data_value(value.clone())?; + deserializer.append_data_value(value.clone(), &format)?; } // Pop all data value diff --git a/common/functions/src/scalars/dates/interval_function.rs b/common/functions/src/scalars/dates/interval_function.rs index e941eee098c5b..a9aef2bffe4f4 100644 --- a/common/functions/src/scalars/dates/interval_function.rs +++ b/common/functions/src/scalars/dates/interval_function.rs @@ -16,7 +16,6 @@ use std::fmt; use std::marker::PhantomData; use std::sync::Arc; - use common_datavalues::chrono::Datelike; use common_datavalues::chrono::Duration; use common_datavalues::chrono::NaiveDate; @@ -148,7 +147,7 @@ where _input_rows: usize, ) -> Result { // Todo(zhyass): define the ctx out of the eval. - let mut ctx = EvalContext::new(self.factor, self.precision, None, func_ctx.tz.clone()); + let mut ctx = EvalContext::new(self.factor, self.precision, None, func_ctx.tz); let col = scalar_binary_op( columns[0].column(), columns[1].column(), diff --git a/common/functions/src/scalars/dates/number_function.rs b/common/functions/src/scalars/dates/number_function.rs index 1c7edf60fc85c..4c708056516e5 100644 --- a/common/functions/src/scalars/dates/number_function.rs +++ b/common/functions/src/scalars/dates/number_function.rs @@ -20,7 +20,6 @@ use common_datavalues::chrono::DateTime; use common_datavalues::chrono::Datelike; use common_datavalues::chrono::TimeZone; use common_datavalues::chrono::Timelike; - use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; diff --git a/common/functions/src/scalars/dates/round_function.rs b/common/functions/src/scalars/dates/round_function.rs index 453d23319a8ba..7e0477942c4f4 100644 --- a/common/functions/src/scalars/dates/round_function.rs +++ b/common/functions/src/scalars/dates/round_function.rs @@ -124,8 +124,10 @@ impl Function for RoundFunction { _input_rows: usize, ) -> Result { let func = |val: i64, ctx: &mut EvalContext| self.execute(val, &ctx.tz); - let mut eval_context = EvalContext::default(); - eval_context.tz = func_ctx.tz.clone(); + let mut eval_context = EvalContext { + tz: func_ctx.tz, + ..Default::default() + }; let col = scalar_unary_op::(columns[0].column(), func, &mut eval_context)?; for micros in col.iter() { let _ = check_timestamp(*micros)?; diff --git a/common/functions/src/scalars/expressions/cast_from_datetimes.rs b/common/functions/src/scalars/expressions/cast_from_datetimes.rs index 4cdb4904ca9f6..8c94d21f3a60d 100644 --- a/common/functions/src/scalars/expressions/cast_from_datetimes.rs +++ b/common/functions/src/scalars/expressions/cast_from_datetimes.rs @@ -18,9 +18,7 @@ use chrono_tz::Tz; use common_arrow::arrow::bitmap::Bitmap; use common_datavalues::chrono::DateTime; use common_datavalues::chrono::TimeZone; - use common_datavalues::prelude::*; - use common_exception::Result; use super::cast_with_type::arrow_cast_compute; diff --git a/common/functions/src/scalars/expressions/cast_from_string.rs b/common/functions/src/scalars/expressions/cast_from_string.rs index f404fb87cfc2e..67507b4598fb7 100644 --- a/common/functions/src/scalars/expressions/cast_from_string.rs +++ b/common/functions/src/scalars/expressions/cast_from_string.rs @@ -17,13 +17,11 @@ use common_arrow::arrow::bitmap::Bitmap; use common_arrow::arrow::temporal_conversions::EPOCH_DAYS_FROM_CE; use common_datavalues::chrono::DateTime; use common_datavalues::chrono::Datelike; -use common_datavalues::chrono::TimeZone; use common_datavalues::chrono::NaiveDate; - +use common_datavalues::chrono::TimeZone; use common_datavalues::prelude::*; use common_exception::Result; - use super::cast_with_type::arrow_cast_compute; use super::cast_with_type::new_mutable_bitmap; use super::cast_with_type::CastOptions; diff --git a/common/functions/src/scalars/expressions/cast_from_variant.rs b/common/functions/src/scalars/expressions/cast_from_variant.rs index c845e79213343..974334d571a93 100644 --- a/common/functions/src/scalars/expressions/cast_from_variant.rs +++ b/common/functions/src/scalars/expressions/cast_from_variant.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. - use common_arrow::arrow::bitmap::Bitmap; use common_arrow::arrow::temporal_conversions::EPOCH_DAYS_FROM_CE; use common_datavalues::chrono::Datelike; @@ -20,12 +19,12 @@ use common_datavalues::prelude::*; use common_datavalues::with_match_primitive_type_id; use common_exception::ErrorCode; use common_exception::Result; - use serde_json::Value as JsonValue; -use crate::scalars::FunctionContext; + use super::cast_from_string::string_to_date; use super::cast_from_string::string_to_timestamp; use super::cast_with_type::new_mutable_bitmap; +use crate::scalars::FunctionContext; pub fn cast_from_variant( column: &ColumnRef, diff --git a/common/functions/src/scalars/expressions/cast_with_type.rs b/common/functions/src/scalars/expressions/cast_with_type.rs index cb2527944e907..622dc552599ed 100644 --- a/common/functions/src/scalars/expressions/cast_with_type.rs +++ b/common/functions/src/scalars/expressions/cast_with_type.rs @@ -14,7 +14,6 @@ use std::sync::Arc; - use common_arrow::arrow::array::ArrayRef; use common_arrow::arrow::bitmap::Bitmap; use common_arrow::arrow::compute::cast; diff --git a/common/streams/src/sources/source_csv.rs b/common/streams/src/sources/source_csv.rs index f911d82563739..7a2225f4550cd 100644 --- a/common/streams/src/sources/source_csv.rs +++ b/common/streams/src/sources/source_csv.rs @@ -18,7 +18,6 @@ use common_datablocks::DataBlock; use common_datavalues::DataSchemaRef; use common_datavalues::DataType; use common_datavalues::TypeDeserializer; -use common_datavalues::TypeID; use common_exception::ErrorCode; use common_exception::Result; use common_exception::ToErrorCode; @@ -62,7 +61,7 @@ impl CsvSourceBuilder { let empty_as_default = format_settings.empty_as_default; let skip_header = format_settings.skip_header; - let tz = format_settings.timezone.clone(); + let tz = format_settings.timezone; CsvSourceBuilder { schema, @@ -165,9 +164,7 @@ where R: AsyncRead + Unpin + Send .schema .fields() .iter() - .map(|f| { - f.data_type().create_deserializer(self.builder.block_size) - }) + .map(|f| f.data_type().create_deserializer(self.builder.block_size)) .collect::>(); let mut rows = 0; @@ -181,8 +178,10 @@ where R: AsyncRead + Unpin + Send if record.is_empty() { break; } - let mut format = FormatSettings::default(); - format.timezone = self.builder.tz.clone(); + let format = FormatSettings { + timezone: self.builder.tz, + ..Default::default() + }; for (col, pack) in packs.iter_mut().enumerate() { match record.get(col) { Some(bytes) => { diff --git a/common/streams/src/sources/source_ndjson.rs b/common/streams/src/sources/source_ndjson.rs index 69a8c3e086fa8..459ab6fa349f4 100644 --- a/common/streams/src/sources/source_ndjson.rs +++ b/common/streams/src/sources/source_ndjson.rs @@ -15,12 +15,10 @@ use std::borrow::Cow; use async_trait::async_trait; -use chrono_tz::Tz; use common_datablocks::DataBlock; use common_datavalues::DataSchemaRef; use common_datavalues::DataType; use common_datavalues::TypeDeserializer; -use common_datavalues::TypeID; use common_exception::ErrorCode; use common_exception::Result; use common_exception::ToErrorCode; @@ -44,7 +42,7 @@ impl NDJsonSourceBuilder { schema, block_size: 10000, size_limit: usize::MAX, - format + format, } } @@ -112,9 +110,7 @@ where R: AsyncBufRead + Unpin + Send .schema .fields() .iter() - .map(|f| { - f.data_type().create_deserializer(self.builder.block_size) - }) + .map(|f| f.data_type().create_deserializer(self.builder.block_size)) .collect::>(); let fields = self diff --git a/common/streams/tests/it/sources/source_ndjson.rs b/common/streams/tests/it/sources/source_ndjson.rs index 06c21e12ea9ab..3ef7ef3799848 100644 --- a/common/streams/tests/it/sources/source_ndjson.rs +++ b/common/streams/tests/it/sources/source_ndjson.rs @@ -15,6 +15,7 @@ use common_base::tokio; use common_datablocks::assert_blocks_eq; use common_exception::Result; +use common_io::prelude::FormatSettings; use common_streams::NDJsonSourceBuilder; use common_streams::Source; @@ -36,8 +37,7 @@ async fn test_source_ndjson() -> Result<()> { .as_bytes(); let reader = futures::io::Cursor::new(bytes); - - let builder = NDJsonSourceBuilder::create(schema); + let builder = NDJsonSourceBuilder::create(schema, FormatSettings::default()); let mut json_source = builder.build(reader).unwrap(); // expects `page_nums_expects` blocks, and while let Some(block) = json_source.read().await? { diff --git a/query/src/pipelines/transforms/transform_expression_executor.rs b/query/src/pipelines/transforms/transform_expression_executor.rs index 32b47aa7a884f..bc9f4ff40443f 100644 --- a/query/src/pipelines/transforms/transform_expression_executor.rs +++ b/query/src/pipelines/transforms/transform_expression_executor.rs @@ -14,8 +14,8 @@ use std::collections::HashMap; use std::sync::Arc; -use chrono_tz::Tz; +use chrono_tz::Tz; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::ErrorCode; diff --git a/query/src/pipelines/transforms/transform_sink.rs b/query/src/pipelines/transforms/transform_sink.rs index 99f5b0afe971c..cf44570581a77 100644 --- a/query/src/pipelines/transforms/transform_sink.rs +++ b/query/src/pipelines/transforms/transform_sink.rs @@ -14,6 +14,7 @@ use std::any::Any; use std::sync::Arc; + use chrono_tz::Tz; use common_datavalues::DataSchemaRef; use common_datavalues::DataType; diff --git a/query/src/servers/clickhouse/interactive_worker.rs b/query/src/servers/clickhouse/interactive_worker.rs index f965c7cce701c..c14d3783322e9 100644 --- a/query/src/servers/clickhouse/interactive_worker.rs +++ b/query/src/servers/clickhouse/interactive_worker.rs @@ -54,10 +54,8 @@ impl ClickHouseSession for InteractiveWorker { .session .get_shared_query_context() .await - .map_err(|err| to_clickhouse_err(err))?; - let format = query_ctx - .get_format_settings() - .map_err(|err| to_clickhouse_err(err))?; + .map_err(to_clickhouse_err)?; + let format = query_ctx.get_format_settings().map_err(to_clickhouse_err)?; if let Err(cause) = query_writer.write(get_query_result.await, &format).await { let new_error = cause.add_message(&ctx.state.query); return Err(to_clickhouse_err(new_error)); diff --git a/query/src/servers/clickhouse/writers/query_writer.rs b/query/src/servers/clickhouse/writers/query_writer.rs index fe5fc00db3835..51c3ae86a0fa3 100644 --- a/query/src/servers/clickhouse/writers/query_writer.rs +++ b/query/src/servers/clickhouse/writers/query_writer.rs @@ -14,7 +14,6 @@ use std::borrow::Cow; -use chrono_tz::Tz; use common_base::ProgressValues; use common_datablocks::DataBlock; use common_datavalues::prelude::*; diff --git a/query/src/servers/http/clickhouse_handler.rs b/query/src/servers/http/clickhouse_handler.rs index 743fdb3eef462..6effe027a6a0c 100644 --- a/query/src/servers/http/clickhouse_handler.rs +++ b/query/src/servers/http/clickhouse_handler.rs @@ -15,7 +15,6 @@ use std::sync::Arc; use async_stream::stream; -use chrono_tz::Tz; use common_exception::ErrorCode; use common_exception::Result; use common_exception::ToErrorCode; diff --git a/query/src/servers/http/v1/load.rs b/query/src/servers/http/v1/load.rs index 572e017d95558..bee51b2675554 100644 --- a/query/src/servers/http/v1/load.rs +++ b/query/src/servers/http/v1/load.rs @@ -16,7 +16,6 @@ use std::sync::Arc; use async_compat::CompatExt; use async_stream::stream; -use chrono_tz::Tz; use common_base::ProgressValues; use common_exception::ErrorCode; use common_exception::ToErrorCode; diff --git a/query/src/servers/mysql/writers/query_result_writer.rs b/query/src/servers/mysql/writers/query_result_writer.rs index 122716fc29a31..7a10789bc49aa 100644 --- a/query/src/servers/mysql/writers/query_result_writer.rs +++ b/query/src/servers/mysql/writers/query_result_writer.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use chrono_tz::Tz; use common_datablocks::DataBlock; use common_datavalues::prelude::TypeID; use common_datavalues::remove_nullable; @@ -114,7 +113,7 @@ impl<'a, W: std::io::Write> DFQueryResultWriter<'a, W> { } let block = blocks[0].clone(); - let tz = format.timezone.clone(); + let tz = format.timezone; match convert_schema(block.schema()) { Err(error) => Self::err(&error, dataset_writer), Ok(columns) => { diff --git a/query/src/sessions/query_ctx.rs b/query/src/sessions/query_ctx.rs index bfb3979f62288..9184806442b64 100644 --- a/query/src/sessions/query_ctx.rs +++ b/query/src/sessions/query_ctx.rs @@ -18,6 +18,7 @@ use std::net::SocketAddr; use std::sync::atomic::Ordering; use std::sync::atomic::Ordering::Acquire; use std::sync::Arc; + use chrono_tz::Tz; use common_base::tokio::task::JoinHandle; use common_base::Progress; @@ -403,7 +404,7 @@ impl QueryContext { let tz = tz.parse::().map_err(|_| { ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") })?; - Ok(FunctionContext {tz}) + Ok(FunctionContext { tz }) } } diff --git a/query/src/sessions/query_ctx_shared.rs b/query/src/sessions/query_ctx_shared.rs index 0ecd7f9646ac4..b04cc76fd2906 100644 --- a/query/src/sessions/query_ctx_shared.rs +++ b/query/src/sessions/query_ctx_shared.rs @@ -16,6 +16,7 @@ use std::collections::hash_map::Entry; use std::collections::HashMap; use std::sync::atomic::AtomicUsize; use std::sync::Arc; + use chrono_tz::Tz; use common_base::Progress; use common_base::Runtime; diff --git a/query/src/sql/statements/value_source.rs b/query/src/sql/statements/value_source.rs index 19ea405b8d014..6e35aaa7d7d97 100644 --- a/query/src/sql/statements/value_source.rs +++ b/query/src/sql/statements/value_source.rs @@ -16,7 +16,6 @@ use std::ops::Not; use std::sync::Arc; -use chrono_tz::Tz; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::BacktraceGuard; @@ -56,15 +55,11 @@ impl ValueSource { } pub async fn read<'a>(&self, reader: &mut CpBufferReader<'a>) -> Result { - let format = self.ctx.get_format_settings()?; - let tz = format.timezone.clone(); let mut desers = self .schema .fields() .iter() - .map(|f| { - f.data_type().create_deserializer(1024) - }) + .map(|f| f.data_type().create_deserializer(1024)) .collect::>(); let col_size = desers.len(); diff --git a/query/src/storages/s3/s3_stage_source.rs b/query/src/storages/s3/s3_stage_source.rs index d57f0b184a3cd..56251992328a6 100644 --- a/query/src/storages/s3/s3_stage_source.rs +++ b/query/src/storages/s3/s3_stage_source.rs @@ -16,7 +16,6 @@ use std::collections::VecDeque; use std::future::Future; use std::sync::Arc; -use chrono_tz::Tz; use common_datablocks::DataBlock; use common_datavalues::DataSchemaRef; use common_exception::ErrorCode; diff --git a/query/tests/it/servers/http/formats/tsv_output.rs b/query/tests/it/servers/http/formats/tsv_output.rs index 105756bb1bb83..253d0dae03b40 100644 --- a/query/tests/it/servers/http/formats/tsv_output.rs +++ b/query/tests/it/servers/http/formats/tsv_output.rs @@ -16,6 +16,7 @@ use common_arrow::bitmap::MutableBitmap; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::Result; +use common_io::prelude::FormatSettings; use databend_query::servers::http::formats::tsv_output::block_to_tsv; use pretty_assertions::assert_eq; diff --git a/query/tests/it/servers/http/json_block.rs b/query/tests/it/servers/http/json_block.rs index 3d1efa4e74219..f9745c9641b28 100644 --- a/query/tests/it/servers/http/json_block.rs +++ b/query/tests/it/servers/http/json_block.rs @@ -16,6 +16,7 @@ use common_arrow::bitmap::MutableBitmap; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::Result; +use common_io::prelude::FormatSettings; use databend_query::servers::http::v1::json_block::JsonBlock; use pretty_assertions::assert_eq; use serde::Serialize; @@ -70,9 +71,27 @@ fn test_data_block(is_nullable: bool) -> Result<()> { let format = FormatSettings::default(); let json_block = JsonBlock::new(&block, &format)?; let expect = vec![ - vec![val(1), val("a"), val(true), val(1.1), val("1970-01-02")], - vec![val(2), val("b"), val(true), val(2.2), val("1970-01-03")], - vec![val(3), val("c"), val(false), val(3.3), val("1970-01-04")], + vec![ + val(1_i32), + val("a"), + val(true), + val(1.1_f64), + val("1970-01-02"), + ], + vec![ + val(2_i32), + val("b"), + val(true), + val(2.2_f64), + val("1970-01-03"), + ], + vec![ + val(3_i32), + val("c"), + val(false), + val(3.3_f64), + val("1970-01-04"), + ], ]; assert_eq!(json_block.data().clone(), expect); From 2ed34315a8cede1f34f1fbdbb7b7116a0d0fc1a9 Mon Sep 17 00:00:00 2001 From: Veeupup Date: Fri, 6 May 2022 19:49:28 +0800 Subject: [PATCH 14/15] make taplo happy Signed-off-by: Veeupup --- common/datablocks/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/datablocks/Cargo.toml b/common/datablocks/Cargo.toml index 833dcd9b1409c..d7d33a2d24c95 100644 --- a/common/datablocks/Cargo.toml +++ b/common/datablocks/Cargo.toml @@ -12,11 +12,11 @@ test = false [dependencies] # In alphabetical order # Workspace dependencies -common-io = { path = "../io" } common-arrow = { path = "../arrow" } common-datavalues = { path = "../datavalues" } common-exception = { path = "../exception" } common-infallible = { path = "../infallible" } +common-io = { path = "../io" } # Github dependencies From 5a9580bd25e5df8985f5c6b09511cbb66c4bdb73 Mon Sep 17 00:00:00 2001 From: Veeupup Date: Fri, 6 May 2022 21:50:40 +0800 Subject: [PATCH 15/15] fix test Signed-off-by: Veeupup --- .../src/scalars/dates/number_function.rs | 2 +- .../02_0012_function_datetimes_tz.sql | 90 +++++++++---------- 2 files changed, 46 insertions(+), 46 deletions(-) diff --git a/common/functions/src/scalars/dates/number_function.rs b/common/functions/src/scalars/dates/number_function.rs index 7dc4d6ea3767f..cdc257443fe3b 100644 --- a/common/functions/src/scalars/dates/number_function.rs +++ b/common/functions/src/scalars/dates/number_function.rs @@ -291,7 +291,7 @@ impl NumberOperator for ToSecond { RoundFunction::try_create( "toStartOfMinute", &[&TimestampType::new_impl(0)], - Round::Second, + Round::Minute, ) .unwrap(), ) diff --git a/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.sql b/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.sql index 7fc20020a0193..6f63e00166d72 100644 --- a/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.sql +++ b/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.sql @@ -1,16 +1,16 @@ -- cast function select "====CAST===="; set timezone='UTC'; -select toTimestamp(1630320462000000); -select toTimestamp('2000-01-01 00:00:00'); +select to_timestamp(1630320462000000); +select to_timestamp('2000-01-01 00:00:00'); set timezone='Asia/Shanghai'; -- Asia/Shanghai: +8:00 -select toTimestamp(1630320462000000); -select toTimestamp('2000-01-01 12:00:00'); +select to_timestamp(1630320462000000); +select to_timestamp('2000-01-01 12:00:00'); -- insert into table, serialization and deserialization select "====INSERT_WITH_VALUES===="; set timezone = 'UTC'; create table tt (a timestamp); -insert into table tt values ('2021-04-30 22:48:00'), (toTimestamp('2021-04-30 22:48:00')); +insert into table tt values ('2021-04-30 22:48:00'), (to_timestamp('2021-04-30 22:48:00')); select * from tt; set timezone = 'Asia/Shanghai'; select * from tt; @@ -19,59 +19,59 @@ select * from tt; select "====NUMBER_FUNCTION===="; select "==UTC=="; set timezone = 'UTC'; -select toyyyymm(toTimestamp(1619820000000000)); -select toyyyymmdd(toTimestamp(1619820000000000)); -select toyyyymmddhhmmss(toTimestamp(1619820000000000)); -select tostartofmonth(toTimestamp(1619820000000000)); -select tomonth(toTimestamp(1619820000000000)); -select todayofyear(toTimestamp(1619820000000000)); -select todayofmonth(toTimestamp(1619820000000000)); -select todayofweek(toTimestamp(1619820000000000)); +select toyyyymm(to_timestamp(1619820000000000)); +select toyyyymmdd(to_timestamp(1619820000000000)); +select toyyyymmddhhmmss(to_timestamp(1619820000000000)); +select tostartofmonth(to_timestamp(1619820000000000)); +select tomonth(to_timestamp(1619820000000000)); +select todayofyear(to_timestamp(1619820000000000)); +select todayofmonth(to_timestamp(1619820000000000)); +select todayofweek(to_timestamp(1619820000000000)); set timezone = 'Asia/Shanghai'; select "==Asia/Shanghai=="; -select toyyyymm(toTimestamp(1619820000000000)); -select toyyyymmdd(toTimestamp(1619820000000000)); -select toyyyymmddhhmmss(toTimestamp(1619820000000000)); -select tostartofmonth(toTimestamp(1619820000000000)); -select tomonth(toTimestamp(1619820000000000)); -select todayofyear(toTimestamp(1619820000000000)); -select todayofmonth(toTimestamp(1619820000000000)); -select todayofweek(toTimestamp(1619820000000000)); +select toyyyymm(to_timestamp(1619820000000000)); +select toyyyymmdd(to_timestamp(1619820000000000)); +select toyyyymmddhhmmss(to_timestamp(1619820000000000)); +select tostartofmonth(to_timestamp(1619820000000000)); +select tomonth(to_timestamp(1619820000000000)); +select todayofyear(to_timestamp(1619820000000000)); +select todayofmonth(to_timestamp(1619820000000000)); +select todayofweek(to_timestamp(1619820000000000)); -- round function select "====ROUNDER_FUNCTION===="; -- 1619822911999000 = 2021-04-30 22:48:31.999 select "==UTC=="; set timezone = 'UTC'; -select tostartofsecond(toTimestamp(1619822911999000)); -select tostartofminute(toTimestamp(1619822911999000)); -select tostartoffiveminutes(toTimestamp(1619822911999000)); -select tostartoftenminutes(toTimestamp(1619822911999000)); -select tostartoffifteenminutes(toTimestamp(1619822911999000)); -select timeslot(toTimestamp(1619822911999000)); -select tostartofhour(toTimestamp(1619822911999000)); -select tostartofday(toTimestamp(1619822911999000)); -select tostartofweek(toTimestamp(1619822911999000)); +select tostartofsecond(to_timestamp(1619822911999000)); +select tostartofminute(to_timestamp(1619822911999000)); +select tostartoffiveminutes(to_timestamp(1619822911999000)); +select tostartoftenminutes(to_timestamp(1619822911999000)); +select tostartoffifteenminutes(to_timestamp(1619822911999000)); +select timeslot(to_timestamp(1619822911999000)); +select tostartofhour(to_timestamp(1619822911999000)); +select tostartofday(to_timestamp(1619822911999000)); +select tostartofweek(to_timestamp(1619822911999000)); set timezone = 'Asia/Shanghai'; select "==Asia/Shanghai=="; -select tostartofsecond(toTimestamp(1619822911999000)); -select tostartofminute(toTimestamp(1619822911999000)); -select tostartoffiveminutes(toTimestamp(1619822911999000)); -select tostartoftenminutes(toTimestamp(1619822911999000)); -select tostartoffifteenminutes(toTimestamp(1619822911999000)); -select timeslot(toTimestamp(1619822911999000)); -select tostartofhour(toTimestamp(1619822911999000)); -select tostartofday(toTimestamp(1619822911999000)); -select tostartofweek(toTimestamp(1619822911999000)); +select tostartofsecond(to_timestamp(1619822911999000)); +select tostartofminute(to_timestamp(1619822911999000)); +select tostartoffiveminutes(to_timestamp(1619822911999000)); +select tostartoftenminutes(to_timestamp(1619822911999000)); +select tostartoffifteenminutes(to_timestamp(1619822911999000)); +select timeslot(to_timestamp(1619822911999000)); +select tostartofhour(to_timestamp(1619822911999000)); +select tostartofday(to_timestamp(1619822911999000)); +select tostartofweek(to_timestamp(1619822911999000)); select "====INTERVAL_FUNCTION===="; -- 1619822911999000 = 2021-04-30 22:48:31.999 -- 1583013600000000 = 2020-02-29 22:00:00 select "==UTC=="; set timezone = 'UTC'; -select addMonths(totimestamp(1619822911999000), 1); -select totimestamp(1583013600000000); -select addYears(totimestamp(1583013600000000), 1); +select addMonths(to_timestamp(1619822911999000), 1); +select to_timestamp(1583013600000000); +select addYears(to_timestamp(1583013600000000), 1); select "==Asia/Shanghai=="; set timezone = 'Asia/Shanghai'; -select addMonths(totimestamp(1619822911999000), 1); -select totimestamp(1583013600000000); -select addYears(totimestamp(1583013600000000), 1); \ No newline at end of file +select addMonths(to_timestamp(1619822911999000), 1); +select to_timestamp(1583013600000000); +select addYears(to_timestamp(1583013600000000), 1); \ No newline at end of file