diff --git a/Cargo.lock b/Cargo.lock index d97ffddd73785..425ebe26300c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -915,6 +915,7 @@ dependencies = [ "common-datavalues", "common-exception", "common-infallible", + "common-io", "pretty_assertions", "regex", ] @@ -974,6 +975,7 @@ dependencies = [ "bstr", "bumpalo", "bytes 1.1.0", + "chrono-tz", "common-arrow", "common-datablocks", "common-datavalues", @@ -1294,6 +1296,7 @@ version = "0.1.0" dependencies = [ "async-stream", "async-trait", + "chrono-tz", "common-arrow", "common-base", "common-datablocks", diff --git a/common/datablocks/Cargo.toml b/common/datablocks/Cargo.toml index 2b6dd1fa2b810..d7d33a2d24c95 100644 --- a/common/datablocks/Cargo.toml +++ b/common/datablocks/Cargo.toml @@ -16,6 +16,7 @@ common-arrow = { path = "../arrow" } common-datavalues = { path = "../datavalues" } common-exception = { path = "../exception" } common-infallible = { path = "../infallible" } +common-io = { path = "../io" } # Github dependencies diff --git a/common/datablocks/src/kernels/data_block_group_by_hash.rs b/common/datablocks/src/kernels/data_block_group_by_hash.rs index dc6b19c6269d6..6467c8be54082 100644 --- a/common/datablocks/src/kernels/data_block_group_by_hash.rs +++ b/common/datablocks/src/kernels/data_block_group_by_hash.rs @@ -20,6 +20,7 @@ use std::ops::Not; use common_datavalues::prelude::*; use common_exception::Result; +use common_io::prelude::FormatSettings; use crate::DataBlock; @@ -226,14 +227,14 @@ impl HashMethodSerializer { let mut keys: Vec<&[u8]> = keys.iter().map(|x| x.as_slice()).collect(); let rows = keys.len(); - + let format = FormatSettings::default(); let mut res = Vec::with_capacity(group_fields.len()); for f in group_fields.iter() { let data_type = f.data_type(); let mut deserializer = data_type.create_deserializer(rows); for (_row, key) in keys.iter_mut().enumerate() { - deserializer.de_binary(key)?; + deserializer.de_binary(key, &format)?; } res.push(deserializer.finish_to_column()); } @@ -325,9 +326,10 @@ where T: PrimitiveType let mut deserializer = non_null_type.create_deserializer(rows); let reader = vec8.as_slice(); + let format = FormatSettings::default(); let col = match f.is_nullable() { false => { - deserializer.de_fixed_binary_batch(&reader[offsize..], step, rows)?; + deserializer.de_fixed_binary_batch(&reader[offsize..], step, rows, &format)?; deserializer.finish_to_column() } @@ -337,6 +339,7 @@ where T: PrimitiveType &reader[null_offsize..], step, rows, + &format, )?; null_offsize += 1; @@ -346,7 +349,7 @@ where T: PrimitiveType // we store 1 for nulls in fixed_hash let bitmap = col.values().not(); - deserializer.de_fixed_binary_batch(&reader[offsize..], step, rows)?; + deserializer.de_fixed_binary_batch(&reader[offsize..], step, rows, &format)?; let inner = deserializer.finish_to_column(); NullableColumn::wrap_inner(inner, Some(bitmap)) } diff --git a/common/datavalues/src/types/data_type.rs b/common/datavalues/src/types/data_type.rs index f24b27235c706..987f47c0fd23d 100644 --- a/common/datavalues/src/types/data_type.rs +++ b/common/datavalues/src/types/data_type.rs @@ -124,7 +124,9 @@ pub trait DataType: std::fmt::Debug + Sync + Send + DynClone { } fn create_mutable(&self, capacity: usize) -> Box; + fn create_serializer(&self) -> TypeSerializerImpl; + fn create_deserializer(&self, capacity: usize) -> TypeDeserializerImpl; } diff --git a/common/datavalues/src/types/deserializations/boolean.rs b/common/datavalues/src/types/deserializations/boolean.rs index 0185d5fa4e981..3bac9f8171b54 100644 --- a/common/datavalues/src/types/deserializations/boolean.rs +++ b/common/datavalues/src/types/deserializations/boolean.rs @@ -23,17 +23,23 @@ pub struct BooleanDeserializer { } impl TypeDeserializer for BooleanDeserializer { - fn de_binary(&mut self, reader: &mut &[u8]) -> Result<()> { + fn de_binary(&mut self, reader: &mut &[u8], _format: &FormatSettings) -> Result<()> { let value: bool = reader.read_scalar()?; self.builder.append_value(value); Ok(()) } - fn de_default(&mut self) { + fn de_default(&mut self, _format: &FormatSettings) { self.builder.append_value(false); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize) -> Result<()> { + fn de_fixed_binary_batch( + &mut self, + reader: &[u8], + step: usize, + rows: usize, + _format: &FormatSettings, + ) -> Result<()> { for row in 0..rows { let mut reader = &reader[step * row..]; let value: bool = reader.read_scalar()?; @@ -43,7 +49,7 @@ impl TypeDeserializer for BooleanDeserializer { Ok(()) } - fn de_whole_text(&mut self, reader: &[u8]) -> Result<()> { + fn de_whole_text(&mut self, reader: &[u8], _format: &FormatSettings) -> Result<()> { if reader.eq_ignore_ascii_case(b"true") { self.builder.append_value(true); } else if reader.eq_ignore_ascii_case(b"false") { @@ -54,7 +60,11 @@ impl TypeDeserializer for BooleanDeserializer { Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { let v = if BufferReadExt::ignore_insensitive_bytes(reader, b"true")? { Ok(true) } else if BufferReadExt::ignore_insensitive_bytes(reader, b"false")? { @@ -67,7 +77,7 @@ impl TypeDeserializer for BooleanDeserializer { Ok(()) } - fn de_json(&mut self, value: &serde_json::Value) -> Result<()> { + fn de_json(&mut self, value: &serde_json::Value, _format: &FormatSettings) -> Result<()> { match value { serde_json::Value::Bool(v) => self.builder.append_value(*v), _ => return Err(ErrorCode::BadBytes("Incorrect boolean value")), @@ -75,7 +85,7 @@ impl TypeDeserializer for BooleanDeserializer { Ok(()) } - fn append_data_value(&mut self, value: DataValue) -> Result<()> { + fn append_data_value(&mut self, value: DataValue, _format: &FormatSettings) -> Result<()> { self.builder.append_value(value.as_bool()?); Ok(()) } diff --git a/common/datavalues/src/types/deserializations/date.rs b/common/datavalues/src/types/deserializations/date.rs index 2c7cfdc102df3..2d8d6f4f93e1f 100644 --- a/common/datavalues/src/types/deserializations/date.rs +++ b/common/datavalues/src/types/deserializations/date.rs @@ -33,18 +33,24 @@ where T: PrimitiveType, T: Unmarshal + StatBuffer + FromLexical, { - fn de_binary(&mut self, reader: &mut &[u8]) -> Result<()> { + fn de_binary(&mut self, reader: &mut &[u8], _format: &FormatSettings) -> Result<()> { let value: T = reader.read_scalar()?; let _ = check_date(value.as_i32())?; self.builder.append_value(value); Ok(()) } - fn de_default(&mut self) { + fn de_default(&mut self, _format: &FormatSettings) { self.builder.append_value(T::default()); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize) -> Result<()> { + fn de_fixed_binary_batch( + &mut self, + reader: &[u8], + step: usize, + rows: usize, + _format: &FormatSettings, + ) -> Result<()> { for row in 0..rows { let mut reader = &reader[step * row..]; let value: T = reader.read_scalar()?; @@ -54,7 +60,7 @@ where Ok(()) } - fn de_json(&mut self, value: &serde_json::Value) -> Result<()> { + fn de_json(&mut self, value: &serde_json::Value, _format: &FormatSettings) -> Result<()> { match value { serde_json::Value::String(v) => { let mut reader = BufferReader::new(v.as_bytes()); @@ -68,7 +74,7 @@ where } } - fn de_whole_text(&mut self, reader: &[u8]) -> Result<()> { + fn de_whole_text(&mut self, reader: &[u8], _format: &FormatSettings) -> Result<()> { let mut reader = BufferReader::new(reader); let date = reader.read_date_text()?; let days = uniform(date); @@ -78,7 +84,11 @@ where Ok(()) } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_quoted( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { reader.must_ignore_byte(b'\'')?; let date = reader.read_date_text()?; let days = uniform(date); @@ -89,7 +99,11 @@ where Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { let date = reader.read_date_text()?; let days = uniform(date); let _ = check_date(days.as_i32())?; @@ -97,7 +111,11 @@ where Ok(()) } - fn de_text_csv(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_csv( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { let maybe_quote = reader.ignore(|f| f == b'\'' || f == b'"')?; let date = reader.read_date_text()?; let days = uniform(date); @@ -109,7 +127,11 @@ where Ok(()) } - fn de_text_json(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_json( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { reader.must_ignore_byte(b'"')?; let date = reader.read_date_text()?; let days = uniform(date); @@ -120,7 +142,7 @@ where Ok(()) } - fn append_data_value(&mut self, value: DataValue) -> Result<()> { + fn append_data_value(&mut self, value: DataValue, _format: &FormatSettings) -> Result<()> { let v = value.as_i64()? as i32; let _ = check_date(v)?; self.builder.append_value(v.as_()); diff --git a/common/datavalues/src/types/deserializations/mod.rs b/common/datavalues/src/types/deserializations/mod.rs index f6ec37975cc0c..6adf6f9d151be 100644 --- a/common/datavalues/src/types/deserializations/mod.rs +++ b/common/datavalues/src/types/deserializations/mod.rs @@ -39,35 +39,57 @@ pub use variant::*; #[enum_dispatch] pub trait TypeDeserializer: Send + Sync { - fn de_binary(&mut self, reader: &mut &[u8]) -> Result<()>; + fn de_binary(&mut self, reader: &mut &[u8], format: &FormatSettings) -> Result<()>; - fn de_default(&mut self); + fn de_default(&mut self, format: &FormatSettings); - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize) -> Result<()>; + fn de_fixed_binary_batch( + &mut self, + reader: &[u8], + step: usize, + rows: usize, + format: &FormatSettings, + ) -> Result<()>; - fn de_json(&mut self, reader: &Value) -> Result<()>; + fn de_json(&mut self, reader: &Value, format: &FormatSettings) -> Result<()>; - fn de_null(&mut self) -> bool { + fn de_null(&mut self, _format: &FormatSettings) -> bool { false } - fn de_whole_text(&mut self, reader: &[u8]) -> Result<()>; + fn de_whole_text(&mut self, reader: &[u8], format: &FormatSettings) -> Result<()>; - fn de_text(&mut self, reader: &mut CheckpointReader) -> Result<()>; + fn de_text( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()>; - fn de_text_csv(&mut self, reader: &mut CheckpointReader) -> Result<()> { - self.de_text(reader) + fn de_text_csv( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { + self.de_text(reader, format) } - fn de_text_json(&mut self, reader: &mut CheckpointReader) -> Result<()> { - self.de_text(reader) + fn de_text_json( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { + self.de_text(reader, format) } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader) -> Result<()> { - self.de_text(reader) + fn de_text_quoted( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { + self.de_text(reader, format) } - fn append_data_value(&mut self, value: DataValue) -> Result<()>; + fn append_data_value(&mut self, value: DataValue, format: &FormatSettings) -> Result<()>; /// Note this method will return err only when inner builder is empty. fn pop_data_value(&mut self) -> Result; diff --git a/common/datavalues/src/types/deserializations/null.rs b/common/datavalues/src/types/deserializations/null.rs index 1529e5c495ed7..f6f6cd7c42b2a 100644 --- a/common/datavalues/src/types/deserializations/null.rs +++ b/common/datavalues/src/types/deserializations/null.rs @@ -27,37 +27,47 @@ pub struct NullDeserializer { } impl TypeDeserializer for NullDeserializer { - fn de_binary(&mut self, _reader: &mut &[u8]) -> Result<()> { + fn de_binary(&mut self, _reader: &mut &[u8], _format: &FormatSettings) -> Result<()> { self.builder.append_default(); Ok(()) } - fn de_default(&mut self) { + fn de_default(&mut self, _format: &FormatSettings) { self.builder.append_default(); } - fn de_fixed_binary_batch(&mut self, _reader: &[u8], _step: usize, rows: usize) -> Result<()> { + fn de_fixed_binary_batch( + &mut self, + _reader: &[u8], + _step: usize, + rows: usize, + _format: &FormatSettings, + ) -> Result<()> { for _ in 0..rows { self.builder.append_default(); } Ok(()) } - fn de_json(&mut self, _value: &serde_json::Value) -> Result<()> { + fn de_json(&mut self, _value: &serde_json::Value, _format: &FormatSettings) -> Result<()> { self.builder.append_default(); Ok(()) } - fn de_whole_text(&mut self, _reader: &[u8]) -> Result<()> { + fn de_whole_text(&mut self, _reader: &[u8], _format: &FormatSettings) -> Result<()> { Ok(()) } - fn de_text(&mut self, _reader: &mut CheckpointReader) -> Result<()> { + fn de_text( + &mut self, + _reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { self.builder.append_default(); Ok(()) } - fn append_data_value(&mut self, _value: DataValue) -> Result<()> { + fn append_data_value(&mut self, _value: DataValue, _format: &FormatSettings) -> Result<()> { self.builder.append_default(); Ok(()) } diff --git a/common/datavalues/src/types/deserializations/nullable.rs b/common/datavalues/src/types/deserializations/nullable.rs index 5008d9a25ccd2..7557d90aee28d 100644 --- a/common/datavalues/src/types/deserializations/nullable.rs +++ b/common/datavalues/src/types/deserializations/nullable.rs @@ -29,84 +29,98 @@ pub struct NullableDeserializer { } impl TypeDeserializer for NullableDeserializer { - fn de_binary(&mut self, reader: &mut &[u8]) -> Result<()> { + fn de_binary(&mut self, reader: &mut &[u8], format: &FormatSettings) -> Result<()> { let valid: bool = reader.read_scalar()?; if valid { - self.inner.de_binary(reader)?; + self.inner.de_binary(reader, format)?; } else { - self.inner.de_default(); + self.inner.de_default(format); } self.bitmap.push(valid); Ok(()) } - fn de_default(&mut self) { - self.inner.de_default(); + fn de_default(&mut self, format: &FormatSettings) { + self.inner.de_default(format); self.bitmap.push(false); } - fn de_fixed_binary_batch(&mut self, _reader: &[u8], _step: usize, _rows: usize) -> Result<()> { + fn de_fixed_binary_batch( + &mut self, + _reader: &[u8], + _step: usize, + _rows: usize, + _format: &FormatSettings, + ) -> Result<()> { // it's covered outside unreachable!() } - fn de_json(&mut self, value: &serde_json::Value) -> Result<()> { + fn de_json(&mut self, value: &serde_json::Value, format: &FormatSettings) -> Result<()> { match value { serde_json::Value::Null => { - self.de_null(); + self.de_null(format); Ok(()) } other => { self.bitmap.push(true); - self.inner.de_json(other) + self.inner.de_json(other, format) } } } // TODO: support null text setting - fn de_text(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { if reader.ignore_insensitive_bytes(b"null")? { - self.de_default(); + self.de_default(format); return Ok(()); } - self.inner.de_text(reader)?; + self.inner.de_text(reader, format)?; self.bitmap.push(true); Ok(()) } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_quoted( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { if reader.ignore_insensitive_bytes(b"null")? { - self.de_default(); + self.de_default(format); return Ok(()); } - self.inner.de_text_quoted(reader)?; + self.inner.de_text_quoted(reader, format)?; self.bitmap.push(true); Ok(()) } - fn de_whole_text(&mut self, reader: &[u8]) -> Result<()> { + fn de_whole_text(&mut self, reader: &[u8], format: &FormatSettings) -> Result<()> { if reader.eq_ignore_ascii_case(b"null") { - self.de_default(); + self.de_default(format); return Ok(()); } - self.inner.de_whole_text(reader)?; + self.inner.de_whole_text(reader, format)?; self.bitmap.push(true); Ok(()) } - fn de_null(&mut self) -> bool { - self.inner.de_default(); + fn de_null(&mut self, format: &FormatSettings) -> bool { + self.inner.de_default(format); self.bitmap.push(false); true } - fn append_data_value(&mut self, value: DataValue) -> Result<()> { + fn append_data_value(&mut self, value: DataValue, format: &FormatSettings) -> Result<()> { if value.is_null() { - self.inner.de_default(); + self.inner.de_default(format); self.bitmap.push(false); } else { - self.inner.append_data_value(value)?; + self.inner.append_data_value(value, format)?; self.bitmap.push(true); } Ok(()) diff --git a/common/datavalues/src/types/deserializations/number.rs b/common/datavalues/src/types/deserializations/number.rs index 6f5064e59b1c3..e6cb80095cfda 100644 --- a/common/datavalues/src/types/deserializations/number.rs +++ b/common/datavalues/src/types/deserializations/number.rs @@ -28,17 +28,23 @@ where T: PrimitiveType, T: Unmarshal + StatBuffer + FromLexical, { - fn de_binary(&mut self, reader: &mut &[u8]) -> Result<()> { + fn de_binary(&mut self, reader: &mut &[u8], _format: &FormatSettings) -> Result<()> { let value: T = reader.read_scalar()?; self.builder.append_value(value); Ok(()) } - fn de_default(&mut self) { + fn de_default(&mut self, _format: &FormatSettings) { self.builder.append_value(T::default()); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize) -> Result<()> { + fn de_fixed_binary_batch( + &mut self, + reader: &[u8], + step: usize, + rows: usize, + _format: &FormatSettings, + ) -> Result<()> { for row in 0..rows { let mut reader = &reader[step * row..]; let value: T = reader.read_scalar()?; @@ -47,7 +53,7 @@ where Ok(()) } - fn de_json(&mut self, value: &serde_json::Value) -> Result<()> { + fn de_json(&mut self, value: &serde_json::Value, _format: &FormatSettings) -> Result<()> { match value { serde_json::Value::Number(v) => { let v = v.to_string(); @@ -65,7 +71,7 @@ where } } - fn de_whole_text(&mut self, reader: &[u8]) -> Result<()> { + fn de_whole_text(&mut self, reader: &[u8], _format: &FormatSettings) -> Result<()> { let mut reader = BufferReader::new(reader); let v: T = if !T::FLOATING { reader.read_int_text() @@ -78,7 +84,11 @@ where Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { let v: T = if !T::FLOATING { reader.read_int_text() } else { @@ -88,11 +98,11 @@ where Ok(()) } - fn de_null(&mut self) -> bool { + fn de_null(&mut self, _format: &FormatSettings) -> bool { false } - fn append_data_value(&mut self, value: DataValue) -> Result<()> { + fn append_data_value(&mut self, value: DataValue, _format: &FormatSettings) -> Result<()> { self.builder.append_data_value(value) } diff --git a/common/datavalues/src/types/deserializations/string.rs b/common/datavalues/src/types/deserializations/string.rs index f71e268b920f6..a3e4f9c13df0f 100644 --- a/common/datavalues/src/types/deserializations/string.rs +++ b/common/datavalues/src/types/deserializations/string.rs @@ -37,7 +37,7 @@ impl StringDeserializer { impl TypeDeserializer for StringDeserializer { // See GroupHash.rs for StringColumn #[allow(clippy::uninit_vec)] - fn de_binary(&mut self, reader: &mut &[u8]) -> Result<()> { + fn de_binary(&mut self, reader: &mut &[u8], _format: &FormatSettings) -> Result<()> { let offset: u64 = reader.read_uvarint()?; self.buffer.clear(); @@ -51,11 +51,17 @@ impl TypeDeserializer for StringDeserializer { Ok(()) } - fn de_default(&mut self) { + fn de_default(&mut self, _format: &FormatSettings) { self.builder.append_value(""); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize) -> Result<()> { + fn de_fixed_binary_batch( + &mut self, + reader: &[u8], + step: usize, + rows: usize, + _format: &FormatSettings, + ) -> Result<()> { for row in 0..rows { let reader = &reader[step * row..]; self.builder.append_value(reader); @@ -63,7 +69,7 @@ impl TypeDeserializer for StringDeserializer { Ok(()) } - fn de_json(&mut self, value: &serde_json::Value) -> Result<()> { + fn de_json(&mut self, value: &serde_json::Value, _format: &FormatSettings) -> Result<()> { match value { serde_json::Value::String(s) => { self.builder.append_value(s); @@ -73,26 +79,34 @@ impl TypeDeserializer for StringDeserializer { } } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_quoted( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { self.buffer.clear(); reader.read_quoted_text(&mut self.buffer, b'\'')?; self.builder.append_value(self.buffer.as_slice()); Ok(()) } - fn de_whole_text(&mut self, reader: &[u8]) -> Result<()> { + fn de_whole_text(&mut self, reader: &[u8], _format: &FormatSettings) -> Result<()> { self.builder.append_value(reader); Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { self.buffer.clear(); reader.read_escaped_string_text(&mut self.buffer)?; self.builder.append_value(self.buffer.as_slice()); Ok(()) } - fn append_data_value(&mut self, value: DataValue) -> Result<()> { + fn append_data_value(&mut self, value: DataValue, _format: &FormatSettings) -> Result<()> { self.builder.append_data_value(value) } diff --git a/common/datavalues/src/types/deserializations/timestamp.rs b/common/datavalues/src/types/deserializations/timestamp.rs index d33abcd80804b..f48721945b22b 100644 --- a/common/datavalues/src/types/deserializations/timestamp.rs +++ b/common/datavalues/src/types/deserializations/timestamp.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use chrono_tz::Tz; use common_exception::*; use common_io::prelude::*; use num::cast::AsPrimitive; @@ -22,23 +21,28 @@ use crate::prelude::*; pub struct TimestampDeserializer { pub builder: MutablePrimitiveColumn, - pub tz: Tz, pub precision: usize, } impl TypeDeserializer for TimestampDeserializer { - fn de_binary(&mut self, reader: &mut &[u8]) -> Result<()> { + fn de_binary(&mut self, reader: &mut &[u8], _format: &FormatSettings) -> Result<()> { let value: i64 = reader.read_scalar()?; let _ = check_timestamp(value)?; self.builder.append_value(value); Ok(()) } - fn de_default(&mut self) { + fn de_default(&mut self, _format: &FormatSettings) { self.builder.append_value(i64::default()); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize) -> Result<()> { + fn de_fixed_binary_batch( + &mut self, + reader: &[u8], + step: usize, + rows: usize, + _format: &FormatSettings, + ) -> Result<()> { for row in 0..rows { let mut reader = &reader[step * row..]; let value: i64 = reader.read_scalar()?; @@ -48,12 +52,12 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn de_json(&mut self, value: &serde_json::Value) -> Result<()> { + fn de_json(&mut self, value: &serde_json::Value, format: &FormatSettings) -> Result<()> { match value { serde_json::Value::String(v) => { let v = v.clone(); let mut reader = BufferReader::new(v.as_bytes()); - let ts = reader.read_timestamp_text(&self.tz)?; + let ts = reader.read_timestamp_text(&format.timezone)?; let micros = ts.timestamp_micros(); let _ = check_timestamp(micros)?; @@ -64,9 +68,13 @@ impl TypeDeserializer for TimestampDeserializer { } } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_quoted( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { reader.must_ignore_byte(b'\'')?; - let ts = reader.read_timestamp_text(&self.tz)?; + let ts = reader.read_timestamp_text(&format.timezone)?; let micros = ts.timestamp_micros(); let _ = check_timestamp(micros)?; reader.must_ignore_byte(b'\'')?; @@ -74,9 +82,9 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn de_whole_text(&mut self, reader: &[u8]) -> Result<()> { + fn de_whole_text(&mut self, reader: &[u8], format: &FormatSettings) -> Result<()> { let mut reader = BufferReader::new(reader); - let ts = reader.read_timestamp_text(&self.tz)?; + let ts = reader.read_timestamp_text(&format.timezone)?; let micros = ts.timestamp_micros(); let _ = check_timestamp(micros)?; reader.must_eof()?; @@ -84,17 +92,25 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader) -> Result<()> { - let ts = reader.read_timestamp_text(&self.tz)?; + fn de_text( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { + let ts = reader.read_timestamp_text(&format.timezone)?; let micros = ts.timestamp_micros(); let _ = check_timestamp(micros)?; self.builder.append_value(micros.as_()); Ok(()) } - fn de_text_csv(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_csv( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { let maybe_quote = reader.ignore(|f| f == b'\'' || f == b'"')?; - let ts = reader.read_timestamp_text(&self.tz)?; + let ts = reader.read_timestamp_text(&format.timezone)?; let micros = ts.timestamp_micros(); let _ = check_timestamp(micros)?; if maybe_quote { @@ -104,9 +120,13 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn de_text_json(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_json( + &mut self, + reader: &mut CheckpointReader, + format: &FormatSettings, + ) -> Result<()> { reader.must_ignore_byte(b'"')?; - let ts = reader.read_timestamp_text(&self.tz)?; + let ts = reader.read_timestamp_text(&format.timezone)?; let micros = ts.timestamp_micros(); let _ = check_timestamp(micros)?; reader.must_ignore_byte(b'"')?; @@ -115,7 +135,7 @@ impl TypeDeserializer for TimestampDeserializer { Ok(()) } - fn append_data_value(&mut self, value: DataValue) -> Result<()> { + fn append_data_value(&mut self, value: DataValue, _format: &FormatSettings) -> Result<()> { let v = value.as_i64()?; let _ = check_timestamp(v)?; self.builder.append_value(v.as_()); diff --git a/common/datavalues/src/types/deserializations/variant.rs b/common/datavalues/src/types/deserializations/variant.rs index d84e5de6cf33b..af13c92ee3a10 100644 --- a/common/datavalues/src/types/deserializations/variant.rs +++ b/common/datavalues/src/types/deserializations/variant.rs @@ -35,7 +35,7 @@ impl VariantDeserializer { impl TypeDeserializer for VariantDeserializer { #[allow(clippy::uninit_vec)] - fn de_binary(&mut self, reader: &mut &[u8]) -> Result<()> { + fn de_binary(&mut self, reader: &mut &[u8], _format: &FormatSettings) -> Result<()> { let offset: u64 = reader.read_uvarint()?; self.buffer.clear(); @@ -50,12 +50,18 @@ impl TypeDeserializer for VariantDeserializer { Ok(()) } - fn de_default(&mut self) { + fn de_default(&mut self, _format: &FormatSettings) { self.builder .append_value(VariantValue::from(serde_json::Value::Null)); } - fn de_fixed_binary_batch(&mut self, reader: &[u8], step: usize, rows: usize) -> Result<()> { + fn de_fixed_binary_batch( + &mut self, + reader: &[u8], + step: usize, + rows: usize, + _format: &FormatSettings, + ) -> Result<()> { for row in 0..rows { let reader = &reader[step * row..]; let val = serde_json::from_slice(reader)?; @@ -64,12 +70,16 @@ impl TypeDeserializer for VariantDeserializer { Ok(()) } - fn de_json(&mut self, value: &serde_json::Value) -> Result<()> { + fn de_json(&mut self, value: &serde_json::Value, _format: &FormatSettings) -> Result<()> { self.builder.append_value(VariantValue::from(value)); Ok(()) } - fn de_text(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { self.buffer.clear(); reader.read_escaped_string_text(&mut self.buffer)?; let val = serde_json::from_slice(self.buffer.as_slice())?; @@ -77,13 +87,17 @@ impl TypeDeserializer for VariantDeserializer { Ok(()) } - fn de_whole_text(&mut self, reader: &[u8]) -> Result<()> { + fn de_whole_text(&mut self, reader: &[u8], _format: &FormatSettings) -> Result<()> { let val = serde_json::from_slice(reader)?; self.builder.append_value(val); Ok(()) } - fn de_text_quoted(&mut self, reader: &mut CheckpointReader) -> Result<()> { + fn de_text_quoted( + &mut self, + reader: &mut CheckpointReader, + _format: &FormatSettings, + ) -> Result<()> { self.buffer.clear(); reader.read_quoted_text(&mut self.buffer, b'\'')?; @@ -92,7 +106,7 @@ impl TypeDeserializer for VariantDeserializer { Ok(()) } - fn append_data_value(&mut self, value: DataValue) -> Result<()> { + fn append_data_value(&mut self, value: DataValue, _format: &FormatSettings) -> Result<()> { self.builder.append_data_value(value) } diff --git a/common/datavalues/src/types/serializations/array.rs b/common/datavalues/src/types/serializations/array.rs index 417985d510cf1..816008dc36b2a 100644 --- a/common/datavalues/src/types/serializations/array.rs +++ b/common/datavalues/src/types/serializations/array.rs @@ -14,6 +14,7 @@ use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use serde_json::Value; use crate::prelude::*; @@ -25,7 +26,7 @@ pub struct ArraySerializer { } impl TypeSerializer for ArraySerializer { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, format: &FormatSettings) -> Result { if let DataValue::Array(vals) = value { let mut res = String::new(); res.push('['); @@ -37,7 +38,7 @@ impl TypeSerializer for ArraySerializer { } first = false; - let s = self.inner.serialize_value(val)?; + let s = self.inner.serialize_value(val, format)?; if quoted { res.push_str(&format!("'{}'", s)); } else { @@ -51,24 +52,25 @@ impl TypeSerializer for ArraySerializer { } } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column(&self, column: &ColumnRef, format: &FormatSettings) -> Result> { let column: &ArrayColumn = Series::check_get(column)?; let mut result = Vec::with_capacity(column.len()); for i in 0..column.len() { let val = column.get(i); - let s = self.serialize_value(&val)?; + let s = self.serialize_value(&val, format)?; result.push(s); } Ok(result) } - fn serialize_json(&self, _column: &ColumnRef) -> Result> { + fn serialize_json(&self, _column: &ColumnRef, _format: &FormatSettings) -> Result> { todo!() } fn serialize_clickhouse_format( &self, _column: &ColumnRef, + _format: &FormatSettings, ) -> Result { todo!() } diff --git a/common/datavalues/src/types/serializations/boolean.rs b/common/datavalues/src/types/serializations/boolean.rs index df4feae432927..1ca1a43052638 100644 --- a/common/datavalues/src/types/serializations/boolean.rs +++ b/common/datavalues/src/types/serializations/boolean.rs @@ -15,6 +15,7 @@ use common_arrow::arrow::bitmap::Bitmap; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use opensrv_clickhouse::types::column::ArcColumnWrapper; use opensrv_clickhouse::types::column::ColumnFrom; use serde_json::Value; @@ -28,7 +29,7 @@ const TRUE_STR: &str = "1"; const FALSE_STR: &str = "0"; impl TypeSerializer for BooleanSerializer { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, _format: &FormatSettings) -> Result { if let DataValue::Boolean(x) = value { if *x { Ok(TRUE_STR.to_owned()) @@ -40,7 +41,11 @@ impl TypeSerializer for BooleanSerializer { } } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column( + &self, + column: &ColumnRef, + _format: &FormatSettings, + ) -> Result> { let array: &BooleanColumn = Series::check_get(column)?; let result: Vec = array @@ -56,7 +61,7 @@ impl TypeSerializer for BooleanSerializer { Ok(result) } - fn serialize_json(&self, column: &ColumnRef) -> Result> { + fn serialize_json(&self, column: &ColumnRef, _format: &FormatSettings) -> Result> { let array: &BooleanColumn = Series::check_get(column)?; let result: Vec = array .iter() @@ -68,6 +73,7 @@ impl TypeSerializer for BooleanSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, + _format: &FormatSettings, ) -> Result { let col: &BooleanColumn = Series::check_get(column)?; let values: Vec = col.iter().map(|c| c as u8).collect(); @@ -78,13 +84,15 @@ impl TypeSerializer for BooleanSerializer { &self, column: &ColumnRef, _valids: Option<&Bitmap>, + format: &FormatSettings, ) -> Result> { - self.serialize_json(column) + self.serialize_json(column, format) } fn serialize_json_object_suppress_error( &self, column: &ColumnRef, + _format: &FormatSettings, ) -> Result>> { let column: &BooleanColumn = Series::check_get(column)?; let result: Vec> = column diff --git a/common/datavalues/src/types/serializations/date.rs b/common/datavalues/src/types/serializations/date.rs index b926cd679c6f3..7441977e066e5 100644 --- a/common/datavalues/src/types/serializations/date.rs +++ b/common/datavalues/src/types/serializations/date.rs @@ -11,7 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - use std::marker::PhantomData; use std::ops::AddAssign; @@ -20,6 +19,7 @@ use chrono::Duration; use chrono::NaiveDate; use chrono_tz::Tz; use common_exception::*; +use common_io::prelude::FormatSettings; use num::cast::AsPrimitive; use opensrv_clickhouse::types::column::ArcColumnWrapper; use opensrv_clickhouse::types::column::ColumnFrom; @@ -43,14 +43,18 @@ impl> Default for DateSerializer { const DATE_FMT: &str = "%Y-%m-%d"; impl> TypeSerializer for DateSerializer { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, _format: &FormatSettings) -> Result { let mut date = NaiveDate::from_ymd(1970, 1, 1); let d = Duration::days(value.as_i64()?); date.add_assign(d); Ok(date.format(DATE_FMT).to_string()) } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column( + &self, + column: &ColumnRef, + _format: &FormatSettings, + ) -> Result> { let column: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = column @@ -65,7 +69,7 @@ impl> TypeSerializer for DateSerializer { Ok(result) } - fn serialize_json(&self, column: &ColumnRef) -> Result> { + fn serialize_json(&self, column: &ColumnRef, _format: &FormatSettings) -> Result> { let array: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = array .iter() @@ -83,6 +87,7 @@ impl> TypeSerializer for DateSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, + _format: &FormatSettings, ) -> Result { let array: &PrimitiveColumn = Series::check_get(column)?; let tz: Tz = "UTC".parse().unwrap(); diff --git a/common/datavalues/src/types/serializations/mod.rs b/common/datavalues/src/types/serializations/mod.rs index aa52b456d8408..c1ab933c7b5ac 100644 --- a/common/datavalues/src/types/serializations/mod.rs +++ b/common/datavalues/src/types/serializations/mod.rs @@ -15,6 +15,7 @@ use common_arrow::arrow::bitmap::Bitmap; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use enum_dispatch::enum_dispatch; use opensrv_clickhouse::types::column::ArcColumnData; use serde_json::Value; @@ -44,15 +45,20 @@ pub use variant::*; #[enum_dispatch] pub trait TypeSerializer: Send + Sync { - fn serialize_value(&self, value: &DataValue) -> Result; - fn serialize_json(&self, column: &ColumnRef) -> Result>; - fn serialize_column(&self, column: &ColumnRef) -> Result>; - fn serialize_clickhouse_format(&self, column: &ColumnRef) -> Result; + fn serialize_value(&self, value: &DataValue, format: &FormatSettings) -> Result; + fn serialize_json(&self, column: &ColumnRef, format: &FormatSettings) -> Result>; + fn serialize_column(&self, column: &ColumnRef, format: &FormatSettings) -> Result>; + fn serialize_clickhouse_format( + &self, + column: &ColumnRef, + _format: &FormatSettings, + ) -> Result; fn serialize_json_object( &self, _column: &ColumnRef, _valids: Option<&Bitmap>, + _format: &FormatSettings, ) -> Result> { Err(ErrorCode::BadDataValueType( "Error parsing JSON: unsupported data type", @@ -62,6 +68,7 @@ pub trait TypeSerializer: Send + Sync { fn serialize_json_object_suppress_error( &self, _column: &ColumnRef, + _format: &FormatSettings, ) -> Result>> { Err(ErrorCode::BadDataValueType( "Error parsing JSON: unsupported data type", diff --git a/common/datavalues/src/types/serializations/null.rs b/common/datavalues/src/types/serializations/null.rs index c096b04ad5775..e2097a1e92c1f 100644 --- a/common/datavalues/src/types/serializations/null.rs +++ b/common/datavalues/src/types/serializations/null.rs @@ -15,6 +15,7 @@ use std::sync::Arc; use common_exception::Result; +use common_io::prelude::FormatSettings; use opensrv_clickhouse::types::column::ArcColumnWrapper; use opensrv_clickhouse::types::column::ColumnFrom; use opensrv_clickhouse::types::column::NullableColumnData; @@ -30,16 +31,20 @@ pub struct NullSerializer {} const NULL_STR: &str = "NULL"; impl TypeSerializer for NullSerializer { - fn serialize_value(&self, _value: &DataValue) -> Result { + fn serialize_value(&self, _value: &DataValue, _format: &FormatSettings) -> Result { Ok(NULL_STR.to_owned()) } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column( + &self, + column: &ColumnRef, + _format: &FormatSettings, + ) -> Result> { let result: Vec = vec![NULL_STR.to_owned(); column.len()]; Ok(result) } - fn serialize_json(&self, column: &ColumnRef) -> Result> { + fn serialize_json(&self, column: &ColumnRef, _format: &FormatSettings) -> Result> { let null = Value::Null; let result: Vec = vec![null; column.len()]; Ok(result) @@ -48,6 +53,7 @@ impl TypeSerializer for NullSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, + _format: &FormatSettings, ) -> Result { let nulls = vec![1u8; column.len()]; let inner = Vec::column_from::(vec![1u8; column.len()]); diff --git a/common/datavalues/src/types/serializations/nullable.rs b/common/datavalues/src/types/serializations/nullable.rs index bb7c0bdd5ea47..b09ade25bf42d 100644 --- a/common/datavalues/src/types/serializations/nullable.rs +++ b/common/datavalues/src/types/serializations/nullable.rs @@ -15,6 +15,7 @@ use std::sync::Arc; use common_exception::Result; +use common_io::prelude::FormatSettings; use opensrv_clickhouse::types::column::NullableColumnData; use serde_json::Value; @@ -32,18 +33,18 @@ pub struct NullableSerializer { } impl TypeSerializer for NullableSerializer { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, format: &FormatSettings) -> Result { if value.is_null() { Ok("NULL".to_owned()) } else { - self.inner.serialize_value(value) + self.inner.serialize_value(value, format) } } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column(&self, column: &ColumnRef, format: &FormatSettings) -> Result> { let column: &NullableColumn = Series::check_get(column)?; let rows = column.len(); - let mut res = self.inner.serialize_column(column.inner())?; + let mut res = self.inner.serialize_column(column.inner(), format)?; (0..rows).for_each(|row| { if column.null_at(row) { @@ -53,10 +54,10 @@ impl TypeSerializer for NullableSerializer { Ok(res) } - fn serialize_json(&self, column: &ColumnRef) -> Result> { + fn serialize_json(&self, column: &ColumnRef, format: &FormatSettings) -> Result> { let column: &NullableColumn = Series::check_get(column)?; let rows = column.len(); - let mut res = self.inner.serialize_json(column.inner())?; + let mut res = self.inner.serialize_json(column.inner(), format)?; (0..rows).for_each(|row| { if column.null_at(row) { @@ -69,9 +70,12 @@ impl TypeSerializer for NullableSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, + format: &FormatSettings, ) -> Result { let column: &NullableColumn = Series::check_get(column)?; - let inner = self.inner.serialize_clickhouse_format(column.inner())?; + let inner = self + .inner + .serialize_clickhouse_format(column.inner(), format)?; let nulls = column.ensure_validity().iter().map(|v| !v as u8).collect(); let data = NullableColumnData { nulls, inner }; diff --git a/common/datavalues/src/types/serializations/number.rs b/common/datavalues/src/types/serializations/number.rs index de37347f62d2c..15f37053bc672 100644 --- a/common/datavalues/src/types/serializations/number.rs +++ b/common/datavalues/src/types/serializations/number.rs @@ -16,6 +16,7 @@ use std::marker::PhantomData; use common_arrow::arrow::bitmap::Bitmap; use common_exception::Result; +use common_io::prelude::FormatSettings; use common_io::prelude::Marshal; use common_io::prelude::Unmarshal; use opensrv_clickhouse::types::column::ArcColumnWrapper; @@ -49,17 +50,21 @@ where T: PrimitiveType + opensrv_clickhouse::io::Marshal + opensrv_clickhouse::io::Unmarshal { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, _format: &FormatSettings) -> Result { Ok(format!("{:?}", value)) } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column( + &self, + column: &ColumnRef, + _format: &FormatSettings, + ) -> Result> { let column: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = column.iter().map(|x| format!("{}", x)).collect(); Ok(result) } - fn serialize_json(&self, column: &ColumnRef) -> Result> { + fn serialize_json(&self, column: &ColumnRef, _format: &FormatSettings) -> Result> { let column: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = column .iter() @@ -71,6 +76,7 @@ where T: PrimitiveType fn serialize_clickhouse_format( &self, column: &ColumnRef, + _format: &FormatSettings, ) -> Result { let col: &PrimitiveColumn = Series::check_get(column)?; let values: Vec = col.iter().map(|c| c.to_owned()).collect(); @@ -81,13 +87,15 @@ where T: PrimitiveType &self, column: &ColumnRef, _valids: Option<&Bitmap>, + format: &FormatSettings, ) -> Result> { - self.serialize_json(column) + self.serialize_json(column, format) } fn serialize_json_object_suppress_error( &self, column: &ColumnRef, + _format: &FormatSettings, ) -> Result>> { let column: &PrimitiveColumn = Series::check_get(column)?; let result: Vec> = column diff --git a/common/datavalues/src/types/serializations/string.rs b/common/datavalues/src/types/serializations/string.rs index d16019d8a0d1b..092e2958b1693 100644 --- a/common/datavalues/src/types/serializations/string.rs +++ b/common/datavalues/src/types/serializations/string.rs @@ -15,6 +15,7 @@ use common_arrow::arrow::bitmap::Bitmap; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use opensrv_clickhouse::types::column::ArcColumnWrapper; use opensrv_clickhouse::types::column::ColumnFrom; use serde_json::Value; @@ -25,7 +26,7 @@ use crate::prelude::*; pub struct StringSerializer {} impl TypeSerializer for StringSerializer { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, _format: &FormatSettings) -> Result { if let DataValue::String(x) = value { Ok(String::from_utf8_lossy(x).to_string()) } else { @@ -33,7 +34,11 @@ impl TypeSerializer for StringSerializer { } } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column( + &self, + column: &ColumnRef, + _format: &FormatSettings, + ) -> Result> { let column: &StringColumn = Series::check_get(column)?; let result: Vec = column .iter() @@ -42,7 +47,7 @@ impl TypeSerializer for StringSerializer { Ok(result) } - fn serialize_json(&self, column: &ColumnRef) -> Result> { + fn serialize_json(&self, column: &ColumnRef, _format: &FormatSettings) -> Result> { let column: &StringColumn = Series::check_get(column)?; let result: Vec = column .iter() @@ -54,6 +59,7 @@ impl TypeSerializer for StringSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, + _format: &FormatSettings, ) -> Result { let column: &StringColumn = Series::check_get(column)?; let values: Vec<&[u8]> = column.iter().collect(); @@ -64,6 +70,7 @@ impl TypeSerializer for StringSerializer { &self, column: &ColumnRef, valids: Option<&Bitmap>, + _format: &FormatSettings, ) -> Result> { let column: &StringColumn = Series::check_get(column)?; let mut result: Vec = Vec::new(); @@ -98,6 +105,7 @@ impl TypeSerializer for StringSerializer { fn serialize_json_object_suppress_error( &self, column: &ColumnRef, + _format: &FormatSettings, ) -> Result>> { let column: &StringColumn = Series::check_get(column)?; let result: Vec> = column diff --git a/common/datavalues/src/types/serializations/struct_.rs b/common/datavalues/src/types/serializations/struct_.rs index fa71936801ceb..e4b12bbed38ff 100644 --- a/common/datavalues/src/types/serializations/struct_.rs +++ b/common/datavalues/src/types/serializations/struct_.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use itertools::izip; use opensrv_clickhouse::types::column::ArcColumnData; use opensrv_clickhouse::types::column::TupleColumnData; @@ -31,7 +32,7 @@ pub struct StructSerializer { } impl TypeSerializer for StructSerializer { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, format: &FormatSettings) -> Result { if let DataValue::Struct(vals) = value { let mut res = String::new(); res.push('('); @@ -43,7 +44,7 @@ impl TypeSerializer for StructSerializer { } first = false; - let s = inner.serialize_value(val)?; + let s = inner.serialize_value(val, format)?; if typ.data_type_id().is_quoted() { res.push_str(&format!("'{}'", s)); } else { @@ -57,36 +58,32 @@ impl TypeSerializer for StructSerializer { } } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column(&self, column: &ColumnRef, format: &FormatSettings) -> Result> { let column: &StructColumn = Series::check_get(column)?; let mut result = Vec::with_capacity(column.len()); for i in 0..column.len() { let val = column.get(i); - let s = self.serialize_value(&val)?; + let s = self.serialize_value(&val, format)?; result.push(s); } Ok(result) } - fn serialize_json(&self, _column: &ColumnRef) -> Result> { - // let column: &StructColumn = Series::check_get(column)?; - // let inner_columns = column.values(); - // let result = self - // .inners - // .iter() - // .zip(inner_columns.iter()) - // .map(|(inner, col)| inner.serialize_json(col)) - // .collect::>>>()?; + fn serialize_json(&self, _column: &ColumnRef, _format: &FormatSettings) -> Result> { todo!() } - fn serialize_clickhouse_format(&self, column: &ColumnRef) -> Result { + fn serialize_clickhouse_format( + &self, + column: &ColumnRef, + format: &FormatSettings, + ) -> Result { let column: &StructColumn = Series::check_get(column)?; let result = self .inners .iter() .zip(column.values().iter()) - .map(|(inner, col)| inner.serialize_clickhouse_format(col)) + .map(|(inner, col)| inner.serialize_clickhouse_format(col, format)) .collect::>>()?; let data = TupleColumnData { inner: result }; diff --git a/common/datavalues/src/types/serializations/timestamp.rs b/common/datavalues/src/types/serializations/timestamp.rs index 2b132d00f3f2c..bbde1680641be 100644 --- a/common/datavalues/src/types/serializations/timestamp.rs +++ b/common/datavalues/src/types/serializations/timestamp.rs @@ -15,57 +15,49 @@ use chrono::DateTime; use chrono_tz::Tz; use common_exception::*; +use common_io::prelude::FormatSettings; use opensrv_clickhouse::types::column::ArcColumnWrapper; use opensrv_clickhouse::types::column::ColumnFrom; use serde_json::Value; use crate::prelude::*; -#[derive(Debug, Clone)] -pub struct TimestampSerializer { - tz: Tz, -} - -impl Default for TimestampSerializer { - fn default() -> Self { - let tz = "UTC".parse::().unwrap(); - Self { tz } - } -} +#[derive(Debug, Clone, Default)] +pub struct TimestampSerializer; impl TimestampSerializer { - pub fn to_timestamp(&self, value: &i64) -> DateTime { - value.to_timestamp(&self.tz) + pub fn to_timestamp(&self, value: &i64, tz: &Tz) -> DateTime { + value.to_timestamp(tz) } } const TIME_FMT: &str = "%Y-%m-%d %H:%M:%S"; impl TypeSerializer for TimestampSerializer { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, format: &FormatSettings) -> Result { let value = DFTryFrom::try_from(value.clone())?; - let dt = self.to_timestamp(&value); + let dt = self.to_timestamp(&value, &format.timezone); Ok(dt.format(TIME_FMT).to_string()) } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column(&self, column: &ColumnRef, format: &FormatSettings) -> Result> { let column: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = column .iter() .map(|v| { - let dt = self.to_timestamp(v); + let dt = self.to_timestamp(v, &format.timezone); dt.format(TIME_FMT).to_string() }) .collect(); Ok(result) } - fn serialize_json(&self, column: &ColumnRef) -> Result> { + fn serialize_json(&self, column: &ColumnRef, format: &FormatSettings) -> Result> { let array: &PrimitiveColumn = Series::check_get(column)?; let result: Vec = array .iter() .map(|v| { - let dt = self.to_timestamp(v); + let dt = self.to_timestamp(v, &format.timezone); serde_json::to_value(dt.format(TIME_FMT).to_string()).unwrap() }) .collect(); @@ -75,9 +67,13 @@ impl TypeSerializer for TimestampSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, + format: &FormatSettings, ) -> Result { let array: &PrimitiveColumn = Series::check_get(column)?; - let values: Vec> = array.iter().map(|v| self.to_timestamp(v)).collect(); + let values: Vec> = array + .iter() + .map(|v| self.to_timestamp(v, &format.timezone)) + .collect(); Ok(Vec::column_from::(values)) } } diff --git a/common/datavalues/src/types/serializations/variant.rs b/common/datavalues/src/types/serializations/variant.rs index aaa4be3ada9ec..175b8fcb492b8 100644 --- a/common/datavalues/src/types/serializations/variant.rs +++ b/common/datavalues/src/types/serializations/variant.rs @@ -15,6 +15,7 @@ use common_arrow::arrow::bitmap::Bitmap; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use opensrv_clickhouse::types::column::ArcColumnWrapper; use opensrv_clickhouse::types::column::ColumnFrom; use serde_json; @@ -26,7 +27,7 @@ use crate::prelude::*; pub struct VariantSerializer {} impl TypeSerializer for VariantSerializer { - fn serialize_value(&self, value: &DataValue) -> Result { + fn serialize_value(&self, value: &DataValue, _format: &FormatSettings) -> Result { if let DataValue::Variant(v) = value { Ok(v.to_string()) } else { @@ -34,13 +35,17 @@ impl TypeSerializer for VariantSerializer { } } - fn serialize_column(&self, column: &ColumnRef) -> Result> { + fn serialize_column( + &self, + column: &ColumnRef, + _format: &FormatSettings, + ) -> Result> { let column: &VariantColumn = Series::check_get(column)?; let result: Vec = column.iter().map(|v| v.to_string()).collect(); Ok(result) } - fn serialize_json(&self, column: &ColumnRef) -> Result> { + fn serialize_json(&self, column: &ColumnRef, _format: &FormatSettings) -> Result> { let column: &VariantColumn = Series::check_get(column)?; let result: Vec = column.iter().map(|v| v.as_ref().to_owned()).collect(); Ok(result) @@ -49,6 +54,7 @@ impl TypeSerializer for VariantSerializer { fn serialize_clickhouse_format( &self, column: &ColumnRef, + _format: &FormatSettings, ) -> Result { let column: &VariantColumn = Series::check_get(column)?; let values: Vec = column.iter().map(|v| v.to_string()).collect(); @@ -60,6 +66,7 @@ impl TypeSerializer for VariantSerializer { &self, column: &ColumnRef, valids: Option<&Bitmap>, + _format: &FormatSettings, ) -> Result> { let column: &VariantColumn = Series::check_get(column)?; let mut result: Vec = Vec::new(); @@ -89,6 +96,7 @@ impl TypeSerializer for VariantSerializer { fn serialize_json_object_suppress_error( &self, column: &ColumnRef, + _format: &FormatSettings, ) -> Result>> { let column: &VariantColumn = Series::check_get(column)?; let result: Vec> = column diff --git a/common/datavalues/src/types/type_timestamp.rs b/common/datavalues/src/types/type_timestamp.rs index 5b21d91aa409b..65b28edb31db6 100644 --- a/common/datavalues/src/types/type_timestamp.rs +++ b/common/datavalues/src/types/type_timestamp.rs @@ -18,7 +18,6 @@ use std::sync::Arc; use chrono::DateTime; use chrono::TimeZone; use chrono::Utc; -use chrono_tz::Tz; use common_arrow::arrow::datatypes::DataType as ArrowType; use common_exception::ErrorCode; use common_exception::Result; @@ -151,10 +150,8 @@ impl DataType for TimestampType { } fn create_deserializer(&self, capacity: usize) -> TypeDeserializerImpl { - let tz = "UTC".parse::().unwrap(); TimestampDeserializer { builder: MutablePrimitiveColumn::::with_capacity(capacity), - tz, precision: self.precision, } .into() diff --git a/common/datavalues/tests/it/types/deserializations.rs b/common/datavalues/tests/it/types/deserializations.rs index 14928d33f4095..cbcf6ef993527 100644 --- a/common/datavalues/tests/it/types/deserializations.rs +++ b/common/datavalues/tests/it/types/deserializations.rs @@ -14,6 +14,7 @@ use common_datavalues::prelude::*; use common_exception::Result; +use common_io::prelude::FormatSettings; #[test] fn test_nullable_deserializer_pop() -> Result<()> { @@ -25,10 +26,11 @@ fn test_nullable_deserializer_pop() -> Result<()> { ]; let data_type = NullableType::new_impl(BooleanType::new_impl()); let mut deserializer = data_type.create_deserializer(4); + let format = FormatSettings::default(); // Append data value for value in values_vec.iter() { - deserializer.append_data_value(value.clone())?; + deserializer.append_data_value(value.clone(), &format)?; } // Pop all data value diff --git a/common/datavalues/tests/it/types/serializations.rs b/common/datavalues/tests/it/types/serializations.rs index 23d8c04102e5c..0610bfe9b643e 100644 --- a/common/datavalues/tests/it/types/serializations.rs +++ b/common/datavalues/tests/it/types/serializations.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use common_datavalues::prelude::*; use common_exception::Result; +use common_io::prelude::FormatSettings; use pretty_assertions::assert_eq; use serde_json::json; @@ -144,12 +145,13 @@ fn test_serializers() -> Result<()> { }, ]; + let format = FormatSettings::default(); for test in tests { let serializer = test.data_type.create_serializer(); - let val_res = serializer.serialize_value(&test.value)?; + let val_res = serializer.serialize_value(&test.value, &format)?; assert_eq!(&val_res, test.val_str, "case: {:#?}", test.name); - let col_res = serializer.serialize_column(&test.column)?; + let col_res = serializer.serialize_column(&test.column, &format)?; assert_eq!(col_res, test.col_str, "case: {:#?}", test.name); } @@ -175,7 +177,7 @@ fn test_serializers() -> Result<()> { DataValue::Boolean(true), DataValue::UInt64(18869), ]); - let result = serializer.serialize_value(&value)?; + let result = serializer.serialize_value(&value, &format)?; let expect = "(1.2, 'hello', 1, '2021-08-30')"; assert_eq!(&result, expect); } diff --git a/common/functions/Cargo.toml b/common/functions/Cargo.toml index cb0fee2fc441f..730c2bcd34c48 100644 --- a/common/functions/Cargo.toml +++ b/common/functions/Cargo.toml @@ -24,6 +24,7 @@ blake3 = "1.3.1" bstr = "0.2.17" bumpalo = "3.9.1" bytes = "1.1.0" +chrono-tz = "0.6.1" crc32fast = "1.3.2" dyn-clone = "1.0.5" hex = "0.4.3" diff --git a/common/functions/src/scalars/comparisons/comparison.rs b/common/functions/src/scalars/comparisons/comparison.rs index 68f0765be58a5..da7a5fae48ea0 100644 --- a/common/functions/src/scalars/comparisons/comparison.rs +++ b/common/functions/src/scalars/comparisons/comparison.rs @@ -297,14 +297,15 @@ where F: Fn(T::Simd, T::Simd) -> u8 + Send + Sync + Clone, { fn eval(&self, l: &ColumnWithField, r: &ColumnWithField) -> Result { + let func_ctx = FunctionContext::default(); let lhs = if self.need_cast && l.data_type() != &self.least_supertype { - cast_column_field(l, l.data_type(), &self.least_supertype)? + cast_column_field(l, l.data_type(), &self.least_supertype, &func_ctx)? } else { l.column().clone() }; let rhs = if self.need_cast && r.data_type() != &self.least_supertype { - cast_column_field(r, r.data_type(), &self.least_supertype)? + cast_column_field(r, r.data_type(), &self.least_supertype, &func_ctx)? } else { r.column().clone() }; diff --git a/common/functions/src/scalars/conditionals/if.rs b/common/functions/src/scalars/conditionals/if.rs index 8e9ca350d9420..60b08977ec8cb 100644 --- a/common/functions/src/scalars/conditionals/if.rs +++ b/common/functions/src/scalars/conditionals/if.rs @@ -57,6 +57,7 @@ impl IfFunction { &self, cond_col: &ColumnRef, columns: &ColumnsWithField, + _func_ctx: &FunctionContext, ) -> Result { debug_assert!(cond_col.is_const()); // whether nullable or not, we can use viewer to make it @@ -77,6 +78,7 @@ impl IfFunction { cond_col: &BooleanColumn, columns: &ColumnsWithField, input_rows: usize, + func_ctx: &FunctionContext, ) -> Result { debug_assert!(columns[0].column().is_const() || columns[1].column().is_const()); let (lhs_col, rhs_col, reverse) = if columns[0].column().is_const() { @@ -85,8 +87,18 @@ impl IfFunction { (&columns[1], &columns[0], true) }; - let lhs = cast_column_field(lhs_col, lhs_col.data_type(), &self.least_supertype)?; - let rhs = cast_column_field(rhs_col, rhs_col.data_type(), &self.least_supertype)?; + let lhs = cast_column_field( + lhs_col, + lhs_col.data_type(), + &self.least_supertype, + func_ctx, + )?; + let rhs = cast_column_field( + rhs_col, + rhs_col.data_type(), + &self.least_supertype, + func_ctx, + )?; let type_id = remove_nullable(&lhs.data_type()).data_type_id(); @@ -181,12 +193,23 @@ impl IfFunction { cond_col: &BooleanColumn, columns: &ColumnsWithField, input_rows: usize, + func_ctx: &FunctionContext, ) -> Result { let lhs_col = &columns[0]; let rhs_col = &columns[1]; - let lhs = cast_column_field(lhs_col, lhs_col.data_type(), &self.least_supertype)?; - let rhs = cast_column_field(rhs_col, rhs_col.data_type(), &self.least_supertype)?; + let lhs = cast_column_field( + lhs_col, + lhs_col.data_type(), + &self.least_supertype, + func_ctx, + )?; + let rhs = cast_column_field( + rhs_col, + rhs_col.data_type(), + &self.least_supertype, + func_ctx, + )?; let type_id = remove_nullable(&self.least_supertype).data_type_id(); @@ -217,12 +240,23 @@ impl IfFunction { &self, cond_col: &BooleanColumn, columns: &ColumnsWithField, + func_ctx: &FunctionContext, ) -> Result { let lhs_col = &columns[0]; let rhs_col = &columns[1]; - let lhs = cast_column_field(lhs_col, lhs_col.data_type(), &self.least_supertype)?; - let rhs = cast_column_field(rhs_col, rhs_col.data_type(), &self.least_supertype)?; + let lhs = cast_column_field( + lhs_col, + lhs_col.data_type(), + &self.least_supertype, + func_ctx, + )?; + let rhs = cast_column_field( + rhs_col, + rhs_col.data_type(), + &self.least_supertype, + func_ctx, + )?; debug_assert!(!self.least_supertype.is_nullable()); let type_id = self.least_supertype.data_type_id(); @@ -256,7 +290,7 @@ impl Function for IfFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { @@ -265,24 +299,24 @@ impl Function for IfFunction { // 1. fast path for cond nullable or const or null column if cond_col.is_const() { - return self.eval_cond_const(&cond_col, &columns[1..]); + return self.eval_cond_const(&cond_col, &columns[1..], &func_ctx); } let cond_col = Series::check_get_scalar::(&cond_col)?; // 2. handle when lhs / rhs is const if columns[1].column().is_const() || columns[2].column().is_const() { - return self.eval_const(cond_col, &columns[1..], input_rows); + return self.eval_const(cond_col, &columns[1..], input_rows, &func_ctx); } // 3. handle nullable column let whether_nullable = |col: &ColumnRef| col.is_nullable() || col.data_type().is_null(); if whether_nullable(columns[1].column()) || whether_nullable(columns[2].column()) { - return self.eval_nullable(cond_col, &columns[1..], input_rows); + return self.eval_nullable(cond_col, &columns[1..], input_rows, &func_ctx); } // 4. all normal type and are not nullable/const - self.eval_generic(cond_col, &columns[1..]) + self.eval_generic(cond_col, &columns[1..], &func_ctx) } } diff --git a/common/functions/src/scalars/conditionals/in_basic.rs b/common/functions/src/scalars/conditionals/in_basic.rs index d78c664d1fdac..845c26b11f138 100644 --- a/common/functions/src/scalars/conditionals/in_basic.rs +++ b/common/functions/src/scalars/conditionals/in_basic.rs @@ -58,11 +58,11 @@ impl InFunction { } macro_rules! scalar_contains { - ($T: ident, $INPUT_COL: expr, $ROWS: expr, $COLUMNS: expr, $CAST_TYPE: ident) => {{ + ($T: ident, $INPUT_COL: expr, $ROWS: expr, $COLUMNS: expr, $CAST_TYPE: ident, $FUNC_CTX: expr) => {{ let mut builder: ColumnBuilder = ColumnBuilder::with_capacity($ROWS); let mut vals_set = HashSet::with_capacity($ROWS - 1); for col in &$COLUMNS[1..] { - let col = cast_column_field(col, col.data_type(), &$CAST_TYPE)?; + let col = cast_column_field(col, col.data_type(), &$CAST_TYPE, &$FUNC_CTX)?; let col_viewer = $T::try_create_viewer(&col)?; if col_viewer.valid_at(0) { let val = col_viewer.value_at(0).to_owned_scalar(); @@ -80,11 +80,11 @@ macro_rules! scalar_contains { } macro_rules! float_contains { - ($T: ident, $INPUT_COL: expr, $ROWS: expr, $COLUMNS: expr, $CAST_TYPE: ident) => {{ + ($T: ident, $INPUT_COL: expr, $ROWS: expr, $COLUMNS: expr, $CAST_TYPE: ident, $FUNC_CTX: expr) => {{ let mut builder: ColumnBuilder = ColumnBuilder::with_capacity($ROWS); let mut vals_set = HashSet::with_capacity($ROWS - 1); for col in &$COLUMNS[1..] { - let col = cast_column_field(col, col.data_type(), &$CAST_TYPE)?; + let col = cast_column_field(col, col.data_type(), &$CAST_TYPE, &$FUNC_CTX)?; let col_viewer = $T::try_create_viewer(&col)?; if col_viewer.valid_at(0) { let val = col_viewer.value_at(0); @@ -115,7 +115,7 @@ impl Function for InFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { @@ -149,50 +149,139 @@ impl Function for InFunction { } let least_super_type_id = remove_nullable(&least_super_dt).data_type_id(); - let input_col = cast_column_field(&columns[0], columns[0].data_type(), &least_super_dt)?; + let input_col = cast_column_field( + &columns[0], + columns[0].data_type(), + &least_super_dt, + &func_ctx, + )?; match least_super_type_id { TypeID::Boolean => { - scalar_contains!(bool, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + bool, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::UInt8 => { - scalar_contains!(u8, input_col, input_rows, columns, least_super_dt) + scalar_contains!(u8, input_col, input_rows, columns, least_super_dt, func_ctx) } TypeID::UInt16 => { - scalar_contains!(u16, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + u16, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::UInt32 => { - scalar_contains!(u32, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + u32, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::UInt64 => { - scalar_contains!(u64, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + u64, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::Int8 => { - scalar_contains!(i8, input_col, input_rows, columns, least_super_dt) + scalar_contains!(i8, input_col, input_rows, columns, least_super_dt, func_ctx) } TypeID::Int16 => { - scalar_contains!(i16, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + i16, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::Int32 => { - scalar_contains!(i32, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + i32, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::Int64 => { - scalar_contains!(i64, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + i64, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::String => { - scalar_contains!(Vu8, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + Vu8, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::Float32 => { - float_contains!(f32, input_col, input_rows, columns, least_super_dt) + float_contains!( + f32, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::Float64 => { - float_contains!(f64, input_col, input_rows, columns, least_super_dt) + float_contains!( + f64, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::Date => { - scalar_contains!(i32, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + i32, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } TypeID::Timestamp => { - scalar_contains!(i64, input_col, input_rows, columns, least_super_dt) + scalar_contains!( + i64, + input_col, + input_rows, + columns, + least_super_dt, + func_ctx + ) } _ => Result::Err(ErrorCode::BadDataValueType(format!( "{} type is not supported for IN now", diff --git a/common/functions/src/scalars/dates/date.rs b/common/functions/src/scalars/dates/date.rs index 5e6beb9851c49..28f0db55d0e36 100644 --- a/common/functions/src/scalars/dates/date.rs +++ b/common/functions/src/scalars/dates/date.rs @@ -15,6 +15,7 @@ use super::now::NowFunction; use super::number_function::ToMondayFunction; use super::number_function::ToYearFunction; +use super::round_function::Round; use super::AddDaysFunction; use super::AddMonthsFunction; use super::AddTimesFunction; @@ -47,7 +48,7 @@ use crate::scalars::FunctionFeatures; pub struct DateFunction {} impl DateFunction { - fn round_function_creator(round: u32) -> FunctionDescription { + fn round_function_creator(round: Round) -> FunctionDescription { let creator: FactoryCreator = Box::new(move |display_name, args| { RoundFunction::try_create(display_name, args, round) }); @@ -84,17 +85,29 @@ impl DateFunction { factory.register("toYear", ToYearFunction::desc()); // rounders - factory.register("toStartOfSecond", Self::round_function_creator(1)); - factory.register("toStartOfMinute", Self::round_function_creator(60)); - factory.register("toStartOfFiveMinutes", Self::round_function_creator(5 * 60)); - factory.register("toStartOfTenMinutes", Self::round_function_creator(10 * 60)); + factory.register( + "toStartOfSecond", + Self::round_function_creator(Round::Second), + ); + factory.register( + "toStartOfMinute", + Self::round_function_creator(Round::Minute), + ); + factory.register( + "toStartOfFiveMinutes", + Self::round_function_creator(Round::FiveMinutes), + ); + factory.register( + "toStartOfTenMinutes", + Self::round_function_creator(Round::TenMinutes), + ); factory.register( "toStartOfFifteenMinutes", - Self::round_function_creator(15 * 60), + Self::round_function_creator(Round::FifteenMinutes), ); - factory.register("timeSlot", Self::round_function_creator(30 * 60)); - factory.register("toStartOfHour", Self::round_function_creator(60 * 60)); - factory.register("toStartOfDay", Self::round_function_creator(60 * 60 * 24)); + factory.register("timeSlot", Self::round_function_creator(Round::TimeSlot)); + factory.register("toStartOfHour", Self::round_function_creator(Round::Hour)); + factory.register("toStartOfDay", Self::round_function_creator(Round::Day)); factory.register("toStartOfWeek", ToStartOfWeekFunction::desc()); diff --git a/common/functions/src/scalars/dates/interval_function.rs b/common/functions/src/scalars/dates/interval_function.rs index 72bc4244a646e..a9aef2bffe4f4 100644 --- a/common/functions/src/scalars/dates/interval_function.rs +++ b/common/functions/src/scalars/dates/interval_function.rs @@ -20,6 +20,7 @@ use common_datavalues::chrono::Datelike; use common_datavalues::chrono::Duration; use common_datavalues::chrono::NaiveDate; use common_datavalues::chrono::NaiveDateTime; +use common_datavalues::chrono::TimeZone; use common_datavalues::prelude::*; use common_datavalues::with_match_primitive_types_error; use common_exception::ErrorCode; @@ -141,12 +142,12 @@ where fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, _input_rows: usize, ) -> Result { // Todo(zhyass): define the ctx out of the eval. - let mut ctx = EvalContext::new(self.factor, self.precision, None); + let mut ctx = EvalContext::new(self.factor, self.precision, None, func_ctx.tz); let col = scalar_binary_op( columns[0].column(), columns[1].column(), diff --git a/common/functions/src/scalars/dates/macros.rs b/common/functions/src/scalars/dates/macros.rs index 2bad39a3a0c70..73a6097fcd177 100644 --- a/common/functions/src/scalars/dates/macros.rs +++ b/common/functions/src/scalars/dates/macros.rs @@ -39,6 +39,7 @@ macro_rules! impl_interval_year_month { #[macro_export] macro_rules! define_date_add_year_months { ($l: ident, $r: ident, $ctx: ident, $op: expr) => {{ + let _tz = $ctx.tz; let factor = $ctx.factor; let epoch = NaiveDate::from_ymd(1970, 1, 1); let naive = epoch.checked_add_signed(Duration::days($l as i64)); @@ -65,21 +66,11 @@ macro_rules! define_date_add_year_months { #[macro_export] macro_rules! define_timestamp_add_year_months { ($l: ident, $r: ident, $ctx: ident, $op: expr) => {{ + let tz = $ctx.tz; let factor = $ctx.factor; let micros = $l; - let naive = NaiveDateTime::from_timestamp_opt( - micros / 1_000_000, - (micros % 1_000_000 * 1000) as u32, - ); - if naive.is_none() { - $ctx.set_error(ErrorCode::Overflow(format!( - "Overflow on datetime with microseconds {}", - $l - ))); - return 0; - }; + let date = tz.timestamp(micros / 1_000_000, (micros % 1_000_000 * 1000) as u32); - let date = naive.unwrap(); let new_date = $op(date.year(), date.month(), date.day(), $r.as_() * factor); new_date.map_or_else( |e| { diff --git a/common/functions/src/scalars/dates/number_function.rs b/common/functions/src/scalars/dates/number_function.rs index 6cfe7bf9e4e51..cdc257443fe3b 100644 --- a/common/functions/src/scalars/dates/number_function.rs +++ b/common/functions/src/scalars/dates/number_function.rs @@ -15,15 +15,16 @@ use std::fmt; use std::marker::PhantomData; +use chrono_tz::Tz; use common_datavalues::chrono::DateTime; use common_datavalues::chrono::Datelike; use common_datavalues::chrono::TimeZone; use common_datavalues::chrono::Timelike; -use common_datavalues::chrono::Utc; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; +use super::round_function::Round; use crate::scalars::function_factory::FunctionDescription; use crate::scalars::scalar_unary_op; use crate::scalars::CastFunction; @@ -47,7 +48,7 @@ pub struct NumberFunction { pub trait NumberOperator { const IS_DETERMINISTIC: bool; - fn to_number(_value: DateTime) -> R; + fn to_number(_value: DateTime, tz: &Tz) -> R; // Used to check the monotonicity of the function. // For example, ToDayOfYear is monotonous only when the time range is the same year. @@ -69,7 +70,7 @@ pub struct ToYYYYMM; impl NumberOperator for ToYYYYMM { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u32 { + fn to_number(value: DateTime, _tz: &Tz) -> u32 { value.year() as u32 * 100 + value.month() } } @@ -80,7 +81,7 @@ pub struct ToYYYYMMDD; impl NumberOperator for ToYYYYMMDD { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u32 { + fn to_number(value: DateTime, _tz: &Tz) -> u32 { value.year() as u32 * 10000 + value.month() * 100 + value.day() } } @@ -91,7 +92,7 @@ pub struct ToYYYYMMDDhhmmss; impl NumberOperator for ToYYYYMMDDhhmmss { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u64 { + fn to_number(value: DateTime, _tz: &Tz) -> u64 { value.year() as u64 * 10000000000 + value.month() as u64 * 100000000 + value.day() as u64 * 1000000 @@ -107,9 +108,9 @@ pub struct ToStartOfYear; impl NumberOperator for ToStartOfYear { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> i32 { - let end: DateTime = Utc.ymd(value.year(), 1, 1).and_hms(0, 0, 0); - get_day(end) as i32 + fn to_number(value: DateTime, tz: &Tz) -> i32 { + let end = tz.ymd(value.year(), 1, 1).and_hms(0, 0, 0); + get_day(end, tz) as i32 } fn return_type() -> Option { @@ -123,14 +124,14 @@ pub struct ToStartOfISOYear; impl NumberOperator for ToStartOfISOYear { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> i32 { + fn to_number(value: DateTime, tz: &Tz) -> i32 { let week_day = value.weekday().num_days_from_monday(); let iso_week = value.iso_week(); let iso_week_num = iso_week.week(); let sub_days = (iso_week_num - 1) * 7 + week_day; let result = value.timestamp_millis() - sub_days as i64 * 24 * 3600 * 1000; - let end: DateTime = Utc.timestamp_millis(result); - get_day(end) as i32 + let end = tz.timestamp_millis(result); + get_day(end, tz) as i32 } fn return_type() -> Option { @@ -144,10 +145,10 @@ pub struct ToStartOfQuarter; impl NumberOperator for ToStartOfQuarter { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> i32 { + fn to_number(value: DateTime, tz: &Tz) -> i32 { let new_month = value.month0() / 3 * 3 + 1; - let date = Utc.ymd(value.year(), new_month, 1).and_hms(0, 0, 0); - get_day(date) as i32 + let date = tz.ymd(value.year(), new_month, 1).and_hms(0, 0, 0); + get_day(date, tz) as i32 } fn return_type() -> Option { @@ -161,9 +162,9 @@ pub struct ToStartOfMonth; impl NumberOperator for ToStartOfMonth { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> i32 { - let date = Utc.ymd(value.year(), value.month(), 1).and_hms(0, 0, 0); - get_day(date) as i32 + fn to_number(value: DateTime, tz: &Tz) -> i32 { + let date = tz.ymd(value.year(), value.month(), 1).and_hms(0, 0, 0); + get_day(date, tz) as i32 } fn return_type() -> Option { @@ -177,7 +178,7 @@ pub struct ToMonth; impl NumberOperator for ToMonth { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u8 { + fn to_number(value: DateTime, _tz: &Tz) -> u8 { value.month() as u8 } @@ -194,7 +195,7 @@ pub struct ToDayOfYear; impl NumberOperator for ToDayOfYear { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u16 { + fn to_number(value: DateTime, _tz: &Tz) -> u16 { value.ordinal() as u16 } @@ -211,7 +212,7 @@ pub struct ToDayOfMonth; impl NumberOperator for ToDayOfMonth { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u8 { + fn to_number(value: DateTime, _tz: &Tz) -> u8 { value.day() as u8 } @@ -228,7 +229,7 @@ pub struct ToDayOfWeek; impl NumberOperator for ToDayOfWeek { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u8 { + fn to_number(value: DateTime, _tz: &Tz) -> u8 { value.weekday().number_from_monday() as u8 } @@ -244,7 +245,7 @@ pub struct ToHour; impl NumberOperator for ToHour { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u8 { + fn to_number(value: DateTime, _tz: &Tz) -> u8 { value.hour() as u8 } @@ -261,14 +262,14 @@ pub struct ToMinute; impl NumberOperator for ToMinute { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u8 { + fn to_number(value: DateTime, _tz: &Tz) -> u8 { value.minute() as u8 } // ToMinute is NOT a monotonic function in general, unless the time range is within the same hour. fn factor_function(_input_type: DataTypeImpl) -> Option> { Some( - RoundFunction::try_create("toStartOfHour", &[&TimestampType::new_impl(0)], 60 * 60) + RoundFunction::try_create("toStartOfHour", &[&TimestampType::new_impl(0)], Round::Hour) .unwrap(), ) } @@ -280,15 +281,19 @@ pub struct ToSecond; impl NumberOperator for ToSecond { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u8 { + fn to_number(value: DateTime, _tz: &Tz) -> u8 { value.second() as u8 } // ToSecond is NOT a monotonic function in general, unless the time range is within the same minute. fn factor_function(_input_type: DataTypeImpl) -> Option> { Some( - RoundFunction::try_create("toStartOfMinute", &[&TimestampType::new_impl(0)], 60) - .unwrap(), + RoundFunction::try_create( + "toStartOfMinute", + &[&TimestampType::new_impl(0)], + Round::Minute, + ) + .unwrap(), ) } } @@ -299,9 +304,9 @@ pub struct ToMonday; impl NumberOperator for ToMonday { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u16 { + fn to_number(value: DateTime, tz: &Tz) -> u16 { let weekday = value.weekday(); - (get_day(value) as u32 - weekday.num_days_from_monday()) as u16 + (get_day(value, tz) as u32 - weekday.num_days_from_monday()) as u16 } } @@ -311,7 +316,7 @@ pub struct ToYear; impl NumberOperator for ToYear { const IS_DETERMINISTIC: bool = true; - fn to_number(value: DateTime) -> u16 { + fn to_number(value: DateTime, _tz: &Tz) -> u16 { value.year() as u16 } } @@ -362,7 +367,7 @@ where fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &common_datavalues::ColumnsWithField, _input_rows: usize, ) -> Result { @@ -370,9 +375,10 @@ where let number_array = match type_id { TypeID::Date => { + let tz = "UTC".parse::().unwrap(); let func = |v: i32, _ctx: &mut EvalContext| { - let date_time = Utc.timestamp(v as i64 * 24 * 3600, 0_u32); - T::to_number(date_time) + let date_time = tz.timestamp(v as i64 * 24 * 3600, 0_u32); + T::to_number(date_time, &tz) }; let col = scalar_unary_op::( columns[0].column(), @@ -382,9 +388,13 @@ where Ok(col.arc()) } TypeID::Timestamp => { + // round_func need to calcute it with origin timezone + // such as in UTC: 2022-03-31 22:00 and in +8:00 time is 2022-04-01 6:00 + // then the result of to the month of should be 2022-04-01 6:00 rather than 2022-03-01 22:00 + let tz = func_ctx.tz; let func = |v: i64, _ctx: &mut EvalContext| { - let date_time = Utc.timestamp(v / 1_000_000, 0_u32); - T::to_number(date_time) + let date_time = tz.timestamp(v / 1_000_000, 0_u32); + T::to_number(date_time, &tz) }; let col = scalar_unary_op::( columns[0].column(), @@ -444,8 +454,8 @@ impl fmt::Display for NumberFunction { } } -fn get_day(date: DateTime) -> i64 { - let start: DateTime = Utc.ymd(1970, 1, 1).and_hms(0, 0, 0); +fn get_day(date: DateTime, tz: &Tz) -> i64 { + let start = tz.ymd(1970, 1, 1).and_hms(0, 0, 0); let duration = date.signed_duration_since(start); duration.num_days() } diff --git a/common/functions/src/scalars/dates/round_function.rs b/common/functions/src/scalars/dates/round_function.rs index 591349171c83c..7e0477942c4f4 100644 --- a/common/functions/src/scalars/dates/round_function.rs +++ b/common/functions/src/scalars/dates/round_function.rs @@ -14,6 +14,10 @@ use std::fmt; +use chrono_tz::Tz; +use common_datavalues::chrono::Datelike; +use common_datavalues::chrono::TimeZone; +use common_datavalues::chrono::Timelike; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; @@ -24,17 +28,29 @@ use crate::scalars::Function; use crate::scalars::FunctionContext; use crate::scalars::Monotonicity; +#[derive(Clone, Copy)] +pub enum Round { + Second, + Minute, + FiveMinutes, + TenMinutes, + FifteenMinutes, + TimeSlot, + Hour, + Day, +} + #[derive(Clone)] pub struct RoundFunction { display_name: String, - round: u32, + round: Round, } impl RoundFunction { pub fn try_create( display_name: &str, args: &[&DataTypeImpl], - round: u32, + round: Round, ) -> Result> { if args[0].data_type_id() != TypeID::Timestamp { return Err(ErrorCode::BadDataValueType(format!( @@ -56,9 +72,39 @@ impl RoundFunction { // Consider about the timezones/offsets // Currently: assuming timezone offset is a multiple of round. #[inline] - fn execute(&self, time: i64) -> i64 { - let round = self.round as i64; - time / MICROSECONDS / round * round * MICROSECONDS + fn execute(&self, time: i64, tz: &Tz) -> i64 { + let dt = tz.timestamp(time / MICROSECONDS, 0_u32); + match self.round { + Round::Second => dt.timestamp_micros(), + Round::Minute => tz + .ymd(dt.year(), dt.month(), dt.day()) + .and_hms_micro(dt.hour(), dt.minute(), 0, 0) + .timestamp_micros(), + Round::FiveMinutes => tz + .ymd(dt.year(), dt.month(), dt.day()) + .and_hms_micro(dt.hour(), dt.minute() / 5 * 5, 0, 0) + .timestamp_micros(), + Round::TenMinutes => tz + .ymd(dt.year(), dt.month(), dt.day()) + .and_hms_micro(dt.hour(), dt.minute() / 10 * 10, 0, 0) + .timestamp_micros(), + Round::FifteenMinutes => tz + .ymd(dt.year(), dt.month(), dt.day()) + .and_hms_micro(dt.hour(), dt.minute() / 15 * 15, 0, 0) + .timestamp_micros(), + Round::TimeSlot => tz + .ymd(dt.year(), dt.month(), dt.day()) + .and_hms_micro(dt.hour(), dt.minute() / 30 * 30, 0, 0) + .timestamp_micros(), + Round::Hour => tz + .ymd(dt.year(), dt.month(), dt.day()) + .and_hms_micro(dt.hour(), 0, 0, 0) + .timestamp_micros(), + Round::Day => tz + .ymd(dt.year(), dt.month(), dt.day()) + .and_hms_micro(0, 0, 0, 0) + .timestamp_micros(), + } } } @@ -73,13 +119,16 @@ impl Function for RoundFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &common_datavalues::ColumnsWithField, _input_rows: usize, ) -> Result { - let func = |val: i64, _ctx: &mut EvalContext| self.execute(val); - let col = - scalar_unary_op::(columns[0].column(), func, &mut EvalContext::default())?; + let func = |val: i64, ctx: &mut EvalContext| self.execute(val, &ctx.tz); + let mut eval_context = EvalContext { + tz: func_ctx.tz, + ..Default::default() + }; + let col = scalar_unary_op::(columns[0].column(), func, &mut eval_context)?; for micros in col.iter() { let _ = check_timestamp(*micros)?; } diff --git a/common/functions/src/scalars/dates/week_date.rs b/common/functions/src/scalars/dates/week_date.rs index 5932e0e068411..4c5308839fa17 100644 --- a/common/functions/src/scalars/dates/week_date.rs +++ b/common/functions/src/scalars/dates/week_date.rs @@ -16,11 +16,11 @@ use std::fmt; use std::marker::PhantomData; use std::ops::Sub; +use chrono_tz::Tz; use common_datavalues::chrono::DateTime; use common_datavalues::chrono::Datelike; use common_datavalues::chrono::Duration; use common_datavalues::chrono::TimeZone; -use common_datavalues::chrono::Utc; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; @@ -45,7 +45,7 @@ pub trait WeekResultFunction { const IS_DETERMINISTIC: bool; fn return_type() -> DataTypeImpl; - fn to_number(_value: DateTime, mode: u64) -> R; + fn to_number(_value: DateTime, mode: u64, tz: &Tz) -> R; fn factor_function() -> Option> { None } @@ -60,7 +60,7 @@ impl WeekResultFunction for ToStartOfWeek { fn return_type() -> DataTypeImpl { DateType::new_impl() } - fn to_number(value: DateTime, week_mode: u64) -> i32 { + fn to_number(value: DateTime, week_mode: u64, tz: &Tz) -> i32 { let mut weekday = value.weekday().number_from_sunday(); if week_mode & 1 == 1 { weekday = value.weekday().number_from_monday(); @@ -68,7 +68,7 @@ impl WeekResultFunction for ToStartOfWeek { weekday -= 1; let duration = Duration::days(weekday as i64); let result = value.sub(duration); - get_day(result) + get_day(result, tz) } } @@ -124,7 +124,7 @@ where fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { @@ -147,12 +147,14 @@ where mode = week_mode; } + let tz = func_ctx.tz; + match columns[0].data_type().data_type_id() { TypeID::Date => { let col: &Int32Column = Series::check_get(columns[0].column())?; let iter = col.scalar_iter().map(|v| { - let date_time = Utc.timestamp(v as i64 * 24 * 3600, 0_u32); - T::to_number(date_time, mode) + let date_time = tz.timestamp(v as i64 * 24 * 3600, 0_u32); + T::to_number(date_time, mode, &tz) }); let col = PrimitiveColumn::::from_owned_iterator(iter).arc(); let viewer = i32::try_create_viewer(&col)?; @@ -164,8 +166,8 @@ where TypeID::Timestamp => { let col: &Int64Column = Series::check_get(columns[0].column())?; let iter = col.scalar_iter().map(|v| { - let date_time = Utc.timestamp(v / 1_000_000, 0_u32); - T::to_number(date_time, mode) + let date_time = tz.timestamp(v / 1_000_000, 0_u32); + T::to_number(date_time, mode, &tz) }); let col = PrimitiveColumn::::from_owned_iterator(iter).arc(); let viewer = i32::try_create_viewer(&col)?; @@ -222,8 +224,8 @@ impl fmt::Display for WeekFunction { } } -fn get_day(date: DateTime) -> i32 { - let start: DateTime = Utc.ymd(1970, 1, 1).and_hms(0, 0, 0); +fn get_day(date: DateTime, tz: &Tz) -> i32 { + let start = tz.ymd(1970, 1, 1).and_hms(0, 0, 0); let duration = date.signed_duration_since(start); duration.num_days() as i32 } diff --git a/common/functions/src/scalars/expressions/cast.rs b/common/functions/src/scalars/expressions/cast.rs index 4369cb7da9b89..f7cc788a2491c 100644 --- a/common/functions/src/scalars/expressions/cast.rs +++ b/common/functions/src/scalars/expressions/cast.rs @@ -82,11 +82,11 @@ impl Function for CastFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, _input_rows: usize, ) -> Result { - cast_column_field(&columns[0], &self.from_type, &self.target_type) + cast_column_field(&columns[0], &self.from_type, &self.target_type, &func_ctx) } } diff --git a/common/functions/src/scalars/expressions/cast_from_datetimes.rs b/common/functions/src/scalars/expressions/cast_from_datetimes.rs index 71d80ea6c96e3..8c94d21f3a60d 100644 --- a/common/functions/src/scalars/expressions/cast_from_datetimes.rs +++ b/common/functions/src/scalars/expressions/cast_from_datetimes.rs @@ -14,15 +14,16 @@ use std::sync::Arc; +use chrono_tz::Tz; use common_arrow::arrow::bitmap::Bitmap; use common_datavalues::chrono::DateTime; use common_datavalues::chrono::TimeZone; -use common_datavalues::chrono::Utc; use common_datavalues::prelude::*; use common_exception::Result; use super::cast_with_type::arrow_cast_compute; use super::cast_with_type::CastOptions; +use crate::scalars::FunctionContext; const DATE_FMT: &str = "%Y-%m-%d"; // const TIME_FMT: &str = "%Y-%m-%d %H:%M:%S"; @@ -32,6 +33,7 @@ pub fn cast_from_date( from_type: &DataTypeImpl, data_type: &DataTypeImpl, cast_options: &CastOptions, + func_ctx: &FunctionContext, ) -> Result<(ColumnRef, Option)> { let c = Series::remove_nullable(column); let c: &Int32Column = Series::check_get(&c)?; @@ -42,7 +44,8 @@ pub fn cast_from_date( let mut builder = ColumnBuilder::::with_capacity(size); for v in c.iter() { - let s = timestamp_to_string(Utc.timestamp(*v as i64 * 24 * 3600, 0_u32), DATE_FMT); + let utc = "UTC".parse::().unwrap(); + let s = timestamp_to_string(utc.timestamp(*v as i64 * 24 * 3600, 0_u32), DATE_FMT); builder.append(s.as_bytes()); } Ok((builder.build(size), None)) @@ -54,7 +57,7 @@ pub fn cast_from_date( Ok((result, None)) } - _ => arrow_cast_compute(column, from_type, data_type, cast_options), + _ => arrow_cast_compute(column, from_type, data_type, cast_options, func_ctx), } } @@ -63,6 +66,7 @@ pub fn cast_from_timestamp( from_type: &DataTypeImpl, data_type: &DataTypeImpl, cast_options: &CastOptions, + func_ctx: &FunctionContext, ) -> Result<(ColumnRef, Option)> { let c = Series::remove_nullable(column); let c: &Int64Column = Series::check_get(&c)?; @@ -73,9 +77,10 @@ pub fn cast_from_timestamp( match data_type.data_type_id() { TypeID::String => { let mut builder = MutableStringColumn::with_capacity(size); + let tz = func_ctx.tz; for v in c.iter() { let s = timestamp_to_string( - date_time64.utc_timestamp(*v), + tz.timestamp(*v / 1_000_000, (*v % 1_000_000 * 1_000) as u32), date_time64.format_string().as_str(), ); builder.append_value(s.as_bytes()); @@ -97,11 +102,11 @@ pub fn cast_from_timestamp( Ok((result, None)) } - _ => arrow_cast_compute(column, from_type, data_type, cast_options), + _ => arrow_cast_compute(column, from_type, data_type, cast_options, func_ctx), } } #[inline] -fn timestamp_to_string(date: DateTime, fmt: &str) -> String { +fn timestamp_to_string(date: DateTime, fmt: &str) -> String { date.format(fmt).to_string() } diff --git a/common/functions/src/scalars/expressions/cast_from_string.rs b/common/functions/src/scalars/expressions/cast_from_string.rs index 7c962bb5c5582..67507b4598fb7 100644 --- a/common/functions/src/scalars/expressions/cast_from_string.rs +++ b/common/functions/src/scalars/expressions/cast_from_string.rs @@ -12,24 +12,27 @@ // See the License for the specific language governing permissions and // limitations under the License. -// use chrono_tz::Tz; +use chrono_tz::Tz; use common_arrow::arrow::bitmap::Bitmap; use common_arrow::arrow::temporal_conversions::EPOCH_DAYS_FROM_CE; +use common_datavalues::chrono::DateTime; use common_datavalues::chrono::Datelike; use common_datavalues::chrono::NaiveDate; -use common_datavalues::chrono::NaiveDateTime; +use common_datavalues::chrono::TimeZone; use common_datavalues::prelude::*; use common_exception::Result; use super::cast_with_type::arrow_cast_compute; use super::cast_with_type::new_mutable_bitmap; use super::cast_with_type::CastOptions; +use crate::scalars::FunctionContext; pub fn cast_from_string( column: &ColumnRef, from_type: &DataTypeImpl, data_type: &DataTypeImpl, cast_options: &CastOptions, + func_ctx: &FunctionContext, ) -> Result<(ColumnRef, Option)> { let str_column = Series::remove_nullable(column); let str_column: &StringColumn = Series::check_get(&str_column)?; @@ -53,9 +56,9 @@ pub fn cast_from_string( TypeID::Timestamp => { let mut builder = ColumnBuilder::::with_capacity(size); - + let tz = func_ctx.tz; for (row, v) in str_column.iter().enumerate() { - match string_to_timestamp(v) { + match string_to_timestamp(v, &tz) { Some(d) => { builder.append(d.timestamp_micros()); } @@ -78,15 +81,15 @@ pub fn cast_from_string( Ok((builder.build(size), Some(bitmap.into()))) } TypeID::Interval => todo!(), - _ => arrow_cast_compute(column, from_type, data_type, cast_options), + _ => arrow_cast_compute(column, from_type, data_type, cast_options, func_ctx), } } // TODO support timezone #[inline] -pub fn string_to_timestamp(date_str: impl AsRef<[u8]>) -> Option { +pub fn string_to_timestamp(date_str: impl AsRef<[u8]>, tz: &Tz) -> Option> { let s = std::str::from_utf8(date_str.as_ref()).ok(); - s.and_then(|c| NaiveDateTime::parse_from_str(c, "%Y-%m-%d %H:%M:%S%.9f").ok()) + s.and_then(|c| tz.datetime_from_str(c, "%Y-%m-%d %H:%M:%S%.f").ok()) } #[inline] diff --git a/common/functions/src/scalars/expressions/cast_from_variant.rs b/common/functions/src/scalars/expressions/cast_from_variant.rs index 7da68fbe25a98..974334d571a93 100644 --- a/common/functions/src/scalars/expressions/cast_from_variant.rs +++ b/common/functions/src/scalars/expressions/cast_from_variant.rs @@ -24,10 +24,12 @@ use serde_json::Value as JsonValue; use super::cast_from_string::string_to_date; use super::cast_from_string::string_to_timestamp; use super::cast_with_type::new_mutable_bitmap; +use crate::scalars::FunctionContext; pub fn cast_from_variant( column: &ColumnRef, data_type: &DataTypeImpl, + func_ctx: &FunctionContext, ) -> Result<(ColumnRef, Option)> { let column = Series::remove_nullable(column); let json_column: &VariantColumn = if column.is_const() { @@ -134,12 +136,12 @@ pub fn cast_from_variant( TypeID::Timestamp => { // TODO(veeupup): support datetime with precision let mut builder = ColumnBuilder::::with_capacity(size); - + let tz = func_ctx.tz; for (row, value) in json_column.iter().enumerate() { match value.as_ref() { JsonValue::Null => bitmap.set(row, false), JsonValue::String(v) => { - if let Some(d) = string_to_timestamp(v) { + if let Some(d) = string_to_timestamp(v, &tz) { builder.append(d.timestamp_micros()); } else { bitmap.set(row, false); diff --git a/common/functions/src/scalars/expressions/cast_with_type.rs b/common/functions/src/scalars/expressions/cast_with_type.rs index 46c643b1fb957..50b06c0d8dd43 100644 --- a/common/functions/src/scalars/expressions/cast_with_type.rs +++ b/common/functions/src/scalars/expressions/cast_with_type.rs @@ -22,11 +22,13 @@ use common_arrow::arrow::compute::cast::CastOptions as ArrowOption; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use super::cast_from_datetimes::cast_from_date; use super::cast_from_string::cast_from_string; use super::cast_from_variant::cast_from_variant; use crate::scalars::expressions::cast_from_datetimes::cast_from_timestamp; +use crate::scalars::FunctionContext; #[derive(PartialEq, Eq, Debug, Clone, Copy)] pub struct CastOptions { @@ -66,23 +68,27 @@ pub fn cast_column_field( column_with_field: &ColumnWithField, from_type: &DataTypeImpl, target_type: &DataTypeImpl, + func_ctx: &FunctionContext, ) -> Result { cast_with_type( column_with_field.column(), from_type, target_type, &DEFAULT_CAST_OPTIONS, + func_ctx, ) } // No logical type is specified // Use Default options pub fn default_column_cast(column: &ColumnRef, data_type: &DataTypeImpl) -> Result { + let func_ctx = FunctionContext::default(); cast_with_type( column, &column.data_type(), data_type, &DEFAULT_CAST_OPTIONS, + &func_ctx, ) } @@ -91,6 +97,7 @@ pub fn cast_with_type( from_type: &DataTypeImpl, target_type: &DataTypeImpl, cast_options: &CastOptions, + func_ctx: &FunctionContext, ) -> Result { // they are pyhsically the same type if &column.data_type() == target_type { @@ -114,7 +121,7 @@ pub fn cast_with_type( if column.is_const() { let col: &ConstColumn = Series::check_get(column)?; let inner = col.inner(); - let res = cast_with_type(inner, from_type, target_type, cast_options)?; + let res = cast_with_type(inner, from_type, target_type, cast_options, func_ctx)?; return Ok(ConstColumn::new(res, column.len()).arc()); } @@ -122,18 +129,37 @@ pub fn cast_with_type( let nonull_data_type = remove_nullable(target_type); let (result, valids) = match nonull_from_type.data_type_id() { - TypeID::String => { - cast_from_string(column, &nonull_from_type, &nonull_data_type, cast_options) - } - TypeID::Date => cast_from_date(column, &nonull_from_type, &nonull_data_type, cast_options), - TypeID::Timestamp => { - cast_from_timestamp(column, &nonull_from_type, &nonull_data_type, cast_options) - } + TypeID::String => cast_from_string( + column, + &nonull_from_type, + &nonull_data_type, + cast_options, + func_ctx, + ), + TypeID::Date => cast_from_date( + column, + &nonull_from_type, + &nonull_data_type, + cast_options, + func_ctx, + ), + TypeID::Timestamp => cast_from_timestamp( + column, + &nonull_from_type, + &nonull_data_type, + cast_options, + func_ctx, + ), TypeID::Variant | TypeID::VariantArray | TypeID::VariantObject => { - cast_from_variant(column, &nonull_data_type) + cast_from_variant(column, &nonull_data_type, func_ctx) } - // TypeID::Interval => arrow_cast_compute(column, &nonull_data_type, cast_options), - _ => arrow_cast_compute(column, &nonull_from_type, &nonull_data_type, cast_options), + _ => arrow_cast_compute( + column, + &nonull_from_type, + &nonull_data_type, + cast_options, + func_ctx, + ), }?; // check date/timestamp bound @@ -183,6 +209,7 @@ pub fn cast_to_variant( column: &ColumnRef, from_type: &DataTypeImpl, data_type: &DataTypeImpl, + _func_ctx: &FunctionContext, ) -> Result<(ColumnRef, Option)> { let column = Series::remove_nullable(column); let size = column.len(); @@ -201,7 +228,8 @@ pub fn cast_to_variant( let mut builder = ColumnBuilder::::with_capacity(size); if from_type.data_type_id().is_numeric() || from_type.data_type_id() == TypeID::Boolean { let serializer = from_type.create_serializer(); - match serializer.serialize_json_object(&column, None) { + let format = FormatSettings::default(); + match serializer.serialize_json_object(&column, None, &format) { Ok(values) => { for v in values { builder.append(&VariantValue::from(v)); @@ -224,9 +252,10 @@ pub fn arrow_cast_compute( from_type: &DataTypeImpl, data_type: &DataTypeImpl, cast_options: &CastOptions, + func_ctx: &FunctionContext, ) -> Result<(ColumnRef, Option)> { if data_type.data_type_id().is_variant() { - return cast_to_variant(column, from_type, data_type); + return cast_to_variant(column, from_type, data_type, func_ctx); } let arrow_array = column.as_arrow_array(); diff --git a/common/functions/src/scalars/expressions/ctx.rs b/common/functions/src/scalars/expressions/ctx.rs index b3029f8f826dd..10f5cca1c98c5 100644 --- a/common/functions/src/scalars/expressions/ctx.rs +++ b/common/functions/src/scalars/expressions/ctx.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use chrono_tz::Tz; use common_exception::ErrorCode; #[derive(Debug, Clone)] @@ -19,24 +20,28 @@ pub struct EvalContext { pub factor: i64, pub precision: usize, pub error: Option, + pub tz: Tz, } impl Default for EvalContext { fn default() -> Self { + let tz = "UTC".parse::().unwrap(); Self { factor: 1, precision: 0, error: None, + tz, } } } impl EvalContext { - pub fn new(factor: i64, precision: usize, error: Option) -> Self { + pub fn new(factor: i64, precision: usize, error: Option, tz: Tz) -> Self { Self { factor, precision, error, + tz, } } diff --git a/common/functions/src/scalars/function.rs b/common/functions/src/scalars/function.rs index 7e33f67845d85..13e2800f42dd8 100644 --- a/common/functions/src/scalars/function.rs +++ b/common/functions/src/scalars/function.rs @@ -14,6 +14,7 @@ use std::fmt; +use chrono_tz::Tz; use common_datavalues::ColumnRef; use common_datavalues::ColumnsWithField; use common_datavalues::DataTypeImpl; @@ -25,13 +26,13 @@ use super::Monotonicity; /// for now, this is only store Timezone #[derive(Clone)] pub struct FunctionContext { - pub tz: String, + pub tz: Tz, } impl Default for FunctionContext { fn default() -> Self { Self { - tz: "UTC".to_string(), + tz: "UTC".parse::().unwrap(), } } } diff --git a/common/functions/src/scalars/function_monotonic.rs b/common/functions/src/scalars/function_monotonic.rs index c85da52d6c084..2142fd46c1201 100644 --- a/common/functions/src/scalars/function_monotonic.rs +++ b/common/functions/src/scalars/function_monotonic.rs @@ -17,6 +17,7 @@ use common_exception::ErrorCode; use common_exception::Result; use super::cast_column_field; +use super::FunctionContext; #[derive(Clone)] pub struct Monotonicity { @@ -102,7 +103,8 @@ impl Monotonicity { }; if let (Some(max), Some(min)) = (max, min) { - let col = cast_column_field(&min, min.data_type(), &f64::to_data_type())?; + let func_ctx = FunctionContext::default(); + let col = cast_column_field(&min, min.data_type(), &f64::to_data_type(), &func_ctx)?; let min_val = col.get_f64(0)?; if min_val >= 0.0 { @@ -113,7 +115,7 @@ impl Monotonicity { return Ok(-1); } - let col = cast_column_field(&max, max.data_type(), &f64::to_data_type())?; + let col = cast_column_field(&max, max.data_type(), &f64::to_data_type(), &func_ctx)?; let max_val = col.get_f64(0)?; if max_val <= 0.0 { diff --git a/common/functions/src/scalars/hashes/city64_with_seed.rs b/common/functions/src/scalars/hashes/city64_with_seed.rs index 72a4a6397438c..584c41b110576 100644 --- a/common/functions/src/scalars/hashes/city64_with_seed.rs +++ b/common/functions/src/scalars/hashes/city64_with_seed.rs @@ -114,7 +114,7 @@ impl Function for City64WithSeedFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &common_datavalues::ColumnsWithField, _input_rows: usize, ) -> Result { @@ -137,8 +137,12 @@ impl Function for City64WithSeedFunction { }); Ok(Arc::new(result_col)) } else { - let seed_col = - cast_column_field(&columns[1], columns[1].data_type(), &UInt64Type::new_impl())?; + let seed_col = cast_column_field( + &columns[1], + columns[1].data_type(), + &UInt64Type::new_impl(), + &func_ctx, + )?; let seed_viewer = u64::try_create_viewer(&seed_col)?; let result_col = with_match_scalar_types_error!(physical_data_type, |$S| { diff --git a/common/functions/src/scalars/hashes/sha2hash.rs b/common/functions/src/scalars/hashes/sha2hash.rs index d40da97429ebd..ea8524f878f5a 100644 --- a/common/functions/src/scalars/hashes/sha2hash.rs +++ b/common/functions/src/scalars/hashes/sha2hash.rs @@ -71,7 +71,7 @@ impl Function for Sha2HashFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &common_datavalues::ColumnsWithField, _input_rows: usize, ) -> Result { @@ -124,8 +124,12 @@ impl Function for Sha2HashFunction { Ok(Arc::new(col)) } else { - let l = - cast_column_field(&columns[1], columns[1].data_type(), &UInt16Type::new_impl())?; + let l = cast_column_field( + &columns[1], + columns[1].data_type(), + &UInt16Type::new_impl(), + &func_ctx, + )?; let l_viewer = u16::try_create_viewer(&l)?; let mut col_builder = MutableStringColumn::with_capacity(l.len()); diff --git a/common/functions/src/scalars/logics/and.rs b/common/functions/src/scalars/logics/and.rs index bfa8f895d14e5..eef2d0027a2de 100644 --- a/common/functions/src/scalars/logics/and.rs +++ b/common/functions/src/scalars/logics/and.rs @@ -22,6 +22,7 @@ use crate::calcute; use crate::impl_logic_expression; use crate::scalars::cast_column_field; use crate::scalars::Function; +use crate::scalars::FunctionContext; use crate::scalars::FunctionDescription; use crate::scalars::FunctionFeatures; diff --git a/common/functions/src/scalars/logics/logic.rs b/common/functions/src/scalars/logics/logic.rs index c62e4de6791d5..c1e6d10bd8f7f 100644 --- a/common/functions/src/scalars/logics/logic.rs +++ b/common/functions/src/scalars/logics/logic.rs @@ -53,7 +53,12 @@ pub struct LogicFunctionImpl { } pub trait LogicExpression: Sync + Send { - fn eval(columns: &ColumnsWithField, input_rows: usize, nullable: bool) -> Result; + fn eval( + func_ctx: FunctionContext, + columns: &ColumnsWithField, + input_rows: usize, + nullable: bool, + ) -> Result; } impl LogicFunctionImpl @@ -97,11 +102,11 @@ where F: LogicExpression + Clone fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { - F::eval(columns, input_rows, self.nullable) + F::eval(func_ctx, columns, input_rows, self.nullable) } } diff --git a/common/functions/src/scalars/logics/macros.rs b/common/functions/src/scalars/logics/macros.rs index 3ada8655fd7ef..8b5b544e163f8 100644 --- a/common/functions/src/scalars/logics/macros.rs +++ b/common/functions/src/scalars/logics/macros.rs @@ -28,15 +28,15 @@ macro_rules! impl_logic_expression { pub struct $name; impl LogicExpression for $name { - fn eval(columns: &ColumnsWithField, input_rows: usize, nullable: bool) -> Result { + fn eval(func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, nullable: bool) -> Result { let dt = if nullable { NullableType::new_impl(BooleanType::new_impl()) } else { BooleanType::new_impl() }; - let lhs = cast_column_field(&columns[0], columns[0].data_type(), &dt)?; - let rhs = cast_column_field(&columns[1], columns[1].data_type(), &dt)?; + let lhs = cast_column_field(&columns[0], columns[0].data_type(), &dt, &func_ctx)?; + let rhs = cast_column_field(&columns[1], columns[1].data_type(), &dt, &func_ctx)?; if nullable { let lhs_viewer = bool::try_create_viewer(&lhs)?; diff --git a/common/functions/src/scalars/logics/not.rs b/common/functions/src/scalars/logics/not.rs index 283c45ff7eae2..e485e8c73df81 100644 --- a/common/functions/src/scalars/logics/not.rs +++ b/common/functions/src/scalars/logics/not.rs @@ -20,6 +20,7 @@ use super::logic::LogicFunctionImpl; use super::logic::LogicOperator; use crate::scalars::cast_column_field; use crate::scalars::Function; +use crate::scalars::FunctionContext; use crate::scalars::FunctionDescription; use crate::scalars::FunctionFeatures; @@ -27,11 +28,17 @@ use crate::scalars::FunctionFeatures; pub struct LogicNotExpression; impl LogicExpression for LogicNotExpression { - fn eval(columns: &ColumnsWithField, input_rows: usize, _nullable: bool) -> Result { + fn eval( + func_ctx: FunctionContext, + columns: &ColumnsWithField, + input_rows: usize, + _nullable: bool, + ) -> Result { let col = cast_column_field( &columns[0], columns[0].data_type(), &BooleanType::new_impl(), + &func_ctx, )?; let col_viewer = bool::try_create_viewer(&col)?; diff --git a/common/functions/src/scalars/logics/or.rs b/common/functions/src/scalars/logics/or.rs index 5518e3c436778..2a960277059ca 100644 --- a/common/functions/src/scalars/logics/or.rs +++ b/common/functions/src/scalars/logics/or.rs @@ -22,6 +22,7 @@ use crate::calcute; use crate::impl_logic_expression; use crate::scalars::cast_column_field; use crate::scalars::Function; +use crate::scalars::FunctionContext; use crate::scalars::FunctionDescription; use crate::scalars::FunctionFeatures; diff --git a/common/functions/src/scalars/logics/xor.rs b/common/functions/src/scalars/logics/xor.rs index 3c87c53e5b9cc..f2c72799a5f20 100644 --- a/common/functions/src/scalars/logics/xor.rs +++ b/common/functions/src/scalars/logics/xor.rs @@ -21,6 +21,7 @@ use super::logic::LogicOperator; use crate::calcute; use crate::scalars::cast_column_field; use crate::scalars::Function; +use crate::scalars::FunctionContext; use crate::scalars::FunctionDescription; use crate::scalars::FunctionFeatures; @@ -28,16 +29,23 @@ use crate::scalars::FunctionFeatures; pub struct LogicXorExpression; impl LogicExpression for LogicXorExpression { - fn eval(columns: &ColumnsWithField, input_rows: usize, _nullable: bool) -> Result { + fn eval( + func_ctx: FunctionContext, + columns: &ColumnsWithField, + input_rows: usize, + _nullable: bool, + ) -> Result { let lhs = cast_column_field( &columns[0], columns[0].data_type(), &BooleanType::new_impl(), + &func_ctx, )?; let rhs = cast_column_field( &columns[1], columns[1].data_type(), &BooleanType::new_impl(), + &func_ctx, )?; let lhs_viewer = bool::try_create_viewer(&lhs)?; let rhs_viewer = bool::try_create_viewer(&rhs)?; diff --git a/common/functions/src/scalars/others/inet_ntoa.rs b/common/functions/src/scalars/others/inet_ntoa.rs index 2a3c4269491aa..9081bbe9ff539 100644 --- a/common/functions/src/scalars/others/inet_ntoa.rs +++ b/common/functions/src/scalars/others/inet_ntoa.rs @@ -70,7 +70,7 @@ impl Function for InetNtoaFunctionImpl Result { @@ -86,6 +86,7 @@ impl Function for InetNtoaFunctionImpl Function for InetNtoaFunctionImpl::with_capacity(input_rows); let serializer = data_type.create_serializer(); - match serializer.serialize_json_object(columns[0].column(), None) { + // TODO(veeupup): check if we can use default format_settings + let format = FormatSettings::default(); + match serializer.serialize_json_object(columns[0].column(), None, &format) { Ok(values) => { for v in values { builder.append(&VariantValue::from(v)); diff --git a/common/functions/src/scalars/semi_structureds/parse_json.rs b/common/functions/src/scalars/semi_structureds/parse_json.rs index 3c27a701435b5..85da7d5992982 100644 --- a/common/functions/src/scalars/semi_structureds/parse_json.rs +++ b/common/functions/src/scalars/semi_structureds/parse_json.rs @@ -17,6 +17,7 @@ use std::fmt; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use crate::scalars::Function; use crate::scalars::FunctionContext; @@ -91,6 +92,8 @@ impl Function for ParseJsonFunctionImpl::with_capacity(input_rows); @@ -100,7 +103,7 @@ impl Function for ParseJsonFunctionImpl { for v in values { match v { @@ -132,7 +135,7 @@ impl Function for ParseJsonFunctionImpl { for (i, v) in values.iter().enumerate() { if let Some(valids) = valids { @@ -162,7 +165,9 @@ impl Function for ParseJsonFunctionImpl { for v in values { builder.append(&VariantValue::from(v)); diff --git a/common/functions/src/scalars/strings/bin.rs b/common/functions/src/scalars/strings/bin.rs index 2088488224dc2..d5707e812e372 100644 --- a/common/functions/src/scalars/strings/bin.rs +++ b/common/functions/src/scalars/strings/bin.rs @@ -55,7 +55,7 @@ impl Function for BinFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { @@ -67,6 +67,7 @@ impl Function for BinFunction { &columns[0], columns[0].data_type(), &UInt64Type::new_impl(), + &func_ctx, )?; let col = col.as_any().downcast_ref::().unwrap(); for val in col.iter() { @@ -74,8 +75,12 @@ impl Function for BinFunction { } } TypeID::Int8 | TypeID::Int16 | TypeID::Int32 | TypeID::Int64 => { - let col = - cast_column_field(&columns[0], columns[0].data_type(), &Int64Type::new_impl())?; + let col = cast_column_field( + &columns[0], + columns[0].data_type(), + &Int64Type::new_impl(), + &func_ctx, + )?; let col = col.as_any().downcast_ref::().unwrap(); for val in col.iter() { builder.append(format!("{:b}", val).as_bytes()); @@ -86,6 +91,7 @@ impl Function for BinFunction { &columns[0], columns[0].data_type(), &Float64Type::new_impl(), + &func_ctx, )?; let col = col.as_any().downcast_ref::().unwrap(); for val in col.iter() { diff --git a/common/functions/src/scalars/strings/export_set.rs b/common/functions/src/scalars/strings/export_set.rs index 4ca96eb340ce0..3233c010be416 100644 --- a/common/functions/src/scalars/strings/export_set.rs +++ b/common/functions/src/scalars/strings/export_set.rs @@ -72,7 +72,7 @@ impl Function for ExportSetFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { @@ -94,6 +94,7 @@ impl Function for ExportSetFunction { &number_bits_column.data_type(), &t, &DEFAULT_CAST_OPTIONS, + &func_ctx, )?; let bits_column = cast_with_type( @@ -101,6 +102,7 @@ impl Function for ExportSetFunction { &columns[0].column().data_type(), &t, &DEFAULT_CAST_OPTIONS, + &func_ctx, )?; if input_rows != 1 diff --git a/common/functions/src/scalars/strings/hex.rs b/common/functions/src/scalars/strings/hex.rs index 2e3abaced0b8c..91dee529619a8 100644 --- a/common/functions/src/scalars/strings/hex.rs +++ b/common/functions/src/scalars/strings/hex.rs @@ -62,7 +62,7 @@ impl Function for HexFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, _input_rows: usize, ) -> Result { @@ -72,14 +72,19 @@ impl Function for HexFunction { &columns[0], columns[0].data_type(), &UInt64Type::new_impl(), + &func_ctx, )?; let col = col.as_any().downcast_ref::().unwrap(); let iter = col.iter().map(|val| format!("{:x}", val).into_bytes()); Ok(Arc::new(StringColumn::from_owned_iterator(iter))) } TypeID::Int8 | TypeID::Int16 | TypeID::Int32 | TypeID::Int64 => { - let col = - cast_column_field(&columns[0], columns[0].data_type(), &Int64Type::new_impl())?; + let col = cast_column_field( + &columns[0], + columns[0].data_type(), + &Int64Type::new_impl(), + &func_ctx, + )?; let col = col.as_any().downcast_ref::().unwrap(); let iter = col.iter().map(|val| match val.cmp(&0) { Ordering::Less => format!("-{:x}", val.unsigned_abs()).into_bytes(), @@ -92,6 +97,7 @@ impl Function for HexFunction { &columns[0], columns[0].data_type(), &StringType::new_impl(), + &func_ctx, )?; let col = col.as_any().downcast_ref::().unwrap(); let iter = col.iter().map(|val| { diff --git a/common/functions/src/scalars/strings/oct.rs b/common/functions/src/scalars/strings/oct.rs index f3f941dab028b..ea0fd71d30f56 100644 --- a/common/functions/src/scalars/strings/oct.rs +++ b/common/functions/src/scalars/strings/oct.rs @@ -81,7 +81,7 @@ impl Function for OctFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { @@ -93,6 +93,7 @@ impl Function for OctFunction { &columns[0], columns[0].data_type(), &UInt64Type::new_impl(), + &func_ctx, )?; let col = col.as_any().downcast_ref::().unwrap(); for val in col.iter() { @@ -100,8 +101,12 @@ impl Function for OctFunction { } } _ => { - let col = - cast_column_field(&columns[0], columns[0].data_type(), &Int64Type::new_impl())?; + let col = cast_column_field( + &columns[0], + columns[0].data_type(), + &Int64Type::new_impl(), + &func_ctx, + )?; let col = col.as_any().downcast_ref::().unwrap(); for val in col.iter() { builder.append(val.oct_string().as_bytes()); diff --git a/common/functions/src/scalars/strings/regexp_instr.rs b/common/functions/src/scalars/strings/regexp_instr.rs index e8534a8339610..4829981ba9e82 100644 --- a/common/functions/src/scalars/strings/regexp_instr.rs +++ b/common/functions/src/scalars/strings/regexp_instr.rs @@ -80,7 +80,7 @@ impl Function for RegexpInStrFunction { // Notes: https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-instr fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { @@ -101,6 +101,7 @@ impl Function for RegexpInStrFunction { &columns[2], columns[2].data_type(), &NullableType::new_impl(Int64Type::new_impl()), + &func_ctx, )? } 3 => { @@ -108,6 +109,7 @@ impl Function for RegexpInStrFunction { &columns[3], columns[3].data_type(), &NullableType::new_impl(Int64Type::new_impl()), + &func_ctx, )? } 4 => { @@ -115,6 +117,7 @@ impl Function for RegexpInStrFunction { &columns[4], columns[4].data_type(), &NullableType::new_impl(Int64Type::new_impl()), + &func_ctx, )? } _ => { @@ -122,6 +125,7 @@ impl Function for RegexpInStrFunction { &columns[5], columns[5].data_type(), &NullableType::new_impl(StringType::new_impl()), + &func_ctx, )? } } diff --git a/common/functions/src/scalars/strings/regexp_replace.rs b/common/functions/src/scalars/strings/regexp_replace.rs index cec90aa64d715..a85b2f87b325a 100644 --- a/common/functions/src/scalars/strings/regexp_replace.rs +++ b/common/functions/src/scalars/strings/regexp_replace.rs @@ -81,7 +81,7 @@ impl Function for RegexpReplaceFunction { // Notes: https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-replace fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { @@ -101,6 +101,7 @@ impl Function for RegexpReplaceFunction { &columns[3], columns[3].data_type(), &NullableType::new_impl(Int64Type::new_impl()), + &func_ctx, )? } 4 => { @@ -108,6 +109,7 @@ impl Function for RegexpReplaceFunction { &columns[4], columns[4].data_type(), &NullableType::new_impl(Int64Type::new_impl()), + &func_ctx, )? } _ => { @@ -115,6 +117,7 @@ impl Function for RegexpReplaceFunction { &columns[5], columns[5].data_type(), &NullableType::new_impl(StringType::new_impl()), + &func_ctx, )? } } diff --git a/common/functions/src/scalars/strings/regexp_substr.rs b/common/functions/src/scalars/strings/regexp_substr.rs index 115163be11f67..52b7ed2fb8097 100644 --- a/common/functions/src/scalars/strings/regexp_substr.rs +++ b/common/functions/src/scalars/strings/regexp_substr.rs @@ -81,7 +81,7 @@ impl Function for RegexpSubStrFunction { // Notes: https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-substr fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { @@ -101,6 +101,7 @@ impl Function for RegexpSubStrFunction { &columns[2], columns[2].data_type(), &NullableType::new_impl(Int64Type::new_impl()), + &func_ctx, )? } 3 => { @@ -108,6 +109,7 @@ impl Function for RegexpSubStrFunction { &columns[3], columns[3].data_type(), &NullableType::new_impl(Int64Type::new_impl()), + &func_ctx, )? } _ => { @@ -115,6 +117,7 @@ impl Function for RegexpSubStrFunction { &columns[4], columns[4].data_type(), &NullableType::new_impl(StringType::new_impl()), + &func_ctx, )? } } diff --git a/common/functions/src/scalars/strings/repeat.rs b/common/functions/src/scalars/strings/repeat.rs index 4e2d1848bc709..4c22ff8f294c0 100644 --- a/common/functions/src/scalars/strings/repeat.rs +++ b/common/functions/src/scalars/strings/repeat.rs @@ -69,13 +69,18 @@ impl Function for RepeatFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { let col1_viewer = Vu8::try_create_viewer(columns[0].column())?; - let col2 = cast_column_field(&columns[1], columns[1].data_type(), &UInt64Type::new_impl())?; + let col2 = cast_column_field( + &columns[1], + columns[1].data_type(), + &UInt64Type::new_impl(), + &func_ctx, + )?; let col2_viewer = u64::try_create_viewer(&col2)?; let mut builder = ColumnBuilder::::with_capacity(input_rows); diff --git a/common/functions/src/scalars/strings/substring.rs b/common/functions/src/scalars/strings/substring.rs index bde4b465ce782..e6a05feceb2f1 100644 --- a/common/functions/src/scalars/strings/substring.rs +++ b/common/functions/src/scalars/strings/substring.rs @@ -77,23 +77,35 @@ impl Function for SubstringFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { - let s_column = - cast_column_field(&columns[0], columns[0].data_type(), &StringType::new_impl())?; + let s_column = cast_column_field( + &columns[0], + columns[0].data_type(), + &StringType::new_impl(), + &func_ctx, + )?; let s_viewer = Vu8::try_create_viewer(&s_column)?; - let p_column = - cast_column_field(&columns[1], columns[1].data_type(), &Int64Type::new_impl())?; + let p_column = cast_column_field( + &columns[1], + columns[1].data_type(), + &Int64Type::new_impl(), + &func_ctx, + )?; let p_viewer = i64::try_create_viewer(&p_column)?; let mut builder = ColumnBuilder::::with_capacity(input_rows); if columns.len() > 2 { - let p2_column = - cast_column_field(&columns[2], columns[2].data_type(), &UInt64Type::new_impl())?; + let p2_column = cast_column_field( + &columns[2], + columns[2].data_type(), + &UInt64Type::new_impl(), + &func_ctx, + )?; let p2_viewer = u64::try_create_viewer(&p2_column)?; let iter = izip!(s_viewer, p_viewer, p2_viewer); diff --git a/common/functions/src/scalars/strings/substring_index.rs b/common/functions/src/scalars/strings/substring_index.rs index 3c68eb6571865..aa9d345fa90e1 100644 --- a/common/functions/src/scalars/strings/substring_index.rs +++ b/common/functions/src/scalars/strings/substring_index.rs @@ -72,20 +72,32 @@ impl Function for SubstringIndexFunction { fn eval( &self, - _func_ctx: FunctionContext, + func_ctx: FunctionContext, columns: &ColumnsWithField, input_rows: usize, ) -> Result { - let s_column = - cast_column_field(&columns[0], columns[0].data_type(), &StringType::new_impl())?; + let s_column = cast_column_field( + &columns[0], + columns[0].data_type(), + &StringType::new_impl(), + &func_ctx, + )?; let s_viewer = Vu8::try_create_viewer(&s_column)?; - let d_column = - cast_column_field(&columns[1], columns[1].data_type(), &StringType::new_impl())?; + let d_column = cast_column_field( + &columns[1], + columns[1].data_type(), + &StringType::new_impl(), + &func_ctx, + )?; let d_viewer = Vu8::try_create_viewer(&d_column)?; - let c_column = - cast_column_field(&columns[2], columns[2].data_type(), &Int64Type::new_impl())?; + let c_column = cast_column_field( + &columns[2], + columns[2].data_type(), + &Int64Type::new_impl(), + &func_ctx, + )?; let c_viewer = i64::try_create_viewer(&c_column)?; let iter = izip!(s_viewer, d_viewer, c_viewer); diff --git a/common/io/src/format_settings.rs b/common/io/src/format_settings.rs index 39d04a86d77d9..48bfbcae69c7a 100644 --- a/common/io/src/format_settings.rs +++ b/common/io/src/format_settings.rs @@ -14,18 +14,20 @@ use std::str::FromStr; +use chrono_tz::Tz; use common_exception::ErrorCode; use common_exception::Result; use serde::Deserialize; use serde::Serialize; -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub struct FormatSettings { pub record_delimiter: Vec, pub field_delimiter: Vec, pub empty_as_default: bool, pub skip_header: bool, pub compression: Compression, + pub timezone: Tz, } impl Default for FormatSettings { @@ -36,6 +38,7 @@ impl Default for FormatSettings { empty_as_default: false, skip_header: false, compression: Compression::None, + timezone: "UTC".parse::().unwrap(), } } } diff --git a/common/streams/Cargo.toml b/common/streams/Cargo.toml index e70b83393842f..80e48ffc242d5 100644 --- a/common/streams/Cargo.toml +++ b/common/streams/Cargo.toml @@ -27,6 +27,7 @@ common-tracing = { path = "../tracing" } # Crates.io dependencies async-stream = "0.3.3" async-trait = "0.1.53" +chrono-tz = "0.6.1" csv-async = "1.2.4" futures = "0.3.21" pin-project-lite = "0.2.8" diff --git a/common/streams/src/sources/source_csv.rs b/common/streams/src/sources/source_csv.rs index d8d0eef036fa4..7a2225f4550cd 100644 --- a/common/streams/src/sources/source_csv.rs +++ b/common/streams/src/sources/source_csv.rs @@ -13,6 +13,7 @@ // limitations under the License. use async_trait::async_trait; +use chrono_tz::Tz; use common_datablocks::DataBlock; use common_datavalues::DataSchemaRef; use common_datavalues::DataType; @@ -38,6 +39,7 @@ pub struct CsvSourceBuilder { size_limit: usize, field_delimiter: u8, record_delimiter: Terminator, + tz: Tz, } impl CsvSourceBuilder { @@ -59,6 +61,7 @@ impl CsvSourceBuilder { let empty_as_default = format_settings.empty_as_default; let skip_header = format_settings.skip_header; + let tz = format_settings.timezone; CsvSourceBuilder { schema, @@ -68,6 +71,7 @@ impl CsvSourceBuilder { empty_as_default, block_size: 10000, size_limit: usize::MAX, + tz, } } @@ -174,16 +178,20 @@ where R: AsyncRead + Unpin + Send if record.is_empty() { break; } + let format = FormatSettings { + timezone: self.builder.tz, + ..Default::default() + }; for (col, pack) in packs.iter_mut().enumerate() { match record.get(col) { Some(bytes) => { if bytes.is_empty() && self.builder.empty_as_default { - pack.de_default(); + pack.de_default(&format); } else { - pack.de_whole_text(bytes)? + pack.de_whole_text(bytes, &format)? } } - None => pack.de_default(), + None => pack.de_default(&format), } } rows += 1; diff --git a/common/streams/src/sources/source_ndjson.rs b/common/streams/src/sources/source_ndjson.rs index 7879ae70f1056..459ab6fa349f4 100644 --- a/common/streams/src/sources/source_ndjson.rs +++ b/common/streams/src/sources/source_ndjson.rs @@ -22,6 +22,7 @@ use common_datavalues::TypeDeserializer; use common_exception::ErrorCode; use common_exception::Result; use common_exception::ToErrorCode; +use common_io::prelude::FormatSettings; use futures::AsyncBufRead; use futures::AsyncBufReadExt; @@ -32,14 +33,16 @@ pub struct NDJsonSourceBuilder { schema: DataSchemaRef, block_size: usize, size_limit: usize, + format: FormatSettings, } impl NDJsonSourceBuilder { - pub fn create(schema: DataSchemaRef) -> Self { + pub fn create(schema: DataSchemaRef, format: FormatSettings) -> Self { NDJsonSourceBuilder { schema, block_size: 10000, size_limit: usize::MAX, + format, } } @@ -143,7 +146,7 @@ where R: AsyncBufRead + Unpin + Send for ((name, type_name), deser) in fields.iter().zip(packs.iter_mut()) { let value = &json[name]; - deser.de_json(value).map_err(|e| { + deser.de_json(value, &self.builder.format).map_err(|e| { let value_str = format!("{:?}", value); ErrorCode::BadBytes(format!( "error at row {} column {}: type={}, err={}, value={}", diff --git a/common/streams/tests/it/sources/source_ndjson.rs b/common/streams/tests/it/sources/source_ndjson.rs index 06c21e12ea9ab..3ef7ef3799848 100644 --- a/common/streams/tests/it/sources/source_ndjson.rs +++ b/common/streams/tests/it/sources/source_ndjson.rs @@ -15,6 +15,7 @@ use common_base::tokio; use common_datablocks::assert_blocks_eq; use common_exception::Result; +use common_io::prelude::FormatSettings; use common_streams::NDJsonSourceBuilder; use common_streams::Source; @@ -36,8 +37,7 @@ async fn test_source_ndjson() -> Result<()> { .as_bytes(); let reader = futures::io::Cursor::new(bytes); - - let builder = NDJsonSourceBuilder::create(schema); + let builder = NDJsonSourceBuilder::create(schema, FormatSettings::default()); let mut json_source = builder.build(reader).unwrap(); // expects `page_nums_expects` blocks, and while let Some(block) = json_source.read().await? { diff --git a/query/src/interpreters/interpreter_insert.rs b/query/src/interpreters/interpreter_insert.rs index 724bc2ab24170..fed8bdad285c7 100644 --- a/query/src/interpreters/interpreter_insert.rs +++ b/query/src/interpreters/interpreter_insert.rs @@ -15,6 +15,7 @@ use std::collections::VecDeque; use std::sync::Arc; +use chrono_tz::Tz; use common_datavalues::DataType; use common_exception::ErrorCode; use common_exception::Result; @@ -120,6 +121,9 @@ impl InsertInterpreter { let tz = String::from_utf8(tz).map_err(|_| { ErrorCode::LogicalError("Timezone has been checked and should be valid.") })?; + let tz = tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; let func_ctx = FunctionContext { tz }; pipeline.add_transform(|transform_input_port, transform_output_port| { TransformCastSchema::try_create( diff --git a/query/src/pipelines/transforms/group_by/aggregator_groups_builder.rs b/query/src/pipelines/transforms/group_by/aggregator_groups_builder.rs index a105cabda530f..66c50e84cf177 100644 --- a/query/src/pipelines/transforms/group_by/aggregator_groups_builder.rs +++ b/query/src/pipelines/transforms/group_by/aggregator_groups_builder.rs @@ -23,6 +23,7 @@ use common_datavalues::PrimitiveType; use common_datavalues::ScalarColumnBuilder; use common_datavalues::TypeDeserializer; use common_exception::Result; +use common_io::prelude::FormatSettings; use crate::pipelines::new::processors::AggregatorParams; use crate::pipelines::transforms::group_by::keys_ref::KeysRef; @@ -100,12 +101,13 @@ impl GroupColumnsBuilder for SerializedKeysGroupColumnsBuilder { let rows = self.data.len(); let mut res = Vec::with_capacity(self.groups_fields.len()); + let format = FormatSettings::default(); for group_field in self.groups_fields.iter() { let data_type = group_field.data_type(); let mut deserializer = data_type.create_deserializer(rows); for (_, key) in keys.iter_mut().enumerate() { - deserializer.de_binary(key)?; + deserializer.de_binary(key, &format)?; } res.push(deserializer.finish_to_column()); } diff --git a/query/src/pipelines/transforms/transform_expression_executor.rs b/query/src/pipelines/transforms/transform_expression_executor.rs index a2c6d1e5ff131..bc9f4ff40443f 100644 --- a/query/src/pipelines/transforms/transform_expression_executor.rs +++ b/query/src/pipelines/transforms/transform_expression_executor.rs @@ -15,6 +15,7 @@ use std::collections::HashMap; use std::sync::Arc; +use chrono_tz::Tz; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::ErrorCode; @@ -197,6 +198,9 @@ impl ExpressionExecutor { let tz = String::from_utf8(tz).map_err(|_| { ErrorCode::LogicalError("Timezone has beeen checked and should be valid.") })?; + let tz = tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; let func_ctx = FunctionContext { tz }; let column = f.func.eval(func_ctx, &arg_columns, rows)?; Ok(ColumnWithField::new( diff --git a/query/src/pipelines/transforms/transform_sink.rs b/query/src/pipelines/transforms/transform_sink.rs index 44dfc58caa0b7..cf44570581a77 100644 --- a/query/src/pipelines/transforms/transform_sink.rs +++ b/query/src/pipelines/transforms/transform_sink.rs @@ -15,6 +15,7 @@ use std::any::Any; use std::sync::Arc; +use chrono_tz::Tz; use common_datavalues::DataSchemaRef; use common_datavalues::DataType; use common_exception::ErrorCode; @@ -100,6 +101,9 @@ impl Processor for SinkTransform { let tz = String::from_utf8(tz).map_err(|_| { ErrorCode::LogicalError("Timezone has beeen checked and should be valid.") })?; + let tz = tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; let func_ctx = FunctionContext { tz }; input_stream = Box::pin(CastStream::try_create( input_stream, diff --git a/query/src/servers/clickhouse/interactive_worker.rs b/query/src/servers/clickhouse/interactive_worker.rs index 4d2d2a98a70cc..c14d3783322e9 100644 --- a/query/src/servers/clickhouse/interactive_worker.rs +++ b/query/src/servers/clickhouse/interactive_worker.rs @@ -50,7 +50,13 @@ impl ClickHouseSession for InteractiveWorker { let session = self.session.clone(); let get_query_result = InteractiveWorkerBase::do_query(ctx, session); - if let Err(cause) = query_writer.write(get_query_result.await).await { + let query_ctx = self + .session + .get_shared_query_context() + .await + .map_err(to_clickhouse_err)?; + let format = query_ctx.get_format_settings().map_err(to_clickhouse_err)?; + if let Err(cause) = query_writer.write(get_query_result.await, &format).await { let new_error = cause.add_message(&ctx.state.query); return Err(to_clickhouse_err(new_error)); } diff --git a/query/src/servers/clickhouse/writers/query_writer.rs b/query/src/servers/clickhouse/writers/query_writer.rs index d01c2fb74e652..51c3ae86a0fa3 100644 --- a/query/src/servers/clickhouse/writers/query_writer.rs +++ b/query/src/servers/clickhouse/writers/query_writer.rs @@ -19,6 +19,7 @@ use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use common_tracing::tracing; use futures::channel::mpsc::Receiver; use futures::StreamExt; @@ -46,11 +47,15 @@ impl<'a> QueryWriter<'a> { } } - pub async fn write(&mut self, receiver: Result>) -> Result<()> { + pub async fn write( + &mut self, + receiver: Result>, + format: &FormatSettings, + ) -> Result<()> { match receiver { Err(error) => self.write_error(error).await, Ok(receiver) => { - let write_data = self.write_data(receiver); + let write_data = self.write_data(receiver, format); write_data.await } } @@ -85,8 +90,8 @@ impl<'a> QueryWriter<'a> { } } - async fn write_block(&mut self, block: DataBlock) -> Result<()> { - let block = to_clickhouse_block(block)?; + async fn write_block(&mut self, block: DataBlock, format: &FormatSettings) -> Result<()> { + let block = to_clickhouse_block(block, format)?; match self.conn.write_block(&block).await { Ok(_) => Ok(()), @@ -94,7 +99,11 @@ impl<'a> QueryWriter<'a> { } } - async fn write_data(&mut self, mut receiver: Receiver) -> Result<()> { + async fn write_data( + &mut self, + mut receiver: Receiver, + format: &FormatSettings, + ) -> Result<()> { loop { match receiver.next().await { None => { @@ -106,13 +115,13 @@ impl<'a> QueryWriter<'a> { return Ok(()); } Some(BlockItem::Block(Ok(block))) => { - self.write_block(block).await?; + self.write_block(block, format).await?; } Some(BlockItem::InsertSample(block)) => { let schema = block.schema(); let header = DataBlock::empty_with_schema(schema.clone()); - self.write_block(header).await?; + self.write_block(header, format).await?; } } } @@ -132,7 +141,7 @@ pub fn from_clickhouse_err(res: opensrv_clickhouse::errors::Error) -> ErrorCode ErrorCode::LogicalError(format!("clickhouse-srv expception: {:?}", res)) } -pub fn to_clickhouse_block(block: DataBlock) -> Result { +pub fn to_clickhouse_block(block: DataBlock, format: &FormatSettings) -> Result { let mut result = Block::new(); if block.num_columns() == 0 { return Ok(result); @@ -145,7 +154,7 @@ pub fn to_clickhouse_block(block: DataBlock) -> Result { let serializer = field.data_type().create_serializer(); result.append_column(column::new_column( name, - serializer.serialize_clickhouse_format(&column.convert_full_column())?, + serializer.serialize_clickhouse_format(&column.convert_full_column(), format)?, )); } Ok(result) diff --git a/query/src/servers/http/clickhouse_handler.rs b/query/src/servers/http/clickhouse_handler.rs index e37dc0ae2d8dd..6effe027a6a0c 100644 --- a/query/src/servers/http/clickhouse_handler.rs +++ b/query/src/servers/http/clickhouse_handler.rs @@ -18,6 +18,7 @@ use async_stream::stream; use common_exception::ErrorCode; use common_exception::Result; use common_exception::ToErrorCode; +use common_io::prelude::FormatSettings; use common_planners::PlanNode; use common_streams::NDJsonSourceBuilder; use common_streams::SendableDataBlockStream; @@ -88,12 +89,12 @@ async fn execute( interpreter.execute(input_stream).await? }; let mut data_stream = ctx.try_create_abortable(data_stream)?; - + let format = ctx.get_format_settings()?; let stream = stream! { while let Some(block) = data_stream.next().await { match block{ Ok(block) => { - yield(block_to_tsv(&block)) + yield(block_to_tsv(&block, &format)) }, Err(err) => yield(Err(err)), }; @@ -224,7 +225,8 @@ pub async fn clickhouse_handler_post( } async fn build_ndjson_stream(plan: &PlanNode, body: Body) -> Result { - let builder = NDJsonSourceBuilder::create(plan.schema()); + // TODO(veeupup): HTTP with global session tz + let builder = NDJsonSourceBuilder::create(plan.schema(), FormatSettings::default()); let cursor = futures::io::Cursor::new( body.into_vec() .await diff --git a/query/src/servers/http/formats/tsv_output.rs b/query/src/servers/http/formats/tsv_output.rs index 5d85fedab8356..452248487c5dc 100644 --- a/query/src/servers/http/formats/tsv_output.rs +++ b/query/src/servers/http/formats/tsv_output.rs @@ -17,11 +17,12 @@ use common_datavalues::DataType; use common_datavalues::TypeSerializer; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; const FIELD_DELIMITER: u8 = b'\t'; const ROW_DELIMITER: u8 = b'\n'; -pub fn block_to_tsv(block: &DataBlock) -> Result> { +pub fn block_to_tsv(block: &DataBlock, format: &FormatSettings) -> Result> { let rows_size = block.column(0).len(); let columns_size = block.num_columns(); @@ -33,7 +34,7 @@ pub fn block_to_tsv(block: &DataBlock) -> Result> { let data_type = field.data_type(); let serializer = data_type.create_serializer(); // todo(youngsofun): escape - col_table.push(serializer.serialize_column(&column).map_err(|e| { + col_table.push(serializer.serialize_column(&column, format).map_err(|e| { ErrorCode::UnexpectedError(format!( "fail to serialize filed {}, error = {}", field.name(), diff --git a/query/src/servers/http/v1/http_query_handlers.rs b/query/src/servers/http/v1/http_query_handlers.rs index 7d07291da653b..59c208f1774d5 100644 --- a/query/src/servers/http/v1/http_query_handlers.rs +++ b/query/src/servers/http/v1/http_query_handlers.rs @@ -15,6 +15,7 @@ use common_base::ProgressValues; use common_datavalues::DataSchemaRef; use common_exception::ErrorCode; +use common_io::prelude::FormatSettings; use common_tracing::tracing; use poem::error::Error as PoemError; use poem::error::Result as PoemResult; @@ -175,9 +176,11 @@ async fn query_page_handler( let http_query_manager = ctx.session_mgr.get_http_query_manager(); match http_query_manager.get_query(&query_id).await { Some(query) => { + // TODO(veeupup): get query_ctx here to get format_settings + let format = FormatSettings::default(); query.clear_expire_time().await; let resp = query - .get_response_page(page_no) + .get_response_page(page_no, &format) .await .map_err(|err| poem::Error::from_string(err.message(), StatusCode::NOT_FOUND))?; query.update_expire_time().await; @@ -196,10 +199,12 @@ pub(crate) async fn query_handler( let http_query_manager = ctx.session_mgr.get_http_query_manager(); let query = http_query_manager.try_create_query(ctx, req).await; + // TODO(veeupup): get global query_ctx's format_settings, because we cann't set session settings now + let format = FormatSettings::default(); match query { Ok(query) => { let resp = query - .get_response_page(0) + .get_response_page(0, &format) .await .map_err(|err| poem::Error::from_string(err.message(), StatusCode::NOT_FOUND))?; query.update_expire_time().await; diff --git a/query/src/servers/http/v1/json_block.rs b/query/src/servers/http/v1/json_block.rs index 8336b01bd8404..c4cf94cd41de3 100644 --- a/query/src/servers/http/v1/json_block.rs +++ b/query/src/servers/http/v1/json_block.rs @@ -21,6 +21,7 @@ use common_datavalues::DataType; use common_datavalues::TypeSerializer; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use serde_json::Value as JsonValue; #[derive(Debug, Clone)] @@ -39,7 +40,7 @@ impl JsonBlock { } } - pub fn new(block: &DataBlock) -> Result { + pub fn new(block: &DataBlock, format: &FormatSettings) -> Result { let mut col_table = Vec::new(); let columns_size = block.columns().len(); for col_index in 0..columns_size { @@ -48,7 +49,7 @@ impl JsonBlock { let field = block.schema().field(col_index); let data_type = field.data_type(); let serializer = data_type.create_serializer(); - col_table.push(serializer.serialize_json(&column).map_err(|e| { + col_table.push(serializer.serialize_json(&column, format).map_err(|e| { ErrorCode::UnexpectedError(format!( "fail to serialize filed {}, error = {}", field.name(), diff --git a/query/src/servers/http/v1/load.rs b/query/src/servers/http/v1/load.rs index 2bc435d30a434..bee51b2675554 100644 --- a/query/src/servers/http/v1/load.rs +++ b/query/src/servers/http/v1/load.rs @@ -279,7 +279,7 @@ fn build_ndjson_stream( plan: &PlanNode, mut multipart: Multipart, ) -> PoemResult { - let builder = NDJsonSourceBuilder::create(plan.schema()); + let builder = NDJsonSourceBuilder::create(plan.schema(), FormatSettings::default()); let stream = stream! { while let Ok(Some(field)) = multipart.next_field().await { let bytes = field.bytes().await.map_err_to_code(ErrorCode::BadBytes, || "Read part to field bytes error")?; @@ -383,7 +383,7 @@ async fn ndjson_source_pipe_builder( plan: &PlanNode, mut multipart: Multipart, ) -> PoemResult { - let builder = NDJsonSourceBuilder::create(plan.schema()); + let builder = NDJsonSourceBuilder::create(plan.schema(), FormatSettings::default()); let mut source_pipe_builder = SourcePipeBuilder::create(); while let Ok(Some(field)) = multipart.next_field().await { let bytes = field diff --git a/query/src/servers/http/v1/query/http_query.rs b/query/src/servers/http/v1/query/http_query.rs index 47da0f60947f3..04fbed64097ce 100644 --- a/query/src/servers/http/v1/query/http_query.rs +++ b/query/src/servers/http/v1/query/http_query.rs @@ -23,6 +23,7 @@ use common_base::tokio::sync::RwLock; use common_base::ProgressValues; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use serde::Deserialize; use super::HttpQueryContext; @@ -177,9 +178,13 @@ impl HttpQuery { self.request.pagination.wait_time_secs == 0 } - pub async fn get_response_page(&self, page_no: usize) -> Result { + pub async fn get_response_page( + &self, + page_no: usize, + format: &FormatSettings, + ) -> Result { Ok(HttpQueryResponseInternal { - data: Some(self.get_page(page_no).await?), + data: Some(self.get_page(page_no, format).await?), session_id: self.session_id.clone(), state: self.get_state().await, }) @@ -204,10 +209,10 @@ impl HttpQuery { } } - async fn get_page(&self, page_no: usize) -> Result { + async fn get_page(&self, page_no: usize, format: &FormatSettings) -> Result { let mut data = self.data.lock().await; let page = data - .get_a_page(page_no, &self.request.pagination.get_wait_type()) + .get_a_page(page_no, &self.request.pagination.get_wait_type(), format) .await?; let response = ResponseData { page, diff --git a/query/src/servers/http/v1/query/result_data_manager.rs b/query/src/servers/http/v1/query/result_data_manager.rs index 10c58921bdf81..010a53203ff21 100644 --- a/query/src/servers/http/v1/query/result_data_manager.rs +++ b/query/src/servers/http/v1/query/result_data_manager.rs @@ -20,6 +20,7 @@ use common_base::tokio::sync::mpsc::error::TryRecvError; use common_datablocks::DataBlock; use common_exception::ErrorCode; use common_exception::Result; +use common_io::prelude::FormatSettings; use common_tracing::tracing; use crate::servers::http::v1::JsonBlock; @@ -71,10 +72,15 @@ impl ResultDataManager { } } - pub async fn get_a_page(&mut self, page_no: usize, tp: &Wait) -> Result { + pub async fn get_a_page( + &mut self, + page_no: usize, + tp: &Wait, + format: &FormatSettings, + ) -> Result { let next_no = self.total_pages; if page_no == next_no && !self.end { - let (block, end) = self.collect_new_page(tp).await?; + let (block, end) = self.collect_new_page(tp, format).await?; let num_row = block.num_rows(); self.total_rows += num_row; let page = Page { @@ -120,7 +126,11 @@ impl ResultDataManager { } } - pub async fn collect_new_page(&mut self, tp: &Wait) -> Result<(JsonBlock, bool)> { + pub async fn collect_new_page( + &mut self, + tp: &Wait, + format: &FormatSettings, + ) -> Result<(JsonBlock, bool)> { let mut results: Vec = Vec::new(); let mut rows = 0; let block_rx = &mut self.block_rx; @@ -130,7 +140,7 @@ impl ResultDataManager { match ResultDataManager::receive(block_rx, tp).await { Ok(block) => { rows += block.num_rows(); - results.push(JsonBlock::new(&block)?); + results.push(JsonBlock::new(&block, format)?); // TODO(youngsofun): set it in post if needed if rows >= TARGET_ROWS_PER_PAGE { break; diff --git a/query/src/servers/mysql/mysql_interactive_worker.rs b/query/src/servers/mysql/mysql_interactive_worker.rs index c25c36c58f876..4ce4af49c760a 100644 --- a/query/src/servers/mysql/mysql_interactive_worker.rs +++ b/query/src/servers/mysql/mysql_interactive_worker.rs @@ -179,7 +179,12 @@ impl AsyncMysqlShim for InteractiveWorker let instant = Instant::now(); let blocks = self.base.do_query(query).await; - let mut write_result = writer.write(blocks); + let format = self + .session + .get_shared_query_context() + .await? + .get_format_settings()?; + let mut write_result = writer.write(blocks, &format); if let Err(cause) = write_result { let suffix = format!("(while in query {})", query); diff --git a/query/src/servers/mysql/writers/query_result_writer.rs b/query/src/servers/mysql/writers/query_result_writer.rs index d2ca5a3474f55..7a10789bc49aa 100644 --- a/query/src/servers/mysql/writers/query_result_writer.rs +++ b/query/src/servers/mysql/writers/query_result_writer.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use chrono_tz::Tz; use common_datablocks::DataBlock; use common_datavalues::prelude::TypeID; use common_datavalues::remove_nullable; @@ -27,6 +26,7 @@ use common_exception::ErrorCode; use common_exception::Result; use common_exception::ABORT_QUERY; use common_exception::ABORT_SESSION; +use common_io::prelude::FormatSettings; use common_tracing::tracing; use opensrv_mysql::*; @@ -39,10 +39,14 @@ impl<'a, W: std::io::Write> DFQueryResultWriter<'a, W> { DFQueryResultWriter::<'a, W> { inner: Some(inner) } } - pub fn write(&mut self, query_result: Result<(Vec, String)>) -> Result<()> { + pub fn write( + &mut self, + query_result: Result<(Vec, String)>, + format: &FormatSettings, + ) -> Result<()> { if let Some(writer) = self.inner.take() { match query_result { - Ok((blocks, extra_info)) => Self::ok(blocks, extra_info, writer)?, + Ok((blocks, extra_info)) => Self::ok(blocks, extra_info, writer, format)?, Err(error) => Self::err(&error, writer)?, } } @@ -53,6 +57,7 @@ impl<'a, W: std::io::Write> DFQueryResultWriter<'a, W> { blocks: Vec, extra_info: String, dataset_writer: QueryResultWriter<'a, W>, + format: &FormatSettings, ) -> Result<()> { // XXX: num_columns == 0 may is error? let default_response = OkResponse { @@ -108,7 +113,7 @@ impl<'a, W: std::io::Write> DFQueryResultWriter<'a, W> { } let block = blocks[0].clone(); - let tz: Tz = "UTC".parse().unwrap(); + let tz = format.timezone; match convert_schema(block.schema()) { Err(error) => Self::err(&error, dataset_writer), Ok(columns) => { @@ -151,19 +156,23 @@ impl<'a, W: std::io::Write> DFQueryResultWriter<'a, W> { } (TypeID::Struct, DataValue::Struct(_)) => { let serializer = data_type.create_serializer(); - row_writer.write_col(serializer.serialize_value(&val)?)? + row_writer + .write_col(serializer.serialize_value(&val, format)?)? } (TypeID::Variant, DataValue::Variant(_)) => { let serializer = data_type.create_serializer(); - row_writer.write_col(serializer.serialize_value(&val)?)? + row_writer + .write_col(serializer.serialize_value(&val, format)?)? } (TypeID::VariantArray, DataValue::Variant(_)) => { let serializer = data_type.create_serializer(); - row_writer.write_col(serializer.serialize_value(&val)?)? + row_writer + .write_col(serializer.serialize_value(&val, format)?)? } (TypeID::VariantObject, DataValue::Variant(_)) => { let serializer = data_type.create_serializer(); - row_writer.write_col(serializer.serialize_value(&val)?)? + row_writer + .write_col(serializer.serialize_value(&val, format)?)? } (_, DataValue::Int64(v)) => row_writer.write_col(v)?, diff --git a/query/src/sessions/query_ctx.rs b/query/src/sessions/query_ctx.rs index 7e6cdacb8623b..9184806442b64 100644 --- a/query/src/sessions/query_ctx.rs +++ b/query/src/sessions/query_ctx.rs @@ -19,6 +19,7 @@ use std::sync::atomic::Ordering; use std::sync::atomic::Ordering::Acquire; use std::sync::Arc; +use chrono_tz::Tz; use common_base::tokio::task::JoinHandle; use common_base::Progress; use common_base::ProgressValues; @@ -397,11 +398,13 @@ impl QueryContext { } pub fn try_get_function_context(&self) -> Result { - Ok(FunctionContext { - tz: String::from_utf8(self.get_settings().get_timezone()?).map_err(|_| { - ErrorCode::LogicalError("Timezone has been checked and should be valid.") - })?, - }) + let tz = String::from_utf8(self.get_settings().get_timezone()?).map_err(|_| { + ErrorCode::LogicalError("Timezone has been checked and should be valid.") + })?; + let tz = tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; + Ok(FunctionContext { tz }) } } diff --git a/query/src/sessions/query_ctx_shared.rs b/query/src/sessions/query_ctx_shared.rs index 3da38e24efbe7..b04cc76fd2906 100644 --- a/query/src/sessions/query_ctx_shared.rs +++ b/query/src/sessions/query_ctx_shared.rs @@ -17,6 +17,7 @@ use std::collections::HashMap; use std::sync::atomic::AtomicUsize; use std::sync::Arc; +use chrono_tz::Tz; use common_base::Progress; use common_base::Runtime; use common_contexts::DalContext; @@ -263,6 +264,12 @@ impl QueryContextShared { format.field_delimiter = settings.get_field_delimiter()?; format.empty_as_default = settings.get_empty_as_default()? > 0; format.skip_header = settings.get_skip_header()? > 0; + let tz = String::from_utf8(settings.get_timezone()?).map_err(|_| { + ErrorCode::LogicalError("Timezone has been checked and should be valid.") + })?; + format.timezone = tz.parse::().map_err(|_| { + ErrorCode::InvalidTimezone("Timezone has been checked and should be valid") + })?; } Ok(format) } diff --git a/query/src/sql/statements/value_source.rs b/query/src/sql/statements/value_source.rs index 421e2a5e070fc..467e6861238b3 100644 --- a/query/src/sql/statements/value_source.rs +++ b/query/src/sql/statements/value_source.rs @@ -108,6 +108,7 @@ impl ValueSource { )); } + let format = self.ctx.get_format_settings()?; for col_idx in 0..col_size { let _ = reader.ignore_white_spaces()?; let col_end = if col_idx + 1 == col_size { b')' } else { b',' }; @@ -117,7 +118,7 @@ impl ValueSource { .ok_or_else(|| ErrorCode::BadBytes("Deserializer is None"))?; let (need_fallback, pop_count) = deser - .de_text_quoted(reader) + .de_text_quoted(reader, &format) .and_then(|_| { let _ = reader.ignore_white_spaces()?; let need_fallback = reader.ignore_byte(col_end)?.not(); @@ -142,7 +143,7 @@ impl ValueSource { .await?; for (append_idx, deser) in desers.iter_mut().enumerate().take(col_size) { - deser.append_data_value(values[append_idx].clone())?; + deser.append_data_value(values[append_idx].clone(), &format)?; } return Ok(()); diff --git a/query/src/storages/s3/s3_stage_source.rs b/query/src/storages/s3/s3_stage_source.rs index 868c788300433..56251992328a6 100644 --- a/query/src/storages/s3/s3_stage_source.rs +++ b/query/src/storages/s3/s3_stage_source.rs @@ -122,7 +122,7 @@ impl StageSource { stage_info: &UserStageInfo, reader: BytesReader, ) -> Result> { - let mut builder = NDJsonSourceBuilder::create(schema); + let mut builder = NDJsonSourceBuilder::create(schema, ctx.get_format_settings()?); let size_limit = stage_info.copy_options.size_limit; // Size limit. diff --git a/query/tests/it/servers/http/formats/tsv_output.rs b/query/tests/it/servers/http/formats/tsv_output.rs index e7412c9ed2f8a..0a9714b0ad1f3 100644 --- a/query/tests/it/servers/http/formats/tsv_output.rs +++ b/query/tests/it/servers/http/formats/tsv_output.rs @@ -16,6 +16,7 @@ use common_arrow::arrow::bitmap::MutableBitmap; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::Result; +use common_io::prelude::FormatSettings; use databend_query::servers::http::formats::tsv_output::block_to_tsv; use pretty_assertions::assert_eq; @@ -59,8 +60,8 @@ fn test_data_block(is_nullable: bool) -> Result<()> { } else { block }; - - let json_block = String::from_utf8(block_to_tsv(&block)?)?; + let format = FormatSettings::default(); + let json_block = String::from_utf8(block_to_tsv(&block, &format)?)?; let expect = "1\ta\t1\t1.1\t1970-01-02\n\ 2\tb\t1\t2.2\t1970-01-03\n\ 3\tc\t0\t3.3\t1970-01-04\n"; diff --git a/query/tests/it/servers/http/json_block.rs b/query/tests/it/servers/http/json_block.rs index e23e227b02561..a4141be74db29 100644 --- a/query/tests/it/servers/http/json_block.rs +++ b/query/tests/it/servers/http/json_block.rs @@ -16,6 +16,7 @@ use common_arrow::arrow::bitmap::MutableBitmap; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_exception::Result; +use common_io::prelude::FormatSettings; use databend_query::servers::http::v1::json_block::JsonBlock; use pretty_assertions::assert_eq; use serde::Serialize; @@ -67,12 +68,30 @@ fn test_data_block(is_nullable: bool) -> Result<()> { } else { block }; - - let json_block = JsonBlock::new(&block)?; + let format = FormatSettings::default(); + let json_block = JsonBlock::new(&block, &format)?; let expect = vec![ - vec![val(1), val("a"), val(true), val(1.1), val("1970-01-02")], - vec![val(2), val("b"), val(true), val(2.2), val("1970-01-03")], - vec![val(3), val("c"), val(false), val(3.3), val("1970-01-04")], + vec![ + val(1_i32), + val("a"), + val(true), + val(1.1_f64), + val("1970-01-02"), + ], + vec![ + val(2_i32), + val("b"), + val(true), + val(2.2_f64), + val("1970-01-03"), + ], + vec![ + val(3_i32), + val("c"), + val(false), + val(3.3_f64), + val("1970-01-04"), + ], ]; assert_eq!(json_block.data().clone(), expect); diff --git a/tests/suites/0_stateless/02_function/02_0012_function_datetimes.sql b/tests/suites/0_stateless/02_function/02_0012_function_datetimes.sql index c34efe05218bd..56469fe0e9cc3 100644 --- a/tests/suites/0_stateless/02_function/02_0012_function_datetimes.sql +++ b/tests/suites/0_stateless/02_function/02_0012_function_datetimes.sql @@ -1,3 +1,4 @@ +set timezone = 'UTC'; SELECT today() >= 18869; SELECT now() >= 1630295616; select to_datetime(1630320462000000), to_int64(to_datetime(1630320462000000)) = 1630320462000000; diff --git a/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.result b/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.result new file mode 100644 index 0000000000000..be61cbb84437f --- /dev/null +++ b/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.result @@ -0,0 +1,59 @@ +====CAST==== +2021-08-30 10:47:42.000000 +2000-01-01 00:00:00.000000 +2021-08-30 18:47:42.000000 +2000-01-01 12:00:00.000000 +====INSERT_WITH_VALUES==== +2021-04-30 22:48:00.000000 +2021-04-30 22:48:00.000000 +2021-05-01 06:48:00.000000 +2021-05-01 06:48:00.000000 +====NUMBER_FUNCTION==== +==UTC== +202104 +20210430 +20210430220000 +2021-04-01 +4 +120 +30 +5 +==Asia/Shanghai== +202105 +20210501 +20210501060000 +2021-05-01 +5 +121 +1 +6 +====ROUNDER_FUNCTION==== +==UTC== +2021-04-30 22:48:31 +2021-04-30 22:48:00 +2021-04-30 22:45:00 +2021-04-30 22:40:00 +2021-04-30 22:45:00 +2021-04-30 22:30:00 +2021-04-30 22:00:00 +2021-04-30 00:00:00 +2021-04-25 +==Asia/Shanghai== +2021-05-01 06:48:31 +2021-05-01 06:48:00 +2021-05-01 06:45:00 +2021-05-01 06:40:00 +2021-05-01 06:45:00 +2021-05-01 06:30:00 +2021-05-01 06:00:00 +2021-05-01 00:00:00 +2021-04-25 +====INTERVAL_FUNCTION==== +==UTC== +2021-05-30 22:48:31.999000 +2020-02-29 22:00:00.000000 +2021-02-28 22:00:00.000000 +==Asia/Shanghai== +2021-06-01 14:48:31.999000 +2020-03-01 06:00:00.000000 +2021-03-01 14:00:00.000000 diff --git a/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.sql b/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.sql new file mode 100644 index 0000000000000..6f63e00166d72 --- /dev/null +++ b/tests/suites/0_stateless/02_function/02_0012_function_datetimes_tz.sql @@ -0,0 +1,77 @@ +-- cast function +select "====CAST===="; +set timezone='UTC'; +select to_timestamp(1630320462000000); +select to_timestamp('2000-01-01 00:00:00'); +set timezone='Asia/Shanghai'; -- Asia/Shanghai: +8:00 +select to_timestamp(1630320462000000); +select to_timestamp('2000-01-01 12:00:00'); +-- insert into table, serialization and deserialization +select "====INSERT_WITH_VALUES===="; +set timezone = 'UTC'; +create table tt (a timestamp); +insert into table tt values ('2021-04-30 22:48:00'), (to_timestamp('2021-04-30 22:48:00')); +select * from tt; +set timezone = 'Asia/Shanghai'; +select * from tt; +-- number function +-- 1619820000000000 = 2021-04-30 22:00:00 +select "====NUMBER_FUNCTION===="; +select "==UTC=="; +set timezone = 'UTC'; +select toyyyymm(to_timestamp(1619820000000000)); +select toyyyymmdd(to_timestamp(1619820000000000)); +select toyyyymmddhhmmss(to_timestamp(1619820000000000)); +select tostartofmonth(to_timestamp(1619820000000000)); +select tomonth(to_timestamp(1619820000000000)); +select todayofyear(to_timestamp(1619820000000000)); +select todayofmonth(to_timestamp(1619820000000000)); +select todayofweek(to_timestamp(1619820000000000)); +set timezone = 'Asia/Shanghai'; +select "==Asia/Shanghai=="; +select toyyyymm(to_timestamp(1619820000000000)); +select toyyyymmdd(to_timestamp(1619820000000000)); +select toyyyymmddhhmmss(to_timestamp(1619820000000000)); +select tostartofmonth(to_timestamp(1619820000000000)); +select tomonth(to_timestamp(1619820000000000)); +select todayofyear(to_timestamp(1619820000000000)); +select todayofmonth(to_timestamp(1619820000000000)); +select todayofweek(to_timestamp(1619820000000000)); +-- round function +select "====ROUNDER_FUNCTION===="; +-- 1619822911999000 = 2021-04-30 22:48:31.999 +select "==UTC=="; +set timezone = 'UTC'; +select tostartofsecond(to_timestamp(1619822911999000)); +select tostartofminute(to_timestamp(1619822911999000)); +select tostartoffiveminutes(to_timestamp(1619822911999000)); +select tostartoftenminutes(to_timestamp(1619822911999000)); +select tostartoffifteenminutes(to_timestamp(1619822911999000)); +select timeslot(to_timestamp(1619822911999000)); +select tostartofhour(to_timestamp(1619822911999000)); +select tostartofday(to_timestamp(1619822911999000)); +select tostartofweek(to_timestamp(1619822911999000)); +set timezone = 'Asia/Shanghai'; +select "==Asia/Shanghai=="; +select tostartofsecond(to_timestamp(1619822911999000)); +select tostartofminute(to_timestamp(1619822911999000)); +select tostartoffiveminutes(to_timestamp(1619822911999000)); +select tostartoftenminutes(to_timestamp(1619822911999000)); +select tostartoffifteenminutes(to_timestamp(1619822911999000)); +select timeslot(to_timestamp(1619822911999000)); +select tostartofhour(to_timestamp(1619822911999000)); +select tostartofday(to_timestamp(1619822911999000)); +select tostartofweek(to_timestamp(1619822911999000)); +select "====INTERVAL_FUNCTION===="; +-- 1619822911999000 = 2021-04-30 22:48:31.999 +-- 1583013600000000 = 2020-02-29 22:00:00 +select "==UTC=="; +set timezone = 'UTC'; +select addMonths(to_timestamp(1619822911999000), 1); +select to_timestamp(1583013600000000); +select addYears(to_timestamp(1583013600000000), 1); +select "==Asia/Shanghai=="; +set timezone = 'Asia/Shanghai'; +select addMonths(to_timestamp(1619822911999000), 1); +select to_timestamp(1583013600000000); +select addYears(to_timestamp(1583013600000000), 1); \ No newline at end of file