From 157c33f8d11630c46898e54dfcd208e1d8d9732d Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Wed, 22 Sep 2021 21:30:28 +0530 Subject: [PATCH 01/17] Add `month` and `day` temporal extractors --- src/compute/temporal.rs | 156 ++++++++++++++++++++++++++--------- tests/it/compute/temporal.rs | 20 +++++ 2 files changed, 138 insertions(+), 38 deletions(-) diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index b098c890778..d882d798cde 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -100,21 +100,31 @@ fn chrono_tz_hour( #[cfg(feature = "chrono-tz")] #[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))] -fn chrono_tz_year( +fn chrono_tz( array: &PrimitiveArray, time_unit: TimeUnit, timezone_str: &str, -) -> Result> { + op: F, +) -> Result> +where + O: NativeType, + F: Fn(chrono::DateTime) -> O, +{ let timezone = parse_offset_tz(timezone_str)?; - Ok(extract_impl(array, time_unit, timezone, |x| x.year())) + Ok(extract_impl(array, time_unit, timezone, op)) } #[cfg(not(feature = "chrono-tz"))] -fn chrono_tz_year( +fn chrono_tz( _: &PrimitiveArray, _: TimeUnit, timezone_str: &str, -) -> Result> { + _: F, +) -> Result> +where + O: NativeType, + F: Fn(chrono::DateTime) -> O, +{ Err(ArrowError::InvalidArgumentError(format!( "timezone \"{}\" cannot be parsed (feature chrono-tz is not active)", timezone_str @@ -249,59 +259,115 @@ pub fn can_hour(data_type: &DataType) -> bool { /// Extracts the years of a temporal array as [`PrimitiveArray`]. /// Use [`can_year`] to check if this operation is supported for the target [`DataType`]. pub fn year(array: &dyn Array) -> Result> { - let final_data_type = DataType::Int32; + match array.data_type() { + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { + date_like(array, DataType::Int32, |x| x.year()) + } + DataType::Timestamp(time_unit, Some(timezone_str)) => { + let time_unit = *time_unit; + let timezone = parse_offset(timezone_str); + + let array = array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = timezone { + Ok(extract_impl(array, time_unit, timezone, |x| x.year())) + } else { + chrono_tz(array, time_unit, timezone_str, |x| x.year()) + } + } + dt => Err(ArrowError::NotYetImplemented(format!( + "\"year\" does not support type {:?}", + dt + ))), + } +} + +/// Extracts the months of a temporal array as [`PrimitiveArray`]. +/// Use [`can_month`] to check if this operation is supported for the target [`DataType`]. +pub fn month(array: &dyn Array) -> Result> { + match array.data_type() { + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { + date_like(array, DataType::UInt32, |x| x.month()) + } + DataType::Timestamp(time_unit, Some(timezone_str)) => { + let time_unit = *time_unit; + let timezone = parse_offset(timezone_str); + + let array = array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = timezone { + Ok(extract_impl(array, time_unit, timezone, |x| x.month())) + } else { + chrono_tz(array, time_unit, timezone_str, |x| x.month()) + } + } + dt => Err(ArrowError::NotYetImplemented(format!( + "\"month\" does not support type {:?}", + dt + ))), + } +} + +/// Extracts the days of a temporal array as [`PrimitiveArray`]. +/// Use [`can_day`] to check if this operation is supported for the target [`DataType`]. +pub fn day(array: &dyn Array) -> Result> { + match array.data_type() { + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { + date_like(array, DataType::UInt32, |x| x.day()) + } + DataType::Timestamp(time_unit, Some(timezone_str)) => { + let time_unit = *time_unit; + let timezone = parse_offset(timezone_str); + + let array = array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = timezone { + Ok(extract_impl(array, time_unit, timezone, |x| x.day())) + } else { + chrono_tz(array, time_unit, timezone_str, |x| x.day()) + } + } + dt => Err(ArrowError::NotYetImplemented(format!( + "\"day\" does not support type {:?}", + dt + ))), + } +} + +pub fn date_like(array: &dyn Array, data_type: DataType, op: F) -> Result> +where + O: NativeType, + F: Fn(chrono::NaiveDateTime) -> O, +{ match array.data_type() { DataType::Date32 => { let array = array .as_any() .downcast_ref::>() .unwrap(); - Ok(unary( - array, - |x| date32_to_datetime(x).year(), - final_data_type, - )) + Ok(unary(array, |x| op(date32_to_datetime(x)), data_type)) } DataType::Date64 => { let array = array .as_any() .downcast_ref::>() .unwrap(); - Ok(unary( - array, - |x| date64_to_datetime(x).year(), - final_data_type, - )) + Ok(unary(array, |x| op(date64_to_datetime(x)), data_type)) } DataType::Timestamp(time_unit, None) => { let array = array .as_any() .downcast_ref::>() .unwrap(); - let op = match time_unit { - TimeUnit::Second => |x| timestamp_s_to_datetime(x).year(), - TimeUnit::Millisecond => |x| timestamp_ms_to_datetime(x).year(), - TimeUnit::Microsecond => |x| timestamp_us_to_datetime(x).year(), - TimeUnit::Nanosecond => |x| timestamp_ns_to_datetime(x).year(), + let func = match time_unit { + TimeUnit::Second => timestamp_s_to_datetime, + TimeUnit::Millisecond => timestamp_ms_to_datetime, + TimeUnit::Microsecond => timestamp_us_to_datetime, + TimeUnit::Nanosecond => timestamp_ns_to_datetime, }; - Ok(unary(array, op, final_data_type)) + Ok(unary(array, |x| op(func(x)), data_type)) } - DataType::Timestamp(time_unit, Some(timezone_str)) => { - let time_unit = *time_unit; - let timezone = parse_offset(timezone_str); - - let array = array.as_any().downcast_ref().unwrap(); - - if let Ok(timezone) = timezone { - Ok(extract_impl(array, time_unit, timezone, |x| x.year())) - } else { - chrono_tz_year(array, time_unit, timezone_str) - } - } - dt => Err(ArrowError::NotYetImplemented(format!( - "\"year\" does not support type {:?}", - dt - ))), + _ => unreachable!(), } } @@ -324,3 +390,17 @@ pub fn can_year(data_type: &DataType) -> bool { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) ) } + +pub fn can_month(data_type: &DataType) -> bool { + matches!( + data_type, + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) + ) +} + +pub fn can_day(data_type: &DataType) -> bool { + matches!( + data_type, + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) + ) +} diff --git a/tests/it/compute/temporal.rs b/tests/it/compute/temporal.rs index 160d880a91a..2e9da140a2f 100644 --- a/tests/it/compute/temporal.rs +++ b/tests/it/compute/temporal.rs @@ -78,6 +78,26 @@ fn naive_timestamp_micro_year() { assert_eq!(result, expected); } +#[test] +fn date64_month() { + let array = Int64Array::from(&[Some(1514764800000), None]).to(DataType::Date64); + let result = month(&array).unwrap(); + + let expected = UInt32Array::from(&[Some(1), None]); + + assert_eq!(result, expected); +} + +#[test] +fn date64_day() { + let array = Int64Array::from(&[Some(1614764800000), None]).to(DataType::Date64); + let result = day(&array).unwrap(); + + let expected = UInt32Array::from(&[Some(3), None]); + + assert_eq!(result, expected); +} + #[test] fn timestamp_micro_hour() { let array = Int64Array::from(&[Some(1621877130000000), None]).to(DataType::Timestamp( From 4c93c9e83981ee79d9f571b8ad69561a99755ad4 Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Thu, 23 Sep 2021 11:18:17 +0530 Subject: [PATCH 02/17] Add `minute` and `second` temporal extractors --- src/compute/temporal.rs | 211 ++++++++++++++++++++--------------- tests/it/compute/temporal.rs | 20 ++++ 2 files changed, 140 insertions(+), 91 deletions(-) diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index d882d798cde..1866875751a 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -75,29 +75,6 @@ where } } -#[cfg(feature = "chrono-tz")] -#[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))] -fn chrono_tz_hour( - array: &PrimitiveArray, - time_unit: TimeUnit, - timezone_str: &str, -) -> Result> { - let timezone = parse_offset_tz(timezone_str)?; - Ok(extract_impl(array, time_unit, timezone, |x| x.hour())) -} - -#[cfg(not(feature = "chrono-tz"))] -fn chrono_tz_hour( - _: &PrimitiveArray, - _: TimeUnit, - timezone_str: &str, -) -> Result> { - Err(ArrowError::InvalidArgumentError(format!( - "timezone \"{}\" cannot be parsed (feature chrono-tz is not active)", - timezone_str - ))) -} - #[cfg(feature = "chrono-tz")] #[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))] fn chrono_tz( @@ -134,99 +111,125 @@ where /// Extracts the hours of a temporal array as [`PrimitiveArray`]. /// Use [`can_hour`] to check if this operation is supported for the target [`DataType`]. pub fn hour(array: &dyn Array) -> Result> { - let final_data_type = DataType::UInt32; + match array.data_type() { + DataType::Date32 | DataType::Date64 | &DataType::Timestamp(_, None) => { + date_like(array, DataType::UInt32, |x| x.hour()) + } + DataType::Time32(_) | DataType::Time64(_) => { + time_like(array, DataType::UInt32, |x| x.hour()) + } + DataType::Timestamp(time_unit, Some(timezone_str)) => { + let time_unit = *time_unit; + let timezone = parse_offset(timezone_str); + + let array = array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = timezone { + Ok(extract_impl(array, time_unit, timezone, |x| x.hour())) + } else { + chrono_tz(array, time_unit, timezone_str, |x| x.hour()) + } + } + dt => Err(ArrowError::NotYetImplemented(format!( + "\"hour\" does not support type {:?}", + dt + ))), + } +} + +/// Extracts the minutes of a temporal array as [`PrimitiveArray`]. +/// Use [`can_minute`] to check if this operation is supported for the target [`DataType`]. +pub fn minute(array: &dyn Array) -> Result> { + match array.data_type() { + DataType::Date32 | DataType::Date64 | &DataType::Timestamp(_, None) => { + date_like(array, DataType::UInt32, |x| x.minute()) + } + DataType::Time32(_) | DataType::Time64(_) => { + time_like(array, DataType::UInt32, |x| x.minute()) + } + DataType::Timestamp(time_unit, Some(timezone_str)) => { + let time_unit = *time_unit; + let timezone = parse_offset(timezone_str); + + let array = array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = timezone { + Ok(extract_impl(array, time_unit, timezone, |x| x.minute())) + } else { + chrono_tz(array, time_unit, timezone_str, |x| x.minute()) + } + } + dt => Err(ArrowError::NotYetImplemented(format!( + "\"minute\" does not support type {:?}", + dt + ))), + } +} + +/// Extracts the seconds of a temporal array as [`PrimitiveArray`]. +/// Use [`can_second`] to check if this operation is supported for the target [`DataType`]. +pub fn second(array: &dyn Array) -> Result> { + match array.data_type() { + DataType::Date32 | DataType::Date64 | &DataType::Timestamp(_, None) => { + date_like(array, DataType::UInt32, |x| x.second()) + } + DataType::Time32(_) | DataType::Time64(_) => { + time_like(array, DataType::UInt32, |x| x.second()) + } + DataType::Timestamp(time_unit, Some(timezone_str)) => { + let time_unit = *time_unit; + let timezone = parse_offset(timezone_str); + + let array = array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = timezone { + Ok(extract_impl(array, time_unit, timezone, |x| x.second())) + } else { + chrono_tz(array, time_unit, timezone_str, |x| x.second()) + } + } + dt => Err(ArrowError::NotYetImplemented(format!( + "\"second\" does not support type {:?}", + dt + ))), + } +} + +fn time_like(array: &dyn Array, data_type: DataType, op: F) -> Result> +where + O: NativeType, + F: Fn(chrono::NaiveTime) -> O, +{ match array.data_type() { DataType::Time32(TimeUnit::Second) => { let array = array .as_any() .downcast_ref::>() .unwrap(); - Ok(unary(array, |x| time32s_to_time(x).hour(), final_data_type)) + Ok(unary(array, |x| op(time32s_to_time(x)), data_type)) } DataType::Time32(TimeUnit::Millisecond) => { let array = array .as_any() .downcast_ref::>() .unwrap(); - Ok(unary( - array, - |x| time32ms_to_time(x).hour(), - final_data_type, - )) + Ok(unary(array, |x| op(time32ms_to_time(x)), data_type)) } DataType::Time64(TimeUnit::Microsecond) => { let array = array .as_any() .downcast_ref::>() .unwrap(); - Ok(unary( - array, - |x| time64us_to_time(x).hour(), - final_data_type, - )) + Ok(unary(array, |x| op(time64us_to_time(x)), data_type)) } DataType::Time64(TimeUnit::Nanosecond) => { let array = array .as_any() .downcast_ref::>() .unwrap(); - Ok(unary( - array, - |x| time64ns_to_time(x).hour(), - final_data_type, - )) + Ok(unary(array, |x| op(time64ns_to_time(x)), data_type)) } - DataType::Date32 => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - Ok(unary( - array, - |x| date32_to_datetime(x).hour(), - final_data_type, - )) - } - DataType::Date64 => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - Ok(unary( - array, - |x| date64_to_datetime(x).hour(), - final_data_type, - )) - } - DataType::Timestamp(time_unit, None) => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - let op = match time_unit { - TimeUnit::Second => |x| timestamp_s_to_datetime(x).hour(), - TimeUnit::Millisecond => |x| timestamp_ms_to_datetime(x).hour(), - TimeUnit::Microsecond => |x| timestamp_us_to_datetime(x).hour(), - TimeUnit::Nanosecond => |x| timestamp_ns_to_datetime(x).hour(), - }; - Ok(unary(array, op, final_data_type)) - } - DataType::Timestamp(time_unit, Some(timezone_str)) => { - let time_unit = *time_unit; - let timezone = parse_offset(timezone_str); - - let array = array.as_any().downcast_ref().unwrap(); - - if let Ok(timezone) = timezone { - Ok(extract_impl(array, time_unit, timezone, |x| x.hour())) - } else { - chrono_tz_hour(array, time_unit, timezone_str) - } - } - dt => Err(ArrowError::NotYetImplemented(format!( - "\"hour\" does not support type {:?}", - dt - ))), + _ => unreachable!(), } } @@ -256,6 +259,32 @@ pub fn can_hour(data_type: &DataType) -> bool { ) } +pub fn can_minute(data_type: &DataType) -> bool { + matches!( + data_type, + DataType::Time32(TimeUnit::Second) + | DataType::Time32(TimeUnit::Millisecond) + | DataType::Time64(TimeUnit::Microsecond) + | DataType::Time64(TimeUnit::Nanosecond) + | DataType::Date32 + | DataType::Date64 + | DataType::Timestamp(_, _) + ) +} + +pub fn can_second(data_type: &DataType) -> bool { + matches!( + data_type, + DataType::Time32(TimeUnit::Second) + | DataType::Time32(TimeUnit::Millisecond) + | DataType::Time64(TimeUnit::Microsecond) + | DataType::Time64(TimeUnit::Nanosecond) + | DataType::Date32 + | DataType::Date64 + | DataType::Timestamp(_, _) + ) +} + /// Extracts the years of a temporal array as [`PrimitiveArray`]. /// Use [`can_year`] to check if this operation is supported for the target [`DataType`]. pub fn year(array: &dyn Array) -> Result> { diff --git a/tests/it/compute/temporal.rs b/tests/it/compute/temporal.rs index 2e9da140a2f..842fdfe6d46 100644 --- a/tests/it/compute/temporal.rs +++ b/tests/it/compute/temporal.rs @@ -2,6 +2,26 @@ use arrow2::array::*; use arrow2::compute::temporal::*; use arrow2::datatypes::*; +#[test] +fn date64_second() { + let array = + Int64Array::from(&[Some(1514764800000), None, Some(1550636625000)]).to(DataType::Date64); + + let result = second(&array).unwrap(); + let expected = UInt32Array::from(&[Some(0), None, Some(45)]); + assert_eq!(result, expected); +} + +#[test] +fn date64_minute() { + let array = + Int64Array::from(&[Some(1514764800000), None, Some(1550636625000)]).to(DataType::Date64); + + let result = minute(&array).unwrap(); + let expected = UInt32Array::from(&[Some(0), None, Some(23)]); + assert_eq!(result, expected); +} + #[test] fn date64_hour() { let array = From b758df9df0c30cfdf07b2ecb303f6d2609287fef Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Thu, 23 Sep 2021 11:37:32 +0530 Subject: [PATCH 03/17] Rearrange functions in temporal compute --- src/compute/temporal.rs | 438 ++++++++++++++++++++-------------------- 1 file changed, 219 insertions(+), 219 deletions(-) diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index 1866875751a..fa1210f4ebc 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -28,84 +28,82 @@ use crate::types::NaturalDataType; use super::arity::unary; -fn extract_impl( - array: &PrimitiveArray, - time_unit: TimeUnit, - timezone: T, - extract: F, -) -> PrimitiveArray -where - T: chrono::TimeZone, - A: NativeType + NaturalDataType, - F: Fn(chrono::DateTime) -> A, -{ - match time_unit { - TimeUnit::Second => { - let op = |x| { - let datetime = timestamp_s_to_datetime(x); - let offset = timezone.offset_from_utc_datetime(&datetime); - extract(chrono::DateTime::::from_utc(datetime, offset)) - }; - unary(array, op, DataType::UInt32) +/// Extracts the years of a temporal array as [`PrimitiveArray`]. +/// Use [`can_year`] to check if this operation is supported for the target [`DataType`]. +pub fn year(array: &dyn Array) -> Result> { + match array.data_type() { + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { + date_like(array, DataType::Int32, |x| x.year()) } - TimeUnit::Millisecond => { - let op = |x| { - let datetime = timestamp_ms_to_datetime(x); - let offset = timezone.offset_from_utc_datetime(&datetime); - extract(chrono::DateTime::::from_utc(datetime, offset)) - }; - unary(array, op, A::DATA_TYPE) + DataType::Timestamp(time_unit, Some(timezone_str)) => { + let time_unit = *time_unit; + let timezone = parse_offset(timezone_str); + + let array = array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = timezone { + Ok(extract_impl(array, time_unit, timezone, |x| x.year())) + } else { + chrono_tz(array, time_unit, timezone_str, |x| x.year()) + } } - TimeUnit::Microsecond => { - let op = |x| { - let datetime = timestamp_us_to_datetime(x); - let offset = timezone.offset_from_utc_datetime(&datetime); - extract(chrono::DateTime::::from_utc(datetime, offset)) - }; - unary(array, op, A::DATA_TYPE) + dt => Err(ArrowError::NotYetImplemented(format!( + "\"year\" does not support type {:?}", + dt + ))), + } +} + +/// Extracts the months of a temporal array as [`PrimitiveArray`]. +/// Use [`can_month`] to check if this operation is supported for the target [`DataType`]. +pub fn month(array: &dyn Array) -> Result> { + match array.data_type() { + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { + date_like(array, DataType::UInt32, |x| x.month()) } - TimeUnit::Nanosecond => { - let op = |x| { - let datetime = timestamp_ns_to_datetime(x); - let offset = timezone.offset_from_utc_datetime(&datetime); - extract(chrono::DateTime::::from_utc(datetime, offset)) - }; - unary(array, op, A::DATA_TYPE) + DataType::Timestamp(time_unit, Some(timezone_str)) => { + let time_unit = *time_unit; + let timezone = parse_offset(timezone_str); + + let array = array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = timezone { + Ok(extract_impl(array, time_unit, timezone, |x| x.month())) + } else { + chrono_tz(array, time_unit, timezone_str, |x| x.month()) + } } + dt => Err(ArrowError::NotYetImplemented(format!( + "\"month\" does not support type {:?}", + dt + ))), } } -#[cfg(feature = "chrono-tz")] -#[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))] -fn chrono_tz( - array: &PrimitiveArray, - time_unit: TimeUnit, - timezone_str: &str, - op: F, -) -> Result> -where - O: NativeType, - F: Fn(chrono::DateTime) -> O, -{ - let timezone = parse_offset_tz(timezone_str)?; - Ok(extract_impl(array, time_unit, timezone, op)) -} +/// Extracts the days of a temporal array as [`PrimitiveArray`]. +/// Use [`can_day`] to check if this operation is supported for the target [`DataType`]. +pub fn day(array: &dyn Array) -> Result> { + match array.data_type() { + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { + date_like(array, DataType::UInt32, |x| x.day()) + } + DataType::Timestamp(time_unit, Some(timezone_str)) => { + let time_unit = *time_unit; + let timezone = parse_offset(timezone_str); -#[cfg(not(feature = "chrono-tz"))] -fn chrono_tz( - _: &PrimitiveArray, - _: TimeUnit, - timezone_str: &str, - _: F, -) -> Result> -where - O: NativeType, - F: Fn(chrono::DateTime) -> O, -{ - Err(ArrowError::InvalidArgumentError(format!( - "timezone \"{}\" cannot be parsed (feature chrono-tz is not active)", - timezone_str - ))) + let array = array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = timezone { + Ok(extract_impl(array, time_unit, timezone, |x| x.day())) + } else { + chrono_tz(array, time_unit, timezone_str, |x| x.day()) + } + } + dt => Err(ArrowError::NotYetImplemented(format!( + "\"day\" does not support type {:?}", + dt + ))), + } } /// Extracts the hours of a temporal array as [`PrimitiveArray`]. @@ -195,6 +193,43 @@ pub fn second(array: &dyn Array) -> Result> { } } +pub fn date_like(array: &dyn Array, data_type: DataType, op: F) -> Result> +where + O: NativeType, + F: Fn(chrono::NaiveDateTime) -> O, +{ + match array.data_type() { + DataType::Date32 => { + let array = array + .as_any() + .downcast_ref::>() + .unwrap(); + Ok(unary(array, |x| op(date32_to_datetime(x)), data_type)) + } + DataType::Date64 => { + let array = array + .as_any() + .downcast_ref::>() + .unwrap(); + Ok(unary(array, |x| op(date64_to_datetime(x)), data_type)) + } + DataType::Timestamp(time_unit, None) => { + let array = array + .as_any() + .downcast_ref::>() + .unwrap(); + let func = match time_unit { + TimeUnit::Second => timestamp_s_to_datetime, + TimeUnit::Millisecond => timestamp_ms_to_datetime, + TimeUnit::Microsecond => timestamp_us_to_datetime, + TimeUnit::Nanosecond => timestamp_ns_to_datetime, + }; + Ok(unary(array, |x| op(func(x)), data_type)) + } + _ => unreachable!(), + } +} + fn time_like(array: &dyn Array, data_type: DataType, op: F) -> Result> where O: NativeType, @@ -233,6 +268,120 @@ where } } +#[cfg(feature = "chrono-tz")] +#[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))] +fn chrono_tz( + array: &PrimitiveArray, + time_unit: TimeUnit, + timezone_str: &str, + op: F, +) -> Result> +where + O: NativeType, + F: Fn(chrono::DateTime) -> O, +{ + let timezone = parse_offset_tz(timezone_str)?; + Ok(extract_impl(array, time_unit, timezone, op)) +} + +#[cfg(not(feature = "chrono-tz"))] +fn chrono_tz( + _: &PrimitiveArray, + _: TimeUnit, + timezone_str: &str, + _: F, +) -> Result> +where + O: NativeType, + F: Fn(chrono::DateTime) -> O, +{ + Err(ArrowError::InvalidArgumentError(format!( + "timezone \"{}\" cannot be parsed (feature chrono-tz is not active)", + timezone_str + ))) +} + +fn extract_impl( + array: &PrimitiveArray, + time_unit: TimeUnit, + timezone: T, + extract: F, +) -> PrimitiveArray +where + T: chrono::TimeZone, + A: NativeType + NaturalDataType, + F: Fn(chrono::DateTime) -> A, +{ + match time_unit { + TimeUnit::Second => { + let op = |x| { + let datetime = timestamp_s_to_datetime(x); + let offset = timezone.offset_from_utc_datetime(&datetime); + extract(chrono::DateTime::::from_utc(datetime, offset)) + }; + unary(array, op, DataType::UInt32) + } + TimeUnit::Millisecond => { + let op = |x| { + let datetime = timestamp_ms_to_datetime(x); + let offset = timezone.offset_from_utc_datetime(&datetime); + extract(chrono::DateTime::::from_utc(datetime, offset)) + }; + unary(array, op, A::DATA_TYPE) + } + TimeUnit::Microsecond => { + let op = |x| { + let datetime = timestamp_us_to_datetime(x); + let offset = timezone.offset_from_utc_datetime(&datetime); + extract(chrono::DateTime::::from_utc(datetime, offset)) + }; + unary(array, op, A::DATA_TYPE) + } + TimeUnit::Nanosecond => { + let op = |x| { + let datetime = timestamp_ns_to_datetime(x); + let offset = timezone.offset_from_utc_datetime(&datetime); + extract(chrono::DateTime::::from_utc(datetime, offset)) + }; + unary(array, op, A::DATA_TYPE) + } + } +} + +/// Checks if an array of type `datatype` can perform year operation +/// +/// # Examples +/// ``` +/// use arrow2::compute::temporal::can_year; +/// use arrow2::datatypes::{DataType}; +/// +/// let data_type = DataType::Date32; +/// assert_eq!(can_year(&data_type), true); + +/// let data_type = DataType::Int8; +/// assert_eq!(can_year(&data_type), false); +/// ``` +pub fn can_year(data_type: &DataType) -> bool { + matches!( + data_type, + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) + ) +} + +pub fn can_month(data_type: &DataType) -> bool { + matches!( + data_type, + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) + ) +} + +pub fn can_day(data_type: &DataType) -> bool { + matches!( + data_type, + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) + ) +} + /// Checks if an array of type `datatype` can perform hour operation /// /// # Examples @@ -284,152 +433,3 @@ pub fn can_second(data_type: &DataType) -> bool { | DataType::Timestamp(_, _) ) } - -/// Extracts the years of a temporal array as [`PrimitiveArray`]. -/// Use [`can_year`] to check if this operation is supported for the target [`DataType`]. -pub fn year(array: &dyn Array) -> Result> { - match array.data_type() { - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { - date_like(array, DataType::Int32, |x| x.year()) - } - DataType::Timestamp(time_unit, Some(timezone_str)) => { - let time_unit = *time_unit; - let timezone = parse_offset(timezone_str); - - let array = array.as_any().downcast_ref().unwrap(); - - if let Ok(timezone) = timezone { - Ok(extract_impl(array, time_unit, timezone, |x| x.year())) - } else { - chrono_tz(array, time_unit, timezone_str, |x| x.year()) - } - } - dt => Err(ArrowError::NotYetImplemented(format!( - "\"year\" does not support type {:?}", - dt - ))), - } -} - -/// Extracts the months of a temporal array as [`PrimitiveArray`]. -/// Use [`can_month`] to check if this operation is supported for the target [`DataType`]. -pub fn month(array: &dyn Array) -> Result> { - match array.data_type() { - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { - date_like(array, DataType::UInt32, |x| x.month()) - } - DataType::Timestamp(time_unit, Some(timezone_str)) => { - let time_unit = *time_unit; - let timezone = parse_offset(timezone_str); - - let array = array.as_any().downcast_ref().unwrap(); - - if let Ok(timezone) = timezone { - Ok(extract_impl(array, time_unit, timezone, |x| x.month())) - } else { - chrono_tz(array, time_unit, timezone_str, |x| x.month()) - } - } - dt => Err(ArrowError::NotYetImplemented(format!( - "\"month\" does not support type {:?}", - dt - ))), - } -} - -/// Extracts the days of a temporal array as [`PrimitiveArray`]. -/// Use [`can_day`] to check if this operation is supported for the target [`DataType`]. -pub fn day(array: &dyn Array) -> Result> { - match array.data_type() { - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { - date_like(array, DataType::UInt32, |x| x.day()) - } - DataType::Timestamp(time_unit, Some(timezone_str)) => { - let time_unit = *time_unit; - let timezone = parse_offset(timezone_str); - - let array = array.as_any().downcast_ref().unwrap(); - - if let Ok(timezone) = timezone { - Ok(extract_impl(array, time_unit, timezone, |x| x.day())) - } else { - chrono_tz(array, time_unit, timezone_str, |x| x.day()) - } - } - dt => Err(ArrowError::NotYetImplemented(format!( - "\"day\" does not support type {:?}", - dt - ))), - } -} - -pub fn date_like(array: &dyn Array, data_type: DataType, op: F) -> Result> -where - O: NativeType, - F: Fn(chrono::NaiveDateTime) -> O, -{ - match array.data_type() { - DataType::Date32 => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - Ok(unary(array, |x| op(date32_to_datetime(x)), data_type)) - } - DataType::Date64 => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - Ok(unary(array, |x| op(date64_to_datetime(x)), data_type)) - } - DataType::Timestamp(time_unit, None) => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - let func = match time_unit { - TimeUnit::Second => timestamp_s_to_datetime, - TimeUnit::Millisecond => timestamp_ms_to_datetime, - TimeUnit::Microsecond => timestamp_us_to_datetime, - TimeUnit::Nanosecond => timestamp_ns_to_datetime, - }; - Ok(unary(array, |x| op(func(x)), data_type)) - } - _ => unreachable!(), - } -} - -/// Checks if an array of type `datatype` can perform year operation -/// -/// # Examples -/// ``` -/// use arrow2::compute::temporal::can_year; -/// use arrow2::datatypes::{DataType}; -/// -/// let data_type = DataType::Date32; -/// assert_eq!(can_year(&data_type), true); - -/// let data_type = DataType::Int8; -/// assert_eq!(can_year(&data_type), false); -/// ``` -pub fn can_year(data_type: &DataType) -> bool { - matches!( - data_type, - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) - ) -} - -pub fn can_month(data_type: &DataType) -> bool { - matches!( - data_type, - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) - ) -} - -pub fn can_day(data_type: &DataType) -> bool { - matches!( - data_type, - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) - ) -} From 8696c2d1c7b81355cbec121462861c0708dafa1b Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Thu, 23 Sep 2021 11:42:05 +0530 Subject: [PATCH 04/17] Avoid repetitions in `can_*` functions --- src/compute/temporal.rs | 92 ++++++++++++++++++++++++++++------------- 1 file changed, 64 insertions(+), 28 deletions(-) diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index fa1210f4ebc..635852e3861 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -362,20 +362,44 @@ where /// assert_eq!(can_year(&data_type), false); /// ``` pub fn can_year(data_type: &DataType) -> bool { - matches!( - data_type, - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) - ) + can_date(data_type) } +/// Checks if an array of type `datatype` can perform month operation +/// +/// # Examples +/// ``` +/// use arrow2::compute::temporal::can_month; +/// use arrow2::datatypes::{DataType}; +/// +/// let data_type = DataType::Date32; +/// assert_eq!(can_month(&data_type), true); + +/// let data_type = DataType::Int8; +/// assert_eq!(can_month(&data_type), false); +/// ``` pub fn can_month(data_type: &DataType) -> bool { - matches!( - data_type, - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) - ) + can_date(data_type) } +/// Checks if an array of type `datatype` can perform day operation +/// +/// # Examples +/// ``` +/// use arrow2::compute::temporal::can_day; +/// use arrow2::datatypes::{DataType}; +/// +/// let data_type = DataType::Date32; +/// assert_eq!(can_day(&data_type), true); + +/// let data_type = DataType::Int8; +/// assert_eq!(can_day(&data_type), false); +/// ``` pub fn can_day(data_type: &DataType) -> bool { + can_date(data_type) +} + +fn can_date(data_type: &DataType) -> bool { matches!( data_type, DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) @@ -396,32 +420,44 @@ pub fn can_day(data_type: &DataType) -> bool { /// assert_eq!(can_hour(&data_type), false); /// ``` pub fn can_hour(data_type: &DataType) -> bool { - matches!( - data_type, - DataType::Time32(TimeUnit::Second) - | DataType::Time32(TimeUnit::Millisecond) - | DataType::Time64(TimeUnit::Microsecond) - | DataType::Time64(TimeUnit::Nanosecond) - | DataType::Date32 - | DataType::Date64 - | DataType::Timestamp(_, _) - ) + can_time(data_type) } +/// Checks if an array of type `datatype` can perform minute operation +/// +/// # Examples +/// ``` +/// use arrow2::compute::temporal::can_minute; +/// use arrow2::datatypes::{DataType, TimeUnit}; +/// +/// let data_type = DataType::Time32(TimeUnit::Second); +/// assert_eq!(can_minute(&data_type), true); + +/// let data_type = DataType::Int8; +/// assert_eq!(can_minute(&data_type), false); +/// ``` pub fn can_minute(data_type: &DataType) -> bool { - matches!( - data_type, - DataType::Time32(TimeUnit::Second) - | DataType::Time32(TimeUnit::Millisecond) - | DataType::Time64(TimeUnit::Microsecond) - | DataType::Time64(TimeUnit::Nanosecond) - | DataType::Date32 - | DataType::Date64 - | DataType::Timestamp(_, _) - ) + can_time(data_type) } +/// Checks if an array of type `datatype` can perform second operation +/// +/// # Examples +/// ``` +/// use arrow2::compute::temporal::can_second; +/// use arrow2::datatypes::{DataType, TimeUnit}; +/// +/// let data_type = DataType::Time32(TimeUnit::Second); +/// assert_eq!(can_second(&data_type), true); + +/// let data_type = DataType::Int8; +/// assert_eq!(can_second(&data_type), false); +/// ``` pub fn can_second(data_type: &DataType) -> bool { + can_time(data_type) +} + +fn can_time(data_type: &DataType) -> bool { matches!( data_type, DataType::Time32(TimeUnit::Second) From 6581e7b5a0b167c0e9899d895648e2039de27abd Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Fri, 24 Sep 2021 10:39:51 +0530 Subject: [PATCH 05/17] Rename `(time|date)_like` functions as `(time|date)_variants` --- src/compute/temporal.rs | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index 635852e3861..0b4e8709cb6 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -33,7 +33,7 @@ use super::arity::unary; pub fn year(array: &dyn Array) -> Result> { match array.data_type() { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { - date_like(array, DataType::Int32, |x| x.year()) + date_variants(array, DataType::Int32, |x| x.year()) } DataType::Timestamp(time_unit, Some(timezone_str)) => { let time_unit = *time_unit; @@ -59,7 +59,7 @@ pub fn year(array: &dyn Array) -> Result> { pub fn month(array: &dyn Array) -> Result> { match array.data_type() { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { - date_like(array, DataType::UInt32, |x| x.month()) + date_variants(array, DataType::UInt32, |x| x.month()) } DataType::Timestamp(time_unit, Some(timezone_str)) => { let time_unit = *time_unit; @@ -85,7 +85,7 @@ pub fn month(array: &dyn Array) -> Result> { pub fn day(array: &dyn Array) -> Result> { match array.data_type() { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { - date_like(array, DataType::UInt32, |x| x.day()) + date_variants(array, DataType::UInt32, |x| x.day()) } DataType::Timestamp(time_unit, Some(timezone_str)) => { let time_unit = *time_unit; @@ -111,10 +111,10 @@ pub fn day(array: &dyn Array) -> Result> { pub fn hour(array: &dyn Array) -> Result> { match array.data_type() { DataType::Date32 | DataType::Date64 | &DataType::Timestamp(_, None) => { - date_like(array, DataType::UInt32, |x| x.hour()) + date_variants(array, DataType::UInt32, |x| x.hour()) } DataType::Time32(_) | DataType::Time64(_) => { - time_like(array, DataType::UInt32, |x| x.hour()) + time_variants(array, DataType::UInt32, |x| x.hour()) } DataType::Timestamp(time_unit, Some(timezone_str)) => { let time_unit = *time_unit; @@ -140,10 +140,10 @@ pub fn hour(array: &dyn Array) -> Result> { pub fn minute(array: &dyn Array) -> Result> { match array.data_type() { DataType::Date32 | DataType::Date64 | &DataType::Timestamp(_, None) => { - date_like(array, DataType::UInt32, |x| x.minute()) + date_variants(array, DataType::UInt32, |x| x.minute()) } DataType::Time32(_) | DataType::Time64(_) => { - time_like(array, DataType::UInt32, |x| x.minute()) + time_variants(array, DataType::UInt32, |x| x.minute()) } DataType::Timestamp(time_unit, Some(timezone_str)) => { let time_unit = *time_unit; @@ -169,10 +169,10 @@ pub fn minute(array: &dyn Array) -> Result> { pub fn second(array: &dyn Array) -> Result> { match array.data_type() { DataType::Date32 | DataType::Date64 | &DataType::Timestamp(_, None) => { - date_like(array, DataType::UInt32, |x| x.second()) + date_variants(array, DataType::UInt32, |x| x.second()) } DataType::Time32(_) | DataType::Time64(_) => { - time_like(array, DataType::UInt32, |x| x.second()) + time_variants(array, DataType::UInt32, |x| x.second()) } DataType::Timestamp(time_unit, Some(timezone_str)) => { let time_unit = *time_unit; @@ -193,7 +193,11 @@ pub fn second(array: &dyn Array) -> Result> { } } -pub fn date_like(array: &dyn Array, data_type: DataType, op: F) -> Result> +pub fn date_variants( + array: &dyn Array, + data_type: DataType, + op: F, +) -> Result> where O: NativeType, F: Fn(chrono::NaiveDateTime) -> O, @@ -230,7 +234,7 @@ where } } -fn time_like(array: &dyn Array, data_type: DataType, op: F) -> Result> +fn time_variants(array: &dyn Array, data_type: DataType, op: F) -> Result> where O: NativeType, F: Fn(chrono::NaiveTime) -> O, From 33a2ce535e3dbc622270b5162f4d96a3e6ce4125 Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Fri, 24 Sep 2021 13:23:57 +0530 Subject: [PATCH 06/17] Reduce little bit of boilerplate --- src/compute/temporal.rs | 56 ++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 37 deletions(-) diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index 0b4e8709cb6..a3ea0ff7c0c 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -36,15 +36,12 @@ pub fn year(array: &dyn Array) -> Result> { date_variants(array, DataType::Int32, |x| x.year()) } DataType::Timestamp(time_unit, Some(timezone_str)) => { - let time_unit = *time_unit; - let timezone = parse_offset(timezone_str); - let array = array.as_any().downcast_ref().unwrap(); - if let Ok(timezone) = timezone { - Ok(extract_impl(array, time_unit, timezone, |x| x.year())) + if let Ok(timezone) = parse_offset(timezone_str) { + Ok(extract_impl(array, *time_unit, timezone, |x| x.year())) } else { - chrono_tz(array, time_unit, timezone_str, |x| x.year()) + chrono_tz(array, *time_unit, timezone_str, |x| x.year()) } } dt => Err(ArrowError::NotYetImplemented(format!( @@ -62,15 +59,12 @@ pub fn month(array: &dyn Array) -> Result> { date_variants(array, DataType::UInt32, |x| x.month()) } DataType::Timestamp(time_unit, Some(timezone_str)) => { - let time_unit = *time_unit; - let timezone = parse_offset(timezone_str); - let array = array.as_any().downcast_ref().unwrap(); - if let Ok(timezone) = timezone { - Ok(extract_impl(array, time_unit, timezone, |x| x.month())) + if let Ok(timezone) = parse_offset(timezone_str) { + Ok(extract_impl(array, *time_unit, timezone, |x| x.month())) } else { - chrono_tz(array, time_unit, timezone_str, |x| x.month()) + chrono_tz(array, *time_unit, timezone_str, |x| x.month()) } } dt => Err(ArrowError::NotYetImplemented(format!( @@ -88,15 +82,12 @@ pub fn day(array: &dyn Array) -> Result> { date_variants(array, DataType::UInt32, |x| x.day()) } DataType::Timestamp(time_unit, Some(timezone_str)) => { - let time_unit = *time_unit; - let timezone = parse_offset(timezone_str); - let array = array.as_any().downcast_ref().unwrap(); - if let Ok(timezone) = timezone { - Ok(extract_impl(array, time_unit, timezone, |x| x.day())) + if let Ok(timezone) = parse_offset(timezone_str) { + Ok(extract_impl(array, *time_unit, timezone, |x| x.day())) } else { - chrono_tz(array, time_unit, timezone_str, |x| x.day()) + chrono_tz(array, *time_unit, timezone_str, |x| x.day()) } } dt => Err(ArrowError::NotYetImplemented(format!( @@ -117,15 +108,12 @@ pub fn hour(array: &dyn Array) -> Result> { time_variants(array, DataType::UInt32, |x| x.hour()) } DataType::Timestamp(time_unit, Some(timezone_str)) => { - let time_unit = *time_unit; - let timezone = parse_offset(timezone_str); - let array = array.as_any().downcast_ref().unwrap(); - if let Ok(timezone) = timezone { - Ok(extract_impl(array, time_unit, timezone, |x| x.hour())) + if let Ok(timezone) = parse_offset(timezone_str) { + Ok(extract_impl(array, *time_unit, timezone, |x| x.hour())) } else { - chrono_tz(array, time_unit, timezone_str, |x| x.hour()) + chrono_tz(array, *time_unit, timezone_str, |x| x.hour()) } } dt => Err(ArrowError::NotYetImplemented(format!( @@ -146,15 +134,12 @@ pub fn minute(array: &dyn Array) -> Result> { time_variants(array, DataType::UInt32, |x| x.minute()) } DataType::Timestamp(time_unit, Some(timezone_str)) => { - let time_unit = *time_unit; - let timezone = parse_offset(timezone_str); - let array = array.as_any().downcast_ref().unwrap(); - if let Ok(timezone) = timezone { - Ok(extract_impl(array, time_unit, timezone, |x| x.minute())) + if let Ok(timezone) = parse_offset(timezone_str) { + Ok(extract_impl(array, *time_unit, timezone, |x| x.minute())) } else { - chrono_tz(array, time_unit, timezone_str, |x| x.minute()) + chrono_tz(array, *time_unit, timezone_str, |x| x.minute()) } } dt => Err(ArrowError::NotYetImplemented(format!( @@ -175,15 +160,12 @@ pub fn second(array: &dyn Array) -> Result> { time_variants(array, DataType::UInt32, |x| x.second()) } DataType::Timestamp(time_unit, Some(timezone_str)) => { - let time_unit = *time_unit; - let timezone = parse_offset(timezone_str); - let array = array.as_any().downcast_ref().unwrap(); - if let Ok(timezone) = timezone { - Ok(extract_impl(array, time_unit, timezone, |x| x.second())) + if let Ok(timezone) = parse_offset(timezone_str) { + Ok(extract_impl(array, *time_unit, timezone, |x| x.second())) } else { - chrono_tz(array, time_unit, timezone_str, |x| x.second()) + chrono_tz(array, *time_unit, timezone_str, |x| x.second()) } } dt => Err(ArrowError::NotYetImplemented(format!( @@ -297,7 +279,7 @@ fn chrono_tz( ) -> Result> where O: NativeType, - F: Fn(chrono::DateTime) -> O, + F: Fn(chrono::DateTime) -> O, { Err(ArrowError::InvalidArgumentError(format!( "timezone \"{}\" cannot be parsed (feature chrono-tz is not active)", From d6722299c6bf3dbd801cdeacc327403d6f7f42ae Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Fri, 24 Sep 2021 14:33:50 +0530 Subject: [PATCH 07/17] Use `macro_rules` to reduce boilerplate --- src/compute/temporal.rs | 168 +++++++++++++--------------------------- 1 file changed, 54 insertions(+), 114 deletions(-) diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index a3ea0ff7c0c..18d3c5ac6fd 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -28,151 +28,91 @@ use crate::types::NaturalDataType; use super::arity::unary; +macro_rules! date_like { + ($extract:ident, $array:ident, $data_type:path) => { + match $array.data_type() { + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { + date_variants($array, $data_type, |x| x.$extract()) + } + DataType::Timestamp(time_unit, Some(timezone_str)) => { + let array = $array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = parse_offset(timezone_str) { + Ok(extract_impl(array, *time_unit, timezone, |x| x.$extract())) + } else { + chrono_tz(array, *time_unit, timezone_str, |x| x.$extract()) + } + } + dt => Err(ArrowError::NotYetImplemented(format!( + "\"{}\" does not support type {:?}", + stringify!($extract), + dt + ))), + } + }; +} + /// Extracts the years of a temporal array as [`PrimitiveArray`]. /// Use [`can_year`] to check if this operation is supported for the target [`DataType`]. pub fn year(array: &dyn Array) -> Result> { - match array.data_type() { - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { - date_variants(array, DataType::Int32, |x| x.year()) - } - DataType::Timestamp(time_unit, Some(timezone_str)) => { - let array = array.as_any().downcast_ref().unwrap(); - - if let Ok(timezone) = parse_offset(timezone_str) { - Ok(extract_impl(array, *time_unit, timezone, |x| x.year())) - } else { - chrono_tz(array, *time_unit, timezone_str, |x| x.year()) - } - } - dt => Err(ArrowError::NotYetImplemented(format!( - "\"year\" does not support type {:?}", - dt - ))), - } + date_like!(year, array, DataType::Int32) } /// Extracts the months of a temporal array as [`PrimitiveArray`]. /// Use [`can_month`] to check if this operation is supported for the target [`DataType`]. pub fn month(array: &dyn Array) -> Result> { - match array.data_type() { - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { - date_variants(array, DataType::UInt32, |x| x.month()) - } - DataType::Timestamp(time_unit, Some(timezone_str)) => { - let array = array.as_any().downcast_ref().unwrap(); - - if let Ok(timezone) = parse_offset(timezone_str) { - Ok(extract_impl(array, *time_unit, timezone, |x| x.month())) - } else { - chrono_tz(array, *time_unit, timezone_str, |x| x.month()) - } - } - dt => Err(ArrowError::NotYetImplemented(format!( - "\"month\" does not support type {:?}", - dt - ))), - } + date_like!(month, array, DataType::UInt32) } /// Extracts the days of a temporal array as [`PrimitiveArray`]. /// Use [`can_day`] to check if this operation is supported for the target [`DataType`]. pub fn day(array: &dyn Array) -> Result> { - match array.data_type() { - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { - date_variants(array, DataType::UInt32, |x| x.day()) - } - DataType::Timestamp(time_unit, Some(timezone_str)) => { - let array = array.as_any().downcast_ref().unwrap(); + date_like!(day, array, DataType::UInt32) +} - if let Ok(timezone) = parse_offset(timezone_str) { - Ok(extract_impl(array, *time_unit, timezone, |x| x.day())) - } else { - chrono_tz(array, *time_unit, timezone_str, |x| x.day()) +macro_rules! time_like { + ($extract:ident, $array:ident, $data_type:path) => { + match $array.data_type() { + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { + date_variants($array, $data_type, |x| x.$extract()) + } + DataType::Time32(_) | DataType::Time64(_) => { + time_variants($array, DataType::UInt32, |x| x.$extract()) + } + DataType::Timestamp(time_unit, Some(timezone_str)) => { + let array = $array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = parse_offset(timezone_str) { + Ok(extract_impl(array, *time_unit, timezone, |x| x.$extract())) + } else { + chrono_tz(array, *time_unit, timezone_str, |x| x.$extract()) + } } + dt => Err(ArrowError::NotYetImplemented(format!( + "\"{}\" does not support type {:?}", + stringify!($extract), + dt + ))), } - dt => Err(ArrowError::NotYetImplemented(format!( - "\"day\" does not support type {:?}", - dt - ))), - } + }; } /// Extracts the hours of a temporal array as [`PrimitiveArray`]. /// Use [`can_hour`] to check if this operation is supported for the target [`DataType`]. pub fn hour(array: &dyn Array) -> Result> { - match array.data_type() { - DataType::Date32 | DataType::Date64 | &DataType::Timestamp(_, None) => { - date_variants(array, DataType::UInt32, |x| x.hour()) - } - DataType::Time32(_) | DataType::Time64(_) => { - time_variants(array, DataType::UInt32, |x| x.hour()) - } - DataType::Timestamp(time_unit, Some(timezone_str)) => { - let array = array.as_any().downcast_ref().unwrap(); - - if let Ok(timezone) = parse_offset(timezone_str) { - Ok(extract_impl(array, *time_unit, timezone, |x| x.hour())) - } else { - chrono_tz(array, *time_unit, timezone_str, |x| x.hour()) - } - } - dt => Err(ArrowError::NotYetImplemented(format!( - "\"hour\" does not support type {:?}", - dt - ))), - } + time_like!(hour, array, DataType::UInt32) } /// Extracts the minutes of a temporal array as [`PrimitiveArray`]. /// Use [`can_minute`] to check if this operation is supported for the target [`DataType`]. pub fn minute(array: &dyn Array) -> Result> { - match array.data_type() { - DataType::Date32 | DataType::Date64 | &DataType::Timestamp(_, None) => { - date_variants(array, DataType::UInt32, |x| x.minute()) - } - DataType::Time32(_) | DataType::Time64(_) => { - time_variants(array, DataType::UInt32, |x| x.minute()) - } - DataType::Timestamp(time_unit, Some(timezone_str)) => { - let array = array.as_any().downcast_ref().unwrap(); - - if let Ok(timezone) = parse_offset(timezone_str) { - Ok(extract_impl(array, *time_unit, timezone, |x| x.minute())) - } else { - chrono_tz(array, *time_unit, timezone_str, |x| x.minute()) - } - } - dt => Err(ArrowError::NotYetImplemented(format!( - "\"minute\" does not support type {:?}", - dt - ))), - } + time_like!(minute, array, DataType::UInt32) } /// Extracts the seconds of a temporal array as [`PrimitiveArray`]. /// Use [`can_second`] to check if this operation is supported for the target [`DataType`]. pub fn second(array: &dyn Array) -> Result> { - match array.data_type() { - DataType::Date32 | DataType::Date64 | &DataType::Timestamp(_, None) => { - date_variants(array, DataType::UInt32, |x| x.second()) - } - DataType::Time32(_) | DataType::Time64(_) => { - time_variants(array, DataType::UInt32, |x| x.second()) - } - DataType::Timestamp(time_unit, Some(timezone_str)) => { - let array = array.as_any().downcast_ref().unwrap(); - - if let Ok(timezone) = parse_offset(timezone_str) { - Ok(extract_impl(array, *time_unit, timezone, |x| x.second())) - } else { - chrono_tz(array, *time_unit, timezone_str, |x| x.second()) - } - } - dt => Err(ArrowError::NotYetImplemented(format!( - "\"second\" does not support type {:?}", - dt - ))), - } + time_like!(second, array, DataType::UInt32) } pub fn date_variants( From c0ca677d9a2f4bab8b6b979334bbea6be54439d2 Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Fri, 24 Sep 2021 23:36:52 +0530 Subject: [PATCH 08/17] Add `weekday`, `iso_week` and `nanosecond` extractors --- src/compute/temporal.rs | 53 ++++++++++++++++++++++++++++++++++++ tests/it/compute/temporal.rs | 24 ++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index 18d3c5ac6fd..38c802c7c23 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -28,6 +28,24 @@ use crate::types::NaturalDataType; use super::arity::unary; +trait U32Weekday: Datelike { + fn u32_weekday(&self) -> u32 { + self.weekday().number_from_monday() + } +} + +impl U32Weekday for chrono::NaiveDateTime {} +impl U32Weekday for chrono::DateTime {} + +trait U32IsoWeek: Datelike { + fn u32_iso_week(&self) -> u32 { + self.iso_week().week() + } +} + +impl U32IsoWeek for chrono::NaiveDateTime {} +impl U32IsoWeek for chrono::DateTime {} + macro_rules! date_like { ($extract:ident, $array:ident, $data_type:path) => { match $array.data_type() { @@ -70,6 +88,20 @@ pub fn day(array: &dyn Array) -> Result> { date_like!(day, array, DataType::UInt32) } +/// Extracts weekday of a temporal array as [`PrimitiveArray`]. +/// Monday is 1, Tuesday is 2, ... Sunday is 7 +/// Use [`can_weekday`] to check if this operation is supported for the target [`DataType`] +pub fn weekday(array: &dyn Array) -> Result> { + date_like!(u32_weekday, array, DataType::UInt32) +} + +/// Extracts ISO week of a temporal array as [`PrimitiveArray`] +/// Value ranges from 1 to 53. +/// Use [`can_iso_week`] to check if this operation is supported for the target [`DataType`] +pub fn iso_week(array: &dyn Array) -> Result> { + date_like!(u32_iso_week, array, DataType::UInt32) +} + macro_rules! time_like { ($extract:ident, $array:ident, $data_type:path) => { match $array.data_type() { @@ -115,6 +147,12 @@ pub fn second(array: &dyn Array) -> Result> { time_like!(second, array, DataType::UInt32) } +/// Extracts the nanoseconds of a temporal array as [`PrimitiveArray`]. +/// Use [`can_nanosecond`] to check if this operation is supported for the target [`DataType`]. +pub fn nanosecond(array: &dyn Array) -> Result> { + time_like!(nanosecond, array, DataType::UInt32) +} + pub fn date_variants( array: &dyn Array, data_type: DataType, @@ -325,6 +363,16 @@ pub fn can_day(data_type: &DataType) -> bool { can_date(data_type) } +/// Checks if an array of type `data_type` can perform weekday operation +pub fn can_weekday(data_type: &DataType) -> bool { + can_date(data_type) +} + +/// Checks if an array of type `data_type` can perform ISO week operation +pub fn can_iso_week(data_type: &DataType) -> bool { + can_date(data_type) +} + fn can_date(data_type: &DataType) -> bool { matches!( data_type, @@ -383,6 +431,11 @@ pub fn can_second(data_type: &DataType) -> bool { can_time(data_type) } +/// Checks if an array of type `datatype` can perform nanosecond operation +pub fn can_nanosecond(data_type: &DataType) -> bool { + can_time(data_type) +} + fn can_time(data_type: &DataType) -> bool { matches!( data_type, diff --git a/tests/it/compute/temporal.rs b/tests/it/compute/temporal.rs index 842fdfe6d46..b6ed7c229a5 100644 --- a/tests/it/compute/temporal.rs +++ b/tests/it/compute/temporal.rs @@ -70,6 +70,30 @@ fn naive_timestamp_micro_hour() { assert_eq!(result, expected); } +#[test] +fn date64_weekday() { + let array = Int64Array::from(&[Some(1514764800000), None, Some(86400000)]).to(DataType::Date64); + + let result = weekday(&array).unwrap(); + let expected = UInt32Array::from(&[Some(1), None, Some(5)]); + assert_eq!(result, expected); +} + +#[test] +fn date64_iso_week() { + let array = Int64Array::from(&[ + Some(1514764800000), + None, + Some(1515456000000), + Some(1514678400000), + ]) + .to(DataType::Date64); + + let result = iso_week(&array).unwrap(); + let expected = UInt32Array::from(&[Some(1), None, Some(2), Some(52)]); + assert_eq!(result, expected); +} + #[test] fn date64_year() { let array = Int64Array::from(&[Some(1514764800000), None]).to(DataType::Date64); From aeb806bdec26881f10611ea6856a78e122753fd9 Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Sat, 25 Sep 2021 11:24:36 +0530 Subject: [PATCH 09/17] Refactor and add more consistency checks --- tests/it/compute/temporal.rs | 93 +++++++++++++++++------------------- 1 file changed, 44 insertions(+), 49 deletions(-) diff --git a/tests/it/compute/temporal.rs b/tests/it/compute/temporal.rs index b6ed7c229a5..f0e3306c4af 100644 --- a/tests/it/compute/temporal.rs +++ b/tests/it/compute/temporal.rs @@ -180,59 +180,54 @@ fn timestamp_micro_hour_tz() { #[test] fn consistency_hour() { - use arrow2::array::new_null_array; - use arrow2::datatypes::DataType::*; - use arrow2::datatypes::TimeUnit; + consistency_check(can_hour, hour); +} - let datatypes = vec![ - Null, - Boolean, - UInt8, - UInt16, - UInt32, - UInt64, - Int8, - Int16, - Int32, - Int64, - Float32, - Float64, - Timestamp(TimeUnit::Second, None), - Timestamp(TimeUnit::Millisecond, None), - Timestamp(TimeUnit::Microsecond, None), - Timestamp(TimeUnit::Nanosecond, None), - Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_string())), - Time64(TimeUnit::Microsecond), - Time64(TimeUnit::Nanosecond), - Date32, - Time32(TimeUnit::Second), - Time32(TimeUnit::Millisecond), - Date64, - Utf8, - LargeUtf8, - Binary, - LargeBinary, - Duration(TimeUnit::Second), - Duration(TimeUnit::Millisecond), - Duration(TimeUnit::Microsecond), - Duration(TimeUnit::Nanosecond), - ]; +#[test] +fn consistency_minute() { + consistency_check(can_minute, minute); +} - datatypes.into_iter().for_each(|d1| { - let array = new_null_array(d1.clone(), 10); - if can_hour(&d1) { - assert!(hour(array.as_ref()).is_ok()); - } else { - assert!(hour(array.as_ref()).is_err()); - } - }); +#[test] +fn consistency_second() { + consistency_check(can_second, second); +} + +#[test] +fn consistency_nanosecond() { + consistency_check(can_nanosecond, nanosecond); } #[test] fn consistency_year() { - use arrow2::array::new_null_array; + consistency_check(can_year, year); +} + +#[test] +fn consistency_month() { + consistency_check(can_month, month); +} + +#[test] +fn consistency_day() { + consistency_check(can_day, day); +} + +#[test] +fn consistency_weekday() { + consistency_check(can_weekday, weekday); +} + +#[test] +fn consistency_iso_week() { + consistency_check(can_iso_week, iso_week); +} + +fn consistency_check( + can_extract: fn(&DataType) -> bool, + extract: fn(&dyn Array) -> arrow2::error::Result>, +) { use arrow2::datatypes::DataType::*; - use arrow2::datatypes::TimeUnit; let datatypes = vec![ Null, @@ -270,10 +265,10 @@ fn consistency_year() { datatypes.into_iter().for_each(|d1| { let array = new_null_array(d1.clone(), 10); - if can_year(&d1) { - assert!(year(array.as_ref()).is_ok()); + if can_extract(&d1) { + assert!(extract(array.as_ref()).is_ok()); } else { - assert!(year(array.as_ref()).is_err()); + assert!(extract(array.as_ref()).is_err()); } }); } From 26a0d777b63fca6a10152cedd4442a66c0634671 Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Sat, 25 Sep 2021 17:26:04 +0530 Subject: [PATCH 10/17] Add/Remove code comments --- src/compute/temporal.rs | 56 ++++++----------------------------------- 1 file changed, 8 insertions(+), 48 deletions(-) diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index 38c802c7c23..1fd96bcd40e 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -28,6 +28,8 @@ use crate::types::NaturalDataType; use super::arity::unary; +// Create and implement a trait that converts chrono's `Weekday` +// type into `u32` trait U32Weekday: Datelike { fn u32_weekday(&self) -> u32 { self.weekday().number_from_monday() @@ -37,6 +39,8 @@ trait U32Weekday: Datelike { impl U32Weekday for chrono::NaiveDateTime {} impl U32Weekday for chrono::DateTime {} +// Create and implement a trait that converts chrono's `IsoWeek` +// type into `u32` trait U32IsoWeek: Datelike { fn u32_iso_week(&self) -> u32 { self.iso_week().week() @@ -46,6 +50,8 @@ trait U32IsoWeek: Datelike { impl U32IsoWeek for chrono::NaiveDateTime {} impl U32IsoWeek for chrono::DateTime {} +// Macro to avoid repetition in functions, that apply +// `chrono::Datelike` methods on Arrays macro_rules! date_like { ($extract:ident, $array:ident, $data_type:path) => { match $array.data_type() { @@ -102,6 +108,8 @@ pub fn iso_week(array: &dyn Array) -> Result> { date_like!(u32_iso_week, array, DataType::UInt32) } +// Macro to avoid repetition in functions, that apply +// `chrono::Timelike` methods on Arrays macro_rules! time_like { ($extract:ident, $array:ident, $data_type:path) => { match $array.data_type() { @@ -330,35 +338,11 @@ pub fn can_year(data_type: &DataType) -> bool { } /// Checks if an array of type `datatype` can perform month operation -/// -/// # Examples -/// ``` -/// use arrow2::compute::temporal::can_month; -/// use arrow2::datatypes::{DataType}; -/// -/// let data_type = DataType::Date32; -/// assert_eq!(can_month(&data_type), true); - -/// let data_type = DataType::Int8; -/// assert_eq!(can_month(&data_type), false); -/// ``` pub fn can_month(data_type: &DataType) -> bool { can_date(data_type) } /// Checks if an array of type `datatype` can perform day operation -/// -/// # Examples -/// ``` -/// use arrow2::compute::temporal::can_day; -/// use arrow2::datatypes::{DataType}; -/// -/// let data_type = DataType::Date32; -/// assert_eq!(can_day(&data_type), true); - -/// let data_type = DataType::Int8; -/// assert_eq!(can_day(&data_type), false); -/// ``` pub fn can_day(data_type: &DataType) -> bool { can_date(data_type) } @@ -398,35 +382,11 @@ pub fn can_hour(data_type: &DataType) -> bool { } /// Checks if an array of type `datatype` can perform minute operation -/// -/// # Examples -/// ``` -/// use arrow2::compute::temporal::can_minute; -/// use arrow2::datatypes::{DataType, TimeUnit}; -/// -/// let data_type = DataType::Time32(TimeUnit::Second); -/// assert_eq!(can_minute(&data_type), true); - -/// let data_type = DataType::Int8; -/// assert_eq!(can_minute(&data_type), false); -/// ``` pub fn can_minute(data_type: &DataType) -> bool { can_time(data_type) } /// Checks if an array of type `datatype` can perform second operation -/// -/// # Examples -/// ``` -/// use arrow2::compute::temporal::can_second; -/// use arrow2::datatypes::{DataType, TimeUnit}; -/// -/// let data_type = DataType::Time32(TimeUnit::Second); -/// assert_eq!(can_second(&data_type), true); - -/// let data_type = DataType::Int8; -/// assert_eq!(can_second(&data_type), false); -/// ``` pub fn can_second(data_type: &DataType) -> bool { can_time(data_type) } From c49451ab55ffe71a95a7d0439363efed9541ec72 Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Sat, 25 Sep 2021 17:26:25 +0530 Subject: [PATCH 11/17] Add and refactor tests --- tests/it/compute/temporal.rs | 349 +++++++++++++++++++++++------------ 1 file changed, 228 insertions(+), 121 deletions(-) diff --git a/tests/it/compute/temporal.rs b/tests/it/compute/temporal.rs index f0e3306c4af..7619e280983 100644 --- a/tests/it/compute/temporal.rs +++ b/tests/it/compute/temporal.rs @@ -3,155 +3,262 @@ use arrow2::compute::temporal::*; use arrow2::datatypes::*; #[test] -fn date64_second() { - let array = - Int64Array::from(&[Some(1514764800000), None, Some(1550636625000)]).to(DataType::Date64); - - let result = second(&array).unwrap(); - let expected = UInt32Array::from(&[Some(0), None, Some(45)]); - assert_eq!(result, expected); +fn temporal_hour() { + for data_type in TestData::available_time_like_types() { + let data = TestData::data(&data_type); + let result = hour(&*data.input).unwrap(); + + assert_eq!( + result, + data.hour.unwrap(), + "\"hour\" failed on type: {:?}", + data_type + ); + } } #[test] -fn date64_minute() { - let array = - Int64Array::from(&[Some(1514764800000), None, Some(1550636625000)]).to(DataType::Date64); - - let result = minute(&array).unwrap(); - let expected = UInt32Array::from(&[Some(0), None, Some(23)]); - assert_eq!(result, expected); +fn temporal_minute() { + for data_type in TestData::available_time_like_types() { + let data = TestData::data(&data_type); + let result = minute(&*data.input).unwrap(); + + assert_eq!( + result, + data.minute.unwrap(), + "\"hour\" failed on type: {:?}", + data_type + ); + } } #[test] -fn date64_hour() { - let array = - Int64Array::from(&[Some(1514764800000), None, Some(1550636625000)]).to(DataType::Date64); - - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(0), None, Some(4)]); - assert_eq!(result, expected); +fn temporal_second() { + for data_type in TestData::available_time_like_types() { + let data = TestData::data(&data_type); + let result = second(&*data.input).unwrap(); + + assert_eq!( + result, + data.second.unwrap(), + "\"second\" failed on type: {:?}", + data_type + ); + } } #[test] -fn date32_hour() { - let array = Int32Array::from(&[Some(15147), None, Some(15148)]).to(DataType::Date32); - - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(0), None, Some(0)]); - assert_eq!(result, expected); +fn temporal_nanosecond() { + for data_type in TestData::available_time_like_types() { + let data = TestData::data(&data_type); + let result = nanosecond(&*data.input).unwrap(); + + assert_eq!( + result, + data.nanosecond.unwrap(), + "\"nanosecond\" failed on type: {:?}", + data_type + ); + } } #[test] -fn time32_second_hour() { - let array = Int32Array::from(&[Some(37800), None]).to(DataType::Time32(TimeUnit::Second)); - - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(10), None]); - assert_eq!(result, expected); +fn temporal_year() { + for data_type in TestData::available_date_like_types() { + let data = TestData::data(&data_type); + let result = year(&*data.input).unwrap(); + + assert_eq!( + result, + data.year.unwrap(), + "\"year\" failed on type: {:?}", + data_type + ); + } } #[test] -fn time64_micro_hour() { - let array = - Int64Array::from(&[Some(37800000000), None]).to(DataType::Time64(TimeUnit::Microsecond)); - - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(10), None]); - assert_eq!(result, expected); +fn temporal_month() { + for data_type in TestData::available_date_like_types() { + let data = TestData::data(&data_type); + let result = month(&*data.input).unwrap(); + + assert_eq!( + result, + data.month.unwrap(), + "\"month\" failed on type: {:?}", + data_type + ); + } } #[test] -fn naive_timestamp_micro_hour() { - let array = Int64Array::from(&[Some(37800000000), None]) - .to(DataType::Timestamp(TimeUnit::Microsecond, None)); - - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(10), None]); - assert_eq!(result, expected); +fn temporal_day() { + for data_type in TestData::available_date_like_types() { + let data = TestData::data(&data_type); + let result = day(&*data.input).unwrap(); + + assert_eq!( + result, + data.day.unwrap(), + "\"day\" failed on type: {:?}", + data_type + ); + } } #[test] -fn date64_weekday() { - let array = Int64Array::from(&[Some(1514764800000), None, Some(86400000)]).to(DataType::Date64); - - let result = weekday(&array).unwrap(); - let expected = UInt32Array::from(&[Some(1), None, Some(5)]); - assert_eq!(result, expected); +fn temporal_weekday() { + for data_type in TestData::available_date_like_types() { + let data = TestData::data(&data_type); + let result = weekday(&*data.input).unwrap(); + + assert_eq!( + result, + data.weekday.unwrap(), + "\"weekday\" failed on type: {:?}", + data_type + ); + } } #[test] -fn date64_iso_week() { - let array = Int64Array::from(&[ - Some(1514764800000), - None, - Some(1515456000000), - Some(1514678400000), - ]) - .to(DataType::Date64); - - let result = iso_week(&array).unwrap(); - let expected = UInt32Array::from(&[Some(1), None, Some(2), Some(52)]); - assert_eq!(result, expected); +fn temporal_iso_week() { + for data_type in TestData::available_date_like_types() { + let data = TestData::data(&data_type); + let result = iso_week(&*data.input).unwrap(); + + assert_eq!( + result, + data.iso_week.unwrap(), + "\"iso_week\" failed on type: {:?}", + data_type + ); + } } -#[test] -fn date64_year() { - let array = Int64Array::from(&[Some(1514764800000), None]).to(DataType::Date64); - - let result = year(&array).unwrap(); - let expected = Int32Array::from(&[Some(2018), None]); - assert_eq!(result, expected); +struct TestData { + input: Box, + year: Option, + month: Option, + day: Option, + weekday: Option, + iso_week: Option, + hour: Option, + minute: Option, + second: Option, + nanosecond: Option, } -#[test] -fn naive_timestamp_date32_year() { - let array = Int32Array::from(&[Some(15147), None]).to(DataType::Date32); - - let result = year(&array).unwrap(); - let expected = Int32Array::from(&[Some(2011), None]); - assert_eq!(result, expected); -} - -#[test] -fn naive_timestamp_micro_year() { - let array = Int64Array::from(&[Some(1612025847000000), None]) - .to(DataType::Timestamp(TimeUnit::Microsecond, None)); - - let result = year(&array).unwrap(); - let expected = Int32Array::from(&[Some(2021), None]); - assert_eq!(result, expected); -} - -#[test] -fn date64_month() { - let array = Int64Array::from(&[Some(1514764800000), None]).to(DataType::Date64); - let result = month(&array).unwrap(); - - let expected = UInt32Array::from(&[Some(1), None]); - - assert_eq!(result, expected); -} - -#[test] -fn date64_day() { - let array = Int64Array::from(&[Some(1614764800000), None]).to(DataType::Date64); - let result = day(&array).unwrap(); - - let expected = UInt32Array::from(&[Some(3), None]); - - assert_eq!(result, expected); -} - -#[test] -fn timestamp_micro_hour() { - let array = Int64Array::from(&[Some(1621877130000000), None]).to(DataType::Timestamp( - TimeUnit::Microsecond, - Some("+01:00".to_string()), - )); - - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(18), None]); - assert_eq!(result, expected); +impl TestData { + fn data(data_type: &DataType) -> TestData { + match data_type { + DataType::Date64 => TestData { + input: Box::new( + Int64Array::from(&[Some(1514764800000), None, Some(1550636625000)]) + .to(data_type.clone()), + ), + year: Some(Int32Array::from(&[Some(2018), None, Some(2019)])), + month: Some(UInt32Array::from(&[Some(1), None, Some(2)])), + day: Some(UInt32Array::from(&[Some(1), None, Some(20)])), + weekday: Some(UInt32Array::from(&[Some(1), None, Some(3)])), + iso_week: Some(UInt32Array::from(&[Some(1), None, Some(8)])), + hour: Some(UInt32Array::from(&[Some(0), None, Some(4)])), + minute: Some(UInt32Array::from(&[Some(0), None, Some(23)])), + second: Some(UInt32Array::from(&[Some(0), None, Some(45)])), + nanosecond: Some(UInt32Array::from(&[Some(0), None, Some(0)])), + }, + DataType::Date32 => TestData { + input: Box::new(Int32Array::from(&[Some(15147), None]).to(data_type.clone())), + year: Some(Int32Array::from(&[Some(2011), None])), + month: Some(UInt32Array::from(&[Some(6), None])), + day: Some(UInt32Array::from(&[Some(22), None])), + weekday: Some(UInt32Array::from(&[Some(3), None])), + iso_week: Some(UInt32Array::from(&[Some(25), None])), + hour: Some(UInt32Array::from(&[Some(0), None])), + minute: Some(UInt32Array::from(&[Some(0), None])), + second: Some(UInt32Array::from(&[Some(0), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + DataType::Time32(TimeUnit::Second) => TestData { + input: Box::new(Int32Array::from(&[Some(37800), None]).to(data_type.clone())), + year: None, + month: None, + day: None, + weekday: None, + iso_week: None, + hour: Some(UInt32Array::from(&[Some(10), None])), + minute: Some(UInt32Array::from(&[Some(30), None])), + second: Some(UInt32Array::from(&[Some(0), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + DataType::Time64(TimeUnit::Microsecond) => TestData { + input: Box::new(Int64Array::from(&[Some(378000000), None]).to(data_type.clone())), + year: None, + month: None, + day: None, + weekday: None, + iso_week: None, + hour: Some(UInt32Array::from(&[Some(0), None])), + minute: Some(UInt32Array::from(&[Some(6), None])), + second: Some(UInt32Array::from(&[Some(18), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + DataType::Timestamp(TimeUnit::Microsecond, None) => TestData { + input: Box::new( + Int64Array::from(&[Some(1612025847000000), None]).to(data_type.clone()), + ), + year: Some(Int32Array::from(&[Some(2021), None])), + month: Some(UInt32Array::from(&[Some(1), None])), + day: Some(UInt32Array::from(&[Some(30), None])), + weekday: Some(UInt32Array::from(&[Some(6), None])), + iso_week: Some(UInt32Array::from(&[Some(4), None])), + hour: Some(UInt32Array::from(&[Some(16), None])), + minute: Some(UInt32Array::from(&[Some(57), None])), + second: Some(UInt32Array::from(&[Some(27), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + DataType::Timestamp(TimeUnit::Microsecond, Some(_)) => TestData { + // NOTE We hardcode the timezone as an offset, we ignore + // the zone sent through `data_type` + input: Box::new(Int64Array::from(&[Some(1621877130000000), None]).to( + DataType::Timestamp(TimeUnit::Microsecond, Some("+01:00".to_string())), + )), + year: Some(Int32Array::from(&[Some(2021), None])), + month: Some(UInt32Array::from(&[Some(5), None])), + day: Some(UInt32Array::from(&[Some(24), None])), + weekday: Some(UInt32Array::from(&[Some(1), None])), + iso_week: Some(UInt32Array::from(&[Some(21), None])), + hour: Some(UInt32Array::from(&[Some(18), None])), + minute: Some(UInt32Array::from(&[Some(25), None])), + second: Some(UInt32Array::from(&[Some(30), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + _ => todo!(), + } + } + + fn available_time_like_types() -> Vec { + vec![ + DataType::Date32, + DataType::Date64, + DataType::Time32(TimeUnit::Second), + DataType::Time64(TimeUnit::Microsecond), + DataType::Timestamp(TimeUnit::Microsecond, None), + // NOTE The timezone value will be ignored + DataType::Timestamp(TimeUnit::Microsecond, Some("+01:00".to_string())), + ] + } + + fn available_date_like_types() -> Vec { + vec![ + DataType::Date32, + DataType::Date64, + DataType::Timestamp(TimeUnit::Microsecond, None), + // NOTE The timezone value will be ignored + DataType::Timestamp(TimeUnit::Microsecond, Some("+01:00".to_string())), + ] + } } #[cfg(feature = "chrono-tz")] From e7ae7accd3bcbdd8f6e139189f49f569dfc76854 Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Sat, 25 Sep 2021 17:55:44 +0530 Subject: [PATCH 12/17] Add code review changes --- src/compute/temporal.rs | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index 1fd96bcd40e..7f7e3deb3e3 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -54,7 +54,7 @@ impl U32IsoWeek for chrono::DateTime {} // `chrono::Datelike` methods on Arrays macro_rules! date_like { ($extract:ident, $array:ident, $data_type:path) => { - match $array.data_type() { + match $array.data_type().to_logical_type() { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { date_variants($array, $data_type, |x| x.$extract()) } @@ -112,7 +112,7 @@ pub fn iso_week(array: &dyn Array) -> Result> { // `chrono::Timelike` methods on Arrays macro_rules! time_like { ($extract:ident, $array:ident, $data_type:path) => { - match $array.data_type() { + match $array.data_type().to_logical_type() { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { date_variants($array, $data_type, |x| x.$extract()) } @@ -161,16 +161,12 @@ pub fn nanosecond(array: &dyn Array) -> Result> { time_like!(nanosecond, array, DataType::UInt32) } -pub fn date_variants( - array: &dyn Array, - data_type: DataType, - op: F, -) -> Result> +fn date_variants(array: &dyn Array, data_type: DataType, op: F) -> Result> where O: NativeType, F: Fn(chrono::NaiveDateTime) -> O, { - match array.data_type() { + match array.data_type().to_logical_type() { DataType::Date32 => { let array = array .as_any() @@ -207,7 +203,7 @@ where O: NativeType, F: Fn(chrono::NaiveTime) -> O, { - match array.data_type() { + match array.data_type().to_logical_type() { DataType::Time32(TimeUnit::Second) => { let array = array .as_any() @@ -241,7 +237,6 @@ where } #[cfg(feature = "chrono-tz")] -#[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))] fn chrono_tz( array: &PrimitiveArray, time_unit: TimeUnit, @@ -327,11 +322,8 @@ where /// use arrow2::compute::temporal::can_year; /// use arrow2::datatypes::{DataType}; /// -/// let data_type = DataType::Date32; -/// assert_eq!(can_year(&data_type), true); - -/// let data_type = DataType::Int8; -/// assert_eq!(can_year(&data_type), false); +/// assert_eq!(can_year(&DataType::Date32), true); +/// assert_eq!(can_year(&DataType::Int8), false); /// ``` pub fn can_year(data_type: &DataType) -> bool { can_date(data_type) @@ -371,11 +363,8 @@ fn can_date(data_type: &DataType) -> bool { /// use arrow2::compute::temporal::can_hour; /// use arrow2::datatypes::{DataType, TimeUnit}; /// -/// let data_type = DataType::Time32(TimeUnit::Second); -/// assert_eq!(can_hour(&data_type), true); - -/// let data_type = DataType::Int8; -/// assert_eq!(can_hour(&data_type), false); +/// assert_eq!(can_hour(&DataType::Time32(TimeUnit::Second)), true); +/// assert_eq!(can_hour(&DataType::Int8), false); /// ``` pub fn can_hour(data_type: &DataType) -> bool { can_time(data_type) From 898003db857ab75ca32d5609a31a9a90105453df Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Sat, 25 Sep 2021 19:15:15 +0530 Subject: [PATCH 13/17] Fix bug when extracting year from `TimeStamp` with Second as `TimeUnit` --- src/compute/temporal.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index 7f7e3deb3e3..c0d749a3401 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -286,7 +286,7 @@ where let offset = timezone.offset_from_utc_datetime(&datetime); extract(chrono::DateTime::::from_utc(datetime, offset)) }; - unary(array, op, DataType::UInt32) + unary(array, op, A::DATA_TYPE) } TimeUnit::Millisecond => { let op = |x| { From 1a86a0efbc069240fa2bd7aee8fca40df198ffec Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Sat, 25 Sep 2021 19:21:40 +0530 Subject: [PATCH 14/17] Add timezone tests, covering special cases --- tests/it/compute/temporal.rs | 255 ++++++++++++++++++++++++++++------- 1 file changed, 204 insertions(+), 51 deletions(-) diff --git a/tests/it/compute/temporal.rs b/tests/it/compute/temporal.rs index 7619e280983..3d9a50f696b 100644 --- a/tests/it/compute/temporal.rs +++ b/tests/it/compute/temporal.rs @@ -205,36 +205,22 @@ impl TestData { nanosecond: Some(UInt32Array::from(&[Some(0), None])), }, DataType::Timestamp(TimeUnit::Microsecond, None) => TestData { + // 68216970000000 (Epoch Microsecond) has 29th Feb (leap year) input: Box::new( - Int64Array::from(&[Some(1612025847000000), None]).to(data_type.clone()), + Int64Array::from(&[Some(1612025847000000), None, Some(68216970000000)]) + .to(data_type.clone()), ), - year: Some(Int32Array::from(&[Some(2021), None])), - month: Some(UInt32Array::from(&[Some(1), None])), - day: Some(UInt32Array::from(&[Some(30), None])), - weekday: Some(UInt32Array::from(&[Some(6), None])), - iso_week: Some(UInt32Array::from(&[Some(4), None])), - hour: Some(UInt32Array::from(&[Some(16), None])), - minute: Some(UInt32Array::from(&[Some(57), None])), - second: Some(UInt32Array::from(&[Some(27), None])), - nanosecond: Some(UInt32Array::from(&[Some(0), None])), - }, - DataType::Timestamp(TimeUnit::Microsecond, Some(_)) => TestData { - // NOTE We hardcode the timezone as an offset, we ignore - // the zone sent through `data_type` - input: Box::new(Int64Array::from(&[Some(1621877130000000), None]).to( - DataType::Timestamp(TimeUnit::Microsecond, Some("+01:00".to_string())), - )), - year: Some(Int32Array::from(&[Some(2021), None])), - month: Some(UInt32Array::from(&[Some(5), None])), - day: Some(UInt32Array::from(&[Some(24), None])), - weekday: Some(UInt32Array::from(&[Some(1), None])), - iso_week: Some(UInt32Array::from(&[Some(21), None])), - hour: Some(UInt32Array::from(&[Some(18), None])), - minute: Some(UInt32Array::from(&[Some(25), None])), - second: Some(UInt32Array::from(&[Some(30), None])), - nanosecond: Some(UInt32Array::from(&[Some(0), None])), + year: Some(Int32Array::from(&[Some(2021), None, Some(1972)])), + month: Some(UInt32Array::from(&[Some(1), None, Some(2)])), + day: Some(UInt32Array::from(&[Some(30), None, Some(29)])), + weekday: Some(UInt32Array::from(&[Some(6), None, Some(2)])), + iso_week: Some(UInt32Array::from(&[Some(4), None, Some(9)])), + hour: Some(UInt32Array::from(&[Some(16), None, Some(13)])), + minute: Some(UInt32Array::from(&[Some(57), None, Some(9)])), + second: Some(UInt32Array::from(&[Some(27), None, Some(30)])), + nanosecond: Some(UInt32Array::from(&[Some(0), None, Some(0)])), }, - _ => todo!(), + _ => unreachable!(), } } @@ -245,8 +231,6 @@ impl TestData { DataType::Time32(TimeUnit::Second), DataType::Time64(TimeUnit::Microsecond), DataType::Timestamp(TimeUnit::Microsecond, None), - // NOTE The timezone value will be ignored - DataType::Timestamp(TimeUnit::Microsecond, Some("+01:00".to_string())), ] } @@ -255,34 +239,203 @@ impl TestData { DataType::Date32, DataType::Date64, DataType::Timestamp(TimeUnit::Microsecond, None), - // NOTE The timezone value will be ignored - DataType::Timestamp(TimeUnit::Microsecond, Some("+01:00".to_string())), ] } } #[cfg(feature = "chrono-tz")] #[test] -fn timestamp_micro_hour_tz() { - let timestamp = 1621877130000000; // Mon May 24 2021 17:25:30 GMT+0000 - let array = Int64Array::from(&[Some(timestamp), None]).to(DataType::Timestamp( - TimeUnit::Microsecond, - Some("GMT".to_string()), - )); - - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(17), None]); - assert_eq!(result, expected); - - // (Western European Summer Time in Lisbon) => +1 hour - let array = Int64Array::from(&[Some(timestamp), None]).to(DataType::Timestamp( - TimeUnit::Microsecond, - Some("Europe/Lisbon".to_string()), - )); - - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(18), None]); - assert_eq!(result, expected); +fn temporal_tz_hour() { + let test_data = test_data_tz(); + + for data in test_data { + let result = hour(&*data.input).unwrap(); + + assert_eq!(result, data.hour.unwrap()); + } +} + +#[cfg(feature = "chrono-tz")] +#[test] +fn temporal_tz_minute() { + let test_data = test_data_tz(); + + for data in test_data { + let result = minute(&*data.input).unwrap(); + + assert_eq!(result, data.minute.unwrap()); + } +} + +#[cfg(feature = "chrono-tz")] +#[test] +fn temporal_tz_second() { + let test_data = test_data_tz(); + + for data in test_data { + let result = second(&*data.input).unwrap(); + + assert_eq!(result, data.second.unwrap()); + } +} + +#[cfg(feature = "chrono-tz")] +#[test] +fn temporal_tz_nanosecond() { + let test_data = test_data_tz(); + + for data in test_data { + let result = nanosecond(&*data.input).unwrap(); + + assert_eq!(result, data.nanosecond.unwrap()); + } +} + +#[cfg(feature = "chrono-tz")] +#[test] +fn temporal_tz_year() { + let test_data = test_data_tz(); + + for data in test_data { + let result = year(&*data.input).unwrap(); + + assert_eq!(result, data.year.unwrap()); + } +} + +#[cfg(feature = "chrono-tz")] +#[test] +fn temporal_tz_month() { + let test_data = test_data_tz(); + + for data in test_data { + let result = month(&*data.input).unwrap(); + + assert_eq!(result, data.month.unwrap()); + } +} + +#[cfg(feature = "chrono-tz")] +#[test] +fn temporal_tz_day() { + let test_data = test_data_tz(); + + for data in test_data { + let result = day(&*data.input).unwrap(); + + assert_eq!(result, data.day.unwrap()); + } +} + +#[cfg(feature = "chrono-tz")] +#[test] +fn temporal_tz_weekday() { + let test_data = test_data_tz(); + + for data in test_data { + let result = weekday(&*data.input).unwrap(); + + assert_eq!(result, data.weekday.unwrap()); + } +} + +#[cfg(feature = "chrono-tz")] +#[test] +fn temporal_tz_iso_week() { + let test_data = test_data_tz(); + + for data in test_data { + let result = iso_week(&*data.input).unwrap(); + + assert_eq!(result, data.iso_week.unwrap()); + } +} + +fn test_data_tz() -> Vec { + vec![ + TestData { + input: Box::new( + // Mon May 24 2021 17:25:30 GMT+0000 + Int64Array::from(&[Some(1621877130000000), None]).to(DataType::Timestamp( + TimeUnit::Microsecond, + Some("GMT".to_string()), + )), + ), + year: Some(Int32Array::from(&[Some(2021), None])), + month: Some(UInt32Array::from(&[Some(5), None])), + day: Some(UInt32Array::from(&[Some(24), None])), + weekday: Some(UInt32Array::from(&[Some(1), None])), + iso_week: Some(UInt32Array::from(&[Some(21), None])), + hour: Some(UInt32Array::from(&[Some(17), None])), + minute: Some(UInt32Array::from(&[Some(25), None])), + second: Some(UInt32Array::from(&[Some(30), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + TestData { + input: Box::new(Int64Array::from(&[Some(1621877130000000), None]).to( + DataType::Timestamp(TimeUnit::Microsecond, Some("+01:00".to_string())), + )), + year: Some(Int32Array::from(&[Some(2021), None])), + month: Some(UInt32Array::from(&[Some(5), None])), + day: Some(UInt32Array::from(&[Some(24), None])), + weekday: Some(UInt32Array::from(&[Some(1), None])), + iso_week: Some(UInt32Array::from(&[Some(21), None])), + hour: Some(UInt32Array::from(&[Some(18), None])), + minute: Some(UInt32Array::from(&[Some(25), None])), + second: Some(UInt32Array::from(&[Some(30), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + TestData { + input: Box::new(Int64Array::from(&[Some(1621877130000000), None]).to( + DataType::Timestamp(TimeUnit::Microsecond, Some("Europe/Lisbon".to_string())), + )), + year: Some(Int32Array::from(&[Some(2021), None])), + month: Some(UInt32Array::from(&[Some(5), None])), + day: Some(UInt32Array::from(&[Some(24), None])), + weekday: Some(UInt32Array::from(&[Some(1), None])), + iso_week: Some(UInt32Array::from(&[Some(21), None])), + hour: Some(UInt32Array::from(&[Some(18), None])), + minute: Some(UInt32Array::from(&[Some(25), None])), + second: Some(UInt32Array::from(&[Some(30), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + TestData { + input: Box::new( + // Sun Mar 29 2020 00:00:00 GMT+0000 (Western European Standard Time) + Int64Array::from(&[Some(1585440000), None]).to(DataType::Timestamp( + TimeUnit::Second, + Some("Europe/Lisbon".to_string()), + )), + ), + year: Some(Int32Array::from(&[Some(2020), None])), + month: Some(UInt32Array::from(&[Some(3), None])), + day: Some(UInt32Array::from(&[Some(29), None])), + weekday: Some(UInt32Array::from(&[Some(7), None])), + iso_week: Some(UInt32Array::from(&[Some(13), None])), + hour: Some(UInt32Array::from(&[Some(0), None])), + minute: Some(UInt32Array::from(&[Some(0), None])), + second: Some(UInt32Array::from(&[Some(0), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + TestData { + input: Box::new( + // Sun Mar 29 2020 02:00:00 GMT+0100 (Western European Summer Time) + Int64Array::from(&[Some(1585443600), None]).to(DataType::Timestamp( + TimeUnit::Second, + Some("Europe/Lisbon".to_string()), + )), + ), + year: Some(Int32Array::from(&[Some(2020), None])), + month: Some(UInt32Array::from(&[Some(3), None])), + day: Some(UInt32Array::from(&[Some(29), None])), + weekday: Some(UInt32Array::from(&[Some(7), None])), + iso_week: Some(UInt32Array::from(&[Some(13), None])), + hour: Some(UInt32Array::from(&[Some(2), None])), + minute: Some(UInt32Array::from(&[Some(0), None])), + second: Some(UInt32Array::from(&[Some(0), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + ] } #[test] From 8576ec13a0128cf2b585e14a2d9dfe41a28e39a3 Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Sat, 25 Sep 2021 19:42:17 +0530 Subject: [PATCH 15/17] Use macro in test to avoid repetition --- tests/it/compute/temporal.rs | 295 +++++++---------------------------- 1 file changed, 60 insertions(+), 235 deletions(-) diff --git a/tests/it/compute/temporal.rs b/tests/it/compute/temporal.rs index 3d9a50f696b..9e2b9b380c6 100644 --- a/tests/it/compute/temporal.rs +++ b/tests/it/compute/temporal.rs @@ -2,140 +2,47 @@ use arrow2::array::*; use arrow2::compute::temporal::*; use arrow2::datatypes::*; -#[test] -fn temporal_hour() { - for data_type in TestData::available_time_like_types() { - let data = TestData::data(&data_type); - let result = hour(&*data.input).unwrap(); - - assert_eq!( - result, - data.hour.unwrap(), - "\"hour\" failed on type: {:?}", - data_type - ); - } -} - -#[test] -fn temporal_minute() { - for data_type in TestData::available_time_like_types() { - let data = TestData::data(&data_type); - let result = minute(&*data.input).unwrap(); - - assert_eq!( - result, - data.minute.unwrap(), - "\"hour\" failed on type: {:?}", - data_type - ); - } -} - -#[test] -fn temporal_second() { - for data_type in TestData::available_time_like_types() { - let data = TestData::data(&data_type); - let result = second(&*data.input).unwrap(); - - assert_eq!( - result, - data.second.unwrap(), - "\"second\" failed on type: {:?}", - data_type - ); - } -} - -#[test] -fn temporal_nanosecond() { - for data_type in TestData::available_time_like_types() { - let data = TestData::data(&data_type); - let result = nanosecond(&*data.input).unwrap(); - - assert_eq!( - result, - data.nanosecond.unwrap(), - "\"nanosecond\" failed on type: {:?}", - data_type - ); - } -} - -#[test] -fn temporal_year() { - for data_type in TestData::available_date_like_types() { - let data = TestData::data(&data_type); - let result = year(&*data.input).unwrap(); - - assert_eq!( - result, - data.year.unwrap(), - "\"year\" failed on type: {:?}", - data_type - ); - } -} - -#[test] -fn temporal_month() { - for data_type in TestData::available_date_like_types() { - let data = TestData::data(&data_type); - let result = month(&*data.input).unwrap(); - - assert_eq!( - result, - data.month.unwrap(), - "\"month\" failed on type: {:?}", - data_type - ); - } -} - -#[test] -fn temporal_day() { - for data_type in TestData::available_date_like_types() { - let data = TestData::data(&data_type); - let result = day(&*data.input).unwrap(); - - assert_eq!( - result, - data.day.unwrap(), - "\"day\" failed on type: {:?}", - data_type - ); - } -} - -#[test] -fn temporal_weekday() { - for data_type in TestData::available_date_like_types() { - let data = TestData::data(&data_type); - let result = weekday(&*data.input).unwrap(); - - assert_eq!( - result, - data.weekday.unwrap(), - "\"weekday\" failed on type: {:?}", - data_type - ); - } +macro_rules! temporal_test { + ($func:ident, $extract:ident, $data_types:path) => { + #[test] + fn $func() { + for data_type in $data_types() { + let data = TestData::data(&data_type); + let result = $extract(&*data.input).unwrap(); + + assert_eq!( + result, + data.$extract.unwrap(), + "\"{}\" failed on type: {:?}", + stringify!($extract), + data_type + ); + } + } + }; } -#[test] -fn temporal_iso_week() { - for data_type in TestData::available_date_like_types() { - let data = TestData::data(&data_type); - let result = iso_week(&*data.input).unwrap(); - - assert_eq!( - result, - data.iso_week.unwrap(), - "\"iso_week\" failed on type: {:?}", - data_type - ); - } -} +temporal_test!(temporal_hour, hour, TestData::available_time_like_types); +temporal_test!(temporal_minute, minute, TestData::available_time_like_types); +temporal_test!(temporal_second, second, TestData::available_time_like_types); +temporal_test!( + temporal_nanosecond, + nanosecond, + TestData::available_time_like_types +); +temporal_test!(temporal_year, year, TestData::available_date_like_types); +temporal_test!(temporal_month, month, TestData::available_date_like_types); +temporal_test!(temporal_day, day, TestData::available_date_like_types); +temporal_test!( + temporal_weekday, + weekday, + TestData::available_date_like_types +); +temporal_test!( + temporal_iso_week, + iso_week, + TestData::available_date_like_types +); struct TestData { input: Box, @@ -243,113 +150,31 @@ impl TestData { } } -#[cfg(feature = "chrono-tz")] -#[test] -fn temporal_tz_hour() { - let test_data = test_data_tz(); - - for data in test_data { - let result = hour(&*data.input).unwrap(); +macro_rules! temporal_tz_test { + ($func:ident, $extract:ident) => { + #[cfg(feature = "chrono-tz")] + #[test] + fn $func() { + let test_data = test_data_tz(); - assert_eq!(result, data.hour.unwrap()); - } -} + for data in test_data { + let result = $extract(&*data.input).unwrap(); -#[cfg(feature = "chrono-tz")] -#[test] -fn temporal_tz_minute() { - let test_data = test_data_tz(); - - for data in test_data { - let result = minute(&*data.input).unwrap(); - - assert_eq!(result, data.minute.unwrap()); - } -} - -#[cfg(feature = "chrono-tz")] -#[test] -fn temporal_tz_second() { - let test_data = test_data_tz(); - - for data in test_data { - let result = second(&*data.input).unwrap(); - - assert_eq!(result, data.second.unwrap()); - } -} - -#[cfg(feature = "chrono-tz")] -#[test] -fn temporal_tz_nanosecond() { - let test_data = test_data_tz(); - - for data in test_data { - let result = nanosecond(&*data.input).unwrap(); - - assert_eq!(result, data.nanosecond.unwrap()); - } -} - -#[cfg(feature = "chrono-tz")] -#[test] -fn temporal_tz_year() { - let test_data = test_data_tz(); - - for data in test_data { - let result = year(&*data.input).unwrap(); - - assert_eq!(result, data.year.unwrap()); - } -} - -#[cfg(feature = "chrono-tz")] -#[test] -fn temporal_tz_month() { - let test_data = test_data_tz(); - - for data in test_data { - let result = month(&*data.input).unwrap(); - - assert_eq!(result, data.month.unwrap()); - } -} - -#[cfg(feature = "chrono-tz")] -#[test] -fn temporal_tz_day() { - let test_data = test_data_tz(); - - for data in test_data { - let result = day(&*data.input).unwrap(); - - assert_eq!(result, data.day.unwrap()); - } -} - -#[cfg(feature = "chrono-tz")] -#[test] -fn temporal_tz_weekday() { - let test_data = test_data_tz(); - - for data in test_data { - let result = weekday(&*data.input).unwrap(); - - assert_eq!(result, data.weekday.unwrap()); - } + assert_eq!(result, data.$extract.unwrap()); + } + } + }; } -#[cfg(feature = "chrono-tz")] -#[test] -fn temporal_tz_iso_week() { - let test_data = test_data_tz(); - - for data in test_data { - let result = iso_week(&*data.input).unwrap(); - - assert_eq!(result, data.iso_week.unwrap()); - } -} +temporal_tz_test!(temporal_tz_hour, hour); +temporal_tz_test!(temporal_tz_minute, minute); +temporal_tz_test!(temporal_tz_second, second); +temporal_tz_test!(temporal_tz_nanosecond, nanosecond); +temporal_tz_test!(temporal_tz_year, year); +temporal_tz_test!(temporal_tz_month, month); +temporal_tz_test!(temporal_tz_day, day); +temporal_tz_test!(temporal_tz_weekday, weekday); +temporal_tz_test!(temporal_tz_iso_week, iso_week); fn test_data_tz() -> Vec { vec![ From a8ab26d18d07219c26c69d5fdb646496bafa059c Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Sat, 25 Sep 2021 21:48:48 +0530 Subject: [PATCH 16/17] Specify range of output values possible for each extractor --- src/compute/temporal.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index c0d749a3401..6ed750b65fc 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -83,26 +83,28 @@ pub fn year(array: &dyn Array) -> Result> { } /// Extracts the months of a temporal array as [`PrimitiveArray`]. +/// Value ranges from 1 to 12. /// Use [`can_month`] to check if this operation is supported for the target [`DataType`]. pub fn month(array: &dyn Array) -> Result> { date_like!(month, array, DataType::UInt32) } /// Extracts the days of a temporal array as [`PrimitiveArray`]. +/// Value ranges from 1 to 32 (Last day depends on month). /// Use [`can_day`] to check if this operation is supported for the target [`DataType`]. pub fn day(array: &dyn Array) -> Result> { date_like!(day, array, DataType::UInt32) } /// Extracts weekday of a temporal array as [`PrimitiveArray`]. -/// Monday is 1, Tuesday is 2, ... Sunday is 7 +/// Monday is 1, Tuesday is 2, ..., Sunday is 7. /// Use [`can_weekday`] to check if this operation is supported for the target [`DataType`] pub fn weekday(array: &dyn Array) -> Result> { date_like!(u32_weekday, array, DataType::UInt32) } /// Extracts ISO week of a temporal array as [`PrimitiveArray`] -/// Value ranges from 1 to 53. +/// Value ranges from 1 to 53 (Last week depends on the year). /// Use [`can_iso_week`] to check if this operation is supported for the target [`DataType`] pub fn iso_week(array: &dyn Array) -> Result> { date_like!(u32_iso_week, array, DataType::UInt32) @@ -138,18 +140,21 @@ macro_rules! time_like { } /// Extracts the hours of a temporal array as [`PrimitiveArray`]. +/// Value ranges from 0 to 23. /// Use [`can_hour`] to check if this operation is supported for the target [`DataType`]. pub fn hour(array: &dyn Array) -> Result> { time_like!(hour, array, DataType::UInt32) } /// Extracts the minutes of a temporal array as [`PrimitiveArray`]. +/// Value ranges from 0 to 59. /// Use [`can_minute`] to check if this operation is supported for the target [`DataType`]. pub fn minute(array: &dyn Array) -> Result> { time_like!(minute, array, DataType::UInt32) } /// Extracts the seconds of a temporal array as [`PrimitiveArray`]. +/// Value ranges from 0 to 59. /// Use [`can_second`] to check if this operation is supported for the target [`DataType`]. pub fn second(array: &dyn Array) -> Result> { time_like!(second, array, DataType::UInt32) From 9a85313acf47a809c90c93545587670c42fabb33 Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Sun, 26 Sep 2021 16:15:14 +0530 Subject: [PATCH 17/17] Add simple test for `Time64` with `TimeUnit::Nanosecond` --- tests/it/compute/temporal.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/it/compute/temporal.rs b/tests/it/compute/temporal.rs index 9e2b9b380c6..748a4dbfe60 100644 --- a/tests/it/compute/temporal.rs +++ b/tests/it/compute/temporal.rs @@ -111,6 +111,20 @@ impl TestData { second: Some(UInt32Array::from(&[Some(18), None])), nanosecond: Some(UInt32Array::from(&[Some(0), None])), }, + DataType::Time64(TimeUnit::Nanosecond) => TestData { + input: Box::new( + Int64Array::from(&[Some(378000000100), None]).to(data_type.clone()), + ), + year: None, + month: None, + day: None, + weekday: None, + iso_week: None, + hour: Some(UInt32Array::from(&[Some(0), None])), + minute: Some(UInt32Array::from(&[Some(6), None])), + second: Some(UInt32Array::from(&[Some(18), None])), + nanosecond: Some(UInt32Array::from(&[Some(100), None])), + }, DataType::Timestamp(TimeUnit::Microsecond, None) => TestData { // 68216970000000 (Epoch Microsecond) has 29th Feb (leap year) input: Box::new( @@ -137,6 +151,7 @@ impl TestData { DataType::Date64, DataType::Time32(TimeUnit::Second), DataType::Time64(TimeUnit::Microsecond), + DataType::Time64(TimeUnit::Nanosecond), DataType::Timestamp(TimeUnit::Microsecond, None), ] }