-
Notifications
You must be signed in to change notification settings - Fork 876
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement date_part for intervals #6071
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -20,6 +20,7 @@ | |||||
use std::sync::Arc; | ||||||
|
||||||
use arrow_array::cast::AsArray; | ||||||
use cast::as_primitive_array; | ||||||
use chrono::{Datelike, NaiveDateTime, Offset, TimeZone, Timelike, Utc}; | ||||||
|
||||||
use arrow_array::temporal_conversions::{ | ||||||
|
@@ -31,7 +32,7 @@ use arrow_array::timezone::Tz; | |||||
use arrow_array::types::*; | ||||||
use arrow_array::*; | ||||||
use arrow_buffer::ArrowNativeType; | ||||||
use arrow_schema::{ArrowError, DataType}; | ||||||
use arrow_schema::{ArrowError, DataType, IntervalUnit}; | ||||||
|
||||||
/// Valid parts to extract from date/time/timestamp arrays. | ||||||
/// | ||||||
|
@@ -111,6 +112,7 @@ where | |||||
/// - Date32/Date64 | ||||||
/// - Time32/Time64 | ||||||
/// - Timestamp | ||||||
/// - Interval | ||||||
/// | ||||||
/// Returns an [`Int32Array`] unless input was a dictionary type, in which case returns | ||||||
/// the dictionary but with this function applied onto its values. | ||||||
|
@@ -137,10 +139,21 @@ pub fn date_part(array: &dyn Array, part: DatePart) -> Result<ArrayRef, ArrowErr | |||||
let array = Arc::new(array) as ArrayRef; | ||||||
Ok(array) | ||||||
} | ||||||
// TODO: support interval | ||||||
// DataType::Interval(_) => { | ||||||
// todo!(); | ||||||
// } | ||||||
DataType::Interval(IntervalUnit::YearMonth) => { | ||||||
let array = as_primitive_array::<IntervalYearMonthType>(array).date_part(part)?; | ||||||
let array = Arc::new(array) as ArrayRef; | ||||||
Ok(array) | ||||||
} | ||||||
DataType::Interval(IntervalUnit::DayTime) => { | ||||||
let array = as_primitive_array::<IntervalDayTimeType>(array).date_part(part)?; | ||||||
let array = Arc::new(array) as ArrayRef; | ||||||
Ok(array) | ||||||
} | ||||||
DataType::Interval(IntervalUnit::MonthDayNano) => { | ||||||
let array = as_primitive_array::<IntervalMonthDayNanoType>(array).date_part(part)?; | ||||||
let array = Arc::new(array) as ArrayRef; | ||||||
Ok(array) | ||||||
} | ||||||
DataType::Dictionary(_, _) => { | ||||||
let array = array.as_any_dictionary(); | ||||||
let values = date_part(array.values(), part)?; | ||||||
|
@@ -387,6 +400,88 @@ impl ExtractDatePartExt for PrimitiveArray<TimestampNanosecondType> { | |||||
} | ||||||
} | ||||||
|
||||||
impl ExtractDatePartExt for PrimitiveArray<IntervalYearMonthType> { | ||||||
fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> { | ||||||
match part { | ||||||
DatePart::Year => Ok(self.unary_opt(|d| Some(d / 12))), | ||||||
DatePart::Month => Ok(self.unary_opt(|d| Some(d % 12))), | ||||||
|
||||||
DatePart::Quarter | ||||||
| DatePart::Week | ||||||
| DatePart::Day | ||||||
| DatePart::DayOfWeekSunday0 | ||||||
| DatePart::DayOfWeekMonday0 | ||||||
| DatePart::DayOfYear | ||||||
| DatePart::Hour | ||||||
| DatePart::Minute | ||||||
| DatePart::Second | ||||||
| DatePart::Millisecond | ||||||
| DatePart::Microsecond | ||||||
| DatePart::Nanosecond => { | ||||||
return_compute_error_with!(format!("{part} does not support"), self.data_type()) | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
impl ExtractDatePartExt for PrimitiveArray<IntervalDayTimeType> { | ||||||
fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> { | ||||||
match part { | ||||||
DatePart::Week => Ok(self.unary_opt(|d| Some(d.days / 7))), | ||||||
DatePart::Day => Ok(self.unary_opt(|d| Some(d.days))), | ||||||
DatePart::Hour => Ok(self.unary_opt(|d| Some(d.milliseconds / (60 * 60 * 1_000)))), | ||||||
DatePart::Minute => Ok(self.unary_opt(|d| Some(d.milliseconds / (60 * 1_000)))), | ||||||
DatePart::Second => Ok(self.unary_opt(|d| Some(d.milliseconds / 1_000))), | ||||||
DatePart::Millisecond => Ok(self.unary_opt(|d| Some(d.milliseconds))), | ||||||
DatePart::Microsecond => Ok(self.unary_opt(|d| d.milliseconds.checked_mul(1_000))), | ||||||
DatePart::Nanosecond => Ok(self.unary_opt(|d| d.milliseconds.checked_mul(1_000_000))), | ||||||
|
||||||
DatePart::Quarter | ||||||
| DatePart::Year | ||||||
| DatePart::Month | ||||||
| DatePart::DayOfWeekSunday0 | ||||||
| DatePart::DayOfWeekMonday0 | ||||||
| DatePart::DayOfYear => { | ||||||
return_compute_error_with!(format!("{part} does not support"), self.data_type()) | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
impl ExtractDatePartExt for PrimitiveArray<IntervalMonthDayNanoType> { | ||||||
fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> { | ||||||
match part { | ||||||
DatePart::Year => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.months / 12))), | ||||||
DatePart::Month => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.months))), | ||||||
DatePart::Week => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.days / 7))), | ||||||
DatePart::Day => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.days))), | ||||||
DatePart::Hour => { | ||||||
Ok(self.unary_opt(|d| (d.nanoseconds / (60 * 60 * 1_000_000_000)).try_into().ok())) | ||||||
} | ||||||
DatePart::Minute => { | ||||||
Ok(self.unary_opt(|d| (d.nanoseconds / (60 * 1_000_000_000)).try_into().ok())) | ||||||
} | ||||||
DatePart::Second => { | ||||||
Ok(self.unary_opt(|d| (d.nanoseconds / 1_000_000_000).try_into().ok())) | ||||||
} | ||||||
DatePart::Millisecond => { | ||||||
Ok(self.unary_opt(|d| (d.nanoseconds / 1_000_000).try_into().ok())) | ||||||
} | ||||||
DatePart::Microsecond => { | ||||||
Ok(self.unary_opt(|d| (d.nanoseconds / 1_000).try_into().ok())) | ||||||
} | ||||||
DatePart::Nanosecond => Ok(self.unary_opt(|d| d.nanoseconds.try_into().ok())), | ||||||
|
||||||
DatePart::Quarter | ||||||
| DatePart::DayOfWeekSunday0 | ||||||
| DatePart::DayOfWeekMonday0 | ||||||
| DatePart::DayOfYear => { | ||||||
return_compute_error_with!(format!("{part} does not support"), self.data_type()) | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
macro_rules! return_compute_error_with { | ||||||
($msg:expr, $param:expr) => { | ||||||
return { Err(ArrowError::ComputeError(format!("{}: {:?}", $msg, $param))) } | ||||||
|
@@ -1500,4 +1595,203 @@ mod tests { | |||||
ensure_returns_error(&Time64MicrosecondArray::from(vec![0])); | ||||||
ensure_returns_error(&Time64NanosecondArray::from(vec![0])); | ||||||
} | ||||||
|
||||||
// IntervalDayTimeType week, day, hour, miute, second, mili, u, nano; invalid month, year; ignores the other part | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these comments seem like they belong with the other tests |
||||||
// IntervalMonthDayNanoType year -> nano; days don't affect months, time doesn't affect days, time doesn't affect months (and vice versa) | ||||||
#[test] | ||||||
fn test_interval_year_month_array() { | ||||||
let input: IntervalYearMonthArray = vec![0, 5, 24].into(); | ||||||
|
||||||
let actual = date_part(&input, DatePart::Year).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(0, actual.value(1)); | ||||||
assert_eq!(2, actual.value(2)); | ||||||
|
||||||
let actual = date_part(&input, DatePart::Month).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(5, actual.value(1)); | ||||||
assert_eq!(0, actual.value(2)); | ||||||
|
||||||
assert!(date_part(&input, DatePart::Day).is_err()); | ||||||
assert!(date_part(&input, DatePart::Week).is_err()); | ||||||
} | ||||||
|
||||||
#[test] | ||||||
fn test_interval_day_time_array() { | ||||||
let input: IntervalDayTimeArray = vec![ | ||||||
IntervalDayTime::ZERO, | ||||||
IntervalDayTime::new(10, 42), | ||||||
IntervalDayTime::new(10, 1042), | ||||||
IntervalDayTime::new(10, MILLISECONDS_IN_DAY as i32 + 1), | ||||||
] | ||||||
.into(); | ||||||
|
||||||
// Time doesn't affect days. | ||||||
let actual = date_part(&input, DatePart::Day).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(10, actual.value(1)); | ||||||
assert_eq!(10, actual.value(2)); | ||||||
assert_eq!(10, actual.value(3)); | ||||||
|
||||||
let actual = date_part(&input, DatePart::Week).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(1, actual.value(1)); | ||||||
assert_eq!(1, actual.value(2)); | ||||||
assert_eq!(1, actual.value(3)); | ||||||
|
||||||
// Days doesn't affect time. | ||||||
let actual = date_part(&input, DatePart::Nanosecond).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(42_000_000, actual.value(1)); | ||||||
assert_eq!(1_042_000_000, actual.value(2)); | ||||||
// Overflow returns zero. | ||||||
assert_eq!(0, actual.value(3)); | ||||||
|
||||||
let actual = date_part(&input, DatePart::Microsecond).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(42_000, actual.value(1)); | ||||||
assert_eq!(1_042_000, actual.value(2)); | ||||||
// Overflow returns zero. | ||||||
assert_eq!(0, actual.value(3)); | ||||||
|
||||||
let actual = date_part(&input, DatePart::Millisecond).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(42, actual.value(1)); | ||||||
assert_eq!(1042, actual.value(2)); | ||||||
assert_eq!(MILLISECONDS_IN_DAY as i32 + 1, actual.value(3)); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I find it somewhat strange that only for |
||||||
|
||||||
let actual = date_part(&input, DatePart::Second).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(0, actual.value(1)); | ||||||
assert_eq!(1, actual.value(2)); | ||||||
assert_eq!(24 * 60 * 60, actual.value(3)); | ||||||
|
||||||
let actual = date_part(&input, DatePart::Minute).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(0, actual.value(1)); | ||||||
assert_eq!(0, actual.value(2)); | ||||||
assert_eq!(24 * 60, actual.value(3)); | ||||||
|
||||||
let actual = date_part(&input, DatePart::Hour).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(0, actual.value(1)); | ||||||
assert_eq!(0, actual.value(2)); | ||||||
assert_eq!(24, actual.value(3)); | ||||||
|
||||||
// Month and year are not valid (since days in month varies). | ||||||
assert!(date_part(&input, DatePart::Month).is_err()); | ||||||
assert!(date_part(&input, DatePart::Year).is_err()); | ||||||
} | ||||||
|
||||||
#[test] | ||||||
fn test_interval_month_day_nano_array() { | ||||||
let input: IntervalMonthDayNanoArray = vec![ | ||||||
IntervalMonthDayNano::ZERO, | ||||||
IntervalMonthDayNano::new(5, 10, 42), | ||||||
IntervalMonthDayNano::new(16, 35, MILLISECONDS_IN_DAY * 1_000_000 + 1), | ||||||
] | ||||||
.into(); | ||||||
|
||||||
// Year and month follow from month, but are not affected by days or nanos. | ||||||
let actual = date_part(&input, DatePart::Year).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(0, actual.value(1)); | ||||||
assert_eq!(1, actual.value(2)); | ||||||
|
||||||
let actual = date_part(&input, DatePart::Month).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(5, actual.value(1)); | ||||||
assert_eq!(16, actual.value(2)); | ||||||
|
||||||
// Week and day follow from day, but are not affected by months or nanos. | ||||||
let actual = date_part(&input, DatePart::Week).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(1, actual.value(1)); | ||||||
assert_eq!(5, actual.value(2)); | ||||||
|
||||||
let actual = date_part(&input, DatePart::Day).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(10, actual.value(1)); | ||||||
assert_eq!(35, actual.value(2)); | ||||||
|
||||||
// Times follow from nanos, but are not affected by months or dats. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Proposed to fix in #6140 |
||||||
let actual = date_part(&input, DatePart::Hour).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(0, actual.value(1)); | ||||||
assert_eq!(24, actual.value(2)); | ||||||
|
||||||
let actual = date_part(&input, DatePart::Minute).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(0, actual.value(1)); | ||||||
assert_eq!(24 * 60, actual.value(2)); | ||||||
|
||||||
let actual = date_part(&input, DatePart::Second).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(0, actual.value(1)); | ||||||
assert_eq!(24 * 60 * 60, actual.value(2)); | ||||||
|
||||||
let actual = date_part(&input, DatePart::Millisecond).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(0, actual.value(1)); | ||||||
assert_eq!(24 * 60 * 60 * 1_000, actual.value(2)); | ||||||
|
||||||
let actual = date_part(&input, DatePart::Microsecond).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(0, actual.value(1)); | ||||||
// Overflow gives zero. | ||||||
assert_eq!(0, actual.value(2)); | ||||||
|
||||||
let actual = date_part(&input, DatePart::Nanosecond).unwrap(); | ||||||
let actual = actual.as_primitive::<Int32Type>(); | ||||||
assert_eq!(0, actual.value(0)); | ||||||
assert_eq!(42, actual.value(1)); | ||||||
// Overflow gives zero. | ||||||
assert_eq!(0, actual.value(2)); | ||||||
} | ||||||
|
||||||
#[test] | ||||||
fn test_interval_array_invalid_parts() { | ||||||
fn ensure_returns_error(array: &dyn Array) { | ||||||
let invalid_parts = [ | ||||||
DatePart::Quarter, | ||||||
DatePart::DayOfWeekSunday0, | ||||||
DatePart::DayOfWeekMonday0, | ||||||
DatePart::DayOfYear, | ||||||
]; | ||||||
|
||||||
for part in invalid_parts { | ||||||
let err = date_part(array, part).unwrap_err(); | ||||||
let expected = format!( | ||||||
"Compute error: {part} does not support: {}", | ||||||
array.data_type() | ||||||
); | ||||||
assert_eq!(expected, err.to_string()); | ||||||
} | ||||||
} | ||||||
|
||||||
ensure_returns_error(&IntervalYearMonthArray::from(vec![0])); | ||||||
ensure_returns_error(&IntervalDayTimeArray::from(vec![IntervalDayTime::ZERO])); | ||||||
ensure_returns_error(&IntervalMonthDayNanoArray::from(vec![ | ||||||
IntervalMonthDayNano::ZERO, | ||||||
])); | ||||||
} | ||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense as IntervalYearMonth stores the number of whole months: https://docs.rs/arrow/latest/arrow/datatypes/enum.IntervalUnit.html
👍