Skip to content

Commit

Permalink
support EXTRACT on intervals and durations (#12514)
Browse files Browse the repository at this point in the history
Signed-off-by: Nick Cameron <nrc@ncameron.org>
  • Loading branch information
nrc committed Sep 20, 2024
1 parent 5f424d3 commit 21ec332
Show file tree
Hide file tree
Showing 2 changed files with 224 additions and 6 deletions.
47 changes: 41 additions & 6 deletions datafusion/functions/src/datetime/date_part.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ use arrow::array::{Array, ArrayRef, Float64Array};
use arrow::compute::kernels::cast_utils::IntervalUnit;
use arrow::compute::{binary, cast, date_part, DatePart};
use arrow::datatypes::DataType::{
Date32, Date64, Float64, Time32, Time64, Timestamp, Utf8, Utf8View,
Date32, Date64, Duration, Float64, Interval, Time32, Time64, Timestamp, Utf8,
Utf8View,
};
use arrow::datatypes::IntervalUnit::{DayTime, MonthDayNano, YearMonth};
use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
use arrow::datatypes::{DataType, TimeUnit};

Expand Down Expand Up @@ -109,6 +111,20 @@ impl DatePartFunc {
Exact(vec![Utf8View, Time64(Microsecond)]),
Exact(vec![Utf8, Time64(Nanosecond)]),
Exact(vec![Utf8View, Time64(Nanosecond)]),
Exact(vec![Utf8, Interval(YearMonth)]),
Exact(vec![Utf8View, Interval(YearMonth)]),
Exact(vec![Utf8, Interval(DayTime)]),
Exact(vec![Utf8View, Interval(DayTime)]),
Exact(vec![Utf8, Interval(MonthDayNano)]),
Exact(vec![Utf8View, Interval(MonthDayNano)]),
Exact(vec![Utf8, Duration(Second)]),
Exact(vec![Utf8View, Duration(Second)]),
Exact(vec![Utf8, Duration(Millisecond)]),
Exact(vec![Utf8View, Duration(Millisecond)]),
Exact(vec![Utf8, Duration(Microsecond)]),
Exact(vec![Utf8View, Duration(Microsecond)]),
Exact(vec![Utf8, Duration(Nanosecond)]),
Exact(vec![Utf8View, Duration(Nanosecond)]),
],
Volatility::Immutable,
),
Expand Down Expand Up @@ -224,10 +240,28 @@ fn seconds(array: &dyn Array, unit: TimeUnit) -> Result<ArrayRef> {
let subsecs = date_part(array, DatePart::Nanosecond)?;
let subsecs = as_int32_array(subsecs.as_ref())?;

let r: Float64Array = binary(secs, subsecs, |secs, subsecs| {
(secs as f64 + (subsecs as f64 / 1_000_000_000_f64)) * sf
})?;
Ok(Arc::new(r))
// Special case where there are no nulls.
if subsecs.null_count() == 0 {
let r: Float64Array = binary(secs, subsecs, |secs, subsecs| {
(secs as f64 + ((subsecs % 1_000_000_000) as f64 / 1_000_000_000_f64)) * sf
})?;
Ok(Arc::new(r))
} else {
// Nulls in secs are preserved, nulls in subsecs are treated as zero to account for the case
// where the number of nanoseconds overflows.
let r: Float64Array = secs
.iter()
.zip(subsecs)
.map(|(secs, subsecs)| {
secs.map(|secs| {
let subsecs = subsecs.unwrap_or(0);
(secs as f64 + ((subsecs % 1_000_000_000) as f64 / 1_000_000_000_f64))
* sf
})
})
.collect();
Ok(Arc::new(r))
}
}

fn epoch(array: &dyn Array) -> Result<ArrayRef> {
Expand Down Expand Up @@ -256,7 +290,8 @@ fn epoch(array: &dyn Array) -> Result<ArrayRef> {
Time64(Nanosecond) => {
as_time64_nanosecond_array(array)?.unary(|x| x as f64 / 1_000_000_000_f64)
}
d => return exec_err!("Can not convert {d:?} to epoch"),
Interval(_) | Duration(_) => return seconds(array, Second),
d => return exec_err!("Cannot convert {d:?} to epoch"),
};
Ok(Arc::new(f))
}
183 changes: 183 additions & 0 deletions datafusion/sqllogictest/test_files/expr.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1472,6 +1472,189 @@ SELECT extract(epoch from arrow_cast('1969-12-31', 'Date64'))
----
-86400

# test_extract_interval

query R
SELECT extract(year from arrow_cast('10 years', 'Interval(YearMonth)'))
----
10

query R
SELECT extract(month from arrow_cast('10 years', 'Interval(YearMonth)'))
----
0

query R
SELECT extract(year from arrow_cast('10 months', 'Interval(YearMonth)'))
----
0

query R
SELECT extract(month from arrow_cast('10 months', 'Interval(YearMonth)'))
----
10

query R
SELECT extract(year from arrow_cast('20 months', 'Interval(YearMonth)'))
----
1

query R
SELECT extract(month from arrow_cast('20 months', 'Interval(YearMonth)'))
----
8

query error DataFusion error: Arrow error: Compute error: Year does not support: Interval\(DayTime\)
SELECT extract(year from arrow_cast('10 days', 'Interval(DayTime)'))

query error DataFusion error: Arrow error: Compute error: Month does not support: Interval\(DayTime\)
SELECT extract(month from arrow_cast('10 days', 'Interval(DayTime)'))

query R
SELECT extract(day from arrow_cast('10 days', 'Interval(DayTime)'))
----
10

query R
SELECT extract(day from arrow_cast('14400 minutes', 'Interval(DayTime)'))
----
0

query R
SELECT extract(minute from arrow_cast('14400 minutes', 'Interval(DayTime)'))
----
14400

query R
SELECT extract(second from arrow_cast('5.1 seconds', 'Interval(DayTime)'))
----
5

query R
SELECT extract(second from arrow_cast('14400 minutes', 'Interval(DayTime)'))
----
864000

query R
SELECT extract(second from arrow_cast('2 months', 'Interval(MonthDayNano)'))
----
0

query R
SELECT extract(second from arrow_cast('2 days', 'Interval(MonthDayNano)'))
----
0

query R
SELECT extract(second from arrow_cast('2 seconds', 'Interval(MonthDayNano)'))
----
2

query R
SELECT extract(seconds from arrow_cast('2 seconds', 'Interval(MonthDayNano)'))
----
2

query R
SELECT extract(epoch from arrow_cast('2 seconds', 'Interval(MonthDayNano)'))
----
2

query R
SELECT extract(milliseconds from arrow_cast('2 seconds', 'Interval(MonthDayNano)'))
----
2000

query R
SELECT extract(second from arrow_cast('2030 milliseconds', 'Interval(MonthDayNano)'))
----
2.03

query R
SELECT extract(second from arrow_cast(NULL, 'Interval(MonthDayNano)'))
----
NULL

statement ok
create table t (id int, i interval) as values
(0, interval '5 months 1 day 10 nanoseconds'),
(1, interval '1 year 3 months'),
(2, interval '3 days 2 milliseconds'),
(3, interval '2 seconds'),
(4, interval '8 months'),
(5, NULL);

query IRR rowsort
select
id,
extract(second from i),
extract(month from i)
from t
order by id;
----
0 0.00000001 5
1 0 15
2 0.002 0
3 2 0
4 0 8
5 NULL NULL

statement ok
drop table t;

# test_extract_duration

query R
SELECT extract(second from arrow_cast(2, 'Duration(Second)'))
----
2

query R
SELECT extract(seconds from arrow_cast(2, 'Duration(Second)'))
----
2

query R
SELECT extract(epoch from arrow_cast(2, 'Duration(Second)'))
----
2

query R
SELECT extract(millisecond from arrow_cast(2, 'Duration(Second)'))
----
2000

query R
SELECT extract(second from arrow_cast(2, 'Duration(Millisecond)'))
----
0.002

query R
SELECT extract(second from arrow_cast(2002, 'Duration(Millisecond)'))
----
2.002

query R
SELECT extract(millisecond from arrow_cast(2002, 'Duration(Millisecond)'))
----
2002

query R
SELECT extract(day from arrow_cast(864000, 'Duration(Second)'))
----
10

query error DataFusion error: Arrow error: Compute error: Month does not support: Duration\(Second\)
SELECT extract(month from arrow_cast(864000, 'Duration(Second)'))

query error DataFusion error: Arrow error: Compute error: Year does not support: Duration\(Second\)
SELECT extract(year from arrow_cast(864000, 'Duration(Second)'))

query R
SELECT extract(day from arrow_cast(NULL, 'Duration(Second)'))
----
NULL

# test_extract_date_part_func

query B
Expand Down

0 comments on commit 21ec332

Please sign in to comment.