Skip to content

Commit

Permalink
feat: Support use of Duration in to_string, ergonomic/perf improvem…
Browse files Browse the repository at this point in the history
…ent, tz-aware Datetime bugfix (#19697)
  • Loading branch information
alexander-beedie authored Nov 16, 2024
1 parent 4f3e828 commit 5476332
Show file tree
Hide file tree
Showing 17 changed files with 585 additions and 181 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ either = { workspace = true }
hashbrown = { workspace = true }
hashbrown_old_nightly_hack = { workspace = true }
indexmap = { workspace = true }
itoa = { workspace = true }
ndarray = { workspace = true, optional = true }
num-traits = { workspace = true }
once_cell = { workspace = true }
Expand Down
38 changes: 32 additions & 6 deletions crates/polars-core/src/chunked_array/temporal/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ use chrono::*;

use crate::prelude::*;

/// Number of seconds in a day
pub(crate) const NS_IN_DAY: i64 = 86_400_000_000_000;
pub(crate) const US_IN_DAY: i64 = 86_400_000_000;
pub(crate) const MS_IN_DAY: i64 = 86_400_000;
pub(crate) const SECONDS_IN_DAY: i64 = 86_400;

impl From<&AnyValue<'_>> for NaiveDateTime {
Expand Down Expand Up @@ -37,12 +39,10 @@ pub fn datetime_to_timestamp_ns(v: NaiveDateTime) -> i64 {
v.and_utc().timestamp_nanos_opt().unwrap()
}

// Used by lazy for literal conversion
pub fn datetime_to_timestamp_ms(v: NaiveDateTime) -> i64 {
v.and_utc().timestamp_millis()
}

// Used by lazy for literal conversion
pub fn datetime_to_timestamp_us(v: NaiveDateTime) -> i64 {
let us = v.and_utc().timestamp() * 1_000_000;
us + v.and_utc().timestamp_subsec_micros() as i64
Expand All @@ -52,6 +52,32 @@ pub(crate) fn naive_datetime_to_date(v: NaiveDateTime) -> i32 {
(datetime_to_timestamp_ms(v) / (MILLISECONDS * SECONDS_IN_DAY)) as i32
}

pub(crate) const NS_IN_DAY: i64 = 86_400_000_000_000;
pub(crate) const US_IN_DAY: i64 = 86_400_000_000;
pub(crate) const MS_IN_DAY: i64 = 86_400_000;
pub fn get_strftime_format(fmt: &str, dtype: &DataType) -> String {
if fmt != "iso" {
return fmt.to_string();
}
#[allow(unreachable_code)]
let fmt: &str = match dtype {
#[cfg(feature = "dtype-datetime")]
DataType::Datetime(tu, tz) => match (tu, tz.is_some()) {
(TimeUnit::Milliseconds, true) => "%F %T%.3f%:z",
(TimeUnit::Milliseconds, false) => "%F %T%.3f",
(TimeUnit::Microseconds, true) => "%F %T%.6f%:z",
(TimeUnit::Microseconds, false) => "%F %T%.6f",
(TimeUnit::Nanoseconds, true) => "%F %T%.9f%:z",
(TimeUnit::Nanoseconds, false) => "%F %T%.9f",
},
#[cfg(feature = "dtype-date")]
DataType::Date => "%F",
#[cfg(feature = "dtype-time")]
DataType::Time => "%T%.f",
_ => {
let err = format!(
"invalid call to `get_strftime_format`; fmt={:?}, dtype={}",
fmt, dtype
);
unimplemented!("{}", err)
},
};
fmt.to_string()
}
1 change: 1 addition & 0 deletions crates/polars-core/src/chunked_array/temporal/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ impl DateChunked {
/// Convert from Date into String with the given format.
/// See [chrono strftime/strptime](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html).
pub fn to_string(&self, format: &str) -> PolarsResult<StringChunked> {
let format = if format == "iso" { "%F" } else { format };
let datefmt_f = |ndt: NaiveDate| ndt.format(format);
self.try_apply_into_string_amortized(|val, buf| {
let ndt = date32_to_date(val);
Expand Down
6 changes: 3 additions & 3 deletions crates/polars-core/src/chunked_array/temporal/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ impl DatetimeChunked {
TimeUnit::Microseconds => timestamp_us_to_datetime,
TimeUnit::Milliseconds => timestamp_ms_to_datetime,
};

let format = get_strftime_format(format, self.dtype());
let mut ca: StringChunked = match self.time_zone() {
#[cfg(feature = "timezones")]
Some(time_zone) => {
let parsed_time_zone = time_zone.parse::<Tz>().expect("already validated");
let datefmt_f = |ndt| parsed_time_zone.from_utc_datetime(&ndt).format(format);
let datefmt_f = |ndt| parsed_time_zone.from_utc_datetime(&ndt).format(&format);
self.try_apply_into_string_amortized(|val, buf| {
let ndt = conversion_f(val);
write!(buf, "{}", datefmt_f(ndt))
Expand All @@ -62,7 +62,7 @@ impl DatetimeChunked {
)?
},
_ => {
let datefmt_f = |ndt: NaiveDateTime| ndt.format(format);
let datefmt_f = |ndt: NaiveDateTime| ndt.format(&format);
self.try_apply_into_string_amortized(|val, buf| {
let ndt = conversion_f(val);
write!(buf, "{}", datefmt_f(ndt))
Expand Down
38 changes: 38 additions & 0 deletions crates/polars-core/src/chunked_array/temporal/duration.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::export::chrono::Duration as ChronoDuration;
use crate::fmt::{fmt_duration_string, iso_duration_string};
use crate::prelude::DataType::Duration;
use crate::prelude::*;

Expand Down Expand Up @@ -60,6 +61,43 @@ impl DurationChunked {
self.2 = Some(Duration(tu))
}

/// Convert from [`Duration`] to String; note that `strftime` format
/// strings are not supported, only the specifiers 'iso' and 'polars'.
pub fn to_string(&self, format: &str) -> PolarsResult<StringChunked> {
// the duration string functions below can reuse this string buffer
let mut s = String::with_capacity(32);
match format {
"iso" => {
let out: StringChunked =
self.0
.apply_nonnull_values_generic(DataType::String, |v: i64| {
s.clear();
iso_duration_string(&mut s, v, self.time_unit());
s.clone()
});
Ok(out)
},
"polars" => {
let out: StringChunked =
self.0
.apply_nonnull_values_generic(DataType::String, |v: i64| {
s.clear();
fmt_duration_string(&mut s, v, self.time_unit())
.map_err(|e| polars_err!(ComputeError: "{:?}", e))
.expect("failed to format duration");
s.clone()
});
Ok(out)
},
_ => {
polars_bail!(
InvalidOperation: "format {:?} not supported for Duration type (expected one of 'iso' or 'polars')",
format
)
},
}
}

/// Construct a new [`DurationChunked`] from an iterator over [`ChronoDuration`].
pub fn from_duration<I: IntoIterator<Item = ChronoDuration>>(
name: PlSmallStr,
Expand Down
1 change: 1 addition & 0 deletions crates/polars-core/src/chunked_array/temporal/time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ impl TimeChunked {
pub fn to_string(&self, format: &str) -> StringChunked {
let mut ca: StringChunked = self.apply_kernel_cast(&|arr| {
let mut buf = String::new();
let format = if format == "iso" { "%T%.9f" } else { format };
let mut mutarr = MutablePlString::with_capacity(arr.len());

for opt in arr.into_iter() {
Expand Down
Loading

0 comments on commit 5476332

Please sign in to comment.