From 6a79b44792c32f4ff95aabd5f685e12e7a164654 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Tue, 25 Jun 2024 14:52:47 +0100 Subject: [PATCH 1/6] add tests for #61 --- tests/main.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/main.rs b/tests/main.rs index f9ecc6c..efa93d3 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -230,7 +230,7 @@ fn date_comparison() { } #[test] -fn date_timestamp() { +fn date_timestamp_exact() { let d = Date::from_timestamp(1_654_560_000, true).unwrap(); assert_eq!(d.to_string(), "2022-06-07"); assert_eq!(d.timestamp(), 1_654_560_000); @@ -239,6 +239,16 @@ fn date_timestamp() { Ok(d) => panic!("unexpectedly valid, {d}"), Err(e) => assert_eq!(e, ParseError::DateNotExact), } + + // milliseconds + let d = Date::from_timestamp(1_654_560_000_000, true).unwrap(); + assert_eq!(d.to_string(), "2022-06-07"); + assert_eq!(d.timestamp(), 1_654_560_000); + + match Date::from_timestamp(1_654_560_000_001, true) { + Ok(d) => panic!("unexpectedly valid, {d}"), + Err(e) => assert_eq!(e, ParseError::DateNotExact), + } } macro_rules! date_from_timestamp { @@ -854,6 +864,9 @@ param_tests! { dt_unix1: ok => "1654646400", "2022-06-08T00:00:00"; dt_unix2: ok => "1654646404", "2022-06-08T00:00:04"; dt_unix_float: ok => "1654646404.5", "2022-06-08T00:00:04.500000"; + dt_unix_float_limit: ok => "1654646404.123456", "2022-06-08T00:00:04.123456"; + dt_unix_float_ms: ok => "1654646404000.5", "2022-06-08T00:00:04.000500"; + dt_unix_float_ms_limit: ok => "1654646404123.456", "2022-06-08T00:00:04.123456"; dt_short_date: err => "xxx", TooShort; dt_short_time: err => "2020-01-01T12:0", TooShort; dt: err => "202x-01-01", InvalidCharYear; From a62425396a46ade724a1b6680f25e41b95f06086 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Tue, 25 Jun 2024 15:02:49 +0100 Subject: [PATCH 2/6] fix millisecond fraction being handled with wrong scale --- src/date.rs | 18 +++++++++++------- src/datetime.rs | 40 +++++++++++++++++++++++++++++++--------- 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/src/date.rs b/src/date.rs index ccd6df7..78ef0f8 100644 --- a/src/date.rs +++ b/src/date.rs @@ -56,7 +56,7 @@ impl FromStr for Date { // 2e10 if greater than this, the number is in ms, if less than or equal, it's in seconds // (in seconds this is 11th October 2603, in ms it's 20th August 1970) -const MS_WATERSHED: i64 = 20_000_000_000; +pub(crate) const MS_WATERSHED: i64 = 20_000_000_000; // 1600-01-01 as a unix timestamp used for from_timestamp below const UNIX_1600: i64 = -11_676_096_000; // 9999-12-31T23:59:59 as a unix timestamp, used as max allowed value below @@ -206,9 +206,13 @@ impl Date { /// assert_eq!(d.to_string(), "2022-06-07"); /// ``` pub fn from_timestamp(timestamp: i64, require_exact: bool) -> Result { - let (timestamp_second, _) = Self::timestamp_watershed(timestamp)?; + let (timestamp_second, millis) = Self::timestamp_watershed(timestamp)?; let d = Self::from_timestamp_calc(timestamp_second)?; if require_exact { + if millis != 0 { + return Err(ParseError::DateNotExact); + } + let time_second = timestamp_second.rem_euclid(86_400); if time_second != 0 { return Err(ParseError::DateNotExact); @@ -275,11 +279,11 @@ impl Date { pub(crate) fn timestamp_watershed(timestamp: i64) -> Result<(i64, u32), ParseError> { let ts_abs = timestamp.checked_abs().ok_or(ParseError::DateTooSmall)?; - let (mut seconds, mut microseconds) = if ts_abs > MS_WATERSHED { - (timestamp / 1_000, timestamp % 1_000 * 1000) - } else { - (timestamp, 0) - }; + if ts_abs <= MS_WATERSHED { + return Ok((timestamp, 0)); + } + let mut seconds = timestamp / 1_000; + let mut microseconds = ((timestamp % 1_000) * 1000) as i32; if microseconds < 0 { seconds -= 1; microseconds += 1_000_000; diff --git a/src/datetime.rs b/src/datetime.rs index c5de2ba..0c77393 100644 --- a/src/datetime.rs +++ b/src/datetime.rs @@ -1,5 +1,5 @@ -use crate::numbers::{float_parse_bytes, IntFloat}; -use crate::TimeConfigBuilder; +use crate::date::MS_WATERSHED; +use crate::{int_parse_bytes, TimeConfigBuilder}; use crate::{time::TimeConfig, Date, ParseError, Time}; use std::cmp::Ordering; use std::fmt; @@ -339,14 +339,36 @@ impl DateTime { pub fn parse_bytes_with_config(bytes: &[u8], config: &TimeConfig) -> Result { match Self::parse_bytes_rfc3339_with_config(bytes, config) { Ok(d) => Ok(d), - Err(e) => match float_parse_bytes(bytes) { - IntFloat::Int(int) => Self::from_timestamp_with_config(int, 0, config), - IntFloat::Float(float) => { - let micro = (float.fract() * 1_000_000_f64).round() as u32; - Self::from_timestamp_with_config(float.floor() as i64, micro, config) + Err(e) => { + let mut split = bytes.splitn(2, |&b| b == b'.'); + let Some(timestamp) = + int_parse_bytes(split.next().expect("splitn always returns at least one element")) + else { + return Err(e); + }; + let float_fraction = split.next(); + debug_assert!(split.next().is_none()); // at most two elements + match float_fraction { + Some(fract) => { + // fraction is either: + // - up to 3 digits of millisecond fractions, i.e. microseconds + // - or up to 6 digits of second fractions, i.e. milliseconds + let max_digits = if timestamp > MS_WATERSHED { 3 } else { 6 }; + let Some(fract_integers) = int_parse_bytes(fract) else { + return Err(e); + }; + let multiple = 10f64.powf(max_digits as f64 - fract.len() as f64); + Self::from_timestamp_with_config( + timestamp, + // FIXME should we error if the fraction is too long? + // We have TimeConfig truncate / error option. + (fract_integers as f64 * multiple).round() as u32, + config, + ) + } + None => Self::from_timestamp_with_config(timestamp, 0, config), } - IntFloat::Err => Err(e), - }, + } } } From 26dd249b9a1cc7c76aa2eb6dd8729188398380e1 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Wed, 26 Jun 2024 16:32:16 +0100 Subject: [PATCH 3/6] also raise error if fraction too long --- src/datetime.rs | 19 +++++++++++++++---- src/lib.rs | 2 ++ tests/main.rs | 12 ++++++++++-- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/datetime.rs b/src/datetime.rs index 332d615..3ef07eb 100644 --- a/src/datetime.rs +++ b/src/datetime.rs @@ -1,5 +1,5 @@ use crate::date::MS_WATERSHED; -use crate::{int_parse_bytes, TimeConfigBuilder}; +use crate::{int_parse_bytes, MicrosecondsPrecisionOverflowBehavior, TimeConfigBuilder}; use crate::{time::TimeConfig, Date, ParseError, Time}; use std::cmp::Ordering; use std::fmt; @@ -349,6 +349,9 @@ impl DateTime { let float_fraction = split.next(); debug_assert!(split.next().is_none()); // at most two elements match float_fraction { + // If fraction exists but is empty (i.e. trailing `.`), allow for backwards compatibility; + // TODO might want to reconsider this later? + Some(b"") | None => Self::from_timestamp_with_config(timestamp, 0, config), Some(fract) => { // fraction is either: // - up to 3 digits of millisecond fractions, i.e. microseconds @@ -357,16 +360,24 @@ impl DateTime { let Some(fract_integers) = int_parse_bytes(fract) else { return Err(e); }; + if config.microseconds_precision_overflow_behavior + == MicrosecondsPrecisionOverflowBehavior::Error + && fract.len() > max_digits + { + return Err(if timestamp > MS_WATERSHED { + ParseError::MillisecondFractionTooLong + } else { + ParseError::SecondFractionTooLong + }); + } + // Technically this is rounding let multiple = 10f64.powf(max_digits as f64 - fract.len() as f64); Self::from_timestamp_with_config( timestamp, - // FIXME should we error if the fraction is too long? - // We have TimeConfig truncate / error option. (fract_integers as f64 * multiple).round() as u32, config, ) } - None => Self::from_timestamp_with_config(timestamp, 0, config), } } } diff --git a/src/lib.rs b/src/lib.rs index dcbc06a..6036d11 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -118,6 +118,8 @@ pub enum ParseError { SecondFractionTooLong, /// second fraction digits missing after `.` SecondFractionMissing, + /// millisecond fraction value is more than 3 digits long + MillisecondFractionTooLong, /// invalid digit in duration DurationInvalidNumber, /// `t` character repeated in duration diff --git a/tests/main.rs b/tests/main.rs index efa93d3..b32a5d5 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -867,6 +867,8 @@ param_tests! { dt_unix_float_limit: ok => "1654646404.123456", "2022-06-08T00:00:04.123456"; dt_unix_float_ms: ok => "1654646404000.5", "2022-06-08T00:00:04.000500"; dt_unix_float_ms_limit: ok => "1654646404123.456", "2022-06-08T00:00:04.123456"; + dt_unix_float_too_long: err => "1654646404.1234567", SecondFractionTooLong; + dt_unix_float_ms_too_long: err => "1654646404123.4567", MillisecondFractionTooLong; dt_short_date: err => "xxx", TooShort; dt_short_time: err => "2020-01-01T12:0", TooShort; dt: err => "202x-01-01", InvalidCharYear; @@ -1393,7 +1395,10 @@ fn test_datetime_parse_bytes_does_not_add_offset_for_rfc3339() { fn test_datetime_parse_unix_timestamp_from_bytes_with_utc_offset() { let time = DateTime::parse_bytes_with_config( "1689102037.5586429".as_bytes(), - &(TimeConfigBuilder::new().unix_timestamp_offset(Some(0)).build()), + &(TimeConfigBuilder::new() + .unix_timestamp_offset(Some(0)) + .microseconds_precision_overflow_behavior(MicrosecondsPrecisionOverflowBehavior::Truncate) + .build()), ) .unwrap(); assert_eq!(time.to_string(), "2023-07-11T19:00:37.558643Z"); @@ -1403,7 +1408,10 @@ fn test_datetime_parse_unix_timestamp_from_bytes_with_utc_offset() { fn test_datetime_parse_unix_timestamp_from_bytes_as_naive() { let time = DateTime::parse_bytes_with_config( "1689102037.5586429".as_bytes(), - &(TimeConfigBuilder::new().unix_timestamp_offset(None).build()), + &(TimeConfigBuilder::new() + .unix_timestamp_offset(None) + .microseconds_precision_overflow_behavior(MicrosecondsPrecisionOverflowBehavior::Truncate) + .build()), ) .unwrap(); assert_eq!(time.to_string(), "2023-07-11T19:00:37.558643"); From bca5c315d0e534249cae538ccea8f9fcb6d5cae1 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Wed, 26 Jun 2024 16:33:50 +0100 Subject: [PATCH 4/6] additional test cases --- tests/main.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/main.rs b/tests/main.rs index b32a5d5..65caff9 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -867,6 +867,8 @@ param_tests! { dt_unix_float_limit: ok => "1654646404.123456", "2022-06-08T00:00:04.123456"; dt_unix_float_ms: ok => "1654646404000.5", "2022-06-08T00:00:04.000500"; dt_unix_float_ms_limit: ok => "1654646404123.456", "2022-06-08T00:00:04.123456"; + dt_unix_float_empty: ok => "1654646404.", "2022-06-08T00:00:04"; + dt_unix_float_ms_empty: ok => "1654646404000.", "2022-06-08T00:00:04"; dt_unix_float_too_long: err => "1654646404.1234567", SecondFractionTooLong; dt_unix_float_ms_too_long: err => "1654646404123.4567", MillisecondFractionTooLong; dt_short_date: err => "xxx", TooShort; From 6579bdb3d7f377871f38b24f84d1eebb6f895ee2 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Wed, 26 Jun 2024 16:46:27 +0100 Subject: [PATCH 5/6] update comment --- src/datetime.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/datetime.rs b/src/datetime.rs index 3ef07eb..6becf90 100644 --- a/src/datetime.rs +++ b/src/datetime.rs @@ -370,7 +370,10 @@ impl DateTime { ParseError::SecondFractionTooLong }); } - // Technically this is rounding + // TODO: Technically this is rounding, but this is what the existing + // behaviour already did. Probably this is always better than "truncating" + // so we might want to change MicrosecondsPrecisionOverflowBehavior and + // make other uses also round / deprecate truncating. let multiple = 10f64.powf(max_digits as f64 - fract.len() as f64); Self::from_timestamp_with_config( timestamp, From e0136423e1980531493e84f8124c24712c82e08b Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Wed, 26 Jun 2024 16:49:46 +0100 Subject: [PATCH 6/6] fix doctest --- README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 89d9d43..ac3b29e 100644 --- a/README.md +++ b/README.md @@ -66,10 +66,13 @@ assert_eq!(dt.to_string(), "2022-01-01T12:13:14Z"); To control the specifics of time parsing you can use provide a `TimeConfig`: ```rust -use speedate::{DateTime, Date, Time, TimeConfig}; +use speedate::{DateTime, Date, Time, TimeConfig, MicrosecondsPrecisionOverflowBehavior}; let dt = DateTime::parse_bytes_with_config( "1689102037.5586429".as_bytes(), - &TimeConfig::builder().unix_timestamp_offset(Some(0)).build(), + &TimeConfig::builder() + .unix_timestamp_offset(Some(0)) + .microseconds_precision_overflow_behavior(MicrosecondsPrecisionOverflowBehavior::Truncate) + .build(), ).unwrap(); assert_eq!( dt,