From af80d86036189b99702971d84c839db66ef1a508 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 27 May 2024 11:06:33 +0200 Subject: [PATCH] feat: check if `by` column is sorted, rather than just checking sorted flag, in `group_by_dynamic`, `upsample`, and `rolling` --- crates/polars-core/src/utils/series.rs | 10 ------ crates/polars-ops/src/frame/join/asof/mod.rs | 6 ++-- crates/polars-ops/src/series/ops/various.rs | 5 +++ crates/polars-time/src/group_by/dynamic.rs | 34 +++++-------------- crates/polars-time/src/upsample.rs | 4 +-- py-polars/polars/dataframe/frame.py | 18 +++++++--- py-polars/polars/dataframe/group_by.py | 22 ++++++------ py-polars/polars/expr/expr.py | 4 +-- py-polars/polars/lazyframe/frame.py | 23 ++++++++----- py-polars/src/expr/general.rs | 2 -- py-polars/src/lazyframe/mod.rs | 4 --- py-polars/src/lazyframe/visitor/expr_nodes.rs | 5 --- .../unit/operations/test_group_by_dynamic.py | 31 ++++++++++------- .../tests/unit/operations/test_rolling.py | 27 +++++++-------- 14 files changed, 89 insertions(+), 106 deletions(-) diff --git a/crates/polars-core/src/utils/series.rs b/crates/polars-core/src/utils/series.rs index feeb20ed763d..3312e6a9e109 100644 --- a/crates/polars-core/src/utils/series.rs +++ b/crates/polars-core/src/utils/series.rs @@ -1,6 +1,5 @@ use crate::prelude::*; use crate::series::unstable::UnstableSeries; -use crate::series::IsSorted; /// A utility that allocates an [`UnstableSeries`]. The applied function can then use that /// series container to save heap allocations and swap arrow arrays. @@ -14,15 +13,6 @@ where f(&mut us) } -pub fn ensure_sorted_arg(s: &Series, operation: &str) -> PolarsResult<()> { - polars_ensure!(!matches!(s.is_sorted_flag(), IsSorted::Not), InvalidOperation: "argument in operation '{}' is not explicitly sorted - -- If your data is ALREADY sorted, set the sorted flag with: '.set_sorted()'. -- If your data is NOT sorted, sort the 'expr/series/column' first. - ", operation); - Ok(()) -} - pub fn handle_casting_failures(input: &Series, output: &Series) -> PolarsResult<()> { let failure_mask = !input.is_null() & output.is_null(); let failures = input.filter_threaded(&failure_mask, false)?; diff --git a/crates/polars-ops/src/frame/join/asof/mod.rs b/crates/polars-ops/src/frame/join/asof/mod.rs index ed4f3c2b6db7..bb61cedd6d63 100644 --- a/crates/polars-ops/src/frame/join/asof/mod.rs +++ b/crates/polars-ops/src/frame/join/asof/mod.rs @@ -5,7 +5,6 @@ use std::borrow::Cow; use default::*; pub use groups::AsofJoinBy; use polars_core::prelude::*; -use polars_core::utils::ensure_sorted_arg; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; use smartstring::alias::String as SmartString; @@ -14,6 +13,7 @@ use smartstring::alias::String as SmartString; use super::_check_categorical_src; use super::{_finish_join, build_tables, prepare_bytes}; use crate::frame::IntoDf; +use crate::series::SeriesMethods; trait AsofJoinState: Default { fn next Option>( @@ -185,8 +185,8 @@ fn check_asof_columns( a.dtype(), b.dtype() ); if check_sorted { - ensure_sorted_arg(a, "asof_join")?; - ensure_sorted_arg(b, "asof_join")?; + a.ensure_sorted_arg("asof_join")?; + b.ensure_sorted_arg("asof_join")?; } Ok(()) } diff --git a/crates/polars-ops/src/series/ops/various.rs b/crates/polars-ops/src/series/ops/various.rs index ffc2b19347d4..327d2b193bb6 100644 --- a/crates/polars-ops/src/series/ops/various.rs +++ b/crates/polars-ops/src/series/ops/various.rs @@ -51,6 +51,11 @@ pub trait SeriesMethods: SeriesSealed { } } + fn ensure_sorted_arg(&self, operation: &str) -> PolarsResult<()> { + polars_ensure!(self.is_sorted(Default::default())?, InvalidOperation: "argument in operation '{}' is not sorted, please sort the 'expr/series/column' first", operation); + Ok(()) + } + /// Checks if a [`Series`] is sorted. Tries to fail fast. fn is_sorted(&self, options: SortOptions) -> PolarsResult { let s = self.as_series(); diff --git a/crates/polars-time/src/group_by/dynamic.rs b/crates/polars-time/src/group_by/dynamic.rs index 30c352b62bb0..e38fc9517a4f 100644 --- a/crates/polars-time/src/group_by/dynamic.rs +++ b/crates/polars-time/src/group_by/dynamic.rs @@ -3,9 +3,9 @@ use arrow::legacy::utils::CustomIterTools; use polars_core::export::rayon::prelude::*; use polars_core::prelude::*; use polars_core::series::IsSorted; -use polars_core::utils::ensure_sorted_arg; use polars_core::utils::flatten::flatten_par; use polars_core::POOL; +use polars_ops::series::SeriesMethods; use polars_utils::idx_vec::IdxVec; use polars_utils::slice::{GetSaferUnchecked, SortedSlice}; #[cfg(feature = "serde")] @@ -34,9 +34,6 @@ pub struct DynamicGroupOptions { pub include_boundaries: bool, pub closed_window: ClosedWindow, pub start_by: StartBy, - /// In cases sortedness cannot be checked by the sorted flag, - /// traverse the data to check sortedness. - pub check_sorted: bool, } impl Default for DynamicGroupOptions { @@ -50,7 +47,6 @@ impl Default for DynamicGroupOptions { include_boundaries: false, closed_window: ClosedWindow::Left, start_by: Default::default(), - check_sorted: true, } } } @@ -64,9 +60,6 @@ pub struct RollingGroupOptions { pub period: Duration, pub offset: Duration, pub closed_window: ClosedWindow, - /// In cases sortedness cannot be checked by the sorted flag, - /// traverse the data to check sortedness. - pub check_sorted: bool, } impl Default for RollingGroupOptions { @@ -76,7 +69,6 @@ impl Default for RollingGroupOptions { period: Duration::new(1), offset: Duration::new(1), closed_window: ClosedWindow::Left, - check_sorted: true, } } } @@ -133,10 +125,10 @@ impl Wrap<&DataFrame> { "rolling window period should be strictly positive", ); let time = self.0.column(&options.index_column)?.clone(); - if group_by.is_empty() && options.check_sorted { + if group_by.is_empty() { // If by is given, the column must be sorted in the 'by' arg, which we can not check now // this will be checked when the groups are materialized. - ensure_sorted_arg(&time, "rolling")?; + time.ensure_sorted_arg("rolling")?; } let time_type = time.dtype(); @@ -202,10 +194,10 @@ impl Wrap<&DataFrame> { options: &DynamicGroupOptions, ) -> PolarsResult<(Series, Vec, GroupsProxy)> { let time = self.0.column(&options.index_column)?.rechunk(); - if group_by.is_empty() && options.check_sorted { + if group_by.is_empty() { // If by is given, the column must be sorted in the 'by' arg, which we can not check now // this will be checked when the groups are materialized. - ensure_sorted_arg(&time, "group_by_dynamic")?; + time.ensure_sorted_arg("group_by_dynamic")?; } let time_type = time.dtype(); @@ -349,9 +341,7 @@ impl Wrap<&DataFrame> { let dt = unsafe { dt.take_unchecked(base_g.1) }; let vals = dt.downcast_iter().next().unwrap(); let ts = vals.values().as_slice(); - if options.check_sorted - && !matches!(dt.is_sorted_flag(), IsSorted::Ascending) - { + if !matches!(dt.is_sorted_flag(), IsSorted::Ascending) { check_sortedness_slice(ts)? } let (sub_groups, lower, upper) = group_by_windows( @@ -428,9 +418,7 @@ impl Wrap<&DataFrame> { let dt = unsafe { dt.take_unchecked(base_g.1) }; let vals = dt.downcast_iter().next().unwrap(); let ts = vals.values().as_slice(); - if options.check_sorted - && !matches!(dt.is_sorted_flag(), IsSorted::Ascending) - { + if !matches!(dt.is_sorted_flag(), IsSorted::Ascending) { check_sortedness_slice(ts)? } let (sub_groups, _, _) = group_by_windows( @@ -573,9 +561,7 @@ impl Wrap<&DataFrame> { let dt = unsafe { dt_local.take_unchecked(base_g.1) }; let vals = dt.downcast_iter().next().unwrap(); let ts = vals.values().as_slice(); - if options.check_sorted - && !matches!(dt.is_sorted_flag(), IsSorted::Ascending) - { + if !matches!(dt.is_sorted_flag(), IsSorted::Ascending) { check_sortedness_slice(ts)? } @@ -716,7 +702,6 @@ mod test { period: Duration::parse("2d"), offset: Duration::parse("-2d"), closed_window: ClosedWindow::Right, - ..Default::default() }, ) .unwrap(); @@ -764,7 +749,6 @@ mod test { period: Duration::parse("2d"), offset: Duration::parse("-2d"), closed_window: ClosedWindow::Right, - ..Default::default() }, ) .unwrap(); @@ -848,7 +832,6 @@ mod test { include_boundaries: true, closed_window: ClosedWindow::Both, start_by: Default::default(), - ..Default::default() }, ) .unwrap(); @@ -969,7 +952,6 @@ mod test { include_boundaries: true, closed_window: ClosedWindow::Both, start_by: Default::default(), - ..Default::default() }, ) .unwrap(); diff --git a/crates/polars-time/src/upsample.rs b/crates/polars-time/src/upsample.rs index 5f50613923af..73e3dc529c31 100644 --- a/crates/polars-time/src/upsample.rs +++ b/crates/polars-time/src/upsample.rs @@ -1,8 +1,8 @@ #[cfg(feature = "timezones")] use polars_core::chunked_array::temporal::parse_time_zone; use polars_core::prelude::*; -use polars_core::utils::ensure_sorted_arg; use polars_ops::prelude::*; +use polars_ops::series::SeriesMethods; use crate::prelude::*; @@ -128,7 +128,7 @@ fn upsample_impl( stable: bool, ) -> PolarsResult { let s = source.column(index_column)?; - ensure_sorted_arg(s, "upsample")?; + s.ensure_sorted_arg("upsample")?; let time_type = s.dtype(); if matches!(time_type, DataType::Date) { let mut df = source.clone(); diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index cbb3c5ed23df..7e7a844fe680 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -5472,7 +5472,7 @@ def rolling( offset: str | timedelta | None = None, closed: ClosedInterval = "right", group_by: IntoExpr | Iterable[IntoExpr] | None = None, - check_sorted: bool = True, + check_sorted: bool | None = None, ) -> RollingGroupBy: """ Create rolling groups based on a temporal or integer column. @@ -5547,6 +5547,10 @@ def rolling( data within the groups is sorted, you can set this to `False`. Doing so incorrectly will lead to incorrect output + .. deprecated:: 0.20.31 + Sortedness is now verified in a quick manner, you can safely remove + this argument. + Returns ------- RollingGroupBy @@ -5622,7 +5626,7 @@ def group_by_dynamic( label: Label = "left", group_by: IntoExpr | Iterable[IntoExpr] | None = None, start_by: StartBy = "window", - check_sorted: bool = True, + check_sorted: bool | None = None, ) -> DynamicGroupBy: """ Group based on a time value (or index value of type Int32, Int64). @@ -5707,6 +5711,10 @@ def group_by_dynamic( data within the groups is sorted, you can set this to `False`. Doing so incorrectly will lead to incorrect output + .. deprecated:: 0.20.31 + Sortedness is now verified in a quick manner, you can safely remove + this argument. + Returns ------- DynamicGroupBy @@ -10733,7 +10741,7 @@ def groupby_rolling( offset: str | timedelta | None = None, closed: ClosedInterval = "right", by: IntoExpr | Iterable[IntoExpr] | None = None, - check_sorted: bool = True, + check_sorted: bool | None = None, ) -> RollingGroupBy: """ Create rolling groups based on a time, Int32, or Int64 column. @@ -10787,7 +10795,7 @@ def group_by_rolling( offset: str | timedelta | None = None, closed: ClosedInterval = "right", by: IntoExpr | Iterable[IntoExpr] | None = None, - check_sorted: bool = True, + check_sorted: bool | None = None, ) -> RollingGroupBy: """ Create rolling groups based on a time, Int32, or Int64 column. @@ -10845,7 +10853,7 @@ def groupby_dynamic( closed: ClosedInterval = "left", by: IntoExpr | Iterable[IntoExpr] | None = None, start_by: StartBy = "window", - check_sorted: bool = True, + check_sorted: bool | None = None, ) -> DynamicGroupBy: """ Group based on a time value (or index value of type Int32, Int64). diff --git a/py-polars/polars/dataframe/group_by.py b/py-polars/polars/dataframe/group_by.py index 3f5164b010d0..ad67f68d5b15 100644 --- a/py-polars/polars/dataframe/group_by.py +++ b/py-polars/polars/dataframe/group_by.py @@ -806,8 +806,13 @@ def __init__( offset: str | timedelta | None, closed: ClosedInterval, group_by: IntoExpr | Iterable[IntoExpr] | None, - check_sorted: bool, + check_sorted: bool | None = None, ): + if check_sorted is not None: + issue_deprecation_warning( + "`check_sorted` is now deprecated in `rolling`, you can safely remove this argument.", + version="0.20.31", + ) period = parse_as_duration_string(period) offset = parse_as_duration_string(offset) @@ -817,7 +822,6 @@ def __init__( self.offset = offset self.closed = closed self.group_by = group_by - self.check_sorted = check_sorted def __iter__(self) -> Self: temp_col = "__POLARS_GB_GROUP_INDICES" @@ -829,7 +833,6 @@ def __iter__(self) -> Self: offset=self.offset, closed=self.closed, group_by=self.group_by, - check_sorted=self.check_sorted, ) .agg(F.first().agg_groups().alias(temp_col)) .collect(no_optimization=True) @@ -888,7 +891,6 @@ def agg( offset=self.offset, closed=self.closed, group_by=self.group_by, - check_sorted=self.check_sorted, ) .agg(*aggs, **named_aggs) .collect(no_optimization=True) @@ -931,7 +933,6 @@ def map_groups( offset=self.offset, closed=self.closed, group_by=self.group_by, - check_sorted=self.check_sorted, ) .map_groups(function, schema) .collect(no_optimization=True) @@ -983,8 +984,13 @@ def __init__( label: Label, group_by: IntoExpr | Iterable[IntoExpr] | None, start_by: StartBy, - check_sorted: bool, + check_sorted: bool | None = None, ): + if check_sorted is not None: + issue_deprecation_warning( + "`check_sorted` is now deprecated in `rolling`, you can safely remove this argument.", + version="0.20.31", + ) every = parse_as_duration_string(every) period = parse_as_duration_string(period) offset = parse_as_duration_string(offset) @@ -1000,7 +1006,6 @@ def __init__( self.closed = closed self.group_by = group_by self.start_by = start_by - self.check_sorted = check_sorted def __iter__(self) -> Self: temp_col = "__POLARS_GB_GROUP_INDICES" @@ -1017,7 +1022,6 @@ def __iter__(self) -> Self: closed=self.closed, group_by=self.group_by, start_by=self.start_by, - check_sorted=self.check_sorted, ) .agg(F.first().agg_groups().alias(temp_col)) .collect(no_optimization=True) @@ -1081,7 +1085,6 @@ def agg( closed=self.closed, group_by=self.group_by, start_by=self.start_by, - check_sorted=self.check_sorted, ) .agg(*aggs, **named_aggs) .collect(no_optimization=True) @@ -1128,7 +1131,6 @@ def map_groups( closed=self.closed, group_by=self.group_by, start_by=self.start_by, - check_sorted=self.check_sorted, ) .map_groups(function, schema) .collect(no_optimization=True) diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index debc5e3ac886..d1bcb879b1b6 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -3771,7 +3771,7 @@ def rolling( period: str | timedelta, offset: str | timedelta | None = None, closed: ClosedInterval = "right", - check_sorted: bool = True, + check_sorted: bool | None = None, ) -> Self: """ Create rolling groups based on a temporal or integer column. @@ -3875,7 +3875,7 @@ def rolling( offset = parse_as_duration_string(offset) return self._from_pyexpr( - self._pyexpr.rolling(index_column, period, offset, closed, check_sorted) + self._pyexpr.rolling(index_column, period, offset, closed) ) def is_unique(self) -> Self: diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 560542d6ea86..1e8d54cd512c 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -3168,7 +3168,7 @@ def rolling( offset: str | timedelta | None = None, closed: ClosedInterval = "right", group_by: IntoExpr | Iterable[IntoExpr] | None = None, - check_sorted: bool = True, + check_sorted: bool | None = None, ) -> LazyGroupBy: """ Create rolling groups based on a temporal or integer column. @@ -3243,6 +3243,10 @@ def rolling( data within the groups is sorted, you can set this to `False`. Doing so incorrectly will lead to incorrect output + .. deprecated:: 0.20.31 + Sortedness is now verified in a quick manner, you can safely remove + this argument. + Returns ------- LazyGroupBy @@ -3303,9 +3307,7 @@ def rolling( period = parse_as_duration_string(period) offset = parse_as_duration_string(offset) - lgb = self._ldf.rolling( - index_column, period, offset, closed, pyexprs_by, check_sorted - ) + lgb = self._ldf.rolling(index_column, period, offset, closed, pyexprs_by) return LazyGroupBy(lgb) @deprecate_renamed_parameter("by", "group_by", version="0.20.14") @@ -3322,7 +3324,7 @@ def group_by_dynamic( label: Label = "left", group_by: IntoExpr | Iterable[IntoExpr] | None = None, start_by: StartBy = "window", - check_sorted: bool = True, + check_sorted: bool | None = None, ) -> LazyGroupBy: """ Group based on a time value (or index value of type Int32, Int64). @@ -3407,6 +3409,10 @@ def group_by_dynamic( data within the groups is sorted, you can set this to `False`. Doing so incorrectly will lead to incorrect output + .. deprecated:: 0.20.31 + Sortedness is now verified in a quick manner, you can safely remove + this argument. + Returns ------- LazyGroupBy @@ -3671,7 +3677,6 @@ def group_by_dynamic( closed, pyexprs_by, start_by, - check_sorted, ) return LazyGroupBy(lgb) @@ -6266,7 +6271,7 @@ def groupby_rolling( offset: str | timedelta | None = None, closed: ClosedInterval = "right", by: IntoExpr | Iterable[IntoExpr] | None = None, - check_sorted: bool = True, + check_sorted: bool | None = None, ) -> LazyGroupBy: """ Create rolling groups based on a time, Int32, or Int64 column. @@ -6327,7 +6332,7 @@ def group_by_rolling( offset: str | timedelta | None = None, closed: ClosedInterval = "right", by: IntoExpr | Iterable[IntoExpr] | None = None, - check_sorted: bool = True, + check_sorted: bool | None = None, ) -> LazyGroupBy: """ Create rolling groups based on a time, Int32, or Int64 column. @@ -6392,7 +6397,7 @@ def groupby_dynamic( closed: ClosedInterval = "left", by: IntoExpr | Iterable[IntoExpr] | None = None, start_by: StartBy = "window", - check_sorted: bool = True, + check_sorted: bool | None = None, ) -> LazyGroupBy: """ Group based on a time value (or index value of type Int32, Int64). diff --git a/py-polars/src/expr/general.rs b/py-polars/src/expr/general.rs index 7a5f40858678..c3aa832d9429 100644 --- a/py-polars/src/expr/general.rs +++ b/py-polars/src/expr/general.rs @@ -679,14 +679,12 @@ impl PyExpr { period: &str, offset: &str, closed: Wrap, - check_sorted: bool, ) -> Self { let options = RollingGroupOptions { index_column: index_column.into(), period: Duration::parse(period), offset: Duration::parse(offset), closed_window: closed.0, - check_sorted, }; self.inner.clone().rolling(options).into() diff --git a/py-polars/src/lazyframe/mod.rs b/py-polars/src/lazyframe/mod.rs index 42d831a315c0..ec5461398c28 100644 --- a/py-polars/src/lazyframe/mod.rs +++ b/py-polars/src/lazyframe/mod.rs @@ -815,7 +815,6 @@ impl PyLazyFrame { offset: &str, closed: Wrap, by: Vec, - check_sorted: bool, ) -> PyLazyGroupBy { let closed_window = closed.0; let ldf = self.ldf.clone(); @@ -831,7 +830,6 @@ impl PyLazyFrame { period: Duration::parse(period), offset: Duration::parse(offset), closed_window, - check_sorted, }, ); @@ -849,7 +847,6 @@ impl PyLazyFrame { closed: Wrap, group_by: Vec, start_by: Wrap, - check_sorted: bool, ) -> PyLazyGroupBy { let closed_window = closed.0; let group_by = group_by @@ -868,7 +865,6 @@ impl PyLazyFrame { include_boundaries, closed_window, start_by: start_by.0, - check_sorted, ..Default::default() }, ); diff --git a/py-polars/src/lazyframe/visitor/expr_nodes.rs b/py-polars/src/lazyframe/visitor/expr_nodes.rs index 2b2d9e087c55..b0867401275c 100644 --- a/py-polars/src/lazyframe/visitor/expr_nodes.rs +++ b/py-polars/src/lazyframe/visitor/expr_nodes.rs @@ -360,11 +360,6 @@ impl PyRollingGroupOptions { }; Ok(result.into_py(py)) } - - #[getter] - fn check_sorted(&self, py: Python<'_>) -> PyResult { - Ok(self.inner.check_sorted.into_py(py)) - } } #[pyclass(name = "GroupbyOptions")] diff --git a/py-polars/tests/unit/operations/test_group_by_dynamic.py b/py-polars/tests/unit/operations/test_group_by_dynamic.py index 72c9dfcbb124..bbcb580fad9e 100644 --- a/py-polars/tests/unit/operations/test_group_by_dynamic.py +++ b/py-polars/tests/unit/operations/test_group_by_dynamic.py @@ -373,7 +373,7 @@ def test_rolling_dynamic_sortedness_check() -> None: # no `by` argument with pytest.raises( pl.InvalidOperationError, - match=r"argument in operation 'group_by_dynamic' is not explicitly sorted", + match=r"argument in operation 'group_by_dynamic' is not sorted", ): df.group_by_dynamic("idx", every="2i").agg(pl.col("idx").alias("idx1")) @@ -491,17 +491,23 @@ def test_group_by_dynamic_validation() -> None: ) -def test_no_sorted_err() -> None: +def test_no_sorted_no_error() -> None: df = pl.DataFrame( { "dt": [datetime(2001, 1, 1), datetime(2001, 1, 2)], } ) - with pytest.raises( - pl.InvalidOperationError, - match=r"argument in operation 'group_by_dynamic' is not explicitly sorted", - ): - df.group_by_dynamic("dt", every="1h").agg(pl.all().count().name.suffix("_foo")) + result = df.group_by_dynamic("dt", every="1h").agg( + pl.all().count().name.suffix("_foo") + ) + expected = pl.DataFrame( + { + "dt": [datetime(2001, 1, 1), datetime(2001, 1, 2)], + "dt_foo": [1, 1], + }, + schema_overrides={"dt_foo": pl.UInt32}, + ) + assert_frame_equal(result, expected) @pytest.mark.parametrize("tzinfo", [None, ZoneInfo("UTC"), ZoneInfo("Asia/Kathmandu")]) @@ -968,18 +974,17 @@ def test_group_by_dynamic_check_sorted_15225() -> None: "c": [1, 1, 2], } ) - result = df.group_by_dynamic("b", every="2d", check_sorted=False).agg(pl.sum("a")) + with pytest.deprecated_call(match="`check_sorted` is now deprecated"): + result = df.group_by_dynamic("b", every="2d", check_sorted=False).agg( + pl.sum("a") + ) expected = pl.DataFrame({"b": [date(2020, 1, 1), date(2020, 1, 3)], "a": [3, 3]}) assert_frame_equal(result, expected) - result = df.group_by_dynamic("b", every="2d", check_sorted=False, group_by="c").agg( - pl.sum("a") - ) + result = df.group_by_dynamic("b", every="2d", group_by="c").agg(pl.sum("a")) expected = pl.DataFrame( {"c": [1, 2], "b": [date(2020, 1, 1), date(2020, 1, 3)], "a": [3, 3]} ) assert_frame_equal(result, expected) - with pytest.raises(pl.InvalidOperationError, match="not explicitly sorted"): - result = df.group_by_dynamic("b", every="2d").agg(pl.sum("a")) @pytest.mark.parametrize("start_by", ["window", "friday"]) diff --git a/py-polars/tests/unit/operations/test_rolling.py b/py-polars/tests/unit/operations/test_rolling.py index e08bdafeb69b..9bdfd76486d2 100644 --- a/py-polars/tests/unit/operations/test_rolling.py +++ b/py-polars/tests/unit/operations/test_rolling.py @@ -229,7 +229,7 @@ def test_rolling_dynamic_sortedness_check() -> None: # no `group_by` argument with pytest.raises( pl.InvalidOperationError, - match="argument in operation 'rolling' is not explicitly sorted", + match="argument in operation 'rolling' is not sorted", ): df.rolling("idx", period="2i").agg(pl.col("idx").alias("idx1")) @@ -261,14 +261,15 @@ def test_rolling_empty_groups_9973() -> None: } ) - out = data.rolling( - index_column="date", - group_by="id", - period="2d", - offset="1d", - closed="left", - check_sorted=True, - ).agg(pl.col("value")) + with pytest.deprecated_call(match="`check_sorted` is now deprecated"): + out = data.rolling( + index_column="date", + group_by="id", + period="2d", + offset="1d", + closed="left", + check_sorted=True, + ).agg(pl.col("value")) assert_frame_equal(out, expected) @@ -300,14 +301,12 @@ def test_rolling_check_sorted_15225() -> None: "c": [1, 1, 2], } ) - result = df.rolling("b", period="2d", check_sorted=False).agg(pl.sum("a")) + result = df.rolling("b", period="2d").agg(pl.sum("a")) expected = pl.DataFrame( {"b": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)], "a": [1, 3, 5]} ) assert_frame_equal(result, expected) - result = df.rolling("b", period="2d", group_by="c", check_sorted=False).agg( - pl.sum("a") - ) + result = df.rolling("b", period="2d", group_by="c").agg(pl.sum("a")) expected = pl.DataFrame( { "c": [1, 1, 2], @@ -316,8 +315,6 @@ def test_rolling_check_sorted_15225() -> None: } ) assert_frame_equal(result, expected) - with pytest.raises(pl.InvalidOperationError, match="not explicitly sorted"): - result = df.rolling("b", period="2d").agg(pl.sum("a")) def test_multiple_rolling_in_single_expression() -> None: