From 502508c50cf8f87be9924b4536e19432eb2bf04f Mon Sep 17 00:00:00 2001 From: ritchie Date: Fri, 7 Jun 2024 08:30:25 +0200 Subject: [PATCH 1/2] feat(python)!: Only accept a single column in `set_sorted` --- py-polars/polars/dataframe/frame.py | 22 ++++++++++++++-------- py-polars/polars/expr/expr.py | 2 +- py-polars/polars/lazyframe/frame.py | 27 +++++++++++++++++---------- py-polars/tests/unit/test_errors.py | 7 +++++++ 4 files changed, 39 insertions(+), 19 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index fbd80f4e058f..bae550a57562 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -10313,26 +10313,32 @@ def merge_sorted(self, other: DataFrame, key: str) -> DataFrame: def set_sorted( self, - column: str | Iterable[str], - *more_columns: str, + column: str, + *, descending: bool = False, ) -> DataFrame: """ Indicate that one or multiple columns are sorted. + This can speed up future operations. + Parameters ---------- column - Columns that are sorted - more_columns - Additional columns that are sorted, specified as positional arguments. + Column that are sorted descending Whether the columns are sorted in descending order. + + Warnings + -------- + This can lead to incorrect results if the data is NOT sorted!! + Use with care! + """ + # NOTE: Only accepts 1 column on purpose! User think they are sorted by + # the combined multicolumn values. return ( - self.lazy() - .set_sorted(column, *more_columns, descending=descending) - .collect(_eager=True) + self.lazy().set_sorted(column, descending=descending).collect(_eager=True) ) @unstable() diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 349deb89daca..0c0ec695c555 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -9825,7 +9825,7 @@ def set_sorted(self, *, descending: bool = False) -> Self: Warnings -------- - This can lead to incorrect results if this `Series` is not sorted!! + This can lead to incorrect results if the data is NOT sorted!! Use with care! Examples diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index bbd4cbadc4ff..9921c5219779 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -81,7 +81,7 @@ from polars.slice import LazyPolarsSlice with contextlib.suppress(ImportError): # Module not available when building docs - from polars.polars import PyLazyFrame + from polars.polars import PyLazyFrame, col if TYPE_CHECKING: import sys @@ -5905,27 +5905,34 @@ def merge_sorted(self, other: LazyFrame, key: str) -> Self: def set_sorted( self, - column: str | Iterable[str], - *more_columns: str, + column: str, + *, descending: bool = False, ) -> Self: """ Indicate that one or multiple columns are sorted. + This can speed up future operations. + Parameters ---------- column Columns that are sorted - more_columns - Additional columns that are sorted, specified as positional arguments. descending Whether the columns are sorted in descending order. - """ - columns = parse_as_list_of_expressions(column, *more_columns) - return self.with_columns( - wrap_expr(e).set_sorted(descending=descending) for e in columns - ) + Warnings + -------- + This can lead to incorrect results if the data is NOT sorted!! + Use with care! + + """ + # NOTE: Only accepts 1 column on purpose! User think they are sorted by + # the combined multicolumn values. + if not isinstance(column, str): + msg = "expected a 'str' for argument 'column' in 'set_sorted'" + raise TypeError(msg) + return self.with_columns(col(column).set_sorted(descending=descending)) @unstable() def update( diff --git a/py-polars/tests/unit/test_errors.py b/py-polars/tests/unit/test_errors.py index 861caa19e4b5..c3dfa804d2d2 100644 --- a/py-polars/tests/unit/test_errors.py +++ b/py-polars/tests/unit/test_errors.py @@ -658,3 +658,10 @@ def test_raise_invalid_arithmetic() -> None: with pytest.raises(pl.InvalidOperationError): df.select(pl.col("a") - pl.col("a")) + + +def test_raise_on_sorted_multi_args() -> None: + with pytest.raises(TypeError): + pl.DataFrame({"a": [1], "b": [1]}).set_sorted( + ["a", "b"] # type: ignore[arg-type] + ) From 7312c1e6a107a6186510efa4a3598e053c1d8d7b Mon Sep 17 00:00:00 2001 From: ritchie Date: Fri, 7 Jun 2024 13:59:04 +0200 Subject: [PATCH 2/2] expr level col --- py-polars/polars/lazyframe/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 9921c5219779..031e8afabd2c 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -81,7 +81,7 @@ from polars.slice import LazyPolarsSlice with contextlib.suppress(ImportError): # Module not available when building docs - from polars.polars import PyLazyFrame, col + from polars.polars import PyLazyFrame if TYPE_CHECKING: import sys @@ -5932,7 +5932,7 @@ def set_sorted( if not isinstance(column, str): msg = "expected a 'str' for argument 'column' in 'set_sorted'" raise TypeError(msg) - return self.with_columns(col(column).set_sorted(descending=descending)) + return self.with_columns(F.col(column).set_sorted(descending=descending)) @unstable() def update(