From 817491ef9b3dbbe8c2f7681b999de78b55003921 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Thu, 27 Jun 2024 10:59:30 +0200 Subject: [PATCH 1/3] fix: top_k directly on dataframes --- crates/polars-core/src/frame/top_k.rs | 12 ------------ crates/polars-lazy/src/frame/mod.rs | 10 +++++++--- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/crates/polars-core/src/frame/top_k.rs b/crates/polars-core/src/frame/top_k.rs index 961f54704b55..af3351d79fba 100644 --- a/crates/polars-core/src/frame/top_k.rs +++ b/crates/polars-core/src/frame/top_k.rs @@ -1,19 +1,7 @@ -use smartstring::alias::String as SmartString; - use super::*; use crate::prelude::sort::arg_bottom_k::_arg_bottom_k; impl DataFrame { - pub fn top_k( - &self, - k: usize, - by_column: impl IntoVec, - sort_options: SortMultipleOptions, - ) -> PolarsResult { - let by_column = self.select_series(by_column)?; - self.bottom_k_impl(k, by_column, sort_options.with_order_reversed()) - } - pub(crate) fn bottom_k_impl( &self, k: usize, diff --git a/crates/polars-lazy/src/frame/mod.rs b/crates/polars-lazy/src/frame/mod.rs index e9ccc4a6b4b4..5083f2836021 100644 --- a/crates/polars-lazy/src/frame/mod.rs +++ b/crates/polars-lazy/src/frame/mod.rs @@ -366,8 +366,11 @@ impl LazyFrame { sort_options: SortMultipleOptions, ) -> Self { // this will optimize to top-k - self.sort_by_exprs(by_exprs, sort_options.with_order_reversed()) - .slice(0, k) + self.sort_by_exprs( + by_exprs, + sort_options.with_order_reversed().with_nulls_last(true), + ) + .slice(0, k) } pub fn bottom_k>( @@ -377,7 +380,8 @@ impl LazyFrame { sort_options: SortMultipleOptions, ) -> Self { // this will optimize to bottom-k - self.sort_by_exprs(by_exprs, sort_options).slice(0, k) + self.sort_by_exprs(by_exprs, sort_options.with_nulls_last(true)) + .slice(0, k) } /// Reverse the `DataFrame` from top to bottom. From 383ddb85954fabf0831a6ca0c1b9f3c7dc5c840a Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Thu, 27 Jun 2024 11:39:35 +0200 Subject: [PATCH 2/3] modify test to test behavior around nulls --- py-polars/tests/unit/operations/test_top_k.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/py-polars/tests/unit/operations/test_top_k.py b/py-polars/tests/unit/operations/test_top_k.py index bcf406112fc6..fdb10474fb54 100644 --- a/py-polars/tests/unit/operations/test_top_k.py +++ b/py-polars/tests/unit/operations/test_top_k.py @@ -68,8 +68,8 @@ def test_top_k() -> None: # dataframe df = pl.DataFrame( { - "a": [1, 2, 3, 4, 2, 2], - "b": [3, 2, 1, 4, 3, 2], + "a": [1, 2, 3, 4, 2, 2, None], + "b": [None, 2, 1, 4, 3, 2, None], } ) @@ -81,7 +81,7 @@ def test_top_k() -> None: assert_frame_equal( df.top_k(3, by=["a", "b"], reverse=True), - pl.DataFrame({"a": [1, 2, 2], "b": [3, 2, 2]}), + pl.DataFrame({"a": [1, 2, 2], "b": [None, 2, 2]}), check_row_order=False, ) assert_frame_equal( From 8b0c24c1ff1e25957e9131f760bfae4dcd880a20 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Thu, 27 Jun 2024 13:14:56 +0200 Subject: [PATCH 3/3] fmt --- py-polars/tests/unit/operations/test_top_k.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/tests/unit/operations/test_top_k.py b/py-polars/tests/unit/operations/test_top_k.py index fdb10474fb54..debb3e729274 100644 --- a/py-polars/tests/unit/operations/test_top_k.py +++ b/py-polars/tests/unit/operations/test_top_k.py @@ -68,7 +68,7 @@ def test_top_k() -> None: # dataframe df = pl.DataFrame( { - "a": [1, 2, 3, 4, 2, 2, None], + "a": [1, 2, 3, 4, 2, 2, None], "b": [None, 2, 1, 4, 3, 2, None], } )