diff --git a/crates/polars-core/src/frame/top_k.rs b/crates/polars-core/src/frame/top_k.rs index 961f54704b55..af3351d79fba 100644 --- a/crates/polars-core/src/frame/top_k.rs +++ b/crates/polars-core/src/frame/top_k.rs @@ -1,19 +1,7 @@ -use smartstring::alias::String as SmartString; - use super::*; use crate::prelude::sort::arg_bottom_k::_arg_bottom_k; impl DataFrame { - pub fn top_k( - &self, - k: usize, - by_column: impl IntoVec, - sort_options: SortMultipleOptions, - ) -> PolarsResult { - let by_column = self.select_series(by_column)?; - self.bottom_k_impl(k, by_column, sort_options.with_order_reversed()) - } - pub(crate) fn bottom_k_impl( &self, k: usize, diff --git a/crates/polars-lazy/src/frame/mod.rs b/crates/polars-lazy/src/frame/mod.rs index e9ccc4a6b4b4..5083f2836021 100644 --- a/crates/polars-lazy/src/frame/mod.rs +++ b/crates/polars-lazy/src/frame/mod.rs @@ -366,8 +366,11 @@ impl LazyFrame { sort_options: SortMultipleOptions, ) -> Self { // this will optimize to top-k - self.sort_by_exprs(by_exprs, sort_options.with_order_reversed()) - .slice(0, k) + self.sort_by_exprs( + by_exprs, + sort_options.with_order_reversed().with_nulls_last(true), + ) + .slice(0, k) } pub fn bottom_k>( @@ -377,7 +380,8 @@ impl LazyFrame { sort_options: SortMultipleOptions, ) -> Self { // this will optimize to bottom-k - self.sort_by_exprs(by_exprs, sort_options).slice(0, k) + self.sort_by_exprs(by_exprs, sort_options.with_nulls_last(true)) + .slice(0, k) } /// Reverse the `DataFrame` from top to bottom. diff --git a/py-polars/tests/unit/operations/test_top_k.py b/py-polars/tests/unit/operations/test_top_k.py index bcf406112fc6..debb3e729274 100644 --- a/py-polars/tests/unit/operations/test_top_k.py +++ b/py-polars/tests/unit/operations/test_top_k.py @@ -68,8 +68,8 @@ def test_top_k() -> None: # dataframe df = pl.DataFrame( { - "a": [1, 2, 3, 4, 2, 2], - "b": [3, 2, 1, 4, 3, 2], + "a": [1, 2, 3, 4, 2, 2, None], + "b": [None, 2, 1, 4, 3, 2, None], } ) @@ -81,7 +81,7 @@ def test_top_k() -> None: assert_frame_equal( df.top_k(3, by=["a", "b"], reverse=True), - pl.DataFrame({"a": [1, 2, 2], "b": [3, 2, 2]}), + pl.DataFrame({"a": [1, 2, 2], "b": [None, 2, 2]}), check_row_order=False, ) assert_frame_equal(