From fea15f0107944d384eec8e8c67691cc167fb3849 Mon Sep 17 00:00:00 2001 From: ritchie Date: Sat, 21 Dec 2024 12:08:53 +0100 Subject: [PATCH] expose and features --- crates/polars-lazy/src/frame/mod.rs | 7 ++++++ crates/polars-plan/src/frame/opt_state.rs | 3 +++ crates/polars-plan/src/plans/optimizer/mod.rs | 4 +++- .../src/plans/optimizer/set_order.rs | 4 ++-- crates/polars-plan/src/plans/options.rs | 24 +++++++++++++++++++ crates/polars-python/src/lazyframe/general.rs | 2 ++ py-polars/polars/lazyframe/frame.py | 7 +++++- 7 files changed, 47 insertions(+), 4 deletions(-) diff --git a/crates/polars-lazy/src/frame/mod.rs b/crates/polars-lazy/src/frame/mod.rs index d0860f3faccd..acfa4b77d8f2 100644 --- a/crates/polars-lazy/src/frame/mod.rs +++ b/crates/polars-lazy/src/frame/mod.rs @@ -153,6 +153,13 @@ impl LazyFrame { self } + /// Check if operations are order dependent and unset maintaining_order if + /// the order would not be observed. + pub fn with_check_order(mut self, toggle: bool) -> Self { + self.opt_state.set(OptFlags::CHECK_ORDER_OBSERVE, toggle); + self + } + /// Toggle predicate pushdown optimization. pub fn with_predicate_pushdown(mut self, toggle: bool) -> Self { self.opt_state.set(OptFlags::PREDICATE_PUSHDOWN, toggle); diff --git a/crates/polars-plan/src/frame/opt_state.rs b/crates/polars-plan/src/frame/opt_state.rs index 699b4a09a0ce..04586e774a04 100644 --- a/crates/polars-plan/src/frame/opt_state.rs +++ b/crates/polars-plan/src/frame/opt_state.rs @@ -35,6 +35,9 @@ bitflags! { const FAST_PROJECTION = 1 << 14; /// Collapse slower joins with filters into faster joins. const COLLAPSE_JOINS = 1 << 15; + /// Check if operations are order dependent and unset maintaining_order if + /// the order would not be observed. + const CHECK_ORDER_OBSERVE = 1 << 16; } } diff --git a/crates/polars-plan/src/plans/optimizer/mod.rs b/crates/polars-plan/src/plans/optimizer/mod.rs index f23a9456a948..49d5076b0634 100644 --- a/crates/polars-plan/src/plans/optimizer/mod.rs +++ b/crates/polars-plan/src/plans/optimizer/mod.rs @@ -210,7 +210,9 @@ pub fn optimize( cache_states::set_cache_states(lp_top, lp_arena, expr_arena, scratch, expr_eval, verbose)?; } - if members.has_group_by | members.has_sort | members.has_distinct { + if opt_state.contains(OptFlags::CHECK_ORDER_OBSERVE) + && members.has_group_by | members.has_sort | members.has_distinct + { set_order_flags(lp_top, lp_arena, expr_arena, scratch); } diff --git a/crates/polars-plan/src/plans/optimizer/set_order.rs b/crates/polars-plan/src/plans/optimizer/set_order.rs index ffcd05f6568b..abe16b69c973 100644 --- a/crates/polars-plan/src/plans/optimizer/set_order.rs +++ b/crates/polars-plan/src/plans/optimizer/set_order.rs @@ -105,8 +105,8 @@ pub(super) fn set_order_flags( if apply.is_some() || *maintain_order - || options.rolling.is_some() - || options.dynamic.is_some() + || options.is_rolling() + || options.is_dynamic() { maintain_order_above = true; continue; diff --git a/crates/polars-plan/src/plans/options.rs b/crates/polars-plan/src/plans/options.rs index 90d7755caa4d..ab4a61639b48 100644 --- a/crates/polars-plan/src/plans/options.rs +++ b/crates/polars-plan/src/plans/options.rs @@ -72,6 +72,30 @@ pub struct GroupbyOptions { pub slice: Option<(i64, usize)>, } +impl GroupbyOptions { + pub(crate) fn is_rolling(&self) -> bool { + #[cfg(feature = "dynamic_group_by")] + { + self.rolling.is_some() + } + #[cfg(not(feature = "dynamic_group_by"))] + { + false + } + } + + pub(crate) fn is_dynamic(&self) -> bool { + #[cfg(feature = "dynamic_group_by")] + { + self.dynamic.is_some() + } + #[cfg(not(feature = "dynamic_group_by"))] + { + false + } + } +} + #[derive(Clone, Debug, Eq, PartialEq, Default, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct DistinctOptionsDSL { diff --git a/crates/polars-python/src/lazyframe/general.rs b/crates/polars-python/src/lazyframe/general.rs index f0c53d0f340a..5e0ce0e2e0a4 100644 --- a/crates/polars-python/src/lazyframe/general.rs +++ b/crates/polars-python/src/lazyframe/general.rs @@ -494,6 +494,7 @@ impl PyLazyFrame { collapse_joins: bool, streaming: bool, _eager: bool, + _check_order: bool, #[allow(unused_variables)] new_streaming: bool, ) -> Self { let ldf = self.ldf.clone(); @@ -504,6 +505,7 @@ impl PyLazyFrame { .with_slice_pushdown(slice_pushdown) .with_cluster_with_columns(cluster_with_columns) .with_collapse_joins(collapse_joins) + .with_check_order(_check_order) ._with_eager(_eager) .with_projection_pushdown(projection_pushdown); diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 3f1125497c5c..751843841c30 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -986,7 +986,7 @@ def skip_minmax(dt: PolarsDataType) -> bool: # reshape wide result n_metrics = len(metrics) column_metrics = [ - df_metrics.row(0)[(n * n_metrics) : (n + 1) * n_metrics] + df_metrics.row(0)[(n * n_metrics): (n + 1) * n_metrics] for n in range(schema.len()) ] summary = dict(zip(schema, column_metrics)) @@ -1768,6 +1768,7 @@ def collect( engine: EngineType = "cpu", background: Literal[True], _eager: bool = False, + _check_order: bool = True, ) -> InProcessQuery: ... @overload @@ -1787,6 +1788,7 @@ def collect( streaming: bool = False, engine: EngineType = "cpu", background: Literal[False] = False, + _check_order: bool = True, _eager: bool = False, ) -> DataFrame: ... @@ -1806,6 +1808,7 @@ def collect( streaming: bool = False, engine: EngineType = "cpu", background: bool = False, + _check_order: bool = True, _eager: bool = False, **_kwargs: Any, ) -> DataFrame | InProcessQuery: @@ -1974,6 +1977,7 @@ def collect( comm_subexpr_elim = False cluster_with_columns = False collapse_joins = False + _check_order = False if streaming: issue_unstable_warning("Streaming mode is considered unstable.") @@ -2005,6 +2009,7 @@ def collect( collapse_joins, streaming, _eager, + _check_order, new_streaming, )