Skip to content

Commit

Permalink
feat: Optimizer; remove double SORT and redundant projections (#15573)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Apr 10, 2024
1 parent 8440457 commit 835d198
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 34 deletions.
1 change: 1 addition & 0 deletions crates/polars-lazy/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ mod optimization_checks;
mod predicate_queries;
mod projection_queries;
mod queries;
mod schema;
#[cfg(feature = "streaming")]
mod streaming;
#[cfg(all(feature = "strings", feature = "cse"))]
Expand Down
38 changes: 38 additions & 0 deletions crates/polars-lazy/src/tests/schema.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
use super::*;

#[test]
fn test_schema_update_after_projection_pd() -> PolarsResult<()> {
let df = df![
"a" => [1],
"b" => [1],
"c" => [1],
]?;

let q = df
.lazy()
.with_column(col("a").implode())
.explode([col("a")])
.select([cols(["a", "b"])]);

// run optimizations
// Get the explode node
let (input, lp_arena, _expr_arena) = q.to_alp_optimized()?;

// assert the schema has been corrected with the projection pushdown run
let lp = lp_arena.get(input);
assert!(matches!(
lp,
IR::MapFunction {
function: FunctionNode::Explode { .. },
..
}
));

let schema = lp.schema(&lp_arena).into_owned();
let mut expected = Schema::new();
expected.with_column("a".into(), DataType::Int32);
expected.with_column("b".into(), DataType::Int32);
assert_eq!(schema.as_ref(), &expected);

Ok(())
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,13 @@ impl OptimizationRule for SimpleProjectionAndCollapse {
None
}
},
// If there are 2 subsequent fast projections, flatten them and only take the last
SimpleProjection {
columns,
input,
duplicate_check,
} if !self.eager => {
match lp_arena.get(*input) {
// If there are 2 subsequent fast projections, flatten them and only take the last
SimpleProjection {
input: prev_input, ..
} => Some(SimpleProjection {
Expand All @@ -90,7 +90,16 @@ impl OptimizationRule for SimpleProjectionAndCollapse {
None
}
},
_ => None,
// If a projection does nothing, remove it.
other => {
let input_schema = other.schema(lp_arena);
// This will fail fast if lengths are not equal
if *input_schema.as_ref() == *columns {
Some(other.clone())
} else {
None
}
},
}
},
// if there are 2 subsequent caches, flatten them and only take the inner
Expand All @@ -115,6 +124,19 @@ impl OptimizationRule for SimpleProjectionAndCollapse {
None
}
},
// Remove double sorts
Sort {
input,
by_column,
args,
} => match lp_arena.get(*input) {
Sort { input: inner, .. } => Some(Sort {
input: *inner,
by_column: by_column.clone(),
args: args.clone(),
}),
_ => None,
},
_ => None,
}
}
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-plan/src/logical_plan/optimizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mod cache_states;
mod delay_rechunk;
mod drop_nulls;

mod collapse_and_project;
mod collect_members;
mod count_star;
#[cfg(feature = "cse")]
Expand All @@ -15,21 +16,20 @@ mod flatten_union;
mod fused;
mod predicate_pushdown;
mod projection_pushdown;
mod simple_projection;
mod simplify_expr;
mod simplify_functions;
mod slice_pushdown_expr;
mod slice_pushdown_lp;
mod stack_opt;
mod type_coercion;

use collapse_and_project::SimpleProjectionAndCollapse;
use delay_rechunk::DelayRechunk;
use drop_nulls::ReplaceDropNulls;
use polars_core::config::verbose;
use polars_io::predicates::PhysicalIoExpr;
pub use predicate_pushdown::PredicatePushDown;
pub use projection_pushdown::ProjectionPushDown;
use simple_projection::SimpleProjectionAndCollapse;
pub use simplify_expr::{SimplifyBooleanRule, SimplifyExprRule};
use slice_pushdown_lp::SlicePushDown;
pub use stack_opt::{OptimizationRule, StackOptimizer};
Expand Down
29 changes: 0 additions & 29 deletions crates/polars/tests/it/lazy/schema.rs
Original file line number Diff line number Diff line change
@@ -1,30 +1 @@
use super::*;

#[test]
fn test_schema_update_after_projection_pd() -> PolarsResult<()> {
let df = df![
"a" => [1],
"b" => [1],
"c" => [1],
]?;

let q = df
.lazy()
.with_column(col("a").implode())
.explode([col("a")])
.select([cols(["a", "b"])]);

// run optimizations
let (node, lp_arena, _expr_arena) = q.to_alp_optimized()?;
// get the explode node
let input = lp_arena.get(node).get_inputs()[0];
// assert the schema has been corrected with the projection pushdown run
lp_arena.get(input);
let schema = lp_arena.get(input).schema(&lp_arena).into_owned();
let mut expected = Schema::new();
expected.with_column("a".into(), DataType::Int32);
expected.with_column("b".into(), DataType::Int32);
assert_eq!(schema.as_ref(), &expected);

Ok(())
}
8 changes: 8 additions & 0 deletions py-polars/tests/unit/lazyframe/optimizations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import polars as pl


def test_remove_double_sort() -> None:
assert (
pl.LazyFrame({"a": [1, 2, 3, 3]}).sort("a").sort("a").explain().count("SORT")
== 1
)
2 changes: 1 addition & 1 deletion py-polars/tests/unit/sql/test_table_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_explain_query(test_frame: pl.LazyFrame) -> None:
)
assert (
re.search(
pattern=r'SELECT.+?"x".+?"y".+?"z".+?FROM.+?PROJECT.+?COLUMNS',
pattern=r"PROJECT.+?COLUMNS",
string=plan,
flags=re.IGNORECASE,
)
Expand Down

0 comments on commit 835d198

Please sign in to comment.