Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Optimizer; remove double SORT and redundant projections #15573

Merged
merged 2 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/polars-lazy/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ mod optimization_checks;
mod predicate_queries;
mod projection_queries;
mod queries;
mod schema;
#[cfg(feature = "streaming")]
mod streaming;
#[cfg(all(feature = "strings", feature = "cse"))]
Expand Down
38 changes: 38 additions & 0 deletions crates/polars-lazy/src/tests/schema.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
use super::*;

#[test]
fn test_schema_update_after_projection_pd() -> PolarsResult<()> {
let df = df![
"a" => [1],
"b" => [1],
"c" => [1],
]?;

let q = df
.lazy()
.with_column(col("a").implode())
.explode([col("a")])
.select([cols(["a", "b"])]);

// run optimizations
// Get the explode node
let (input, lp_arena, _expr_arena) = q.to_alp_optimized()?;

// assert the schema has been corrected with the projection pushdown run
let lp = lp_arena.get(input);
assert!(matches!(
lp,
IR::MapFunction {
function: FunctionNode::Explode { .. },
..
}
));

let schema = lp.schema(&lp_arena).into_owned();
let mut expected = Schema::new();
expected.with_column("a".into(), DataType::Int32);
expected.with_column("b".into(), DataType::Int32);
assert_eq!(schema.as_ref(), &expected);

Ok(())
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,13 @@ impl OptimizationRule for SimpleProjectionAndCollapse {
None
}
},
// If there are 2 subsequent fast projections, flatten them and only take the last
SimpleProjection {
columns,
input,
duplicate_check,
} if !self.eager => {
match lp_arena.get(*input) {
// If there are 2 subsequent fast projections, flatten them and only take the last
SimpleProjection {
input: prev_input, ..
} => Some(SimpleProjection {
Expand All @@ -90,7 +90,16 @@ impl OptimizationRule for SimpleProjectionAndCollapse {
None
}
},
_ => None,
// If a projection does nothing, remove it.
other => {
let input_schema = other.schema(lp_arena);
// This will fail fast if lengths are not equal
if *input_schema.as_ref() == *columns {
Some(other.clone())
} else {
None
}
},
}
},
// if there are 2 subsequent caches, flatten them and only take the inner
Expand All @@ -115,6 +124,19 @@ impl OptimizationRule for SimpleProjectionAndCollapse {
None
}
},
// Remove double sorts
Sort {
input,
by_column,
args,
} => match lp_arena.get(*input) {
Sort { input: inner, .. } => Some(Sort {
input: *inner,
by_column: by_column.clone(),
args: args.clone(),
}),
_ => None,
},
_ => None,
}
}
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-plan/src/logical_plan/optimizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mod cache_states;
mod delay_rechunk;
mod drop_nulls;

mod collapse_and_project;
mod collect_members;
mod count_star;
#[cfg(feature = "cse")]
Expand All @@ -15,21 +16,20 @@ mod flatten_union;
mod fused;
mod predicate_pushdown;
mod projection_pushdown;
mod simple_projection;
mod simplify_expr;
mod simplify_functions;
mod slice_pushdown_expr;
mod slice_pushdown_lp;
mod stack_opt;
mod type_coercion;

use collapse_and_project::SimpleProjectionAndCollapse;
use delay_rechunk::DelayRechunk;
use drop_nulls::ReplaceDropNulls;
use polars_core::config::verbose;
use polars_io::predicates::PhysicalIoExpr;
pub use predicate_pushdown::PredicatePushDown;
pub use projection_pushdown::ProjectionPushDown;
use simple_projection::SimpleProjectionAndCollapse;
pub use simplify_expr::{SimplifyBooleanRule, SimplifyExprRule};
use slice_pushdown_lp::SlicePushDown;
pub use stack_opt::{OptimizationRule, StackOptimizer};
Expand Down
29 changes: 0 additions & 29 deletions crates/polars/tests/it/lazy/schema.rs
Original file line number Diff line number Diff line change
@@ -1,30 +1 @@
use super::*;

#[test]
fn test_schema_update_after_projection_pd() -> PolarsResult<()> {
let df = df![
"a" => [1],
"b" => [1],
"c" => [1],
]?;

let q = df
.lazy()
.with_column(col("a").implode())
.explode([col("a")])
.select([cols(["a", "b"])]);

// run optimizations
let (node, lp_arena, _expr_arena) = q.to_alp_optimized()?;
// get the explode node
let input = lp_arena.get(node).get_inputs()[0];
// assert the schema has been corrected with the projection pushdown run
lp_arena.get(input);
let schema = lp_arena.get(input).schema(&lp_arena).into_owned();
let mut expected = Schema::new();
expected.with_column("a".into(), DataType::Int32);
expected.with_column("b".into(), DataType::Int32);
assert_eq!(schema.as_ref(), &expected);

Ok(())
}
8 changes: 8 additions & 0 deletions py-polars/tests/unit/lazyframe/optimizations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import polars as pl


def test_remove_double_sort() -> None:
assert (
pl.LazyFrame({"a": [1, 2, 3, 3]}).sort("a").sort("a").explain().count("SORT")
== 1
)
2 changes: 1 addition & 1 deletion py-polars/tests/unit/sql/test_table_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_explain_query(test_frame: pl.LazyFrame) -> None:
)
assert (
re.search(
pattern=r'SELECT.+?"x".+?"y".+?"z".+?FROM.+?PROJECT.+?COLUMNS',
pattern=r"PROJECT.+?COLUMNS",
string=plan,
flags=re.IGNORECASE,
)
Expand Down
Loading