From ad6852552cfd7c452e3d29dcf0937e45bea445f0 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Tue, 5 Nov 2024 17:52:43 +1100 Subject: [PATCH 1/4] c --- .../plans/optimizer/predicate_pushdown/mod.rs | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs b/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs index ff5f2f89ff0d..be9156b8c0d9 100644 --- a/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs +++ b/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs @@ -576,6 +576,28 @@ impl<'a> PredicatePushDown<'a> { expr_arena, )) }, + FunctionIR::Unnest { columns } => { + let exclude = columns.iter().collect::>(); + + let local_predicates = + transfer_to_local_by_name(expr_arena, &mut acc_predicates, |x| { + exclude.contains(x) + }); + + let lp = self.pushdown_and_continue( + lp, + acc_predicates, + lp_arena, + expr_arena, + false, + )?; + Ok(self.optional_apply_predicate( + lp, + local_predicates, + lp_arena, + expr_arena, + )) + }, _ => self.pushdown_and_continue( lp, acc_predicates, From 83f90a8d467fc488014444dcb78b55b822c6f39b Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Tue, 5 Nov 2024 18:00:51 +1100 Subject: [PATCH 2/4] c --- py-polars/tests/unit/test_predicates.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/py-polars/tests/unit/test_predicates.py b/py-polars/tests/unit/test_predicates.py index 49141fd282de..abd7baf82766 100644 --- a/py-polars/tests/unit/test_predicates.py +++ b/py-polars/tests/unit/test_predicates.py @@ -511,3 +511,17 @@ def test_predicate_push_down_list_gather_17492() -> None: .filter(pl.col("val").list.get(1, null_on_oob=True) == 1) .explain() ) + + +def test_predicate_pushdown_struct_unnest_19632() -> None: + lf = pl.LazyFrame({"a": [1]}).select(pl.col("a").value_counts()).unnest("a") + + q = lf.filter(pl.col("a") == 1) + plan = q.explain() + + assert plan.index("FILTER") < plan.index("UNNEST") + + assert_frame_equal( + q.collect(), + pl.DataFrame({"a": 1, "count": 1}, schema={"a": pl.Int64, "count": pl.UInt32}), + ) From 005f89e3f30c96aff02805d738e7f4db6c7d53bf Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Tue, 5 Nov 2024 18:04:52 +1100 Subject: [PATCH 3/4] c --- py-polars/tests/unit/test_predicates.py | 28 +++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/py-polars/tests/unit/test_predicates.py b/py-polars/tests/unit/test_predicates.py index abd7baf82766..e752bacdf81d 100644 --- a/py-polars/tests/unit/test_predicates.py +++ b/py-polars/tests/unit/test_predicates.py @@ -514,6 +514,34 @@ def test_predicate_push_down_list_gather_17492() -> None: def test_predicate_pushdown_struct_unnest_19632() -> None: + lf = pl.LazyFrame({"a": [{"a": 1, "b": 2}]}).unnest("a") + + q = lf.filter(pl.col("a") == 1) + plan = q.explain() + + assert "FILTER" in plan + assert plan.index("FILTER") < plan.index("UNNEST") + + assert_frame_equal( + q.collect(), + pl.DataFrame({"a": 1, "b": 2}), + ) + + # With `pl.struct()` + lf = pl.LazyFrame({"a": 1, "b": 2}).select(pl.struct(pl.all())).unnest("a") + + q = lf.filter(pl.col("a") == 1) + plan = q.explain() + + assert "FILTER" in plan + assert plan.index("FILTER") < plan.index("UNNEST") + + assert_frame_equal( + q.collect(), + pl.DataFrame({"a": 1, "b": 2}), + ) + + # With `value_counts()` lf = pl.LazyFrame({"a": [1]}).select(pl.col("a").value_counts()).unnest("a") q = lf.filter(pl.col("a") == 1) From cd5655fad9e971d54012a1b60cd62d556c5a1af4 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Tue, 5 Nov 2024 18:14:23 +1100 Subject: [PATCH 4/4] c --- .../polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs b/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs index be9156b8c0d9..f9644b1aef0c 100644 --- a/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs +++ b/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs @@ -577,7 +577,7 @@ impl<'a> PredicatePushDown<'a> { )) }, FunctionIR::Unnest { columns } => { - let exclude = columns.iter().collect::>(); + let exclude = columns.iter().cloned().collect::>(); let local_predicates = transfer_to_local_by_name(expr_arena, &mut acc_predicates, |x| {