From e6f4bc8bbcb1666cd123fe1aabf470075c9cf51c Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Thu, 19 Jan 2023 08:59:36 +0100 Subject: [PATCH] fix(rust, python): disallow alias in inline join expressions --- .../polars-plan/src/logical_plan/builder.rs | 13 +++++ polars/tests/it/io/json.rs | 3 + polars/tests/it/joins.rs | 8 +-- polars/tests/it/lazy/projection_queries.rs | 1 + py-polars/tests/unit/test_errors.py | 9 +++ py-polars/tests/unit/test_joins.py | 58 ------------------- 6 files changed, 30 insertions(+), 62 deletions(-) diff --git a/polars/polars-lazy/polars-plan/src/logical_plan/builder.rs b/polars/polars-lazy/polars-plan/src/logical_plan/builder.rs index 1c5cbf5a0669..727490ced21d 100644 --- a/polars/polars-lazy/polars-plan/src/logical_plan/builder.rs +++ b/polars/polars-lazy/polars-plan/src/logical_plan/builder.rs @@ -602,6 +602,19 @@ impl LogicalPlanBuilder { right_on: Vec, options: JoinOptions, ) -> Self { + for e in left_on.iter().chain(right_on.iter()) { + if has_expr(e, |e| matches!(e, Expr::Alias(_, _))) { + return LogicalPlan::Error { + input: Box::new(self.0), + err: PolarsError::ComputeError( + "'alias' is not allowed in a join key. Use 'with_columns' first.".into(), + ) + .into(), + } + .into(); + } + } + let schema_left = try_delayed!(self.0.schema(), &self.0, into); let schema_right = try_delayed!(other.schema(), &self.0, into); diff --git a/polars/tests/it/io/json.rs b/polars/tests/it/io/json.rs index 1acbbd42070e..c78ed8876cb0 100644 --- a/polars/tests/it/io/json.rs +++ b/polars/tests/it/io/json.rs @@ -129,6 +129,7 @@ fn read_ndjson_with_trailing_newline() { assert!(expected.frame_equal(&df)); } #[test] +#[cfg(feature = "dtype-struct")] fn test_read_ndjson_iss_5875() { let jsonlines = r#" {"struct": {"int_inner": [1, 2, 3], "float_inner": 5.0, "str_inner": ["a", "b", "c"]}} @@ -158,6 +159,7 @@ fn test_read_ndjson_iss_5875() { } #[test] +#[cfg(feature = "dtype-struct")] fn test_read_ndjson_iss_5875_part2() { let jsonlines = r#" {"struct": {"int_list_inner": [4, 5, 6]}} @@ -188,6 +190,7 @@ fn test_read_ndjson_iss_5875_part2() { assert_eq!(schema, df.unwrap().schema()); } #[test] +#[cfg(feature = "dtype-struct")] fn test_read_ndjson_iss_5875_part3() { let jsonlines = r#" {"key1":"value1", "key2": "value2", "key3": {"k1": 2, "k3": "value5", "k10": 5}} diff --git a/polars/tests/it/joins.rs b/polars/tests/it/joins.rs index ac8f647b76a0..ea286750123f 100644 --- a/polars/tests/it/joins.rs +++ b/polars/tests/it/joins.rs @@ -12,17 +12,17 @@ fn join_nans_outer() -> PolarsResult<()> { .lazy(); let a1 = df1 .clone() - .groupby(vec![col("w").alias("w"), col("t").alias("t")]) + .groupby(vec![col("w").alias("w"), col("t")]) .agg(vec![col("c").sum().alias("c_sum")]); let a2 = df1 - .groupby(vec![col("w").alias("w"), col("t").alias("t")]) + .groupby(vec![col("w").alias("w"), col("t")]) .agg(vec![col("c").max().alias("c_max")]); let res = a1 .join_builder() .with(a2) - .left_on(vec![col("w").alias("w"), col("t").alias("t")]) - .right_on(vec![col("w").alias("w"), col("t").alias("t")]) + .left_on(vec![col("w"), col("t")]) + .right_on(vec![col("w"), col("t")]) .how(JoinType::Outer) .finish() .collect()?; diff --git a/polars/tests/it/lazy/projection_queries.rs b/polars/tests/it/lazy/projection_queries.rs index dd233c356068..ccef259d42a5 100644 --- a/polars/tests/it/lazy/projection_queries.rs +++ b/polars/tests/it/lazy/projection_queries.rs @@ -153,6 +153,7 @@ fn test_projection_5086() -> PolarsResult<()> { } #[test] +#[cfg(feature = "dtype-struct")] fn test_unnest_pushdown() -> PolarsResult<()> { let df = df![ "collection" => Series::full_null("", 1, &DataType::Int32), diff --git a/py-polars/tests/unit/test_errors.py b/py-polars/tests/unit/test_errors.py index 113a728cb06d..1852f9dffac6 100644 --- a/py-polars/tests/unit/test_errors.py +++ b/py-polars/tests/unit/test_errors.py @@ -334,3 +334,12 @@ def test_arr_eval_named_cols() -> None: pl.ComputeError, ): df.select(pl.col("B").arr.eval(pl.element().append(pl.col("A")))) + + +def test_alias_in_join_keys() -> None: + df = pl.DataFrame({"A": ["a", "b"], "B": [["a", "b"], ["c", "d"]]}) + with pytest.raises( + pl.ComputeError, + match=r"'alias' is not allowed in a join key. Use 'with_columns' first", + ): + df.join(df, on=pl.col("A").alias("foo")) diff --git a/py-polars/tests/unit/test_joins.py b/py-polars/tests/unit/test_joins.py index b6c4e225ffb4..88c46655f001 100644 --- a/py-polars/tests/unit/test_joins.py +++ b/py-polars/tests/unit/test_joins.py @@ -475,64 +475,6 @@ def test_join_chunks_alignment_4720() -> None: } -def test_join_inline_alias_4694() -> None: - df = pl.DataFrame( - [ - {"ts": datetime(2021, 2, 1, 9, 20), "a1": 1.04, "a2": 0.9}, - {"ts": datetime(2021, 2, 1, 9, 50), "a1": 1.04, "a2": 0.9}, - {"ts": datetime(2021, 2, 2, 10, 20), "a1": 1.04, "a2": 0.9}, - {"ts": datetime(2021, 2, 2, 11, 20), "a1": 1.08, "a2": 0.9}, - {"ts": datetime(2021, 2, 3, 11, 50), "a1": 1.08, "a2": 0.9}, - {"ts": datetime(2021, 2, 3, 13, 20), "a1": 1.16, "a2": 0.8}, - {"ts": datetime(2021, 2, 4, 13, 50), "a1": 1.18, "a2": 0.8}, - ] - ).lazy() - - join_against = pl.DataFrame( - [ - {"d": datetime(2021, 2, 3, 0, 0), "ets": datetime(2021, 2, 4, 0, 0)}, - {"d": datetime(2021, 2, 3, 0, 0), "ets": datetime(2021, 2, 5, 0, 0)}, - {"d": datetime(2021, 2, 3, 0, 0), "ets": datetime(2021, 2, 6, 0, 0)}, - ] - ).lazy() - - # this adds "dd" column to the lhs followed by a projection - # the projection optimizer must realize that this column is added inline and ensure - # it is not dropped. - assert df.join( - join_against, - left_on=pl.col("ts").dt.truncate("1d").alias("dd"), - right_on=pl.col("d"), - ).select(pl.all()).collect().to_dict(False) == { - "ts": [ - datetime(2021, 2, 3, 11, 50), - datetime(2021, 2, 3, 11, 50), - datetime(2021, 2, 3, 11, 50), - datetime(2021, 2, 3, 13, 20), - datetime(2021, 2, 3, 13, 20), - datetime(2021, 2, 3, 13, 20), - ], - "a1": [1.08, 1.08, 1.08, 1.16, 1.16, 1.16], - "a2": [0.9, 0.9, 0.9, 0.8, 0.8, 0.8], - "dd": [ - datetime(2021, 2, 3, 0, 0), - datetime(2021, 2, 3, 0, 0), - datetime(2021, 2, 3, 0, 0), - datetime(2021, 2, 3, 0, 0), - datetime(2021, 2, 3, 0, 0), - datetime(2021, 2, 3, 0, 0), - ], - "ets": [ - datetime(2021, 2, 4, 0, 0), - datetime(2021, 2, 5, 0, 0), - datetime(2021, 2, 6, 0, 0), - datetime(2021, 2, 4, 0, 0), - datetime(2021, 2, 5, 0, 0), - datetime(2021, 2, 6, 0, 0), - ], - } - - def test_sorted_flag_after_joins() -> None: np.random.seed(1) dfa = pl.DataFrame(