Skip to content

Commit

Permalink
fix(rust, python): disallow alias in inline join expressions
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jan 19, 2023
1 parent 43b28b3 commit e6f4bc8
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 62 deletions.
13 changes: 13 additions & 0 deletions polars/polars-lazy/polars-plan/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,19 @@ impl LogicalPlanBuilder {
right_on: Vec<Expr>,
options: JoinOptions,
) -> Self {
for e in left_on.iter().chain(right_on.iter()) {
if has_expr(e, |e| matches!(e, Expr::Alias(_, _))) {
return LogicalPlan::Error {
input: Box::new(self.0),
err: PolarsError::ComputeError(
"'alias' is not allowed in a join key. Use 'with_columns' first.".into(),
)
.into(),
}
.into();
}
}

let schema_left = try_delayed!(self.0.schema(), &self.0, into);
let schema_right = try_delayed!(other.schema(), &self.0, into);

Expand Down
3 changes: 3 additions & 0 deletions polars/tests/it/io/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ fn read_ndjson_with_trailing_newline() {
assert!(expected.frame_equal(&df));
}
#[test]
#[cfg(feature = "dtype-struct")]
fn test_read_ndjson_iss_5875() {
let jsonlines = r#"
{"struct": {"int_inner": [1, 2, 3], "float_inner": 5.0, "str_inner": ["a", "b", "c"]}}
Expand Down Expand Up @@ -158,6 +159,7 @@ fn test_read_ndjson_iss_5875() {
}

#[test]
#[cfg(feature = "dtype-struct")]
fn test_read_ndjson_iss_5875_part2() {
let jsonlines = r#"
{"struct": {"int_list_inner": [4, 5, 6]}}
Expand Down Expand Up @@ -188,6 +190,7 @@ fn test_read_ndjson_iss_5875_part2() {
assert_eq!(schema, df.unwrap().schema());
}
#[test]
#[cfg(feature = "dtype-struct")]
fn test_read_ndjson_iss_5875_part3() {
let jsonlines = r#"
{"key1":"value1", "key2": "value2", "key3": {"k1": 2, "k3": "value5", "k10": 5}}
Expand Down
8 changes: 4 additions & 4 deletions polars/tests/it/joins.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,17 @@ fn join_nans_outer() -> PolarsResult<()> {
.lazy();
let a1 = df1
.clone()
.groupby(vec![col("w").alias("w"), col("t").alias("t")])
.groupby(vec![col("w").alias("w"), col("t")])
.agg(vec![col("c").sum().alias("c_sum")]);
let a2 = df1
.groupby(vec![col("w").alias("w"), col("t").alias("t")])
.groupby(vec![col("w").alias("w"), col("t")])
.agg(vec![col("c").max().alias("c_max")]);

let res = a1
.join_builder()
.with(a2)
.left_on(vec![col("w").alias("w"), col("t").alias("t")])
.right_on(vec![col("w").alias("w"), col("t").alias("t")])
.left_on(vec![col("w"), col("t")])
.right_on(vec![col("w"), col("t")])
.how(JoinType::Outer)
.finish()
.collect()?;
Expand Down
1 change: 1 addition & 0 deletions polars/tests/it/lazy/projection_queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ fn test_projection_5086() -> PolarsResult<()> {
}

#[test]
#[cfg(feature = "dtype-struct")]
fn test_unnest_pushdown() -> PolarsResult<()> {
let df = df![
"collection" => Series::full_null("", 1, &DataType::Int32),
Expand Down
9 changes: 9 additions & 0 deletions py-polars/tests/unit/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,3 +334,12 @@ def test_arr_eval_named_cols() -> None:
pl.ComputeError,
):
df.select(pl.col("B").arr.eval(pl.element().append(pl.col("A"))))


def test_alias_in_join_keys() -> None:
df = pl.DataFrame({"A": ["a", "b"], "B": [["a", "b"], ["c", "d"]]})
with pytest.raises(
pl.ComputeError,
match=r"'alias' is not allowed in a join key. Use 'with_columns' first",
):
df.join(df, on=pl.col("A").alias("foo"))
58 changes: 0 additions & 58 deletions py-polars/tests/unit/test_joins.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,64 +475,6 @@ def test_join_chunks_alignment_4720() -> None:
}


def test_join_inline_alias_4694() -> None:
df = pl.DataFrame(
[
{"ts": datetime(2021, 2, 1, 9, 20), "a1": 1.04, "a2": 0.9},
{"ts": datetime(2021, 2, 1, 9, 50), "a1": 1.04, "a2": 0.9},
{"ts": datetime(2021, 2, 2, 10, 20), "a1": 1.04, "a2": 0.9},
{"ts": datetime(2021, 2, 2, 11, 20), "a1": 1.08, "a2": 0.9},
{"ts": datetime(2021, 2, 3, 11, 50), "a1": 1.08, "a2": 0.9},
{"ts": datetime(2021, 2, 3, 13, 20), "a1": 1.16, "a2": 0.8},
{"ts": datetime(2021, 2, 4, 13, 50), "a1": 1.18, "a2": 0.8},
]
).lazy()

join_against = pl.DataFrame(
[
{"d": datetime(2021, 2, 3, 0, 0), "ets": datetime(2021, 2, 4, 0, 0)},
{"d": datetime(2021, 2, 3, 0, 0), "ets": datetime(2021, 2, 5, 0, 0)},
{"d": datetime(2021, 2, 3, 0, 0), "ets": datetime(2021, 2, 6, 0, 0)},
]
).lazy()

# this adds "dd" column to the lhs followed by a projection
# the projection optimizer must realize that this column is added inline and ensure
# it is not dropped.
assert df.join(
join_against,
left_on=pl.col("ts").dt.truncate("1d").alias("dd"),
right_on=pl.col("d"),
).select(pl.all()).collect().to_dict(False) == {
"ts": [
datetime(2021, 2, 3, 11, 50),
datetime(2021, 2, 3, 11, 50),
datetime(2021, 2, 3, 11, 50),
datetime(2021, 2, 3, 13, 20),
datetime(2021, 2, 3, 13, 20),
datetime(2021, 2, 3, 13, 20),
],
"a1": [1.08, 1.08, 1.08, 1.16, 1.16, 1.16],
"a2": [0.9, 0.9, 0.9, 0.8, 0.8, 0.8],
"dd": [
datetime(2021, 2, 3, 0, 0),
datetime(2021, 2, 3, 0, 0),
datetime(2021, 2, 3, 0, 0),
datetime(2021, 2, 3, 0, 0),
datetime(2021, 2, 3, 0, 0),
datetime(2021, 2, 3, 0, 0),
],
"ets": [
datetime(2021, 2, 4, 0, 0),
datetime(2021, 2, 5, 0, 0),
datetime(2021, 2, 6, 0, 0),
datetime(2021, 2, 4, 0, 0),
datetime(2021, 2, 5, 0, 0),
datetime(2021, 2, 6, 0, 0),
],
}


def test_sorted_flag_after_joins() -> None:
np.random.seed(1)
dfa = pl.DataFrame(
Expand Down

0 comments on commit e6f4bc8

Please sign in to comment.