Skip to content

Commit

Permalink
fix(python): Address incorrect align_frames result when the alignme…
Browse files Browse the repository at this point in the history
…nt column contains NULL values (#18521)
  • Loading branch information
alexander-beedie authored Sep 4, 2024
1 parent b89e772 commit 6d4b79d
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 3 deletions.
9 changes: 8 additions & 1 deletion py-polars/polars/functions/eager.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,14 @@ def join_func(
idx_y: tuple[int, LazyFrame],
) -> tuple[int, LazyFrame]:
(_, x), (y_idx, y) = idx_x, idx_y
return y_idx, x.join(y, how=how, on=align_on, suffix=f":{y_idx}", coalesce=True)
return y_idx, x.join(
y,
how=how,
on=align_on,
suffix=f":{y_idx}",
join_nulls=True,
coalesce=True,
)

joined = reduce(join_func, idx_frames)[1].sort(by=align_on, descending=descending)
if post_align_collect:
Expand Down
19 changes: 17 additions & 2 deletions py-polars/tests/unit/functions/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def test_align_frames() -> None:
assert_frame_equal(pl_dot, pl.from_pandas(pd_dot))
pd.testing.assert_frame_equal(pd_dot, pl_dot.to_pandas())

# (also: confirm alignment function works with lazyframes)
# confirm alignment function works with lazy frames
lf1, lf2 = pl.align_frames(
pl.from_pandas(pdf1.reset_index()).lazy(),
pl.from_pandas(pdf2.reset_index()).lazy(),
Expand All @@ -264,7 +264,7 @@ def test_align_frames() -> None:
assert_frame_equal(lf1.collect(), pf1)
assert_frame_equal(lf2.collect(), pf2)

# misc
# misc: no frames results in an empty list
assert pl.align_frames(on="date") == []

# expected error condition
Expand All @@ -275,6 +275,8 @@ def test_align_frames() -> None:
on="date",
)


def test_align_frames_misc() -> None:
# descending result
df1 = pl.DataFrame([[3, 5, 6], [5, 8, 9]], orient="row")
df2 = pl.DataFrame([[2, 5, 6], [3, 8, 9], [4, 2, 0]], orient="row")
Expand All @@ -290,6 +292,19 @@ def test_align_frames() -> None:
assert pf.rows() == [(5, None, None), (4, 2, 0), (3, 8, 9), (2, 5, 6)]


def test_align_frames_with_nulls() -> None:
df1 = pl.DataFrame({"key": ["x", "y", None], "value": [1, 2, 0]})
df2 = pl.DataFrame({"key": ["x", None, "z", "y"], "value": [4, 3, 6, 5]})

a1, a2 = pl.align_frames(df1, df2, on="key")

aligned_frame_data = a1.to_dict(as_series=False), a2.to_dict(as_series=False)
assert aligned_frame_data == (
{"key": [None, "x", "y", "z"], "value": [0, 1, 2, None]},
{"key": [None, "x", "y", "z"], "value": [3, 4, 5, 6]},
)


def test_align_frames_duplicate_key() -> None:
# setup some test frames with duplicate key/alignment values
df1 = pl.DataFrame({"x": ["a", "a", "a", "e"], "y": [1, 2, 4, 5]})
Expand Down

0 comments on commit 6d4b79d

Please sign in to comment.