Skip to content

Commit

Permalink
Drop support for pandas 1.X
Browse files Browse the repository at this point in the history
  • Loading branch information
hendrikmakait committed Jul 1, 2024
1 parent 147c505 commit 3a1181a
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 42 deletions.
10 changes: 5 additions & 5 deletions distributed/shuffle/tests/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
pd = pytest.importorskip("pandas")
import dask
import dask.dataframe as dd
from dask.dataframe._compat import PANDAS_GE_200, tm
from dask.dataframe._compat import tm
from dask.dataframe.utils import assert_eq

from distributed import get_client
Expand Down Expand Up @@ -293,7 +293,7 @@ async def test_merge_by_multiple_columns(c, s, a, b, how):
# FIXME: There's an discrepancy with an empty index for
# pandas=2.0 (xref https://github.com/dask/dask/issues/9957).
# Temporarily avoid index check until the discrepancy is fixed.
check_index=not (PANDAS_GE_200 and expected.index.empty),
check_index=not expected.index.empty,
)

expected = pdr.join(pdl, how=how)
Expand All @@ -303,7 +303,7 @@ async def test_merge_by_multiple_columns(c, s, a, b, how):
# FIXME: There's an discrepancy with an empty index for
# pandas=2.0 (xref https://github.com/dask/dask/issues/9957).
# Temporarily avoid index check until the discrepancy is fixed.
check_index=not (PANDAS_GE_200 and expected.index.empty),
check_index=not expected.index.empty,
)

expected = pd.merge(
Expand All @@ -323,7 +323,7 @@ async def test_merge_by_multiple_columns(c, s, a, b, how):
# FIXME: There's an discrepancy with an empty index for
# pandas=2.0 (xref https://github.com/dask/dask/issues/9957).
# Temporarily avoid index check until the discrepancy is fixed.
check_index=not (PANDAS_GE_200 and expected.index.empty),
check_index=not expected.index.empty,
)

expected = pd.merge(
Expand All @@ -343,7 +343,7 @@ async def test_merge_by_multiple_columns(c, s, a, b, how):
# FIXME: There's an discrepancy with an empty index for
# pandas=2.0 (xref https://github.com/dask/dask/issues/9957).
# Temporarily avoid index check until the discrepancy is fixed.
check_index=not (PANDAS_GE_200 and expected.index.empty),
check_index=not expected.index.empty,
)

# hash join
Expand Down
69 changes: 32 additions & 37 deletions distributed/shuffle/tests/test_shuffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
pd = pytest.importorskip("pandas")

import dask.dataframe as dd
from dask.dataframe._compat import PANDAS_GE_150, PANDAS_GE_200
from dask.typing import Key

from distributed import (
Expand Down Expand Up @@ -1145,41 +1144,38 @@ def __init__(self, value: int) -> None:
}
)

if PANDAS_GE_150:
columns.update(
{
# PyArrow dtypes
f"col{next(counter)}": pd.array(
[True, False] * 50, dtype="bool[pyarrow]"
),
f"col{next(counter)}": pd.array(range(100), dtype="int8[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="int16[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="int32[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="int64[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="uint8[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="uint16[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="uint32[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="uint64[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="float32[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="float64[pyarrow]"),
f"col{next(counter)}": pd.array(
[pd.Timestamp.fromtimestamp(1641034800 + i) for i in range(100)],
dtype=pd.ArrowDtype(pa.timestamp("ms")),
),
f"col{next(counter)}": pd.array(
["lorem ipsum"] * 100,
dtype="string[pyarrow]",
),
f"col{next(counter)}": pd.array(
["lorem ipsum"] * 100,
dtype=pd.StringDtype("pyarrow"),
),
f"col{next(counter)}": pd.array(
["lorem ipsum"] * 100,
dtype="string[python]",
),
}
)
columns.update(
{
# PyArrow dtypes
f"col{next(counter)}": pd.array([True, False] * 50, dtype="bool[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="int8[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="int16[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="int32[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="int64[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="uint8[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="uint16[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="uint32[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="uint64[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="float32[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="float64[pyarrow]"),
f"col{next(counter)}": pd.array(
[pd.Timestamp.fromtimestamp(1641034800 + i) for i in range(100)],
dtype=pd.ArrowDtype(pa.timestamp("ms")),
),
f"col{next(counter)}": pd.array(
["lorem ipsum"] * 100,
dtype="string[pyarrow]",
),
f"col{next(counter)}": pd.array(
["lorem ipsum"] * 100,
dtype=pd.StringDtype("pyarrow"),
),
f"col{next(counter)}": pd.array(
["lorem ipsum"] * 100,
dtype="string[python]",
),
}
)

df = pd.DataFrame(columns)
df["_partitions"] = df.col4 % npartitions
Expand Down Expand Up @@ -2399,7 +2395,6 @@ async def test_replace_stale_shuffle(c, s, a, b):
await check_scheduler_cleanup(s)


@pytest.mark.skipif(not PANDAS_GE_200, reason="requires pandas >=2.0")
@gen_cluster(client=True)
async def test_handle_null_partitions(c, s, a, b):
data = [
Expand Down

0 comments on commit 3a1181a

Please sign in to comment.