Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop support for pandas 1.X #8741

Merged
merged 1 commit into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions distributed/shuffle/tests/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
pd = pytest.importorskip("pandas")
import dask
import dask.dataframe as dd
from dask.dataframe._compat import PANDAS_GE_200, tm
from dask.dataframe._compat import tm
from dask.dataframe.utils import assert_eq

from distributed import get_client
Expand Down Expand Up @@ -293,7 +293,7 @@ async def test_merge_by_multiple_columns(c, s, a, b, how):
# FIXME: There's an discrepancy with an empty index for
# pandas=2.0 (xref https://github.com/dask/dask/issues/9957).
# Temporarily avoid index check until the discrepancy is fixed.
check_index=not (PANDAS_GE_200 and expected.index.empty),
check_index=not expected.index.empty,
)

expected = pdr.join(pdl, how=how)
Expand All @@ -303,7 +303,7 @@ async def test_merge_by_multiple_columns(c, s, a, b, how):
# FIXME: There's an discrepancy with an empty index for
# pandas=2.0 (xref https://github.com/dask/dask/issues/9957).
# Temporarily avoid index check until the discrepancy is fixed.
check_index=not (PANDAS_GE_200 and expected.index.empty),
check_index=not expected.index.empty,
)

expected = pd.merge(
Expand All @@ -323,7 +323,7 @@ async def test_merge_by_multiple_columns(c, s, a, b, how):
# FIXME: There's an discrepancy with an empty index for
# pandas=2.0 (xref https://github.com/dask/dask/issues/9957).
# Temporarily avoid index check until the discrepancy is fixed.
check_index=not (PANDAS_GE_200 and expected.index.empty),
check_index=not expected.index.empty,
)

expected = pd.merge(
Expand All @@ -343,7 +343,7 @@ async def test_merge_by_multiple_columns(c, s, a, b, how):
# FIXME: There's an discrepancy with an empty index for
# pandas=2.0 (xref https://github.com/dask/dask/issues/9957).
# Temporarily avoid index check until the discrepancy is fixed.
check_index=not (PANDAS_GE_200 and expected.index.empty),
check_index=not expected.index.empty,
)

# hash join
Expand Down
69 changes: 32 additions & 37 deletions distributed/shuffle/tests/test_shuffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
pd = pytest.importorskip("pandas")

import dask.dataframe as dd
from dask.dataframe._compat import PANDAS_GE_150, PANDAS_GE_200
from dask.typing import Key

from distributed import (
Expand Down Expand Up @@ -1145,41 +1144,38 @@ def __init__(self, value: int) -> None:
}
)

if PANDAS_GE_150:
columns.update(
{
# PyArrow dtypes
f"col{next(counter)}": pd.array(
[True, False] * 50, dtype="bool[pyarrow]"
),
f"col{next(counter)}": pd.array(range(100), dtype="int8[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="int16[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="int32[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="int64[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="uint8[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="uint16[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="uint32[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="uint64[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="float32[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="float64[pyarrow]"),
f"col{next(counter)}": pd.array(
[pd.Timestamp.fromtimestamp(1641034800 + i) for i in range(100)],
dtype=pd.ArrowDtype(pa.timestamp("ms")),
),
f"col{next(counter)}": pd.array(
["lorem ipsum"] * 100,
dtype="string[pyarrow]",
),
f"col{next(counter)}": pd.array(
["lorem ipsum"] * 100,
dtype=pd.StringDtype("pyarrow"),
),
f"col{next(counter)}": pd.array(
["lorem ipsum"] * 100,
dtype="string[python]",
),
}
)
columns.update(
{
# PyArrow dtypes
f"col{next(counter)}": pd.array([True, False] * 50, dtype="bool[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="int8[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="int16[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="int32[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="int64[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="uint8[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="uint16[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="uint32[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="uint64[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="float32[pyarrow]"),
f"col{next(counter)}": pd.array(range(100), dtype="float64[pyarrow]"),
f"col{next(counter)}": pd.array(
[pd.Timestamp.fromtimestamp(1641034800 + i) for i in range(100)],
dtype=pd.ArrowDtype(pa.timestamp("ms")),
),
f"col{next(counter)}": pd.array(
["lorem ipsum"] * 100,
dtype="string[pyarrow]",
),
f"col{next(counter)}": pd.array(
["lorem ipsum"] * 100,
dtype=pd.StringDtype("pyarrow"),
),
f"col{next(counter)}": pd.array(
["lorem ipsum"] * 100,
dtype="string[python]",
),
}
)

df = pd.DataFrame(columns)
df["_partitions"] = df.col4 % npartitions
Expand Down Expand Up @@ -2399,7 +2395,6 @@ async def test_replace_stale_shuffle(c, s, a, b):
await check_scheduler_cleanup(s)


@pytest.mark.skipif(not PANDAS_GE_200, reason="requires pandas >=2.0")
@gen_cluster(client=True)
async def test_handle_null_partitions(c, s, a, b):
data = [
Expand Down
Loading