From 3a1181a864e0b4b3f6c715aee90ad92b25bef3c3 Mon Sep 17 00:00:00 2001
From: Hendrik Makait <hendrik@makait.com>
Date: Mon, 1 Jul 2024 09:48:00 -0400
Subject: [PATCH] Drop support for pandas 1.X

---
 distributed/shuffle/tests/test_merge.py   | 10 ++--
 distributed/shuffle/tests/test_shuffle.py | 69 +++++++++++------------
 2 files changed, 37 insertions(+), 42 deletions(-)

diff --git a/distributed/shuffle/tests/test_merge.py b/distributed/shuffle/tests/test_merge.py
index 42d1f443a0..8c196af3c0 100644
--- a/distributed/shuffle/tests/test_merge.py
+++ b/distributed/shuffle/tests/test_merge.py
@@ -17,7 +17,7 @@
 pd = pytest.importorskip("pandas")
 import dask
 import dask.dataframe as dd
-from dask.dataframe._compat import PANDAS_GE_200, tm
+from dask.dataframe._compat import tm
 from dask.dataframe.utils import assert_eq
 
 from distributed import get_client
@@ -293,7 +293,7 @@ async def test_merge_by_multiple_columns(c, s, a, b, how):
                     # FIXME: There's an discrepancy with an empty index for
                     # pandas=2.0 (xref https://github.com/dask/dask/issues/9957).
                     # Temporarily avoid index check until the discrepancy is fixed.
-                    check_index=not (PANDAS_GE_200 and expected.index.empty),
+                    check_index=not expected.index.empty,
                 )
 
                 expected = pdr.join(pdl, how=how)
@@ -303,7 +303,7 @@ async def test_merge_by_multiple_columns(c, s, a, b, how):
                     # FIXME: There's an discrepancy with an empty index for
                     # pandas=2.0 (xref https://github.com/dask/dask/issues/9957).
                     # Temporarily avoid index check until the discrepancy is fixed.
-                    check_index=not (PANDAS_GE_200 and expected.index.empty),
+                    check_index=not expected.index.empty,
                 )
 
                 expected = pd.merge(
@@ -323,7 +323,7 @@ async def test_merge_by_multiple_columns(c, s, a, b, how):
                     # FIXME: There's an discrepancy with an empty index for
                     # pandas=2.0 (xref https://github.com/dask/dask/issues/9957).
                     # Temporarily avoid index check until the discrepancy is fixed.
-                    check_index=not (PANDAS_GE_200 and expected.index.empty),
+                    check_index=not expected.index.empty,
                 )
 
                 expected = pd.merge(
@@ -343,7 +343,7 @@ async def test_merge_by_multiple_columns(c, s, a, b, how):
                     # FIXME: There's an discrepancy with an empty index for
                     # pandas=2.0 (xref https://github.com/dask/dask/issues/9957).
                     # Temporarily avoid index check until the discrepancy is fixed.
-                    check_index=not (PANDAS_GE_200 and expected.index.empty),
+                    check_index=not expected.index.empty,
                 )
 
                 # hash join
diff --git a/distributed/shuffle/tests/test_shuffle.py b/distributed/shuffle/tests/test_shuffle.py
index 3d94ef81be..24e199d1b6 100644
--- a/distributed/shuffle/tests/test_shuffle.py
+++ b/distributed/shuffle/tests/test_shuffle.py
@@ -28,7 +28,6 @@
 pd = pytest.importorskip("pandas")
 
 import dask.dataframe as dd
-from dask.dataframe._compat import PANDAS_GE_150, PANDAS_GE_200
 from dask.typing import Key
 
 from distributed import (
@@ -1145,41 +1144,38 @@ def __init__(self, value: int) -> None:
             }
         )
 
-    if PANDAS_GE_150:
-        columns.update(
-            {
-                # PyArrow dtypes
-                f"col{next(counter)}": pd.array(
-                    [True, False] * 50, dtype="bool[pyarrow]"
-                ),
-                f"col{next(counter)}": pd.array(range(100), dtype="int8[pyarrow]"),
-                f"col{next(counter)}": pd.array(range(100), dtype="int16[pyarrow]"),
-                f"col{next(counter)}": pd.array(range(100), dtype="int32[pyarrow]"),
-                f"col{next(counter)}": pd.array(range(100), dtype="int64[pyarrow]"),
-                f"col{next(counter)}": pd.array(range(100), dtype="uint8[pyarrow]"),
-                f"col{next(counter)}": pd.array(range(100), dtype="uint16[pyarrow]"),
-                f"col{next(counter)}": pd.array(range(100), dtype="uint32[pyarrow]"),
-                f"col{next(counter)}": pd.array(range(100), dtype="uint64[pyarrow]"),
-                f"col{next(counter)}": pd.array(range(100), dtype="float32[pyarrow]"),
-                f"col{next(counter)}": pd.array(range(100), dtype="float64[pyarrow]"),
-                f"col{next(counter)}": pd.array(
-                    [pd.Timestamp.fromtimestamp(1641034800 + i) for i in range(100)],
-                    dtype=pd.ArrowDtype(pa.timestamp("ms")),
-                ),
-                f"col{next(counter)}": pd.array(
-                    ["lorem ipsum"] * 100,
-                    dtype="string[pyarrow]",
-                ),
-                f"col{next(counter)}": pd.array(
-                    ["lorem ipsum"] * 100,
-                    dtype=pd.StringDtype("pyarrow"),
-                ),
-                f"col{next(counter)}": pd.array(
-                    ["lorem ipsum"] * 100,
-                    dtype="string[python]",
-                ),
-            }
-        )
+    columns.update(
+        {
+            # PyArrow dtypes
+            f"col{next(counter)}": pd.array([True, False] * 50, dtype="bool[pyarrow]"),
+            f"col{next(counter)}": pd.array(range(100), dtype="int8[pyarrow]"),
+            f"col{next(counter)}": pd.array(range(100), dtype="int16[pyarrow]"),
+            f"col{next(counter)}": pd.array(range(100), dtype="int32[pyarrow]"),
+            f"col{next(counter)}": pd.array(range(100), dtype="int64[pyarrow]"),
+            f"col{next(counter)}": pd.array(range(100), dtype="uint8[pyarrow]"),
+            f"col{next(counter)}": pd.array(range(100), dtype="uint16[pyarrow]"),
+            f"col{next(counter)}": pd.array(range(100), dtype="uint32[pyarrow]"),
+            f"col{next(counter)}": pd.array(range(100), dtype="uint64[pyarrow]"),
+            f"col{next(counter)}": pd.array(range(100), dtype="float32[pyarrow]"),
+            f"col{next(counter)}": pd.array(range(100), dtype="float64[pyarrow]"),
+            f"col{next(counter)}": pd.array(
+                [pd.Timestamp.fromtimestamp(1641034800 + i) for i in range(100)],
+                dtype=pd.ArrowDtype(pa.timestamp("ms")),
+            ),
+            f"col{next(counter)}": pd.array(
+                ["lorem ipsum"] * 100,
+                dtype="string[pyarrow]",
+            ),
+            f"col{next(counter)}": pd.array(
+                ["lorem ipsum"] * 100,
+                dtype=pd.StringDtype("pyarrow"),
+            ),
+            f"col{next(counter)}": pd.array(
+                ["lorem ipsum"] * 100,
+                dtype="string[python]",
+            ),
+        }
+    )
 
     df = pd.DataFrame(columns)
     df["_partitions"] = df.col4 % npartitions
@@ -2399,7 +2395,6 @@ async def test_replace_stale_shuffle(c, s, a, b):
     await check_scheduler_cleanup(s)
 
 
-@pytest.mark.skipif(not PANDAS_GE_200, reason="requires pandas >=2.0")
 @gen_cluster(client=True)
 async def test_handle_null_partitions(c, s, a, b):
     data = [