[SPARK-45166][PYTHON][FOLLOWUP] Delete unused `pyarrow_version_less_t…

…han_minimum` from `pyspark.sql.pandas.utils` ### What changes were proposed in this pull request? Delete unused `pyarrow_version_less_than_minimum` from `pyspark.sql.pandas.utils` ### Why are the changes needed? this method is only used to compare PyArrow version with 2.0.0, which is on longer needed after the minimum version is set 4.0.0 ### Does this PR introduce _any_ user-facing change? No, dev-only ### How was this patch tested? CI ### Was this patch authored or co-authored using generative AI tooling? No Closes #42948 from zhengruifeng/del_pyarrow_version_less_than_minimum. Authored-by: Ruifeng Zheng <ruifengz@apache.org> Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
apache · Sep 16, 2023 · 3536f33 · 3536f33
1 parent e35c748
commit 3536f33
Show file tree

Hide file tree

Showing 3 changed files with 2 additions and 23 deletions.
diff --git a/python/pyspark/sql/pandas/utils.py b/python/pyspark/sql/pandas/utils.py
@@ -71,16 +71,3 @@ def require_minimum_pyarrow_version() -> None:
             "Arrow legacy IPC format is not supported in PySpark, "
             "please unset ARROW_PRE_0_15_IPC_FORMAT"
         )
-
-
-def pyarrow_version_less_than_minimum(minimum_pyarrow_version: str) -> bool:
-    """Return False if the installed pyarrow version is less than minimum_pyarrow_version
-    or if pyarrow is not installed."""
-    from distutils.version import LooseVersion
-
-    try:
-        import pyarrow
-    except ImportError:
-        return False
-
-    return LooseVersion(pyarrow.__version__) < LooseVersion(minimum_pyarrow_version)
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py
@@ -28,7 +28,6 @@
 from pyspark.rdd import PythonEvalType
 from pyspark.sql import Column
 from pyspark.sql.functions import array, col, expr, lit, sum, struct, udf, pandas_udf, PandasUDFType
-from pyspark.sql.pandas.utils import pyarrow_version_less_than_minimum
 from pyspark.sql.types import (
     IntegerType,
     ByteType,
@@ -215,10 +214,6 @@ def test_input_nested_arrays(self):
             Row(res="[array([1, 2, 3], dtype=int32) array([4, 5], dtype=int32)]"),
         )
 
-    @unittest.skipIf(
-        pyarrow_version_less_than_minimum("2.0.0"),
-        "Pyarrow version must be 2.0.0 or higher",
-    )
     def test_pandas_array_struct(self):
         # SPARK-38098: Support Array of Struct for Pandas UDFs and toPandas
         import numpy as np

diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
@@ -28,7 +28,6 @@
 
 from pyspark.sql import SparkSession, Row, functions
 from pyspark.sql.functions import col, lit, count, sum, mean, struct
-from pyspark.sql.pandas.utils import pyarrow_version_less_than_minimum
 from pyspark.sql.types import (
     StringType,
     IntegerType,
@@ -1479,10 +1478,8 @@ def check_to_pandas_from_mixed_dataframe(self):
         self.assertTrue(np.all(pdf_with_only_nulls.dtypes == pdf_with_some_nulls.dtypes))
 
     @unittest.skipIf(
-        not have_pandas or not have_pyarrow or pyarrow_version_less_than_minimum("2.0.0"),
-        pandas_requirement_message
-        or pyarrow_requirement_message
-        or "Pyarrow version must be 2.0.0 or higher",
+        not have_pandas or not have_pyarrow,
+        pandas_requirement_message or pyarrow_requirement_message,
     )
     def test_to_pandas_for_array_of_struct(self):
         for is_arrow_enabled in [True, False]: