From ea5d519358d12cb6f9bb9bcdc0b45b5c091937d8 Mon Sep 17 00:00:00 2001 From: crusaderky Date: Thu, 21 Sep 2023 13:52:13 +0100 Subject: [PATCH] XFAIL test_dataframe_cow_chain --- tests/benchmarks/test_dataframe.py | 6 +++++- tests/benchmarks/test_parquet.py | 13 ++----------- tests/utils_test.py | 8 ++++++++ 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/tests/benchmarks/test_dataframe.py b/tests/benchmarks/test_dataframe.py index 6c3d032708..bdc1cc9409 100644 --- a/tests/benchmarks/test_dataframe.py +++ b/tests/benchmarks/test_dataframe.py @@ -1,7 +1,8 @@ +import pytest from dask.sizeof import sizeof from dask.utils import format_bytes -from ..utils_test import cluster_memory, timeseries_of_size, wait +from ..utils_test import HAS_PYARROW12, cluster_memory, timeseries_of_size, wait def print_dataframe_info(df): @@ -69,6 +70,9 @@ def test_filter(small_client): wait(result, small_client, 10 * 60) +@pytest.mark.xfail( + HAS_PYARROW12, reason="https://github.com/coiled/benchmarks/issues/1002" +) def test_dataframe_cow_chain(small_client): memory = cluster_memory(small_client) # 76.66 GiB diff --git a/tests/benchmarks/test_parquet.py b/tests/benchmarks/test_parquet.py index 383078caea..cbf57f040a 100644 --- a/tests/benchmarks/test_parquet.py +++ b/tests/benchmarks/test_parquet.py @@ -12,17 +12,9 @@ import pandas import pytest from coiled import Cluster -from packaging.version import Version from ..conftest import dump_cluster_kwargs -from ..utils_test import run_up_to_nthreads, wait - -try: - import pyarrow - - HAS_PYARROW12 = Version(pyarrow.__version__) >= Version("12.0.0") -except ImportError: - HAS_PYARROW12 = False +from ..utils_test import HAS_PYARROW12, run_up_to_nthreads, wait @pytest.fixture(scope="module") @@ -51,8 +43,7 @@ def parquet_client(parquet_cluster, cluster_kwargs, upload_cluster_dump, benchma @pytest.mark.xfail( - HAS_PYARROW12, - reason="50x slower than PyArrow 11; https://github.com/coiled/benchmarks/issues/998", + HAS_PYARROW12, reason="https://github.com/coiled/benchmarks/issues/998" ) @run_up_to_nthreads("parquet_cluster", 100, reason="fixed dataset") def test_read_spark_generated_data(parquet_client): diff --git a/tests/utils_test.py b/tests/utils_test.py index 8dcd94a952..2397d8b2c8 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -12,6 +12,14 @@ from dask.datasets import timeseries from dask.sizeof import sizeof from dask.utils import format_bytes, parse_bytes +from packaging.version import Version + +try: + import pyarrow + + HAS_PYARROW12 = Version(pyarrow.__version__) >= Version("12.0.0") +except ImportError: + HAS_PYARROW12 = False def scaled_array_shape(