From baa9e78721474898b7de4aa56f948b1b450aede7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Fri, 5 Apr 2024 17:28:12 +0200 Subject: [PATCH] Try adding df.optimize() --- .github/workflows/test.yaml | 1 - datashader/core.py | 29 ++++++----------------------- datashader/data_libraries/dask.py | 1 + datashader/tests/conftest.py | 8 -------- 4 files changed, 7 insertions(+), 32 deletions(-) delete mode 100644 datashader/tests/conftest.py diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index cfe40fd1a..fedd4798a 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -34,7 +34,6 @@ env: VECLIB_MAXIMUM_THREADS: 1 NUMEXPR_NUM_THREADS: 1 PYDEVD_DISABLE_FILE_VALIDATION: 1 - DASK_DATAFRAME__QUERY_PLANNING: false jobs: pre_commit: diff --git a/datashader/core.py b/datashader/core.py index 7302fa7af..aa7119614 100644 --- a/datashader/core.py +++ b/datashader/core.py @@ -3,36 +3,19 @@ from numbers import Number from math import log10 import warnings -from importlib.util import find_spec import numpy as np import pandas as pd -import dask +import dask.dataframe as dd import dask.array as da from xarray import DataArray, Dataset -query_planning = dask.config.get("dataframe.query-planning") -dask.config.set({"dataframe.query-planning": False}) - -import dask.dataframe as dd # noqa: E402 - from .utils import Dispatcher, ngjit, calc_res, calc_bbox, orient_array, \ - dshape_from_xarray_dataset # noqa: E402 -from .utils import get_indices, dshape_from_pandas, dshape_from_dask # noqa: E402 -from .utils import Expr # noqa: E402, F401 -from .resampling import resample_2d, resample_2d_distributed # noqa: E402 -from . import reductions as rd # noqa: E402 - - -# Warn if query planning is enabled and installed -dask_expr = find_spec("dask_expr") -if query_planning or (query_planning is None and dask_expr): - msg = """\ -Dask query planning has been disabled, as it does not currently work with datashader. -To remove this warning, you can disable it globally with: - - Python: `dask.config.set({'dataframe.query-planning': False})` - - Terminal: `dask config set dataframe.query-planning false`""" - warnings.warn(msg) + dshape_from_xarray_dataset +from .utils import get_indices, dshape_from_pandas, dshape_from_dask +from .utils import Expr # noqa (API import) +from .resampling import resample_2d, resample_2d_distributed +from . import reductions as rd try: import cudf diff --git a/datashader/data_libraries/dask.py b/datashader/data_libraries/dask.py index 994435750..eb666acb1 100644 --- a/datashader/data_libraries/dask.py +++ b/datashader/data_libraries/dask.py @@ -100,6 +100,7 @@ def func(partition: pd.DataFrame, cumulative_lens, partition_info=None): # Here be dragons # Get the dataframe graph + df = getattr(df, 'optimize', lambda: df)() # Work with new dask_expr graph = df.__dask_graph__() # Guess a reasonable output dtype from combination of dataframe dtypes diff --git a/datashader/tests/conftest.py b/datashader/tests/conftest.py deleted file mode 100644 index b4c75f34f..000000000 --- a/datashader/tests/conftest.py +++ /dev/null @@ -1,8 +0,0 @@ -import contextlib - -with contextlib.suppress(Exception): - # From Dask 2024.3.0 they now use `dask_expr` by default - # https://github.com/dask/dask/issues/10995 - import dask - - dask.config.set({"dataframe.query-planning": False})