From c6aaa772c512b3ebcab045d247c91b513154afc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Thu, 30 May 2024 18:06:14 +0200 Subject: [PATCH] Add gpu marker and test both Classic/dask-expr Dask DataFrames (#1341) --- .github/workflows/test.yaml | 3 +- datashader/data_libraries/dask.py | 12 +- datashader/tests/benchmarks/test_canvas.py | 7 +- datashader/tests/conftest.py | 13 +- datashader/tests/test_dask.py | 330 +++++++------------- datashader/tests/test_geopandas.py | 6 +- datashader/tests/test_pandas.py | 198 ++++-------- datashader/tests/test_polygons.py | 6 + datashader/tests/test_transfer_functions.py | 109 +++---- datashader/tests/test_xarray.py | 23 +- datashader/tests/utils.py | 29 ++ pyproject.toml | 2 +- 12 files changed, 293 insertions(+), 445 deletions(-) create mode 100644 datashader/tests/utils.py diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index b0386dd43..783a2a339 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -34,7 +34,6 @@ env: VECLIB_MAXIMUM_THREADS: 1 NUMEXPR_NUM_THREADS: 1 PYDEVD_DISABLE_FILE_VALIDATION: 1 - DASK_DATAFRAME__QUERY_PLANNING: false jobs: pre_commit: @@ -174,6 +173,8 @@ jobs: env: NUMBA_DISABLE_JIT: 1 - name: doit test_examples + env: + DASK_DATAFRAME__QUERY_PLANNING: false run: | conda activate test-environment doit test_examples diff --git a/datashader/data_libraries/dask.py b/datashader/data_libraries/dask.py index dc0a9f125..12c4f39c0 100644 --- a/datashader/data_libraries/dask.py +++ b/datashader/data_libraries/dask.py @@ -1,5 +1,7 @@ from __future__ import annotations +from contextlib import suppress + import numpy as np import pandas as pd import dask @@ -30,7 +32,6 @@ def _dask_compat(df): return getattr(df, 'optimize', lambda: df)() -@bypixel.pipeline.register(dd.DataFrame) def dask_pipeline(df, schema, canvas, glyph, summary, *, antialias=False, cuda=False): dsk, name = glyph_dispatch(glyph, df, schema, canvas, summary, antialias=antialias, cuda=cuda) @@ -50,6 +51,15 @@ def dask_pipeline(df, schema, canvas, glyph, summary, *, antialias=False, cuda=F return scheduler(dsk, name) +# Classic Dask.DataFrame +bypixel.pipeline.register(dd.core.DataFrame)(dask_pipeline) + +with suppress(ImportError): + import dask_expr + + bypixel.pipeline.register(dask_expr.DataFrame)(dask_pipeline) + + def shape_bounds_st_and_axis(df, canvas, glyph): if not canvas.x_range or not canvas.y_range: x_extents, y_extents = glyph.compute_bounds_dask(df) diff --git a/datashader/tests/benchmarks/test_canvas.py b/datashader/tests/benchmarks/test_canvas.py index 0f34ea8dc..958e2c2f1 100644 --- a/datashader/tests/benchmarks/test_canvas.py +++ b/datashader/tests/benchmarks/test_canvas.py @@ -1,12 +1,9 @@ import pytest -import os import numpy as np import pandas as pd import datashader as ds -test_gpu = bool(int(os.getenv("DATASHADER_TEST_GPU", 0))) - @pytest.fixture def time_series(): @@ -33,7 +30,7 @@ def test_points(benchmark, time_series): benchmark(cvs.points, time_series, 'x', 'y') -@pytest.mark.skipif(not test_gpu, reason="DATASHADER_TEST_GPU not set") +@pytest.mark.gpu @pytest.mark.benchmark(group="canvas") def test_line_gpu(benchmark, time_series): from cudf import from_pandas @@ -42,7 +39,7 @@ def test_line_gpu(benchmark, time_series): benchmark(cvs.line, time_series, 'x', 'y') -@pytest.mark.skipif(not test_gpu, reason="DATASHADER_TEST_GPU not set") +@pytest.mark.gpu @pytest.mark.benchmark(group="canvas") def test_points_gpu(benchmark, time_series): from cudf import from_pandas diff --git a/datashader/tests/conftest.py b/datashader/tests/conftest.py index ed9933742..9ee4b4b43 100644 --- a/datashader/tests/conftest.py +++ b/datashader/tests/conftest.py @@ -1,8 +1,8 @@ -CUSTOM_MARKS = ("benchmark",) +CUSTOM_MARKS = {"benchmark", "gpu"} def pytest_addoption(parser): - for marker in CUSTOM_MARKS: + for marker in sorted(CUSTOM_MARKS): parser.addoption( f"--{marker}", action="store_true", @@ -12,20 +12,21 @@ def pytest_addoption(parser): def pytest_configure(config): - for marker in CUSTOM_MARKS: + for marker in sorted(CUSTOM_MARKS): config.addinivalue_line("markers", f"{marker}: {marker} test marker") def pytest_collection_modifyitems(config, items): skipped, selected = [], [] - markers = [m for m in CUSTOM_MARKS if config.getoption(f"--{m}")] + markers = {m for m in CUSTOM_MARKS if config.getoption(f"--{m}")} empty = not markers for item in items: - if empty and any(m in item.keywords for m in CUSTOM_MARKS): + item_marks = set(item.keywords) & CUSTOM_MARKS + if empty and item_marks: skipped.append(item) elif empty: selected.append(item) - elif not empty and any(m in item.keywords for m in markers): + elif not empty and item_marks == markers: selected.append(item) else: skipped.append(item) diff --git a/datashader/tests/test_dask.py b/datashader/tests/test_dask.py index dc4af5c5e..aff9fdd65 100644 --- a/datashader/tests/test_dask.py +++ b/datashader/tests/test_dask.py @@ -1,6 +1,5 @@ from __future__ import annotations -import os import dask.dataframe as dd import numpy as np @@ -15,6 +14,8 @@ import datashader.utils as du import pytest +from datashader.tests.utils import dask_switcher +from datashader.tests.test_pandas import _pandas try: import spatialpandas as sp @@ -28,32 +29,38 @@ config.set(scheduler='synchronous') -test_gpu = bool(int(os.getenv("DATASHADER_TEST_GPU", 0))) - -df_pd = pd.DataFrame({'x': np.array(([0.] * 10 + [1] * 10)), - 'y': np.array(([0.] * 5 + [1] * 5 + [0] * 5 + [1] * 5)), - 'log_x': np.array(([1.] * 10 + [10] * 10)), - 'log_y': np.array(([1.] * 5 + [10] * 5 + [1] * 5 + [10] * 5)), - 'i32': np.arange(20, dtype='i4'), - 'i64': np.arange(20, dtype='i8'), - 'f32': np.arange(20, dtype='f4'), - 'f64': np.arange(20, dtype='f8'), - 'reverse': np.arange(20, 0, -1), - 'plusminus': np.arange(20, dtype='f8')*([1, -1]*10), - 'empty_bin': np.array([0.] * 15 + [np.nan] * 5), - 'cat': ['a']*5 + ['b']*5 + ['c']*5 + ['d']*5, - 'cat2': ['a', 'b', 'c', 'd']*5, - 'cat_int': np.array([10]*5 + [11]*5 + [12]*5 + [13]*5)}) -df_pd.cat = df_pd.cat.astype('category') -df_pd.cat2 = df_pd.cat2.astype('category') -df_pd.at[2, 'f32'] = nan -df_pd.at[2, 'f64'] = nan -df_pd.at[6, 'reverse'] = nan -df_pd.at[2, 'plusminus'] = nan - -_ddf = dd.from_pandas(df_pd, npartitions=2) - -def dask_DataFrame(*args, **kwargs): + +@dask_switcher(query=False) +def _dask(): + return dd.from_pandas(_pandas(), npartitions=2) + +@dask_switcher(query=True) +def _dask_expr(): + return dd.from_pandas(_pandas(), npartitions=2) + +@dask_switcher(query=False, extras=["dask_cudf"]) +def _dask_cudf(): + import dask_cudf + _dask = dd.from_pandas(_pandas(), npartitions=2) + return dask_cudf.from_dask_dataframe(_dask) + +_backends = [ + pytest.param(_dask, id="dask"), + pytest.param(_dask_expr, id="dask-expr"), + pytest.param(_dask_cudf, marks=pytest.mark.gpu, id="dask-cudf"), +] + +@pytest.fixture(params=_backends) +def ddf(request): + return request.param() + + +@pytest.fixture(params=[1, 2, 4]) +def npartitions(request): + return request.param + +@dask_switcher(query=False) +def _dask_DataFrame(*args, **kwargs): if kwargs.pop("geo", False): df = sp.GeoDataFrame(*args, **kwargs) else: @@ -61,33 +68,36 @@ def dask_DataFrame(*args, **kwargs): return dd.from_pandas(df, npartitions=2) -try: - import cudf - import cupy - import dask_cudf - - if not test_gpu: - # GPU testing disabled even though cudf/cupy are available - raise ImportError +@dask_switcher(query=True) +def _dask_expr_DataFrame(*args, **kwargs): + if kwargs.pop("geo", False): + pytest.skip("dask-expr currently does not work with spatialpandas") + # df = sp.GeoDataFrame(*args, **kwargs) + else: + df = pd.DataFrame(*args, **kwargs) + return dd.from_pandas(df, npartitions=2) - cudf_ddf = dask_cudf.from_dask_dataframe(_ddf) - ddfs = [_ddf, cudf_ddf] - def dask_cudf_DataFrame(*args, **kwargs): - assert not kwargs.pop("geo", False) - cdf = cudf.DataFrame.from_pandas( - pd.DataFrame(*args, **kwargs), nan_as_null=False - ) - return dask_cudf.from_cudf(cdf, npartitions=2) +@dask_switcher(query=False, extras=["dask_cudf"]) +def _dask_cudf_DataFrame(*args, **kwargs): + import cudf + import dask_cudf + if kwargs.pop("geo", False): + pytest.skip("dask-cudf currently does not work with spatialpandas") + cdf = cudf.DataFrame.from_pandas( + pd.DataFrame(*args, **kwargs), nan_as_null=False + ) + return dask_cudf.from_cudf(cdf, npartitions=2) - DataFrames = [dask_DataFrame, dask_cudf_DataFrame] -except ImportError: - cudf = cupy = dask_cudf = None - cudf_ddf = None - ddfs = [_ddf] - DataFrames = [dask_DataFrame] - dask_cudf_DataFrame = None +_backends = [ + pytest.param(_dask_DataFrame, id="dask"), + pytest.param(_dask_expr_DataFrame, id="dask-expr"), + pytest.param(_dask_cudf_DataFrame, marks=pytest.mark.gpu, id="dask-cudf"), +] +@pytest.fixture(params=_backends) +def DataFrame(request): + return request.param c = ds.Canvas(plot_width=2, plot_height=2, x_range=(0, 1), y_range=(0, 1)) c_logx = ds.Canvas(plot_width=2, plot_height=2, x_range=(1, 10), @@ -114,15 +124,21 @@ def floats(n): n = n + np.spacing(n) -def test_gpu_dependencies(): - if test_gpu and cudf is None: - pytest.fail( - "cudf, cupy, and/or dask_cudf not available and DATASHADER_TEST_GPU=1" - ) +@pytest.mark.gpu +def test_check_query_setting(): + import os + from subprocess import check_output + + # dask-cudf does not support query planning as of 24.04. + # So we check that it is not set outside of Python. + assert os.environ.get('DASK_DATAFRAME__QUERY_PLANNING', 'false').lower() != 'true' + + # This also have problem with the global setting so we check + cmd = ['dask', 'config', 'get', 'dataframe.query-planning'] + output = check_output(cmd, text=True).strip().lower() + assert output != 'true' -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_count(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -137,8 +153,6 @@ def test_count(ddf, npartitions): assert_eq_xr(c.points(ddf, 'x', 'y', ds.count('f64')), out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_any(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -152,26 +166,22 @@ def test_any(ddf, npartitions): assert_eq_xr(c.points(ddf, 'x', 'y', ds.any('empty_bin')), out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_sum(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray( - values(df_pd.i32).reshape((2, 2, 5)).sum(axis=2, dtype='f8').T, + values(_pandas().i32).reshape((2, 2, 5)).sum(axis=2, dtype='f8').T, coords=coords, dims=dims) assert_eq_xr(c.points(ddf, 'x', 'y', ds.sum('i32')), out) assert_eq_xr(c.points(ddf, 'x', 'y', ds.sum('i64')), out) out = xr.DataArray( - np.nansum(values(df_pd.f64).reshape((2, 2, 5)), axis=2).T, + np.nansum(values(_pandas().f64).reshape((2, 2, 5)), axis=2).T, coords=coords, dims=dims) assert_eq_xr(c.points(ddf, 'x', 'y', ds.sum('f32')), out) assert_eq_xr(c.points(ddf, 'x', 'y', ds.sum('f64')), out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_first(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -182,8 +192,6 @@ def test_first(ddf, npartitions): assert_eq_xr(c.points(ddf, 'x', 'y', ds.first('f64')), out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_last(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -194,13 +202,11 @@ def test_last(ddf, npartitions): assert_eq_xr(c.points(ddf, 'x', 'y', ds.last('f64')), out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_min(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray( - values(df_pd.i64).reshape((2, 2, 5)).min(axis=2).astype('f8').T, + values(_pandas().i64).reshape((2, 2, 5)).min(axis=2).astype('f8').T, coords=coords, dims=dims) assert_eq_xr(c.points(ddf, 'x', 'y', ds.min('i32')), out) assert_eq_xr(c.points(ddf, 'x', 'y', ds.min('i64')), out) @@ -208,13 +214,11 @@ def test_min(ddf, npartitions): assert_eq_xr(c.points(ddf, 'x', 'y', ds.min('f64')), out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_max(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray( - values(df_pd.i64).reshape((2, 2, 5)).max(axis=2).astype('f8').T, + values(_pandas().i64).reshape((2, 2, 5)).max(axis=2).astype('f8').T, coords=coords, dims=dims) assert_eq_xr(c.points(ddf, 'x', 'y', ds.max('i32')), out) assert_eq_xr(c.points(ddf, 'x', 'y', ds.max('i64')), out) @@ -222,8 +226,6 @@ def test_max(ddf, npartitions): assert_eq_xr(c.points(ddf, 'x', 'y', ds.max('f64')), out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_min_row_index(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -231,8 +233,6 @@ def test_min_row_index(ddf, npartitions): assert_eq_xr(c.points(ddf, 'x', 'y', ds._min_row_index()), out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_max_row_index(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -240,8 +240,6 @@ def test_max_row_index(ddf, npartitions): assert_eq_xr(c.points(ddf, 'x', 'y', ds._max_row_index()), out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_min_n(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -255,8 +253,6 @@ def test_min_n(ddf, npartitions): assert_eq_ndarray(agg[:, :, 0].data, c.points(ddf, 'x', 'y', ds.min('plusminus')).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_max_n(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -270,8 +266,6 @@ def test_max_n(ddf, npartitions): assert_eq_ndarray(agg[:, :, 0].data, c.points(ddf, 'x', 'y', ds.max('plusminus')).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_min_n_row_index(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -285,8 +279,6 @@ def test_min_n_row_index(ddf, npartitions): assert_eq_ndarray(agg[:, :, 0].data, c.points(ddf, 'x', 'y', ds._min_row_index()).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_max_n_row_index(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -300,8 +292,6 @@ def test_max_n_row_index(ddf, npartitions): assert_eq_ndarray(agg[:, :, 0].data, c.points(ddf, 'x', 'y', ds._max_row_index()).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_first_n(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -316,8 +306,6 @@ def test_first_n(ddf, npartitions): c.points(ddf, 'x', 'y', ds.first('plusminus')).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_last_n(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -331,8 +319,6 @@ def test_last_n(ddf, npartitions): assert_eq_ndarray(agg[:, :, 0].data, c.points(ddf, 'x', 'y', ds.last('plusminus')).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_count(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -344,8 +330,6 @@ def test_categorical_count(ddf, npartitions): assert_eq_ndarray(dataset["name"].data, sol) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_min(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -358,8 +342,6 @@ def test_categorical_min(ddf, npartitions): assert_eq_ndarray(c.points(ddf, 'x', 'y', ds.by('cat2', ds.min('f64'))).data, sol_float) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_max(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -372,8 +354,6 @@ def test_categorical_max(ddf, npartitions): assert_eq_ndarray(c.points(ddf, 'x', 'y', ds.by('cat2', ds.max('f64'))).data, sol_float) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_min_n(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -390,8 +370,6 @@ def test_categorical_min_n(ddf, npartitions): c.points(ddf, 'x', 'y', ds.by('cat2', ds.min('f32'))).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_max_n(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -408,8 +386,6 @@ def test_categorical_max_n(ddf, npartitions): c.points(ddf, 'x', 'y', ds.by('cat2', ds.max('f32'))).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_min_row_index(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -418,8 +394,6 @@ def test_categorical_min_row_index(ddf, npartitions): assert_eq_ndarray(agg.data, solution) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_max_row_index(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -428,8 +402,6 @@ def test_categorical_max_row_index(ddf, npartitions): assert_eq_ndarray(agg.data, solution) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_min_n_row_index(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -446,8 +418,6 @@ def test_categorical_min_n_row_index(ddf, npartitions): c.points(ddf, 'x', 'y', ds.by('cat2', ds._min_row_index())).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_max_n_row_index(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -464,8 +434,6 @@ def test_categorical_max_n_row_index(ddf, npartitions): c.points(ddf, 'x', 'y', ds.by('cat2', ds._max_row_index())).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_first(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -478,8 +446,6 @@ def test_categorical_first(ddf, npartitions): assert_eq_ndarray(agg.data, solution) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_last(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -492,8 +458,6 @@ def test_categorical_last(ddf, npartitions): assert_eq_ndarray(agg.data, solution) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_first_n(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -510,8 +474,6 @@ def test_categorical_first_n(ddf, npartitions): c.points(ddf, 'x', 'y', ds.by('cat2', ds.first("plusminus"))).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_last_n(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -528,8 +490,6 @@ def test_categorical_last_n(ddf, npartitions): c.points(ddf, 'x', 'y', ds.by('cat2', ds.last("plusminus"))).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_where_max(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. @@ -549,8 +509,6 @@ def test_where_max(ddf, npartitions): assert_eq_xr(c.points(ddf, 'x', 'y', ds.where(ds.max('f32'))), out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_where_min(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. @@ -570,8 +528,6 @@ def test_where_min(ddf, npartitions): assert_eq_xr(c.points(ddf, 'x', 'y', ds.where(ds.min('f32'))), out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_where_max_n(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. @@ -603,8 +559,6 @@ def test_where_max_n(ddf, npartitions): 'reverse')).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_where_min_n(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. @@ -636,8 +590,6 @@ def test_where_min_n(ddf, npartitions): 'reverse')).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_where_first(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -657,8 +609,6 @@ def test_where_first(ddf, npartitions): assert_eq_xr(c.points(ddf, 'x', 'y', ds.where(ds.first('f32'))), out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_where_last(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -678,8 +628,6 @@ def test_where_last(ddf, npartitions): assert_eq_xr(c.points(ddf, 'x', 'y', ds.where(ds.last('f32'))), out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_where_first_n(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. @@ -711,8 +659,6 @@ def test_where_first_n(ddf, npartitions): 'reverse')).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_where_last_n(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. @@ -743,9 +689,6 @@ def test_where_last_n(ddf, npartitions): c.points(ddf, 'x', 'y', ds.where(ds.last('plusminus'), 'reverse')).data) - -@pytest.mark.parametrize('ddf', [_ddf]) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_summary_by(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -780,8 +723,6 @@ def test_summary_by(ddf, npartitions): assert_eq_xr(agg_summary["by2"], agg_by2) -@pytest.mark.parametrize('ddf', [_ddf]) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_summary_where_n(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. @@ -818,59 +759,51 @@ def test_summary_where_n(ddf, npartitions): assert_eq_ndarray(agg['max_n'].data, sol_max_n_reverse) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_mean(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray( - values(df_pd.i32).reshape((2, 2, 5)).mean(axis=2, dtype='f8').T, + values(_pandas().i32).reshape((2, 2, 5)).mean(axis=2, dtype='f8').T, coords=coords, dims=dims) assert_eq_xr(c.points(ddf, 'x', 'y', ds.mean('i32')), out) assert_eq_xr(c.points(ddf, 'x', 'y', ds.mean('i64')), out) out = xr.DataArray( - np.nanmean(values(df_pd.f64).reshape((2, 2, 5)), axis=2).T, + np.nanmean(values(_pandas().f64).reshape((2, 2, 5)), axis=2).T, coords=coords, dims=dims) assert_eq_xr(c.points(ddf, 'x', 'y', ds.mean('f32')), out) assert_eq_xr(c.points(ddf, 'x', 'y', ds.mean('f64')), out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_var(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray( - values(df_pd.i32).reshape((2, 2, 5)).var(axis=2, dtype='f8').T, + values(_pandas().i32).reshape((2, 2, 5)).var(axis=2, dtype='f8').T, coords=coords, dims=dims) assert_eq_xr(c.points(ddf, 'x', 'y', ds.var('i32')), out) assert_eq_xr(c.points(ddf, 'x', 'y', ds.var('i64')), out) out = xr.DataArray( - np.nanvar(values(df_pd.f64).reshape((2, 2, 5)), axis=2).T, + np.nanvar(values(_pandas().f64).reshape((2, 2, 5)), axis=2).T, coords=coords, dims=dims) assert_eq_xr(c.points(ddf, 'x', 'y', ds.var('f32')), out) assert_eq_xr(c.points(ddf, 'x', 'y', ds.var('f64')), out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_std(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray( - values(df_pd.i32).reshape((2, 2, 5)).std(axis=2, dtype='f8').T, + values(_pandas().i32).reshape((2, 2, 5)).std(axis=2, dtype='f8').T, coords=coords, dims=dims) assert_eq_xr(c.points(ddf, 'x', 'y', ds.std('i32')), out) assert_eq_xr(c.points(ddf, 'x', 'y', ds.std('i64')), out) out = xr.DataArray( - np.nanstd(values(df_pd.f64).reshape((2, 2, 5)), axis=2).T, + np.nanstd(values(_pandas().f64).reshape((2, 2, 5)), axis=2).T, coords=coords, dims=dims) assert_eq_xr(c.points(ddf, 'x', 'y', ds.std('f32')), out) assert_eq_xr(c.points(ddf, 'x', 'y', ds.std('f64')), out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_count_cat(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -927,8 +860,6 @@ def test_count_cat(ddf, npartitions): assert_eq_ndarray(agg.y_range, (0, 1), close=True) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_sum(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -970,10 +901,8 @@ def test_categorical_sum(ddf, npartitions): assert_eq_xr(agg, out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) -def test_categorical_sum_binning(ddf, npartitions): - if cudf and isinstance(ddf._meta, cudf.DataFrame): +def test_categorical_sum_binning(ddf, npartitions, request): + if "cudf" in request.node.name: pytest.skip( "The categorical binning of 'sum' reduction is yet supported on the GPU" ) @@ -997,8 +926,6 @@ def test_categorical_sum_binning(ddf, npartitions): assert_eq_ndarray(agg.y_range, (0, 1), close=True) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_mean(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -1029,10 +956,8 @@ def test_categorical_mean(ddf, npartitions): assert_eq_xr(agg, out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) -def test_categorical_mean_binning(ddf, npartitions): - if cudf and isinstance(ddf._meta, cudf.DataFrame): +def test_categorical_mean_binning(ddf, npartitions, request): + if "cudf" in request.node.name: pytest.skip( "The categorical binning of 'mean' reduction is yet supported on the GPU" ) @@ -1054,8 +979,6 @@ def test_categorical_mean_binning(ddf, npartitions): assert_eq_xr(agg, out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_var(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -1096,8 +1019,6 @@ def test_categorical_var(ddf, npartitions): assert_eq_xr(agg, out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_std(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -1140,8 +1061,6 @@ def test_categorical_std(ddf, npartitions): assert_eq_xr(agg, out) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_multiple_aggregates(ddf, npartitions): ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions @@ -1153,16 +1072,15 @@ def test_multiple_aggregates(ddf, npartitions): def f(x): return xr.DataArray(x, coords=coords, dims=dims) - assert_eq_xr(agg.f64_std, f(np.nanstd(values(df_pd.f64).reshape((2, 2, 5)), axis=2).T)) - assert_eq_xr(agg.f64_mean, f(np.nanmean(values(df_pd.f64).reshape((2, 2, 5)), axis=2).T)) - assert_eq_xr(agg.i32_sum, f(values(df_pd.i32).reshape((2, 2, 5)).sum(axis=2, dtype='f8').T)) + assert_eq_xr(agg.f64_std, f(np.nanstd(values(_pandas().f64).reshape((2, 2, 5)), axis=2).T)) + assert_eq_xr(agg.f64_mean, f(np.nanmean(values(_pandas().f64).reshape((2, 2, 5)), axis=2).T)) + assert_eq_xr(agg.i32_sum, f(values(_pandas().i32).reshape((2, 2, 5)).sum(axis=2, dtype='f8').T)) assert_eq_xr(agg.i32_count, f(np.array([[5, 5], [5, 5]], dtype='i4'))) assert_eq_ndarray(agg.x_range, (0, 1), close=True) assert_eq_ndarray(agg.y_range, (0, 1), close=True) -@pytest.mark.parametrize('DataFrame', DataFrames) def test_auto_range_points(DataFrame): n = 10 data = np.arange(n, dtype='i4') @@ -1216,7 +1134,6 @@ def test_auto_range_points(DataFrame): assert_eq_ndarray(agg.y_range, (0, 3), close=True) -@pytest.mark.parametrize('DataFrame', DataFrames) def test_uniform_points(DataFrame): n = 101 ddf = DataFrame({'time': np.ones(2*n, dtype='i4'), @@ -1232,7 +1149,6 @@ def test_uniform_points(DataFrame): assert_eq_ndarray(agg.y_range, (0, 1), close=True) -@pytest.mark.parametrize('DataFrame', DataFrames) @pytest.mark.parametrize('high', [9, 10, 99, 100]) @pytest.mark.parametrize('low', [0]) def test_uniform_diagonal_points(DataFrame, low, high): @@ -1257,7 +1173,6 @@ def test_uniform_diagonal_points(DataFrame, low, high): assert_eq_ndarray(agg.y_range, (low, high), close=True) -@pytest.mark.parametrize('ddf', ddfs) def test_log_axis_points(ddf): axis = ds.core.LogAxis() logcoords = axis.compute_index(axis.compute_scale_and_translate((1, 10), 2), 2) @@ -1278,6 +1193,7 @@ def test_log_axis_points(ddf): @pytest.mark.skipif(not sp, reason="spatialpandas not installed") +@dask_switcher(query=False, extras=["spatialpandas.dask"]) def test_points_geometry(): axis = ds.core.LinearAxis() lincoords = axis.compute_index(axis.compute_scale_and_translate((0., 2.), 3), 3) @@ -1298,7 +1214,7 @@ def test_points_geometry(): assert_eq_xr(agg, out) -@pytest.mark.parametrize('DataFrame', DataFrames) +@dask_switcher(query=False, extras=["spatialpandas.dask"]) def test_line(DataFrame): axis = ds.core.LinearAxis() lincoords = axis.compute_index(axis.compute_scale_and_translate((-3., 3.), 7), 7) @@ -1379,10 +1295,11 @@ def test_line(DataFrame): [-4, 0, 0, 4, 4, 0, 4, 0, 0, 0, -4, 0]] }, dtype='Line[int64]'), dict(geometry='geom')) ) -@pytest.mark.parametrize('DataFrame', DataFrames[:1]) + +@dask_switcher(query=False, extras=["spatialpandas.dask"]) @pytest.mark.parametrize('df_kwargs,cvs_kwargs', line_manual_range_params[5:7]) -def test_line_manual_range(DataFrame, df_kwargs, cvs_kwargs): - if DataFrame is dask_cudf_DataFrame: +def test_line_manual_range(DataFrame, df_kwargs, cvs_kwargs, request): + if "cudf" in request.node.name: dtype = df_kwargs.get('dtype', '') if dtype.startswith('Ragged') or dtype.startswith('Line'): pytest.skip("Ragged array not supported with cudf") @@ -1491,10 +1408,11 @@ def test_line_manual_range(DataFrame, df_kwargs, cvs_kwargs): [0, -4, 4, 0, 0, 4]] }, dtype='Line[int64]'), dict(geometry='geom')) ) -@pytest.mark.parametrize('DataFrame', DataFrames) + +@dask_switcher(query=False, extras=["spatialpandas.dask"]) @pytest.mark.parametrize('df_kwargs,cvs_kwargs', line_autorange_params) -def test_line_autorange(DataFrame, df_kwargs, cvs_kwargs): - if DataFrame is dask_cudf_DataFrame: +def test_line_autorange(DataFrame, df_kwargs, cvs_kwargs, request): + if "cudf" in request.node.name: dtype = df_kwargs.get('dtype', '') if dtype.startswith('Ragged') or dtype.startswith('Line'): pytest.skip("Ragged array not supported with cudf") @@ -1552,7 +1470,6 @@ def test_line_autorange(DataFrame, df_kwargs, cvs_kwargs): assert_eq_ndarray(agg.y_range, (-4, 4), close=True) -@pytest.mark.parametrize('DataFrame', DataFrames) def test_line_x_constant_autorange(DataFrame): # axis1 y constant x = np.array([-4, 0, 4]) @@ -1588,7 +1505,6 @@ def test_line_x_constant_autorange(DataFrame): assert_eq_xr(agg, out) -@pytest.mark.parametrize('ddf', ddfs) def test_log_axis_line(ddf): axis = ds.core.LogAxis() logcoords = axis.compute_index(axis.compute_scale_and_translate((1, 10), 2), 2) @@ -1609,7 +1525,6 @@ def test_log_axis_line(ddf): assert_eq_xr(c_logxy.line(ddf, 'log_x', 'log_y', ds.count('i32')), out) -@pytest.mark.parametrize('DataFrame', DataFrames) def test_auto_range_line(DataFrame): axis = ds.core.LinearAxis() lincoords = axis.compute_index(axis.compute_scale_and_translate((-10., 10.), 5), 5) @@ -1631,7 +1546,6 @@ def test_auto_range_line(DataFrame): assert_eq_ndarray(agg.y_range, (-10, 10), close=True) -@pytest.mark.parametrize('DataFrame', DataFrames) @pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ @@ -1664,7 +1578,7 @@ def test_auto_range_line(DataFrame): }, dtype='Ragged[float32]'), dict(x='x', y='y', axis=1)) ]) def test_area_to_zero_fixedrange(DataFrame, df_kwargs, cvs_kwargs): - if DataFrame is dask_cudf_DataFrame: + if DataFrame is _dask_cudf_DataFrame: if df_kwargs.get('dtype', '').startswith('Ragged'): pytest.skip("Ragged array not supported with cudf") @@ -1708,7 +1622,6 @@ def test_area_to_zero_fixedrange(DataFrame, df_kwargs, cvs_kwargs): assert_eq_ndarray(agg.y_range, (-2.25, 2.25), close=True) -@pytest.mark.parametrize('DataFrame', DataFrames) @pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ @@ -1757,7 +1670,7 @@ def test_area_to_zero_fixedrange(DataFrame, df_kwargs, cvs_kwargs): }, dtype='Ragged[float32]'), dict(x='x', y='y', axis=1)) ]) def test_area_to_zero_autorange(DataFrame, df_kwargs, cvs_kwargs): - if DataFrame is dask_cudf_DataFrame: + if DataFrame is _dask_cudf_DataFrame: if df_kwargs.get('dtype', '').startswith('Ragged'): pytest.skip("Ragged array not supported with cudf") @@ -1802,7 +1715,6 @@ def test_area_to_zero_autorange(DataFrame, df_kwargs, cvs_kwargs): assert_eq_ndarray(agg.y_range, (-4, 0), close=True) -@pytest.mark.parametrize('DataFrame', DataFrames) @pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ @@ -1835,7 +1747,7 @@ def test_area_to_zero_autorange(DataFrame, df_kwargs, cvs_kwargs): }, dtype='Ragged[float32]'), dict(x='x', y='y', axis=1)) ]) def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, cvs_kwargs): - if DataFrame is dask_cudf_DataFrame: + if DataFrame is _dask_cudf_DataFrame: if df_kwargs.get('dtype', '').startswith('Ragged'): pytest.skip("Ragged array not supported with cudf") @@ -1879,7 +1791,6 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, cvs_kwargs): assert_eq_xr(agg, out) -@pytest.mark.parametrize('DataFrame', DataFrames) @pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ @@ -1939,7 +1850,7 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, cvs_kwargs): }, dtype='Ragged[float32]'), dict(x='x', y='y', y_stack='y_stack', axis=1)) ]) def test_area_to_line_autorange(DataFrame, df_kwargs, cvs_kwargs): - if DataFrame is dask_cudf_DataFrame: + if DataFrame is _dask_cudf_DataFrame: if df_kwargs.get('dtype', '').startswith('Ragged'): pytest.skip("Ragged array not supported with cudf") @@ -1984,7 +1895,6 @@ def test_area_to_line_autorange(DataFrame, df_kwargs, cvs_kwargs): assert_eq_ndarray(agg.y_range, (-4, 0), close=True) -@pytest.mark.parametrize('DataFrame', DataFrames) @pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ @@ -2027,7 +1937,7 @@ def test_area_to_line_autorange(DataFrame, df_kwargs, cvs_kwargs): }, dtype='Ragged[float32]'), dict(x='x', y='y', y_stack='y_stack', axis=1)) ]) def test_area_to_line_autorange_gap(DataFrame, df_kwargs, cvs_kwargs): - if DataFrame is dask_cudf_DataFrame: + if DataFrame is _dask_cudf_DataFrame: if df_kwargs.get('dtype', '').startswith('Ragged'): pytest.skip("Ragged array not supported with cudf") @@ -2151,7 +2061,6 @@ def test_trimesh_dask_partitions(npartitions): assert_eq_ndarray(agg.y_range, (0, 5), close=True) -@pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('reduction,dtype,aa_dtype', [ (ds.any(), bool, np.float32), (ds.count(), np.uint32, np.float32), @@ -2159,8 +2068,8 @@ def test_trimesh_dask_partitions(npartitions): (ds.min("f64"), np.float64, np.float64), (ds.sum("f64"), np.float64, np.float64), ]) -def test_combine_dtype(ddf, reduction, dtype, aa_dtype): - if dask_cudf and isinstance(ddf, dask_cudf.DataFrame): +def test_combine_dtype(ddf, reduction, dtype, aa_dtype, request): + if "cudf" in request.node.name: pytest.skip("antialiased lines not supported with cudf") cvs = ds.Canvas(plot_width=10, plot_height=10) @@ -2174,7 +2083,6 @@ def test_combine_dtype(ddf, reduction, dtype, aa_dtype): assert agg.dtype == aa_dtype -@pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('canvas', [ ds.Canvas(x_axis_type='log'), ds.Canvas(x_axis_type='log', x_range=(0, 1)), @@ -2360,7 +2268,6 @@ def test_canvas_size(): cvs.points(ddf, "x", "y", ds.mean("z")) -@pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3]) def test_dataframe_dtypes(ddf, npartitions): # Issue #1235. @@ -2370,12 +2277,9 @@ def test_dataframe_dtypes(ddf, npartitions): ds.Canvas(2, 2).points(ddf, 'x', 'y', ds.count()) -@pytest.mark.parametrize('on_gpu', [False, True]) +@pytest.mark.parametrize('on_gpu', [False, pytest.param(True, marks=pytest.mark.gpu)]) def test_dask_categorical_counts(on_gpu): # Issue 1202 - if on_gpu and not test_gpu: - pytest.skip('gpu tests not enabled') - df = pd.DataFrame( data=dict( x = [0, 1, 2, 0, 1, 2, 1, 1, 1, 1, 1, 1], @@ -2403,8 +2307,6 @@ def test_dask_categorical_counts(on_gpu): assert all(sum_cat.values == [2, 7, 3]) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_where_max(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. @@ -2425,8 +2327,6 @@ def test_categorical_where_max(ddf, npartitions): assert_eq_xr(agg, sol_reverse) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_where_min(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. @@ -2447,8 +2347,6 @@ def test_categorical_where_min(ddf, npartitions): assert_eq_xr(agg, sol_reverse) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_where_first(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. @@ -2469,8 +2367,6 @@ def test_categorical_where_first(ddf, npartitions): assert_eq_xr(agg, sol_reverse) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_where_last(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. @@ -2491,8 +2387,6 @@ def test_categorical_where_last(ddf, npartitions): assert_eq_xr(agg, sol_reverse) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_where_max_n(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. @@ -2529,8 +2423,6 @@ def test_categorical_where_max_n(ddf, npartitions): 'reverse'))).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_where_min_n(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. @@ -2567,8 +2459,6 @@ def test_categorical_where_min_n(ddf, npartitions): 'reverse'))).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_where_first_n(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. @@ -2605,8 +2495,6 @@ def test_categorical_where_first_n(ddf, npartitions): 'reverse'))).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_where_last_n(ddf, npartitions): sol_rowindex = xr.DataArray( [[[[4, 0, -1], [1, -1, -1], [-1, -1, -1], [3, -1, -1]], @@ -2637,8 +2525,6 @@ def test_categorical_where_last_n(ddf, npartitions): c.points(ddf, 'x', 'y', ds.by('cat2', ds.where(ds.last('plusminus'), 'reverse'))).data) -@pytest.mark.parametrize('ddf', ddfs) -@pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_series_reset_index(ddf, npartitions): # Test for: https://github.com/holoviz/datashader/issues/1331 ser = ddf['i32'].reset_index() diff --git a/datashader/tests/test_geopandas.py b/datashader/tests/test_geopandas.py index f1f4c7df8..6a9126b85 100644 --- a/datashader/tests/test_geopandas.py +++ b/datashader/tests/test_geopandas.py @@ -1,12 +1,16 @@ # Testing GeoPandas and SpatialPandas - import dask.dataframe as dd import datashader as ds from datashader.tests.test_pandas import assert_eq_ndarray import numpy as np from numpy import nan import pytest +from datashader.tests.utils import dask_switcher + +@pytest.fixture(autouse=True) +def _classic_dd(): + with dask_switcher(query=False, extras=["spatialpandas.dask", "dask_geopandas"]): ... try: import dask_geopandas diff --git a/datashader/tests/test_pandas.py b/datashader/tests/test_pandas.py index 2133e5cdd..967750d5a 100644 --- a/datashader/tests/test_pandas.py +++ b/datashader/tests/test_pandas.py @@ -1,5 +1,4 @@ from __future__ import annotations -import os from numpy import nan import numpy as np @@ -12,7 +11,30 @@ from datashader.datatypes import RaggedDtype -df_pd = pd.DataFrame({'x': np.array(([0.] * 10 + [1] * 10)), +try: + import spatialpandas as sp + from spatialpandas.geometry import LineDtype +except ImportError: + LineDtype = None + sp = None + +try: + import cudf + import cupy +except ImportError: + cupy, cudf = None, None + +def _pandas(): + """ + x 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 + y 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 1 1 1 1 1 + i32 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 + f32 0 1 nan 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 + reverse 20 19 18 17 16 15 nan 13 12 11 10 9 8 7 6 5 4 3 2 1 + plusminus 0 -1 nan -3 4 -5 6 -7 8 -9 10 -11 12 -13 14 -15 16 -17 18 -19 + cat2 a b c d a b c d a b c d a b c d a b c d + """ + df_pd = pd.DataFrame({'x': np.array(([0.] * 10 + [1] * 10)), 'y': np.array(([0.] * 5 + [1] * 5 + [0] * 5 + [1] * 5)), 'log_x': np.array(([1.] * 10 + [10] * 10)), 'log_y': np.array(([1.] * 5 + [10] * 5 + [1] * 5 + [10] * 5)), @@ -27,60 +49,53 @@ 'cat2': ['a', 'b', 'c', 'd']*5, 'onecat': ['one']*20, 'cat_int': np.array([10]*5 + [11]*5 + [12]*5 + [13]*5)}) -df_pd.cat = df_pd.cat.astype('category') -df_pd.cat2 = df_pd.cat2.astype('category') -df_pd.onecat = df_pd.onecat.astype('category') -df_pd.at[2, 'f32'] = nan -df_pd.at[2, 'f64'] = nan -df_pd.at[6, 'reverse'] = nan -df_pd.at[2, 'plusminus'] = nan -# x 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 -# y 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 1 1 1 1 1 -# i32 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 -# f32 0 1 nan 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 -# reverse 20 19 18 17 16 15 nan 13 12 11 10 9 8 7 6 5 4 3 2 1 -# plusminus 0 -1 nan -3 4 -5 6 -7 8 -9 10 -11 12 -13 14 -15 16 -17 18 -19 -# cat2 a b c d a b c d a b c d a b c d a b c d - -test_gpu = bool(int(os.getenv("DATASHADER_TEST_GPU", 0))) + df_pd.cat = df_pd.cat.astype('category') + df_pd.cat2 = df_pd.cat2.astype('category') + df_pd.onecat = df_pd.onecat.astype('category') + df_pd.at[2, 'f32'] = nan + df_pd.at[2, 'f64'] = nan + df_pd.at[6, 'reverse'] = nan + df_pd.at[2, 'plusminus'] = nan + return df_pd -try: - import spatialpandas as sp - from spatialpandas.geometry import LineDtype -except ImportError: - LineDtype = None - sp = None +def _cudf(): + return cudf.DataFrame.from_pandas(_pandas()) -def pd_DataFrame(*args, **kwargs): +_backends = [ + pytest.param(_pandas, id="pandas"), + pytest.param(_cudf, marks=pytest.mark.gpu, id="cudf"), +] + +@pytest.fixture(params=_backends) +def df(request): + return request.param() + + +def _pandas_DataFrame(*args, **kwargs): if kwargs.pop("geo", False): return sp.GeoDataFrame(*args, **kwargs) else: return pd.DataFrame(*args, **kwargs) -try: +def _cudf_DataFrame(*args, **kwargs): import cudf - import cupy - - if not test_gpu: - # GPU testing disabled even though cudf/cupy are available - raise ImportError + if kwargs.pop("geo", False): + pytest.skip("cudf currently does not work with spatialpandas") + return cudf.DataFrame.from_pandas( + pd.DataFrame(*args, **kwargs), nan_as_null=False + ) - def cudf_DataFrame(*args, **kwargs): - assert not kwargs.pop("geo", False) - return cudf.DataFrame.from_pandas( - pd.DataFrame(*args, **kwargs), nan_as_null=False - ) - df_cuda = cudf_DataFrame(df_pd) - dfs = [df_pd, df_cuda] - DataFrames = [pd_DataFrame, cudf_DataFrame] -except ImportError: - cudf = cupy = None - dfs = [df_pd] - DataFrames = [pd_DataFrame] +_backends = [ + pytest.param(_pandas_DataFrame, id="pandas"), + pytest.param(_cudf_DataFrame, marks=pytest.mark.gpu, id="cudf"), +] +@pytest.fixture(params=_backends) +def DataFrame(request): + return request.param c = ds.Canvas(plot_width=2, plot_height=2, x_range=(0, 1), y_range=(0, 1)) c_logx = ds.Canvas(plot_width=2, plot_height=2, x_range=(1, 10), @@ -162,12 +177,7 @@ def values(s): return s.values -def test_gpu_dependencies(): - if test_gpu and cudf is None: - pytest.fail("cudf and/or cupy not available and DATASHADER_TEST_GPU=1") - - -@pytest.mark.skipif(not test_gpu, reason="DATASHADER_TEST_GPU not set") +@pytest.mark.gpu def test_cudf_concat(): # Testing if a newer version of cuDF implements the possibility to # concatenate multiple columns with the same name. @@ -181,7 +191,6 @@ def test_cudf_concat(): cudf.concat((dfc["y"], dfc["y"]), axis=1) -@pytest.mark.parametrize('df', dfs) def test_count(df): out = xr.DataArray(np.array([[5, 5], [5, 5]], dtype='i4'), coords=coords, dims=dims) @@ -194,7 +203,6 @@ def test_count(df): assert_eq_xr(c.points(df, 'x', 'y', ds.count('f64')), out) -@pytest.mark.parametrize('df', dfs) def test_any(df): out = xr.DataArray(np.array([[True, True], [True, True]]), coords=coords, dims=dims) @@ -206,7 +214,6 @@ def test_any(df): assert_eq_xr(c.points(df, 'x', 'y', ds.any('empty_bin')), out) -@pytest.mark.parametrize('df', dfs) def test_sum(df): out = xr.DataArray(values(df.i32).reshape((2, 2, 5)).sum(axis=2, dtype='f8').T, coords=coords, dims=dims) @@ -218,7 +225,6 @@ def test_sum(df): assert_eq_xr(c.points(df, 'x', 'y', ds.sum('f64')), out) -@pytest.mark.parametrize('df', dfs) def test_min(df): out = xr.DataArray(values(df.i64).reshape((2, 2, 5)).min(axis=2).astype('f8').T, coords=coords, dims=dims) @@ -228,7 +234,6 @@ def test_min(df): assert_eq_xr(c.points(df, 'x', 'y', ds.min('f64')), out) -@pytest.mark.parametrize('df', dfs) def test_max(df): out = xr.DataArray(values(df.i64).reshape((2, 2, 5)).max(axis=2).astype('f8').T, coords=coords, dims=dims) @@ -238,7 +243,6 @@ def test_max(df): assert_eq_xr(c.points(df, 'x', 'y', ds.max('f64')), out) -@pytest.mark.parametrize('df', dfs) def test_min_n(df): solution = np.array([[[-3, -1, 0, 4, nan, nan], [-13, -11, 10, 12, 14, nan]], [[-9, -7, -5, 6, 8, nan], [-19, -17, -15, 16, 18, nan]]]) @@ -250,7 +254,6 @@ def test_min_n(df): assert_eq_ndarray(agg[:, :, 0].data, c.points(df, 'x', 'y', ds.min('plusminus')).data) -@pytest.mark.parametrize('df', dfs) def test_max_n(df): solution = np.array([[[4, 0, -1, -3, nan, nan], [14, 12, 10, -11, -13, nan]], [[8, 6, -5, -7, -9, nan], [18, 16, -15, -17, -19, nan]]]) @@ -262,7 +265,6 @@ def test_max_n(df): assert_eq_ndarray(agg[:, :, 0].data, c.points(df, 'x', 'y', ds.max('plusminus')).data) -@pytest.mark.parametrize('df', dfs) def test_categorical_min(df): sol_int = np.array([[[0, 1, 2, 3], [12, 13, 10, 11]], [[8, 5, 6, 7], [16, 17, 18, 15]]], dtype=np.float64) @@ -273,7 +275,6 @@ def test_categorical_min(df): assert_eq_ndarray(c.points(df, 'x', 'y', ds.by('cat2', ds.min('f64'))).data, sol_float) -@pytest.mark.parametrize('df', dfs) def test_categorical_max(df): sol_int = np.array([[[4, 1, 2, 3], [12, 13, 14, 11]], [[8, 9, 6, 7], [16, 17, 18, 19]]], dtype=np.float64) @@ -284,7 +285,6 @@ def test_categorical_max(df): assert_eq_ndarray(c.points(df, 'x', 'y', ds.by('cat2', ds.max('f64'))).data, sol_float) -@pytest.mark.parametrize('df', dfs) def test_categorical_min_n(df): solution = np.array([[[[0, 4, nan], [1, nan, nan], [nan, nan, nan], [3, nan, nan]], [[12, nan, nan], [13, nan, nan], [10, 14, nan], [11, nan, nan]]], @@ -299,7 +299,6 @@ def test_categorical_min_n(df): c.points(df, 'x', 'y', ds.by('cat2', ds.min('f32'))).data) -@pytest.mark.parametrize('df', dfs) def test_categorical_max_n(df): solution = np.array([[[[4, 0, nan], [1, nan, nan], [nan, nan, nan], [3, nan, nan]], [[12, nan, nan], [13, nan, nan], [14, 10, nan], [11, nan, nan]]], @@ -314,21 +313,18 @@ def test_categorical_max_n(df): c.points(df, 'x', 'y', ds.by('cat2', ds.max('f32'))).data) -@pytest.mark.parametrize('df', dfs) def test_categorical_min_row_index(df): solution = np.array([[[0, 1, 2, 3], [12, 13, 10, 11]], [[8, 5, 6, 7], [16, 17, 18, 15]]]) agg = c.points(df, 'x', 'y', ds.by('cat2', ds._min_row_index())) assert_eq_ndarray(agg.data, solution) -@pytest.mark.parametrize('df', dfs) def test_categorical_max_row_index(df): solution = np.array([[[4, 1, 2, 3], [12, 13, 14, 11]], [[8, 9, 6, 7], [16, 17, 18, 19]]]) agg = c.points(df, 'x', 'y', ds.by('cat2', ds._max_row_index())) assert_eq_ndarray(agg.data, solution) -@pytest.mark.parametrize('df', dfs) def test_categorical_min_n_row_index(df): solution = np.array([[[[0, 4, -1], [1, -1, -1], [2, -1, -1], [3, -1, -1]], [[12, -1, -1], [13, -1, -1], [10, 14, -1], [11, -1, -1]]], @@ -343,7 +339,6 @@ def test_categorical_min_n_row_index(df): c.points(df, 'x', 'y', ds.by('cat2', ds._min_row_index())).data) -@pytest.mark.parametrize('df', dfs) def test_categorical_max_n_row_index(df): solution = np.array([[[[4, 0, -1], [1, -1, -1], [2, -1, -1], [3, -1, -1]], [[12, -1, -1], [13, -1, -1], [14, 10, -1], [11, -1, -1]]], @@ -358,7 +353,6 @@ def test_categorical_max_n_row_index(df): c.points(df, 'x', 'y', ds.by('cat2', ds._max_row_index())).data) -@pytest.mark.parametrize('df', dfs) def test_categorical_first(df): solution = np.array([[[0, -1, nan, -3], [12, -13, 10, -11]], @@ -369,7 +363,6 @@ def test_categorical_first(df): assert_eq_ndarray(agg.data, solution) -@pytest.mark.parametrize('df', dfs) def test_categorical_last(df): solution = np.array([[[4, -1, nan, -3], [12, -13, 14, -11]], @@ -380,7 +373,6 @@ def test_categorical_last(df): assert_eq_ndarray(agg.data, solution) -@pytest.mark.parametrize('df', dfs) def test_categorical_first_n(df): solution = np.array([[[[0, 4, nan], [-1, nan, nan], [nan, nan, nan], [-3, nan, nan]], [[12, nan, nan], [-13, nan, nan], [10, 14, nan], [-11, nan, nan]]], @@ -395,7 +387,6 @@ def test_categorical_first_n(df): c.points(df, 'x', 'y', ds.by('cat2', ds.first("plusminus"))).data) -@pytest.mark.parametrize('df', dfs) def test_categorical_last_n(df): solution = np.array([[[[4, 0, nan], [-1, nan, nan], [nan, nan, nan], [-3, nan, nan]], [[12, nan, nan], [-13, nan, nan], [14, 10, nan], [-11, nan, nan]]], @@ -410,19 +401,16 @@ def test_categorical_last_n(df): c.points(df, 'x', 'y', ds.by('cat2', ds.last("plusminus"))).data) -@pytest.mark.parametrize('df', dfs) def test_where_min_row_index(df): out = xr.DataArray([[0, 10], [-5, -15]], coords=coords, dims=dims) assert_eq_xr(c.points(df, 'x', 'y', ds.where(ds._min_row_index(), 'plusminus')), out) -@pytest.mark.parametrize('df', dfs) def test_where_max_row_index(df): out = xr.DataArray([[4, 14], [-9, -19]], coords=coords, dims=dims) assert_eq_xr(c.points(df, 'x', 'y', ds.where(ds._max_row_index(), 'plusminus')), out) -@pytest.mark.parametrize('df', dfs) def test_where_min_n_row_index(df): sol = np.array([[[ 0, -1, nan, -3, 4, nan], [ 10, -11, 12, -13, 14, nan]], @@ -440,7 +428,6 @@ def test_where_min_n_row_index(df): ds.where(ds._min_row_index(), 'plusminus')).data) -@pytest.mark.parametrize('df', dfs) def test_where_max_n_row_index(df): sol = np.array([[[ 4, -3, nan, -1, 0, nan], [ 14, -13, 12, -11, 10, nan]], @@ -458,7 +445,6 @@ def test_where_max_n_row_index(df): ds.where(ds._max_row_index(), 'plusminus')).data) -@pytest.mark.parametrize('df', dfs) def test_where_first(df): # Note reductions like ds.where(ds.first('i32'), 'reverse') are supported, # but the same results can be achieved using the simpler ds.first('reverse') @@ -476,7 +462,6 @@ def test_where_first(df): assert_eq_xr(c.points(df, 'x', 'y', ds.where(ds.first('f32'))), out) -@pytest.mark.parametrize('df', dfs) def test_where_last(df): # Note reductions like ds.where(ds.last('i32'), 'reverse') are supported, # but the same results can be achieved using the simpler ds.last('reverse') @@ -494,7 +479,6 @@ def test_where_last(df): assert_eq_xr(c.points(df, 'x', 'y', ds.where(ds.last('f32'))), out) -@pytest.mark.parametrize('df', dfs) def test_where_max(df): out = xr.DataArray([[16, 6], [11, 1]], coords=coords, dims=dims) assert_eq_xr(c.points(df, 'x', 'y', ds.where(ds.max('i32'), 'reverse')), out) @@ -510,7 +494,6 @@ def test_where_max(df): assert_eq_xr(c.points(df, 'x', 'y', ds.where(ds.max('f32'))), out) -@pytest.mark.parametrize('df', dfs) def test_where_min(df): out = xr.DataArray([[20, 10], [15, 5]], coords=coords, dims=dims) assert_eq_xr(c.points(df, 'x', 'y', ds.where(ds.min('i32'), 'reverse')), out) @@ -526,7 +509,6 @@ def test_where_min(df): assert_eq_xr(c.points(df, 'x', 'y', ds.where(ds.min('f32'))), out) -@pytest.mark.parametrize('df', dfs) def test_where_first_n(df): sol_rowindex = np.array([[[ 0, 1, 3, 4, -1, -1], [10, 11, 12, 13, 14, -1]], @@ -554,7 +536,6 @@ def test_where_first_n(df): ds.where(ds.first('plusminus'), 'reverse')).data) -@pytest.mark.parametrize('df', dfs) def test_where_last_n(df): sol_rowindex = np.array([[[ 4, 3, 1, 0, -1, -1], [14, 13, 12, 11, 10, -1]], @@ -582,7 +563,6 @@ def test_where_last_n(df): ds.where(ds.last('plusminus'), 'reverse')).data) -@pytest.mark.parametrize('df', dfs) def test_where_max_n(df): sol_rowindex = np.array([[[ 4, 0, 1, 3, -1, -1], [14, 12, 10, 11, 13, -1]], @@ -609,7 +589,6 @@ def test_where_max_n(df): c.points(df, 'x', 'y', ds.where(ds.max('plusminus'), 'reverse')).data) -@pytest.mark.parametrize('df', dfs) def test_where_min_n(df): sol_rowindex = np.array([[[3, 1, 0, 4, -1, -1], [13, 11, 10, 12, 14, -1]], @@ -636,7 +615,6 @@ def test_where_min_n(df): c.points(df, 'x', 'y', ds.where(ds.min('plusminus'), 'reverse')).data) -@pytest.mark.parametrize('df', dfs) def test_summary_by(df): # summary(by) agg_summary = c.points(df, 'x', 'y', ds.summary(by=ds.by("cat"))) @@ -667,7 +645,6 @@ def test_summary_by(df): assert_eq_xr(agg_summary["by2"], agg_by2) -@pytest.mark.parametrize('df', dfs) def test_summary_where_n(df): sol_min_n_rowindex = np.array([[[ 3, 1, 0, 4, -1], [13, 11, 10, 12, 14]], @@ -709,7 +686,6 @@ def test_summary_where_n(df): assert_eq_ndarray(agg['max2'].data, sol_max_n_reverse) -@pytest.mark.parametrize('df', dfs) def test_summary_different_n(df): msg = 'Using multiple FloatingNReductions with different n values is not supported' with pytest.raises(ValueError, match=msg): @@ -719,7 +695,6 @@ def test_summary_different_n(df): )) -@pytest.mark.parametrize('df', dfs) def test_mean(df): out = xr.DataArray(values(df.i32).reshape((2, 2, 5)).mean(axis=2, dtype='f8').T, coords=coords, dims=dims) @@ -731,7 +706,6 @@ def test_mean(df): assert_eq_xr(c.points(df, 'x', 'y', ds.mean('f64')), out) -@pytest.mark.parametrize('df', dfs) def test_var(df): out = xr.DataArray(values(df.i32).reshape((2, 2, 5)).var(axis=2, dtype='f8').T, coords=coords, dims=dims) @@ -743,7 +717,6 @@ def test_var(df): assert_eq_xr(c.points(df, 'x', 'y', ds.var('f64')), out) -@pytest.mark.parametrize('df', dfs) def test_std(df): out = xr.DataArray(values(df.i32).reshape((2, 2, 5)).std(axis=2, dtype='f8').T, coords=coords, dims=dims) @@ -755,7 +728,6 @@ def test_std(df): assert_eq_xr(c.points(df, 'x', 'y', ds.std('f64')), out) -@pytest.mark.parametrize('df', dfs) def test_count_cat(df): sol = np.array([[[5, 0, 0, 0], [0, 0, 5, 0]], @@ -768,7 +740,6 @@ def test_count_cat(df): assert_eq_ndarray(agg.y_range, (0, 1), close=True) -@pytest.mark.parametrize('df', dfs) def test_categorical_count(df): sol = np.array([[[5, 0, 0, 0], [0, 0, 5, 0]], @@ -789,7 +760,6 @@ def test_categorical_count(df): assert_eq_xr(agg, out) -@pytest.mark.parametrize('df', dfs) def test_one_category(df): # Issue #1142. assert len(df['onecat'].unique()) == 1 @@ -800,7 +770,6 @@ def test_one_category(df): assert_eq_xr(agg, out) -@pytest.mark.parametrize('df', dfs) def test_categorical_count_binning(df): sol = np.array([[[5, 0, 0, 0], [0, 0, 5, 0]], @@ -828,7 +797,6 @@ def test_categorical_count_binning(df): assert_eq_xr(agg, out) -@pytest.mark.parametrize('df', dfs) def test_categorical_sum(df): sol = np.array([[[ 10, nan, nan, nan], [nan, nan, 60, nan]], @@ -864,7 +832,6 @@ def test_categorical_sum(df): assert_eq_xr(agg, out) -@pytest.mark.parametrize('df', dfs) def test_categorical_sum_binning(df): sol = np.array([[[8.0, nan, nan, nan], [nan, nan, 60.0, nan]], @@ -881,7 +848,6 @@ def test_categorical_sum_binning(df): assert_eq_ndarray(agg.y_range, (0, 1), close=True) -@pytest.mark.parametrize('df', dfs) def test_categorical_max2(df): sol = np.array([[[ 4, nan, nan, nan], [nan, nan, 14, nan]], @@ -903,7 +869,6 @@ def test_categorical_max2(df): assert_eq_xr(agg, out) -@pytest.mark.parametrize('df', dfs) def test_categorical_max_binning(df): sol = np.array([[[ 4, nan, nan, nan], [nan, nan, 14, nan]], @@ -918,7 +883,6 @@ def test_categorical_max_binning(df): assert_eq_xr(agg, out) -@pytest.mark.parametrize('df', dfs) def test_categorical_mean(df): sol = np.array([[[ 2, nan, nan, nan], [nan, nan, 12, nan]], @@ -944,7 +908,6 @@ def test_categorical_mean(df): assert_eq_xr(agg, out) -@pytest.mark.parametrize('df', dfs) def test_categorical_mean_binning(df): sol = np.array([[[ 2, nan, nan, nan], [nan, nan, 12, nan]], @@ -959,7 +922,6 @@ def test_categorical_mean_binning(df): assert_eq_xr(agg, out) -@pytest.mark.parametrize('df', dfs) def test_categorical_var(df): sol = np.array([[[ 2.5, nan, nan, nan], [ nan, nan, 2., nan]], @@ -992,7 +954,6 @@ def test_categorical_var(df): assert_eq_xr(agg, out) -@pytest.mark.parametrize('df', dfs) def test_categorical_std(df): sol = np.sqrt(np.array([ [[ 2.5, nan, nan, nan], @@ -1027,7 +988,6 @@ def test_categorical_std(df): assert_eq_xr(agg, out) -@pytest.mark.parametrize('df', dfs) def test_first(df): out = xr.DataArray([[0, 10], [5, 15]], coords=coords, dims=dims) assert_eq_xr(c.points(df, 'x', 'y', ds.first('i32')), out) @@ -1036,7 +996,6 @@ def test_first(df): assert_eq_xr(c.points(df, 'x', 'y', ds.first('f64')), out) -@pytest.mark.parametrize('df', dfs) def test_last(df): out = xr.DataArray([[4, 14], [9, 19]], coords=coords, dims=dims) assert_eq_xr(c.points(df, 'x', 'y', ds.last('i32')), out) @@ -1045,7 +1004,6 @@ def test_last(df): assert_eq_xr(c.points(df, 'x', 'y', ds.last('f64')), out) -@pytest.mark.parametrize('df', dfs) def test_first_n(df): solution = np.array([[[0, -1, -3, 4, nan, nan], [10, -11, 12, -13, 14, nan]], [[-5, 6, -7, 8, -9, nan], [-15, 16, -17, 18, -19, nan]]]) @@ -1057,7 +1015,6 @@ def test_first_n(df): assert_eq_ndarray(agg[:, :, 0].data, c.points(df, 'x', 'y', ds.first('plusminus')).data) -@pytest.mark.parametrize('df', dfs) def test_last_n(df): solution = np.array([[[4, -3, -1, 0, nan, nan], [14, -13, 12, -11, 10, nan]], [[-9, 8, -7, 6, -5, nan], [-19, 18, -17, 16, -15, nan]]]) @@ -1069,7 +1026,6 @@ def test_last_n(df): assert_eq_ndarray(agg[:, :, 0].data, c.points(df, 'x', 'y', ds.last('plusminus')).data) -@pytest.mark.parametrize('df', dfs) def test_min_row_index(df): out = xr.DataArray([[0, 10], [5, 15]], coords=coords, dims=dims) agg = c.points(df, 'x', 'y', ds._min_row_index()) @@ -1077,7 +1033,6 @@ def test_min_row_index(df): assert_eq_xr(agg, out) -@pytest.mark.parametrize('df', dfs) def test_max_row_index(df): out = xr.DataArray([[4, 14], [9, 19]], coords=coords, dims=dims) agg = c.points(df, 'x', 'y', ds._max_row_index()) @@ -1085,7 +1040,6 @@ def test_max_row_index(df): assert_eq_xr(agg, out) -@pytest.mark.parametrize('df', dfs) def test_min_n_row_index(df): solution = np.array([[[0, 1, 2, 3, 4, -1], [10, 11, 12, 13, 14, -1]], [[5, 6, 7, 8, 9, -1], [15, 16, 17, 18, 19, -1]]]) @@ -1098,7 +1052,6 @@ def test_min_n_row_index(df): assert_eq_ndarray(agg[:, :, 0].data, c.points(df, 'x', 'y', ds._min_row_index()).data) -@pytest.mark.parametrize('df', dfs) def test_max_n_row_index(df): solution = np.array([[[4, 3, 2, 1, 0, -1], [14, 13, 12, 11, 10, -1]], [[9, 8, 7, 6, 5, -1], [19, 18, 17, 16, 15, -1]]]) @@ -1111,7 +1064,6 @@ def test_max_n_row_index(df): assert_eq_ndarray(agg[:, :, 0].data, c.points(df, 'x', 'y', ds._max_row_index()).data) -@pytest.mark.parametrize('df', dfs) def test_multiple_aggregates(df): agg = c.points(df, 'x', 'y', ds.summary(f64_mean=ds.mean('f64'), @@ -1125,7 +1077,6 @@ def f(x): assert_eq_xr(agg.i32_count, f(np.array([[5, 5], [5, 5]], dtype='i4'))) -@pytest.mark.parametrize('DataFrame', DataFrames) def test_auto_range_points(DataFrame): n = 10 data = np.arange(n, dtype='i4') @@ -1217,7 +1168,6 @@ def test_uniform_diagonal_points(low, high): assert_eq_ndarray(agg.y_range, (low, high), close=True) -@pytest.mark.parametrize('df', dfs) def test_log_axis_points(df): axis = ds.core.LogAxis() logcoords = axis.compute_index(axis.compute_scale_and_translate((1, 10), 2), 2) @@ -1366,7 +1316,6 @@ def test_lines_on_edge(): assert_eq_xr(agg, out) -@pytest.mark.parametrize('df', dfs) def test_log_axis_line(df): axis = ds.core.LogAxis() logcoords = axis.compute_index(axis.compute_scale_and_translate((1, 10), 2), 2) @@ -1766,10 +1715,9 @@ def test_bug_570(): ), }], dict(geometry='geom')) ) -@pytest.mark.parametrize('DataFrame', DataFrames) @pytest.mark.parametrize('df_args,cvs_kwargs', line_manual_range_params) def test_line_manual_range(DataFrame, df_args, cvs_kwargs): - if cudf and DataFrame is cudf_DataFrame: + if cudf and DataFrame is _cudf_DataFrame: if (isinstance(getattr(df_args[0].get('x', []), 'dtype', ''), RaggedDtype) or sp and isinstance( getattr(df_args[0].get('geom', []), 'dtype', ''), LineDtype @@ -1858,11 +1806,10 @@ def test_line_manual_range(DataFrame, df_args, cvs_kwargs): ), }], dict(geometry='geom')) ) -@pytest.mark.parametrize('DataFrame', DataFrames) @pytest.mark.parametrize('df_args,cvs_kwargs', line_autorange_params) @pytest.mark.parametrize('line_width', [0, 1]) def test_line_autorange(DataFrame, df_args, cvs_kwargs, line_width): - if cudf and DataFrame is cudf_DataFrame: + if cudf and DataFrame is _cudf_DataFrame: if (isinstance(getattr(df_args[0].get('x', []), 'dtype', ''), RaggedDtype) or sp and isinstance( getattr(df_args[0].get('geom', []), 'dtype', ''), LineDtype @@ -1913,7 +1860,6 @@ def test_line_autorange(DataFrame, df_args, cvs_kwargs, line_width): assert_eq_ndarray(agg.y_range, (-4, 4), close=True) -@pytest.mark.parametrize('DataFrame', DataFrames) def test_line_autorange_axis1_x_constant(DataFrame): axis = ds.core.LinearAxis() lincoords = axis.compute_index( @@ -1950,7 +1896,6 @@ def test_line_autorange_axis1_x_constant(DataFrame): # Sum aggregate -@pytest.mark.parametrize('DataFrame', DataFrames) def test_line_agg_sum_axis1_none_constant(DataFrame): axis = ds.core.LinearAxis() lincoords = axis.compute_index(axis.compute_scale_and_translate((-3., 3.), 7), 7) @@ -2022,7 +1967,6 @@ def test_line_autorange_axis1_ragged(): assert_eq_ndarray(agg.y_range, (-4, 4), close=True) -@pytest.mark.parametrize('DataFrame', DataFrames) @pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ @@ -2055,7 +1999,7 @@ def test_line_autorange_axis1_ragged(): }), dict(x='x', y='y', axis=1)) ]) def test_area_to_zero_fixedrange(DataFrame, df_kwargs, cvs_kwargs): - if cudf and DataFrame is cudf_DataFrame: + if cudf and DataFrame is _cudf_DataFrame: if isinstance(getattr(df_kwargs['data'].get('x', []), 'dtype', ''), RaggedDtype): pytest.skip("cudf DataFrames do not support extension types") @@ -2087,7 +2031,6 @@ def test_area_to_zero_fixedrange(DataFrame, df_kwargs, cvs_kwargs): assert_eq_ndarray(agg.y_range, (-2.25, 2.25), close=True) -@pytest.mark.parametrize('DataFrame', DataFrames) @pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ @@ -2135,7 +2078,7 @@ def test_area_to_zero_fixedrange(DataFrame, df_kwargs, cvs_kwargs): }), dict(x='x', y='y', axis=1)) ]) def test_area_to_zero_autorange(DataFrame, df_kwargs, cvs_kwargs): - if cudf and DataFrame is cudf_DataFrame: + if cudf and DataFrame is _cudf_DataFrame: if isinstance(getattr(df_kwargs['data'].get('x', []), 'dtype', ''), RaggedDtype): pytest.skip("cudf DataFrames do not support extension types") @@ -2167,7 +2110,6 @@ def test_area_to_zero_autorange(DataFrame, df_kwargs, cvs_kwargs): assert_eq_ndarray(agg.y_range, (-4, 0), close=True) -@pytest.mark.parametrize('DataFrame', DataFrames) @pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ @@ -2202,7 +2144,7 @@ def test_area_to_zero_autorange(DataFrame, df_kwargs, cvs_kwargs): }), dict(x='x', y='y', axis=1)) ]) def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, cvs_kwargs): - if cudf and DataFrame is cudf_DataFrame: + if cudf and DataFrame is _cudf_DataFrame: if isinstance(getattr(df_kwargs['data'].get('x', []), 'dtype', ''), RaggedDtype): pytest.skip("cudf DataFrames do not support extension types") @@ -2234,7 +2176,6 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, cvs_kwargs): assert_eq_ndarray(agg.y_range, (-4, 4), close=True) -@pytest.mark.parametrize('DataFrame', DataFrames) @pytest.mark.parametrize('df_kwargs,cvs_kwargs', [ # axis1 none constant (dict(data={ @@ -2294,7 +2235,7 @@ def test_area_to_zero_autorange_gap(DataFrame, df_kwargs, cvs_kwargs): }), dict(x='x', y='y', y_stack='y_stack', axis=1)) ]) def test_area_to_line_autorange(DataFrame, df_kwargs, cvs_kwargs): - if cudf and DataFrame is cudf_DataFrame: + if cudf and DataFrame is _cudf_DataFrame: if isinstance(getattr(df_kwargs['data'].get('x', []), 'dtype', ''), RaggedDtype): pytest.skip("cudf DataFrames do not support extension types") @@ -3180,7 +3121,6 @@ def test_reduction_dtype(reduction, dtype, aa_dtype): assert agg.dtype == aa_dtype -@pytest.mark.parametrize('df', dfs) @pytest.mark.parametrize('canvas', [ ds.Canvas(x_axis_type='log'), ds.Canvas(x_axis_type='log', x_range=(0, 1)), @@ -3254,7 +3194,6 @@ def test_canvas_size(): cvs.points(df, "x", "y", ds.mean("z")) -@pytest.mark.parametrize('df', dfs) def test_categorical_where_max(df): sol_rowindex = xr.DataArray([[[4, 1, -1, 3], [12, 13, 14, 11]], [[8, 5, 6, 7], [16, 17, 18, 15]]], @@ -3271,7 +3210,6 @@ def test_categorical_where_max(df): assert_eq_xr(agg, sol_reverse) -@pytest.mark.parametrize('df', dfs) def test_categorical_where_min(df): sol_rowindex = xr.DataArray([[[0, 1, -1, 3], [12, 13, 10, 11]], [[8, 9, 6, 7], [16, 17, 18, 19]]], @@ -3288,7 +3226,6 @@ def test_categorical_where_min(df): assert_eq_xr(agg, sol_reverse) -@pytest.mark.parametrize('df', dfs) def test_categorical_where_first(df): sol_rowindex = xr.DataArray([[[0, 1, -1, 3], [12, 13, 10, 11]], [[8, 5, 6, 7], [16, 17, 18, 15]]], @@ -3305,7 +3242,6 @@ def test_categorical_where_first(df): assert_eq_xr(agg, sol_reverse) -@pytest.mark.parametrize('df', dfs) def test_categorical_where_last(df): sol_rowindex = xr.DataArray([[[4, 1, -1, 3], [12, 13, 14, 11]], [[8, 9, 6, 7], [16, 17, 18, 19]]], @@ -3322,7 +3258,6 @@ def test_categorical_where_last(df): assert_eq_xr(agg, sol_reverse) -@pytest.mark.parametrize('df', dfs) def test_categorical_where_max_n(df): sol_rowindex = xr.DataArray( [[[[4, 0, -1], [1, -1, -1], [-1, -1, -1], [3, -1, -1]], @@ -3354,7 +3289,6 @@ def test_categorical_where_max_n(df): 'reverse'))).data) -@pytest.mark.parametrize('df', dfs) def test_categorical_where_min_n(df): sol_rowindex = xr.DataArray( [[[[0, 4, -1], [1, -1, -1], [-1, -1, -1], [3, -1, -1]], @@ -3386,7 +3320,6 @@ def test_categorical_where_min_n(df): 'reverse'))).data) -@pytest.mark.parametrize('df', dfs) def test_categorical_where_first_n(df): sol_rowindex = xr.DataArray( [[[[0, 4, -1], [1, -1, -1], [-1, -1, -1], [3, -1, -1]], @@ -3418,7 +3351,6 @@ def test_categorical_where_first_n(df): 'reverse'))).data) -@pytest.mark.parametrize('df', dfs) def test_categorical_where_last_n(df): sol_rowindex = xr.DataArray( [[[[4, 0, -1], [1, -1, -1], [-1, -1, -1], [3, -1, -1]], diff --git a/datashader/tests/test_polygons.py b/datashader/tests/test_polygons.py index 178e8990f..fa8593971 100644 --- a/datashader/tests/test_polygons.py +++ b/datashader/tests/test_polygons.py @@ -5,6 +5,12 @@ import datashader as ds from datashader.tests.test_pandas import assert_eq_ndarray, assert_eq_xr import dask.dataframe as dd +from datashader.tests.utils import dask_switcher + + +@pytest.fixture(autouse=True) +def _classic_dd(): + with dask_switcher(query=False, extras=["spatialpandas.dask"]): ... try: # Import to register extension arrays diff --git a/datashader/tests/test_transfer_functions.py b/datashader/tests/test_transfer_functions.py index 574e73e36..d26d51495 100644 --- a/datashader/tests/test_transfer_functions.py +++ b/datashader/tests/test_transfer_functions.py @@ -1,6 +1,5 @@ from __future__ import annotations -import os from io import BytesIO import numpy as np @@ -14,8 +13,6 @@ coords = dict([('x_axis', [3, 4, 5]), ('y_axis', [0, 1, 2])]) dims = ['y_axis', 'x_axis'] -test_gpu = bool(int(os.getenv("DATASHADER_TEST_GPU", 0))) - # CPU def build_agg(array_module=np): a = array_module.arange(10, 19, dtype='u4').reshape((3, 3)) @@ -40,21 +37,41 @@ def build_agg_dask(): return build_agg(np).chunk({d: 1 for d in dims}) -def create_dask_array_np(*args, **kwargs): +def build_agg_cupy(): + import cupy + return build_agg(cupy) + + +_backends = [ + pytest.param(build_agg, id="numpy"), + pytest.param(build_agg_dask, id="dask"), + pytest.param(build_agg_cupy, marks=pytest.mark.gpu, id="cupy"), +] + +@pytest.fixture(params=_backends) +def agg(request): + return request.param() + + +def create_dask_array(*args, **kwargs): """Create a dask array wrapping around a numpy array.""" return da.from_array(np.array(*args, **kwargs)) -if test_gpu: +def create_cupy_array(*args, **kwargs): import cupy - aggs = [build_agg(np), build_agg(cupy), build_agg_dask()] - arrays = [np.array, cupy.array, create_dask_array_np] - array_modules = [np, cupy] -else: - cupy = None - aggs = [build_agg(np), build_agg_dask()] - arrays = [np.array, create_dask_array_np] - array_modules = [np] + return cupy.array(*args, **kwargs) + + +_backends = [ + pytest.param(np.array, id="numpy"), + pytest.param(create_dask_array, id="dask"), + pytest.param(create_cupy_array, marks=pytest.mark.gpu, id="cupy"), +] + +@pytest.fixture(params=_backends) +def array(request): + return request.param int_span = [11, 17] float_span = [11.0, 17.0] @@ -143,7 +160,6 @@ def check_span(x, cmap, how, sol): assert_eq_xr(img, sol) -@pytest.mark.parametrize('agg', aggs) @pytest.mark.parametrize('attr', ['a', 'b', 'c']) @pytest.mark.parametrize('span', [None, int_span, float_span]) def test_shade(agg, attr, span): @@ -184,7 +200,6 @@ def test_shade(agg, attr, span): assert_eq_xr(img, sol) -@pytest.mark.parametrize('agg', aggs) @pytest.mark.parametrize('attr', ['a', 'b', 'c']) @pytest.mark.parametrize('how', ['linear', 'log', 'cbrt']) @pytest.mark.parametrize('cmap', [['pink', 'red'], ('#FFC0CB', '#FF0000')]) @@ -199,7 +214,6 @@ def test_span_cmap_list(agg, attr, how, cmap): check_span(x, cmap, how, sol) -@pytest.mark.parametrize('agg', aggs) @pytest.mark.parametrize('cmap', ['black', (0, 0, 0), '#000000']) def test_span_cmap_single(agg, cmap): # Get input @@ -215,7 +229,6 @@ def test_span_cmap_single(agg, cmap): check_span(x, cmap, 'log', sol) -@pytest.mark.parametrize('agg', aggs) def test_span_cmap_mpl(agg): # Get inputs x = agg.a @@ -249,7 +262,6 @@ def test_shade_bool(): assert_eq_xr(img, sol) -@pytest.mark.parametrize('agg', aggs) def test_shade_cmap(agg): cmap = ['red', (0, 255, 0), '#0000FF'] img = tf.shade(agg.a, how='log', cmap=cmap) @@ -260,7 +272,6 @@ def test_shade_cmap(agg): assert_eq_xr(img, sol) -@pytest.mark.parametrize('agg', aggs) @pytest.mark.parametrize('cmap', ['black', (0, 0, 0), '#000000']) def test_shade_cmap_non_categorical_alpha(agg, cmap): img = tf.shade(agg.a, how='log', cmap=cmap) @@ -271,7 +282,6 @@ def test_shade_cmap_non_categorical_alpha(agg, cmap): assert_eq_xr(img, sol) -@pytest.mark.parametrize('agg', aggs) def test_shade_cmap_errors(agg): with pytest.raises(ValueError): tf.shade(agg.a, cmap='foo') @@ -280,7 +290,6 @@ def test_shade_cmap_errors(agg): tf.shade(agg.a, cmap=[]) -@pytest.mark.parametrize('agg', aggs) def test_shade_mpl_cmap(agg): cm = pytest.importorskip('matplotlib.cm') img = tf.shade(agg.a, how='log', cmap=cm.viridis) @@ -291,7 +300,6 @@ def test_shade_mpl_cmap(agg): assert_eq_xr(img, sol) -@pytest.mark.parametrize('array', arrays) def test_shade_category(array): coords = [np.array([0, 1]), np.array([2, 5])] cat_agg = tf.Image(array([[(0, 12, 0), (3, 0, 3)], [(12, 12, 12), (24, 0, 0)]], dtype='u4'), @@ -478,7 +486,6 @@ def test_shade_category(array): assert ((img.data[1,1] >> 24) & 0xFF) == 20 # min alpha -@pytest.mark.parametrize('array', arrays) def test_shade_zeros(array): coords = [np.array([0, 1]), np.array([2, 5])] cat_agg = tf.Image(array([[(0, 0, 0), (0, 0, 0)], @@ -495,7 +502,6 @@ def test_shade_zeros(array): assert_eq_xr(img, sol) -@pytest.mark.parametrize('agg', aggs) @pytest.mark.parametrize('attr', ['d']) @pytest.mark.parametrize('rescale', [False, True]) def test_shade_rescale_discrete_levels(agg, attr, rescale): @@ -514,9 +520,8 @@ def test_shade_rescale_discrete_levels(agg, attr, rescale): assert_eq_xr(img, sol) -@pytest.mark.parametrize('array_module', array_modules) -def test_shade_rescale_discrete_levels_categorical(array_module): - arr = array_module.array([[[1, 2], [0, 1]], +def test_shade_rescale_discrete_levels_categorical(array): + arr = array([[[1, 2], [0, 1]], [[0, 0], [0, 0]], [[1, 0], [3, 0]], [[1, 0], [2, 1]]], dtype='u4') @@ -530,20 +535,18 @@ def test_shade_rescale_discrete_levels_categorical(array_module): assert_eq_ndarray(img.data, sol) -empty_arrays = [ +@pytest.mark.parametrize('empty_array', [ np.zeros((2, 2, 2), dtype=np.uint32), np.full((2, 2, 2), np.nan, dtype=np.float64), -] -if cupy is not None: - empty_arrays += [ - cupy.zeros((2, 2, 2), dtype=cupy.uint32), - cupy.full((2, 2, 2), cupy.nan, dtype=cupy.float64), - ] -@pytest.mark.parametrize('empty_array', empty_arrays) -def test_shade_all_masked(empty_array): +]) +@pytest.mark.parametrize('on_gpu', [False, pytest.param(True, marks=pytest.mark.gpu)]) +def test_shade_all_masked(empty_array, on_gpu): # Issue #1166, return early with array of all nans if all of data is masked out. # Before the fix this test results in: # IndexError: index -1 is out of bounds for axis 0 with size 0 + if on_gpu: + import cupy + empty_array = cupy.array(empty_array) agg = xr.DataArray( data=empty_array, coords=dict(y=[0, 1], x=[0, 1], cat=['a', 'b']), @@ -1141,42 +1144,28 @@ def test_shade_should_handle_zeros_array(): assert img is not None -def test_shade_with_discrete_color_key(): - data = np.array([[0, 0, 0, 0, 0], +def test_shade_with_discrete_color_key(array): + data = array([[0, 0, 0, 0, 0], [0, 1, 1, 1, 0], [0, 2, 2, 2, 0], [0, 3, 3, 3, 0], [0, 0, 0, 0, 0]], dtype='uint32') color_key = {1: 'white', 2: 'purple', 3: 'yellow'} - result = np.array([[0, 0, 0, 0, 0], + result = array([[0, 0, 0, 0, 0], [0, 4294967295, 4294967295, 4294967295, 0], [0, 4286578816, 4286578816, 4286578816, 0], [0, 4278255615, 4278255615, 4278255615, 0], [0, 0, 0, 0, 0]], dtype='uint32') - # numpy case - arr_numpy = tf.Image(data, dims=['x', 'y']) - result_numpy = tf.shade(arr_numpy, color_key=color_key) - assert (result_numpy.data == result).all() - - # dask with numpy backed case - arr_dask = tf.Image(da.from_array(data, chunks=(2, 2)), dims=['x', 'y']) - result_dask = tf.shade(arr_dask, color_key=color_key) - assert (result_dask.data == result).all() - - # cupy case - try: - import cupy - arr_cupy = tf.Image(cupy.asarray(data), dims=['x', 'y']) - result_cupy = tf.shade(arr_cupy, color_key=color_key) - assert (result_cupy.data == result).all() - except ImportError: - cupy = None + arr = tf.Image(data, dims=['x', 'y']) + result = tf.shade(arr, color_key=color_key) + assert (result.data == result).all() -@pytest.mark.parametrize('array_module', array_modules) -def test_interpolate_alpha_discrete_levels_None(array_module): - data = array_module.array([[0.0, 1.0], [1.0, 0.0]]) +def test_interpolate_alpha_discrete_levels_None(array, request): + if "dask" in request.node.name: + pytest.skip("This test is not compatible with dask arrays") + data = array([[0.0, 1.0], [1.0, 0.0]]) # Issue #1084: this raises a ValueError. tf._interpolate_alpha(data, data, None, "eq_hist", 0.5, None, 0.4, True) diff --git a/datashader/tests/test_xarray.py b/datashader/tests/test_xarray.py index 8397d1bc2..71f3f1f40 100644 --- a/datashader/tests/test_xarray.py +++ b/datashader/tests/test_xarray.py @@ -1,7 +1,7 @@ from __future__ import annotations +from copy import deepcopy import numpy as np from numpy import nan -import os import xarray as xr import datashader as ds @@ -14,9 +14,6 @@ except ImportError: cupy = None -test_gpu = bool(int(os.getenv("DATASHADER_TEST_GPU", 0))) - - xda = xr.DataArray(data=np.array(([1.] * 10 + [10] * 10)), dims=('record'), coords={'x': xr.DataArray(np.array(([0.]*10 + [1]*10)), dims=('record')), @@ -46,9 +43,7 @@ def assert_eq(agg, b): assert agg.equals(b) -@pytest.mark.parametrize("source", [ - (xda), (xdda), (xds), (xdds), -]) +@pytest.mark.parametrize("source", [xda, xdda, xds, xdds]) def test_count(source): out = xr.DataArray(np.array([[5, 5], [5, 5]], dtype='i4'), coords=coords, dims=dims) @@ -97,7 +92,7 @@ def test_count(source): @pytest.mark.parametrize("ds2d", ds2ds) -@pytest.mark.parametrize("cuda", [False, True]) +@pytest.mark.parametrize('on_gpu', [False, pytest.param(True, marks=pytest.mark.gpu)]) @pytest.mark.parametrize("chunksizes", [ None, dict(x=10, channel=10), @@ -105,12 +100,10 @@ def test_count(source): dict(x=3, channel=10), dict(x=3, channel=1), ]) -def test_lines_xarray_common_x(ds2d, cuda, chunksizes): - source = ds2d.copy() - if cuda: - if not (cupy and test_gpu): - pytest.skip("CUDA tests not requested") - elif chunksizes is not None: +def test_lines_xarray_common_x(ds2d, on_gpu, chunksizes): + source = deepcopy(ds2d) + if on_gpu: + if chunksizes is not None: pytest.skip("CUDA-dask for LinesXarrayCommonX not implemented") # CPU -> GPU @@ -162,7 +155,7 @@ def test_lines_xarray_common_x(ds2d, cuda, chunksizes): assert_eq_ndarray(agg.x_range, (0, 4), close=True) assert_eq_ndarray(agg.y_range, (0, 2), close=True) assert_eq_ndarray(agg.data, sol_count) - assert isinstance(agg.data, cupy.ndarray if cuda else np.ndarray) + assert isinstance(agg.data, cupy.ndarray if on_gpu else np.ndarray) # any agg = canvas.line(source, x="x", y="name", agg=ds.any()) diff --git a/datashader/tests/utils.py b/datashader/tests/utils.py new file mode 100644 index 000000000..bbae5695c --- /dev/null +++ b/datashader/tests/utils.py @@ -0,0 +1,29 @@ +import sys +from contextlib import contextmanager +from importlib import reload +from importlib.util import find_spec + +import dask +import pytest + +__all__ = ("dask_switcher",) + +EXPR_UNAVAILABLE = find_spec("dask_expr") is None + + +@contextmanager +def dask_switcher(*, query=False, extras=None): + """ + Context manager to switch on/off dask-expr query planning. + + Using a context manager as it is an easy way to + change the function to a decorator. + """ + if query and EXPR_UNAVAILABLE: + pytest.skip("dask-expr is not available") + + dask.config.set(**{"dataframe.query-planning": query}) + for module in ("dask.dataframe", *(extras or ())): + if module in sys.modules: + reload(sys.modules[module]) + yield diff --git a/pyproject.toml b/pyproject.toml index 6be63c8da..84ab15dd7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ line-length = 100 target-version = "py39" -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "test_mpl_ext.py" = ["E402"] # Module level import not at top of file [tool.pytest.ini_options]