diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index e3fb262c437..d7737a8403e 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -9,6 +9,7 @@ dependencies: - cartopy - cfgrib - dask-core>=2022.1 + - hypothesis>=6.75.8 - h5netcdf>=0.13 - ipykernel - ipywidgets # silence nbsphinx warning diff --git a/doc/api.rst b/doc/api.rst index 24c3aee7d47..f41eaa12038 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1069,6 +1069,27 @@ Testing testing.assert_allclose testing.assert_chunks_equal +Hypothesis Testing Strategies +============================= + +.. currentmodule:: xarray + +See the :ref:`documentation page on testing ` for a guide on how to use these strategies. + +.. warning:: + These strategies should be considered highly experimental, and liable to change at any time. + +.. autosummary:: + :toctree: generated/ + + testing.strategies.supported_dtypes + testing.strategies.names + testing.strategies.dimension_names + testing.strategies.dimension_sizes + testing.strategies.attrs + testing.strategies.variables + testing.strategies.unique_subset_of + Exceptions ========== diff --git a/doc/conf.py b/doc/conf.py index 501ab9f9ec4..4bbceddba3d 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -326,6 +326,7 @@ "dask": ("https://docs.dask.org/en/latest", None), "cftime": ("https://unidata.github.io/cftime", None), "sparse": ("https://sparse.pydata.org/en/latest/", None), + "hypothesis": ("https://hypothesis.readthedocs.io/en/latest/", None), "cubed": ("https://tom-e-white.com/cubed/", None), "datatree": ("https://xarray-datatree.readthedocs.io/en/latest/", None), "xarray-tutorial": ("https://tutorial.xarray.dev/", None), diff --git a/doc/internals/duck-arrays-integration.rst b/doc/internals/duck-arrays-integration.rst index a674acb04fe..43b17be8bb8 100644 --- a/doc/internals/duck-arrays-integration.rst +++ b/doc/internals/duck-arrays-integration.rst @@ -31,6 +31,8 @@ property needs to obey `numpy's broadcasting rules `_ of these same rules). +.. _internals.duckarrays.array_api_standard: + Python Array API standard support ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/user-guide/index.rst b/doc/user-guide/index.rst index 0ac25d68930..45f0ce352de 100644 --- a/doc/user-guide/index.rst +++ b/doc/user-guide/index.rst @@ -25,4 +25,5 @@ examples that describe many common tasks that you can accomplish with xarray. dask plotting options + testing duckarrays diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst new file mode 100644 index 00000000000..13279eccb0b --- /dev/null +++ b/doc/user-guide/testing.rst @@ -0,0 +1,303 @@ +.. _testing: + +Testing your code +================= + +.. ipython:: python + :suppress: + + import numpy as np + import pandas as pd + import xarray as xr + + np.random.seed(123456) + +.. _testing.hypothesis: + +Hypothesis testing +------------------ + +.. note:: + + Testing with hypothesis is a fairly advanced topic. Before reading this section it is recommended that you take a look + at our guide to xarray's :ref:`data structures`, are familiar with conventional unit testing in + `pytest `_, and have seen the + `hypothesis library documentation `_. + +`The hypothesis library `_ is a powerful tool for property-based testing. +Instead of writing tests for one example at a time, it allows you to write tests parameterized by a source of many +dynamically generated examples. For example you might have written a test which you wish to be parameterized by the set +of all possible integers via :py:func:`hypothesis.strategies.integers()`. + +Property-based testing is extremely powerful, because (unlike more conventional example-based testing) it can find bugs +that you did not even think to look for! + +Strategies +~~~~~~~~~~ + +Each source of examples is called a "strategy", and xarray provides a range of custom strategies which produce xarray +data structures containing arbitrary data. You can use these to efficiently test downstream code, +quickly ensuring that your code can handle xarray objects of all possible structures and contents. + +These strategies are accessible in the :py:mod:`xarray.testing.strategies` module, which provides + +.. currentmodule:: xarray + +.. autosummary:: + + testing.strategies.supported_dtypes + testing.strategies.names + testing.strategies.dimension_names + testing.strategies.dimension_sizes + testing.strategies.attrs + testing.strategies.variables + testing.strategies.unique_subset_of + +These build upon the numpy and array API strategies offered in :py:mod:`hypothesis.extra.numpy` and :py:mod:`hypothesis.extra.array_api`: + +.. ipython:: python + + import hypothesis.extra.numpy as npst + +Generating Examples +~~~~~~~~~~~~~~~~~~~ + +To see an example of what each of these strategies might produce, you can call one followed by the ``.example()`` method, +which is a general hypothesis method valid for all strategies. + +.. ipython:: python + + import xarray.testing.strategies as xrst + + xrst.variables().example() + xrst.variables().example() + xrst.variables().example() + +You can see that calling ``.example()`` multiple times will generate different examples, giving you an idea of the wide +range of data that the xarray strategies can generate. + +In your tests however you should not use ``.example()`` - instead you should parameterize your tests with the +:py:func:`hypothesis.given` decorator: + +.. ipython:: python + + from hypothesis import given + +.. ipython:: python + + @given(xrst.variables()) + def test_function_that_acts_on_variables(var): + assert func(var) == ... + + +Chaining Strategies +~~~~~~~~~~~~~~~~~~~ + +Xarray's strategies can accept other strategies as arguments, allowing you to customise the contents of the generated +examples. + +.. ipython:: python + + # generate a Variable containing an array with a complex number dtype, but all other details still arbitrary + from hypothesis.extra.numpy import complex_number_dtypes + + xrst.variables(dtype=complex_number_dtypes()).example() + +This also works with custom strategies, or strategies defined in other packages. +For example you could imagine creating a ``chunks`` strategy to specify particular chunking patterns for a dask-backed array. + +Fixing Arguments +~~~~~~~~~~~~~~~~ + +If you want to fix one aspect of the data structure, whilst allowing variation in the generated examples +over all other aspects, then use :py:func:`hypothesis.strategies.just()`. + +.. ipython:: python + + import hypothesis.strategies as st + + # Generates only variable objects with dimensions ["x", "y"] + xrst.variables(dims=st.just(["x", "y"])).example() + +(This is technically another example of chaining strategies - :py:func:`hypothesis.strategies.just()` is simply a +special strategy that just contains a single example.) + +To fix the length of dimensions you can instead pass ``dims`` as a mapping of dimension names to lengths +(i.e. following xarray objects' ``.sizes()`` property), e.g. + +.. ipython:: python + + # Generates only variables with dimensions ["x", "y"], of lengths 2 & 3 respectively + xrst.variables(dims=st.just({"x": 2, "y": 3})).example() + +You can also use this to specify that you want examples which are missing some part of the data structure, for instance + +.. ipython:: python + + # Generates a Variable with no attributes + xrst.variables(attrs=st.just({})).example() + +Through a combination of chaining strategies and fixing arguments, you can specify quite complicated requirements on the +objects your chained strategy will generate. + +.. ipython:: python + + fixed_x_variable_y_maybe_z = st.fixed_dictionaries( + {"x": st.just(2), "y": st.integers(3, 4)}, optional={"z": st.just(2)} + ) + fixed_x_variable_y_maybe_z.example() + + special_variables = xrst.variables(dims=fixed_x_variable_y_maybe_z) + + special_variables.example() + special_variables.example() + +Here we have used one of hypothesis' built-in strategies :py:func:`hypothesis.strategies.fixed_dictionaries` to create a +strategy which generates mappings of dimension names to lengths (i.e. the ``size`` of the xarray object we want). +This particular strategy will always generate an ``x`` dimension of length 2, and a ``y`` dimension of +length either 3 or 4, and will sometimes also generate a ``z`` dimension of length 2. +By feeding this strategy for dictionaries into the ``dims`` argument of xarray's :py:func:`~st.variables` strategy, +we can generate arbitrary :py:class:`~xarray.Variable` objects whose dimensions will always match these specifications. + +Generating Duck-type Arrays +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Xarray objects don't have to wrap numpy arrays, in fact they can wrap any array type which presents the same API as a +numpy array (so-called "duck array wrapping", see :ref:`wrapping numpy-like arrays `). + +Imagine we want to write a strategy which generates arbitrary ``Variable`` objects, each of which wraps a +:py:class:`sparse.COO` array instead of a ``numpy.ndarray``. How could we do that? There are two ways: + +1. Create a xarray object with numpy data and use the hypothesis' ``.map()`` method to convert the underlying array to a +different type: + +.. ipython:: python + + import sparse + +.. ipython:: python + + def convert_to_sparse(var): + return var.copy(data=sparse.COO.from_numpy(var.to_numpy())) + +.. ipython:: python + + sparse_variables = xrst.variables(dims=xrst.dimension_names(min_dims=1)).map( + convert_to_sparse + ) + + sparse_variables.example() + sparse_variables.example() + +2. Pass a function which returns a strategy which generates the duck-typed arrays directly to the ``array_strategy_fn`` argument of the xarray strategies: + +.. ipython:: python + + def sparse_random_arrays(shape: tuple[int]) -> sparse._coo.core.COO: + """Strategy which generates random sparse.COO arrays""" + if shape is None: + shape = npst.array_shapes() + else: + shape = st.just(shape) + density = st.integers(min_value=0, max_value=1) + # note sparse.random does not accept a dtype kwarg + return st.builds(sparse.random, shape=shape, density=density) + + + def sparse_random_arrays_fn( + *, shape: tuple[int, ...], dtype: np.dtype + ) -> st.SearchStrategy[sparse._coo.core.COO]: + return sparse_random_arrays(shape=shape) + + +.. ipython:: python + + sparse_random_variables = xrst.variables( + array_strategy_fn=sparse_random_arrays_fn, dtype=st.just(np.dtype("float64")) + ) + sparse_random_variables.example() + +Either approach is fine, but one may be more convenient than the other depending on the type of the duck array which you +want to wrap. + +Compatibility with the Python Array API Standard +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Xarray aims to be compatible with any duck-array type that conforms to the `Python Array API Standard `_ +(see our :ref:`docs on Array API Standard support `). + +.. warning:: + + The strategies defined in :py:mod:`testing.strategies` are **not** guaranteed to use array API standard-compliant + dtypes by default. + For example arrays with the dtype ``np.dtype('float16')`` may be generated by :py:func:`testing.strategies.variables` + (assuming the ``dtype`` kwarg was not explicitly passed), despite ``np.dtype('float16')`` not being in the + array API standard. + +If the array type you want to generate has an array API-compliant top-level namespace +(e.g. that which is conventionally imported as ``xp`` or similar), +you can use this neat trick: + +.. ipython:: python + :okwarning: + + from numpy import array_api as xp # available in numpy 1.26.0 + + from hypothesis.extra.array_api import make_strategies_namespace + + xps = make_strategies_namespace(xp) + + xp_variables = xrst.variables( + array_strategy_fn=xps.arrays, + dtype=xps.scalar_dtypes(), + ) + xp_variables.example() + +Another array API-compliant duck array library would replace the import, e.g. ``import cupy as cp`` instead. + +Testing over Subsets of Dimensions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A common task when testing xarray user code is checking that your function works for all valid input dimensions. +We can chain strategies to achieve this, for which the helper strategy :py:func:`~testing.strategies.unique_subset_of` +is useful. + +It works for lists of dimension names + +.. ipython:: python + + dims = ["x", "y", "z"] + xrst.unique_subset_of(dims).example() + xrst.unique_subset_of(dims).example() + +as well as for mappings of dimension names to sizes + +.. ipython:: python + + dim_sizes = {"x": 2, "y": 3, "z": 4} + xrst.unique_subset_of(dim_sizes).example() + xrst.unique_subset_of(dim_sizes).example() + +This is useful because operations like reductions can be performed over any subset of the xarray object's dimensions. +For example we can write a pytest test that tests that a reduction gives the expected result when applying that reduction +along any possible valid subset of the Variable's dimensions. + +.. code-block:: python + + import numpy.testing as npt + + + @given(st.data(), xrst.variables(dims=xrst.dimension_names(min_dims=1))) + def test_mean(data, var): + """Test that the mean of an xarray Variable is always equal to the mean of the underlying array.""" + + # specify arbitrary reduction along at least one dimension + reduction_dims = data.draw(xrst.unique_subset_of(var.dims, min_size=1)) + + # create expected result (using nanmean because arrays with Nans will be generated) + reduction_axes = tuple(var.get_axis_num(dim) for dim in reduction_dims) + expected = np.nanmean(var.data, axis=reduction_axes) + + # assert property is always satisfied + result = var.mean(dim=reduction_dims).data + npt.assert_equal(expected, result) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 35a93af301e..cda6d6f1d74 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,6 +23,10 @@ v2023.11.1 (unreleased) New Features ~~~~~~~~~~~~ +- Added hypothesis strategies for generating :py:class:`xarray.Variable` objects containing arbitrary data, useful for parametrizing downstream tests. + Accessible under :py:mod:`testing.strategies`, and documented in a new page on testing in the User Guide. + (:issue:`6911`, :pull:`8404`) + By `Tom Nicholas `_. - :py:meth:`rolling` uses numbagg `_ for most of its computations by default. Numbagg is up to 5x faster than bottleneck where parallelization is possible. Where parallelization isn't possible — for diff --git a/xarray/core/types.py b/xarray/core/types.py index 90f0f94e679..06ad65679d8 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -173,7 +173,8 @@ def copy( # Temporary placeholder for indicating an array api compliant type. # hopefully in the future we can narrow this down more: -T_DuckArray = TypeVar("T_DuckArray", bound=Any) +T_DuckArray = TypeVar("T_DuckArray", bound=Any, covariant=True) + ScalarOrArray = Union["ArrayLike", np.generic, np.ndarray, "DaskArray"] VarCompatible = Union["Variable", "ScalarOrArray"] diff --git a/xarray/testing/__init__.py b/xarray/testing/__init__.py new file mode 100644 index 00000000000..ab2f8ba4357 --- /dev/null +++ b/xarray/testing/__init__.py @@ -0,0 +1,23 @@ +from xarray.testing.assertions import ( # noqa: F401 + _assert_dataarray_invariants, + _assert_dataset_invariants, + _assert_indexes_invariants_checks, + _assert_internal_invariants, + _assert_variable_invariants, + _data_allclose_or_equiv, + assert_allclose, + assert_chunks_equal, + assert_duckarray_allclose, + assert_duckarray_equal, + assert_equal, + assert_identical, +) + +__all__ = [ + "assert_allclose", + "assert_chunks_equal", + "assert_duckarray_equal", + "assert_duckarray_allclose", + "assert_equal", + "assert_identical", +] diff --git a/xarray/testing.py b/xarray/testing/assertions.py similarity index 98% rename from xarray/testing.py rename to xarray/testing/assertions.py index 0837b562668..faa595a64b6 100644 --- a/xarray/testing.py +++ b/xarray/testing/assertions.py @@ -14,15 +14,6 @@ from xarray.core.indexes import Index, PandasIndex, PandasMultiIndex, default_indexes from xarray.core.variable import IndexVariable, Variable -__all__ = ( - "assert_allclose", - "assert_chunks_equal", - "assert_duckarray_equal", - "assert_duckarray_allclose", - "assert_equal", - "assert_identical", -) - def ensure_warnings(func): # sometimes tests elevate warnings to errors diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py new file mode 100644 index 00000000000..d08cbc0b584 --- /dev/null +++ b/xarray/testing/strategies.py @@ -0,0 +1,447 @@ +from collections.abc import Hashable, Iterable, Mapping, Sequence +from typing import TYPE_CHECKING, Any, Protocol, Union, overload + +try: + import hypothesis.strategies as st +except ImportError as e: + raise ImportError( + "`xarray.testing.strategies` requires `hypothesis` to be installed." + ) from e + +import hypothesis.extra.numpy as npst +import numpy as np +from hypothesis.errors import InvalidArgument + +import xarray as xr +from xarray.core.types import T_DuckArray + +if TYPE_CHECKING: + from xarray.core.types import _DTypeLikeNested, _ShapeLike + + +__all__ = [ + "supported_dtypes", + "names", + "dimension_names", + "dimension_sizes", + "attrs", + "variables", + "unique_subset_of", +] + + +class ArrayStrategyFn(Protocol[T_DuckArray]): + def __call__( + self, + *, + shape: "_ShapeLike", + dtype: "_DTypeLikeNested", + ) -> st.SearchStrategy[T_DuckArray]: + ... + + +def supported_dtypes() -> st.SearchStrategy[np.dtype]: + """ + Generates only those numpy dtypes which xarray can handle. + + Use instead of hypothesis.extra.numpy.scalar_dtypes in order to exclude weirder dtypes such as unicode, byte_string, array, or nested dtypes. + Also excludes datetimes, which dodges bugs with pandas non-nanosecond datetime overflows. + + Requires the hypothesis package to be installed. + + See Also + -------- + :ref:`testing.hypothesis`_ + """ + # TODO should this be exposed publicly? + # We should at least decide what the set of numpy dtypes that xarray officially supports is. + return ( + npst.integer_dtypes() + | npst.unsigned_integer_dtypes() + | npst.floating_dtypes() + | npst.complex_number_dtypes() + ) + + +# TODO Generalize to all valid unicode characters once formatting bugs in xarray's reprs are fixed + docs can handle it. +_readable_characters = st.characters( + categories=["L", "N"], max_codepoint=0x017F +) # only use characters within the "Latin Extended-A" subset of unicode + + +def names() -> st.SearchStrategy[str]: + """ + Generates arbitrary string names for dimensions / variables. + + Requires the hypothesis package to be installed. + + See Also + -------- + :ref:`testing.hypothesis`_ + """ + return st.text( + _readable_characters, + min_size=1, + max_size=5, + ) + + +def dimension_names( + *, + min_dims: int = 0, + max_dims: int = 3, +) -> st.SearchStrategy[list[Hashable]]: + """ + Generates an arbitrary list of valid dimension names. + + Requires the hypothesis package to be installed. + + Parameters + ---------- + min_dims + Minimum number of dimensions in generated list. + max_dims + Maximum number of dimensions in generated list. + """ + + return st.lists( + elements=names(), + min_size=min_dims, + max_size=max_dims, + unique=True, + ) + + +def dimension_sizes( + *, + dim_names: st.SearchStrategy[Hashable] = names(), + min_dims: int = 0, + max_dims: int = 3, + min_side: int = 1, + max_side: Union[int, None] = None, +) -> st.SearchStrategy[Mapping[Hashable, int]]: + """ + Generates an arbitrary mapping from dimension names to lengths. + + Requires the hypothesis package to be installed. + + Parameters + ---------- + dim_names: strategy generating strings, optional + Strategy for generating dimension names. + Defaults to the `names` strategy. + min_dims: int, optional + Minimum number of dimensions in generated list. + Default is 1. + max_dims: int, optional + Maximum number of dimensions in generated list. + Default is 3. + min_side: int, optional + Minimum size of a dimension. + Default is 1. + max_side: int, optional + Minimum size of a dimension. + Default is `min_length` + 5. + + See Also + -------- + :ref:`testing.hypothesis`_ + """ + + if max_side is None: + max_side = min_side + 3 + + return st.dictionaries( + keys=dim_names, + values=st.integers(min_value=min_side, max_value=max_side), + min_size=min_dims, + max_size=max_dims, + ) + + +_readable_strings = st.text( + _readable_characters, + max_size=5, +) +_attr_keys = _readable_strings +_small_arrays = npst.arrays( + shape=npst.array_shapes( + max_side=2, + max_dims=2, + ), + dtype=npst.scalar_dtypes(), +) +_attr_values = st.none() | st.booleans() | _readable_strings | _small_arrays + + +def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: + """ + Generates arbitrary valid attributes dictionaries for xarray objects. + + The generated dictionaries can potentially be recursive. + + Requires the hypothesis package to be installed. + + See Also + -------- + :ref:`testing.hypothesis`_ + """ + return st.recursive( + st.dictionaries(_attr_keys, _attr_values), + lambda children: st.dictionaries(_attr_keys, children), + max_leaves=3, + ) + + +@st.composite +def variables( + draw: st.DrawFn, + *, + array_strategy_fn: Union[ArrayStrategyFn, None] = None, + dims: Union[ + st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]], + None, + ] = None, + dtype: st.SearchStrategy[np.dtype] = supported_dtypes(), + attrs: st.SearchStrategy[Mapping] = attrs(), +) -> xr.Variable: + """ + Generates arbitrary xarray.Variable objects. + + Follows the basic signature of the xarray.Variable constructor, but allows passing alternative strategies to + generate either numpy-like array data or dimensions. Also allows specifying the shape or dtype of the wrapped array + up front. + + Passing nothing will generate a completely arbitrary Variable (containing a numpy array). + + Requires the hypothesis package to be installed. + + Parameters + ---------- + array_strategy_fn: Callable which returns a strategy generating array-likes, optional + Callable must only accept shape and dtype kwargs, and must generate results consistent with its input. + If not passed the default is to generate a small numpy array with one of the supported_dtypes. + dims: Strategy for generating the dimensions, optional + Can either be a strategy for generating a sequence of string dimension names, + or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. + If provided as a mapping the array shape will be passed to array_strategy_fn. + Default is to generate arbitrary dimension names for each axis in data. + dtype: Strategy which generates np.dtype objects, optional + Will be passed in to array_strategy_fn. + Default is to generate any scalar dtype using supported_dtypes. + Be aware that this default set of dtypes includes some not strictly allowed by the array API standard. + attrs: Strategy which generates dicts, optional + Default is to generate a nested attributes dictionary containing arbitrary strings, booleans, integers, Nones, + and numpy arrays. + + Returns + ------- + variable_strategy + Strategy for generating xarray.Variable objects. + + Raises + ------ + ValueError + If a custom array_strategy_fn returns a strategy which generates an example array inconsistent with the shape + & dtype input passed to it. + + Examples + -------- + Generate completely arbitrary Variable objects backed by a numpy array: + + >>> variables().example() # doctest: +SKIP + + array([43506, -16, -151], dtype=int32) + >>> variables().example() # doctest: +SKIP + + array([[[-10000000., -10000000.], + [-10000000., -10000000.]], + [[-10000000., -10000000.], + [ 0., -10000000.]], + [[ 0., -10000000.], + [-10000000., inf]], + [[ -0., -10000000.], + [-10000000., -0.]]], dtype=float32) + Attributes: + śřĴ: {'ĉ': {'iĥf': array([-30117, -1740], dtype=int16)}} + + Generate only Variable objects with certain dimension names: + + >>> variables(dims=st.just(["a", "b"])).example() # doctest: +SKIP + + array([[ 248, 4294967295, 4294967295], + [2412855555, 3514117556, 4294967295], + [ 111, 4294967295, 4294967295], + [4294967295, 1084434988, 51688], + [ 47714, 252, 11207]], dtype=uint32) + + Generate only Variable objects with certain dimension names and lengths: + + >>> variables(dims=st.just({"a": 2, "b": 1})).example() # doctest: +SKIP + + array([[-1.00000000e+007+3.40282347e+038j], + [-2.75034266e-225+2.22507386e-311j]]) + + See Also + -------- + :ref:`testing.hypothesis`_ + """ + + if not isinstance(dims, st.SearchStrategy) and dims is not None: + raise InvalidArgument( + f"dims must be provided as a hypothesis.strategies.SearchStrategy object (or None), but got type {type(dims)}. " + "To specify fixed contents, use hypothesis.strategies.just()." + ) + if not isinstance(dtype, st.SearchStrategy) and dtype is not None: + raise InvalidArgument( + f"dtype must be provided as a hypothesis.strategies.SearchStrategy object (or None), but got type {type(dtype)}. " + "To specify fixed contents, use hypothesis.strategies.just()." + ) + if not isinstance(attrs, st.SearchStrategy) and attrs is not None: + raise InvalidArgument( + f"attrs must be provided as a hypothesis.strategies.SearchStrategy object (or None), but got type {type(attrs)}. " + "To specify fixed contents, use hypothesis.strategies.just()." + ) + + _array_strategy_fn: ArrayStrategyFn + if array_strategy_fn is None: + # For some reason if I move the default value to the function signature definition mypy incorrectly says the ignore is no longer necessary, making it impossible to satisfy mypy + _array_strategy_fn = npst.arrays # type: ignore[assignment] # npst.arrays has extra kwargs that we aren't using later + elif not callable(array_strategy_fn): + raise InvalidArgument( + "array_strategy_fn must be a Callable that accepts the kwargs dtype and shape and returns a hypothesis " + "strategy which generates corresponding array-like objects." + ) + else: + _array_strategy_fn = ( + array_strategy_fn # satisfy mypy that this new variable cannot be None + ) + + _dtype = draw(dtype) + + if dims is not None: + # generate dims first then draw data to match + _dims = draw(dims) + if isinstance(_dims, Sequence): + dim_names = list(_dims) + valid_shapes = npst.array_shapes(min_dims=len(_dims), max_dims=len(_dims)) + _shape = draw(valid_shapes) + array_strategy = _array_strategy_fn(shape=_shape, dtype=_dtype) + elif isinstance(_dims, (Mapping, dict)): + # should be a mapping of form {dim_names: lengths} + dim_names, _shape = list(_dims.keys()), tuple(_dims.values()) + array_strategy = _array_strategy_fn(shape=_shape, dtype=_dtype) + else: + raise InvalidArgument( + f"Invalid type returned by dims strategy - drew an object of type {type(dims)}" + ) + else: + # nothing provided, so generate everything consistently + # We still generate the shape first here just so that we always pass shape to array_strategy_fn + _shape = draw(npst.array_shapes()) + array_strategy = _array_strategy_fn(shape=_shape, dtype=_dtype) + dim_names = draw(dimension_names(min_dims=len(_shape), max_dims=len(_shape))) + + _data = draw(array_strategy) + + if _data.shape != _shape: + raise ValueError( + "array_strategy_fn returned an array object with a different shape than it was passed." + f"Passed {_shape}, but returned {_data.shape}." + "Please either specify a consistent shape via the dims kwarg or ensure the array_strategy_fn callable " + "obeys the shape argument passed to it." + ) + if _data.dtype != _dtype: + raise ValueError( + "array_strategy_fn returned an array object with a different dtype than it was passed." + f"Passed {_dtype}, but returned {_data.dtype}" + "Please either specify a consistent dtype via the dtype kwarg or ensure the array_strategy_fn callable " + "obeys the dtype argument passed to it." + ) + + return xr.Variable(dims=dim_names, data=_data, attrs=draw(attrs)) + + +@overload +def unique_subset_of( + objs: Sequence[Hashable], + *, + min_size: int = 0, + max_size: Union[int, None] = None, +) -> st.SearchStrategy[Sequence[Hashable]]: + ... + + +@overload +def unique_subset_of( + objs: Mapping[Hashable, Any], + *, + min_size: int = 0, + max_size: Union[int, None] = None, +) -> st.SearchStrategy[Mapping[Hashable, Any]]: + ... + + +@st.composite +def unique_subset_of( + draw: st.DrawFn, + objs: Union[Sequence[Hashable], Mapping[Hashable, Any]], + *, + min_size: int = 0, + max_size: Union[int, None] = None, +) -> Union[Sequence[Hashable], Mapping[Hashable, Any]]: + """ + Return a strategy which generates a unique subset of the given objects. + + Each entry in the output subset will be unique (if input was a sequence) or have a unique key (if it was a mapping). + + Requires the hypothesis package to be installed. + + Parameters + ---------- + objs: Union[Sequence[Hashable], Mapping[Hashable, Any]] + Objects from which to sample to produce the subset. + min_size: int, optional + Minimum size of the returned subset. Default is 0. + max_size: int, optional + Maximum size of the returned subset. Default is the full length of the input. + If set to 0 the result will be an empty mapping. + + Returns + ------- + unique_subset_strategy + Strategy generating subset of the input. + + Examples + -------- + >>> unique_subset_of({"x": 2, "y": 3}).example() # doctest: +SKIP + {'y': 3} + >>> unique_subset_of(["x", "y"]).example() # doctest: +SKIP + ['x'] + + See Also + -------- + :ref:`testing.hypothesis`_ + """ + if not isinstance(objs, Iterable): + raise TypeError( + f"Object to sample from must be an Iterable or a Mapping, but received type {type(objs)}" + ) + + if len(objs) == 0: + raise ValueError("Can't sample from a length-zero object.") + + keys = list(objs.keys()) if isinstance(objs, Mapping) else objs + + subset_keys = draw( + st.lists( + st.sampled_from(keys), + unique=True, + min_size=min_size, + max_size=max_size, + ) + ) + + return ( + {k: objs[k] for k in subset_keys} if isinstance(objs, Mapping) else subset_keys + ) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index f7f8f823d78..ffcae0fc664 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -106,6 +106,7 @@ def _importorskip( requires_pandas_version_two = pytest.mark.skipif( not has_pandas_version_two, reason="requires pandas 2.0.0" ) +has_numpy_array_api, requires_numpy_array_api = _importorskip("numpy", "1.26.0") has_h5netcdf_ros3 = _importorskip("h5netcdf", "1.3.0") requires_h5netcdf_ros3 = pytest.mark.skipif( not has_h5netcdf_ros3[0], reason="requires h5netcdf 1.3.0" diff --git a/xarray/tests/test_testing.py b/xarray/tests/test_assertions.py similarity index 100% rename from xarray/tests/test_testing.py rename to xarray/tests/test_assertions.py diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py new file mode 100644 index 00000000000..44f0d56cde8 --- /dev/null +++ b/xarray/tests/test_strategies.py @@ -0,0 +1,271 @@ +import numpy as np +import numpy.testing as npt +import pytest + +pytest.importorskip("hypothesis") +# isort: split + +import hypothesis.extra.numpy as npst +import hypothesis.strategies as st +from hypothesis import given +from hypothesis.extra.array_api import make_strategies_namespace + +from xarray.core.variable import Variable +from xarray.testing.strategies import ( + attrs, + dimension_names, + dimension_sizes, + supported_dtypes, + unique_subset_of, + variables, +) +from xarray.tests import requires_numpy_array_api + +ALLOWED_ATTRS_VALUES_TYPES = (int, bool, str, np.ndarray) + + +class TestDimensionNamesStrategy: + @given(dimension_names()) + def test_types(self, dims): + assert isinstance(dims, list) + for d in dims: + assert isinstance(d, str) + + @given(dimension_names()) + def test_unique(self, dims): + assert len(set(dims)) == len(dims) + + @given(st.data(), st.tuples(st.integers(0, 10), st.integers(0, 10)).map(sorted)) + def test_number_of_dims(self, data, ndims): + min_dims, max_dims = ndims + dim_names = data.draw(dimension_names(min_dims=min_dims, max_dims=max_dims)) + assert isinstance(dim_names, list) + assert min_dims <= len(dim_names) <= max_dims + + +class TestDimensionSizesStrategy: + @given(dimension_sizes()) + def test_types(self, dims): + assert isinstance(dims, dict) + for d, n in dims.items(): + assert isinstance(d, str) + assert len(d) >= 1 + + assert isinstance(n, int) + assert n >= 0 + + @given(st.data(), st.tuples(st.integers(0, 10), st.integers(0, 10)).map(sorted)) + def test_number_of_dims(self, data, ndims): + min_dims, max_dims = ndims + dim_sizes = data.draw(dimension_sizes(min_dims=min_dims, max_dims=max_dims)) + assert isinstance(dim_sizes, dict) + assert min_dims <= len(dim_sizes) <= max_dims + + @given(st.data()) + def test_restrict_names(self, data): + capitalized_names = st.text(st.characters(), min_size=1).map(str.upper) + dim_sizes = data.draw(dimension_sizes(dim_names=capitalized_names)) + for dim in dim_sizes.keys(): + assert dim.upper() == dim + + +def check_dict_values(dictionary: dict, allowed_attrs_values_types) -> bool: + """Helper function to assert that all values in recursive dict match one of a set of types.""" + for key, value in dictionary.items(): + if isinstance(value, allowed_attrs_values_types) or value is None: + continue + elif isinstance(value, dict): + # If the value is a dictionary, recursively check it + if not check_dict_values(value, allowed_attrs_values_types): + return False + else: + # If the value is not an integer or a dictionary, it's not valid + return False + return True + + +class TestAttrsStrategy: + @given(attrs()) + def test_type(self, attrs): + assert isinstance(attrs, dict) + check_dict_values(attrs, ALLOWED_ATTRS_VALUES_TYPES) + + +class TestVariablesStrategy: + @given(variables()) + def test_given_nothing(self, var): + assert isinstance(var, Variable) + + @given(st.data()) + def test_given_incorrect_types(self, data): + with pytest.raises(TypeError, match="dims must be provided as a"): + data.draw(variables(dims=["x", "y"])) # type: ignore[arg-type] + + with pytest.raises(TypeError, match="dtype must be provided as a"): + data.draw(variables(dtype=np.dtype("int32"))) # type: ignore[arg-type] + + with pytest.raises(TypeError, match="attrs must be provided as a"): + data.draw(variables(attrs=dict())) # type: ignore[arg-type] + + with pytest.raises(TypeError, match="Callable"): + data.draw(variables(array_strategy_fn=np.array([0]))) # type: ignore[arg-type] + + @given(st.data(), dimension_names()) + def test_given_fixed_dim_names(self, data, fixed_dim_names): + var = data.draw(variables(dims=st.just(fixed_dim_names))) + + assert list(var.dims) == fixed_dim_names + + @given(st.data(), dimension_sizes()) + def test_given_fixed_dim_sizes(self, data, dim_sizes): + var = data.draw(variables(dims=st.just(dim_sizes))) + + assert var.dims == tuple(dim_sizes.keys()) + assert var.shape == tuple(dim_sizes.values()) + + @given(st.data(), supported_dtypes()) + def test_given_fixed_dtype(self, data, dtype): + var = data.draw(variables(dtype=st.just(dtype))) + + assert var.dtype == dtype + + @given(st.data(), npst.arrays(shape=npst.array_shapes(), dtype=supported_dtypes())) + def test_given_fixed_data_dims_and_dtype(self, data, arr): + def fixed_array_strategy_fn(*, shape=None, dtype=None): + """The fact this ignores shape and dtype is only okay because compatible shape & dtype will be passed separately.""" + return st.just(arr) + + dim_names = data.draw(dimension_names(min_dims=arr.ndim, max_dims=arr.ndim)) + dim_sizes = {name: size for name, size in zip(dim_names, arr.shape)} + + var = data.draw( + variables( + array_strategy_fn=fixed_array_strategy_fn, + dims=st.just(dim_sizes), + dtype=st.just(arr.dtype), + ) + ) + + npt.assert_equal(var.data, arr) + assert var.dtype == arr.dtype + + @given(st.data(), st.integers(0, 3)) + def test_given_array_strat_arbitrary_size_and_arbitrary_data(self, data, ndims): + dim_names = data.draw(dimension_names(min_dims=ndims, max_dims=ndims)) + + def array_strategy_fn(*, shape=None, dtype=None): + return npst.arrays(shape=shape, dtype=dtype) + + var = data.draw( + variables( + array_strategy_fn=array_strategy_fn, + dims=st.just(dim_names), + dtype=supported_dtypes(), + ) + ) + + assert var.ndim == ndims + + @given(st.data()) + def test_catch_unruly_dtype_from_custom_array_strategy_fn(self, data): + def dodgy_array_strategy_fn(*, shape=None, dtype=None): + """Dodgy function which ignores the dtype it was passed""" + return npst.arrays(shape=shape, dtype=npst.floating_dtypes()) + + with pytest.raises( + ValueError, match="returned an array object with a different dtype" + ): + data.draw( + variables( + array_strategy_fn=dodgy_array_strategy_fn, + dtype=st.just(np.dtype("int32")), + ) + ) + + @given(st.data()) + def test_catch_unruly_shape_from_custom_array_strategy_fn(self, data): + def dodgy_array_strategy_fn(*, shape=None, dtype=None): + """Dodgy function which ignores the shape it was passed""" + return npst.arrays(shape=(3, 2), dtype=dtype) + + with pytest.raises( + ValueError, match="returned an array object with a different shape" + ): + data.draw( + variables( + array_strategy_fn=dodgy_array_strategy_fn, + dims=st.just({"a": 2, "b": 1}), + dtype=supported_dtypes(), + ) + ) + + @requires_numpy_array_api + @given(st.data()) + def test_make_strategies_namespace(self, data): + """ + Test not causing a hypothesis.InvalidArgument by generating a dtype that's not in the array API. + + We still want to generate dtypes not in the array API by default, but this checks we don't accidentally override + the user's choice of dtypes with non-API-compliant ones. + """ + from numpy import ( + array_api as np_array_api, # requires numpy>=1.26.0, and we expect a UserWarning to be raised + ) + + np_array_api_st = make_strategies_namespace(np_array_api) + + data.draw( + variables( + array_strategy_fn=np_array_api_st.arrays, + dtype=np_array_api_st.scalar_dtypes(), + ) + ) + + +class TestUniqueSubsetOf: + @given(st.data()) + def test_invalid(self, data): + with pytest.raises(TypeError, match="must be an Iterable or a Mapping"): + data.draw(unique_subset_of(0)) # type: ignore[call-overload] + + with pytest.raises(ValueError, match="length-zero object"): + data.draw(unique_subset_of({})) + + @given(st.data(), dimension_sizes(min_dims=1)) + def test_mapping(self, data, dim_sizes): + subset_of_dim_sizes = data.draw(unique_subset_of(dim_sizes)) + + for dim, length in subset_of_dim_sizes.items(): + assert dim in dim_sizes + assert dim_sizes[dim] == length + + @given(st.data(), dimension_names(min_dims=1)) + def test_iterable(self, data, dim_names): + subset_of_dim_names = data.draw(unique_subset_of(dim_names)) + + for dim in subset_of_dim_names: + assert dim in dim_names + + +class TestReduction: + """ + These tests are for checking that the examples given in the docs page on testing actually work. + """ + + @given(st.data(), variables(dims=dimension_names(min_dims=1))) + def test_mean(self, data, var): + """ + Test that given a Variable of at least one dimension, + the mean of the Variable is always equal to the mean of the underlying array. + """ + + # specify arbitrary reduction along at least one dimension + reduction_dims = data.draw(unique_subset_of(var.dims, min_size=1)) + + # create expected result (using nanmean because arrays with Nans will be generated) + reduction_axes = tuple(var.get_axis_num(dim) for dim in reduction_dims) + expected = np.nanmean(var.data, axis=reduction_axes) + + # assert property is always satisfied + result = var.mean(dim=reduction_dims).data + npt.assert_equal(expected, result)