diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml
index e3fb262c437..d7737a8403e 100644
--- a/ci/requirements/doc.yml
+++ b/ci/requirements/doc.yml
@@ -9,6 +9,7 @@ dependencies:
   - cartopy
   - cfgrib
   - dask-core>=2022.1
+  - hypothesis>=6.75.8
   - h5netcdf>=0.13
   - ipykernel
   - ipywidgets  # silence nbsphinx warning
diff --git a/doc/api.rst b/doc/api.rst
index 24c3aee7d47..f41eaa12038 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -1069,6 +1069,27 @@ Testing
    testing.assert_allclose
    testing.assert_chunks_equal
 
+Hypothesis Testing Strategies
+=============================
+
+.. currentmodule:: xarray
+
+See the :ref:`documentation page on testing <testing.hypothesis>` for a guide on how to use these strategies.
+
+.. warning::
+    These strategies should be considered highly experimental, and liable to change at any time.
+
+.. autosummary::
+   :toctree: generated/
+
+   testing.strategies.supported_dtypes
+   testing.strategies.names
+   testing.strategies.dimension_names
+   testing.strategies.dimension_sizes
+   testing.strategies.attrs
+   testing.strategies.variables
+   testing.strategies.unique_subset_of
+
 Exceptions
 ==========
 
diff --git a/doc/conf.py b/doc/conf.py
index 501ab9f9ec4..4bbceddba3d 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -326,6 +326,7 @@
     "dask": ("https://docs.dask.org/en/latest", None),
     "cftime": ("https://unidata.github.io/cftime", None),
     "sparse": ("https://sparse.pydata.org/en/latest/", None),
+    "hypothesis": ("https://hypothesis.readthedocs.io/en/latest/", None),
     "cubed": ("https://tom-e-white.com/cubed/", None),
     "datatree": ("https://xarray-datatree.readthedocs.io/en/latest/", None),
     "xarray-tutorial": ("https://tutorial.xarray.dev/", None),
diff --git a/doc/internals/duck-arrays-integration.rst b/doc/internals/duck-arrays-integration.rst
index a674acb04fe..43b17be8bb8 100644
--- a/doc/internals/duck-arrays-integration.rst
+++ b/doc/internals/duck-arrays-integration.rst
@@ -31,6 +31,8 @@ property needs to obey `numpy's broadcasting rules <https://numpy.org/doc/stable
 (see also the `Python Array API standard's explanation <https://data-apis.org/array-api/latest/API_specification/broadcasting.html>`_
 of these same rules).
 
+.. _internals.duckarrays.array_api_standard:
+
 Python Array API standard support
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/doc/user-guide/index.rst b/doc/user-guide/index.rst
index 0ac25d68930..45f0ce352de 100644
--- a/doc/user-guide/index.rst
+++ b/doc/user-guide/index.rst
@@ -25,4 +25,5 @@ examples that describe many common tasks that you can accomplish with xarray.
    dask
    plotting
    options
+   testing
    duckarrays
diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst
new file mode 100644
index 00000000000..13279eccb0b
--- /dev/null
+++ b/doc/user-guide/testing.rst
@@ -0,0 +1,303 @@
+.. _testing:
+
+Testing your code
+=================
+
+.. ipython:: python
+    :suppress:
+
+    import numpy as np
+    import pandas as pd
+    import xarray as xr
+
+    np.random.seed(123456)
+
+.. _testing.hypothesis:
+
+Hypothesis testing
+------------------
+
+.. note::
+
+  Testing with hypothesis is a fairly advanced topic. Before reading this section it is recommended that you take a look
+  at our guide to xarray's :ref:`data structures`, are familiar with conventional unit testing in
+  `pytest <https://docs.pytest.org/>`_, and have seen the
+  `hypothesis library documentation <https://hypothesis.readthedocs.io/>`_.
+
+`The hypothesis library <https://hypothesis.readthedocs.io/>`_ is a powerful tool for property-based testing.
+Instead of writing tests for one example at a time, it allows you to write tests parameterized by a source of many
+dynamically generated examples. For example you might have written a test which you wish to be parameterized by the set
+of all possible integers via :py:func:`hypothesis.strategies.integers()`.
+
+Property-based testing is extremely powerful, because (unlike more conventional example-based testing) it can find bugs
+that you did not even think to look for!
+
+Strategies
+~~~~~~~~~~
+
+Each source of examples is called a "strategy", and xarray provides a range of custom strategies which produce xarray
+data structures containing arbitrary data. You can use these to efficiently test downstream code,
+quickly ensuring that your code can handle xarray objects of all possible structures and contents.
+
+These strategies are accessible in the :py:mod:`xarray.testing.strategies` module, which provides
+
+.. currentmodule:: xarray
+
+.. autosummary::
+
+   testing.strategies.supported_dtypes
+   testing.strategies.names
+   testing.strategies.dimension_names
+   testing.strategies.dimension_sizes
+   testing.strategies.attrs
+   testing.strategies.variables
+   testing.strategies.unique_subset_of
+
+These build upon the numpy and array API strategies offered in :py:mod:`hypothesis.extra.numpy` and :py:mod:`hypothesis.extra.array_api`:
+
+.. ipython:: python
+
+    import hypothesis.extra.numpy as npst
+
+Generating Examples
+~~~~~~~~~~~~~~~~~~~
+
+To see an example of what each of these strategies might produce, you can call one followed by the ``.example()`` method,
+which is a general hypothesis method valid for all strategies.
+
+.. ipython:: python
+
+    import xarray.testing.strategies as xrst
+
+    xrst.variables().example()
+    xrst.variables().example()
+    xrst.variables().example()
+
+You can see that calling ``.example()`` multiple times will generate different examples, giving you an idea of the wide
+range of data that the xarray strategies can generate.
+
+In your tests however you should not use ``.example()`` - instead you should parameterize your tests with the
+:py:func:`hypothesis.given` decorator:
+
+.. ipython:: python
+
+    from hypothesis import given
+
+.. ipython:: python
+
+    @given(xrst.variables())
+    def test_function_that_acts_on_variables(var):
+        assert func(var) == ...
+
+
+Chaining Strategies
+~~~~~~~~~~~~~~~~~~~
+
+Xarray's strategies can accept other strategies as arguments, allowing you to customise the contents of the generated
+examples.
+
+.. ipython:: python
+
+    # generate a Variable containing an array with a complex number dtype, but all other details still arbitrary
+    from hypothesis.extra.numpy import complex_number_dtypes
+
+    xrst.variables(dtype=complex_number_dtypes()).example()
+
+This also works with custom strategies, or strategies defined in other packages.
+For example you could imagine creating a ``chunks`` strategy to specify particular chunking patterns for a dask-backed array.
+
+Fixing Arguments
+~~~~~~~~~~~~~~~~
+
+If you want to fix one aspect of the data structure, whilst allowing variation in the generated examples
+over all other aspects, then use :py:func:`hypothesis.strategies.just()`.
+
+.. ipython:: python
+
+    import hypothesis.strategies as st
+
+    # Generates only variable objects with dimensions ["x", "y"]
+    xrst.variables(dims=st.just(["x", "y"])).example()
+
+(This is technically another example of chaining strategies - :py:func:`hypothesis.strategies.just()` is simply a
+special strategy that just contains a single example.)
+
+To fix the length of dimensions you can instead pass ``dims`` as a mapping of dimension names to lengths
+(i.e. following xarray objects' ``.sizes()`` property), e.g.
+
+.. ipython:: python
+
+    # Generates only variables with dimensions ["x", "y"], of lengths 2 & 3 respectively
+    xrst.variables(dims=st.just({"x": 2, "y": 3})).example()
+
+You can also use this to specify that you want examples which are missing some part of the data structure, for instance
+
+.. ipython:: python
+
+    # Generates a Variable with no attributes
+    xrst.variables(attrs=st.just({})).example()
+
+Through a combination of chaining strategies and fixing arguments, you can specify quite complicated requirements on the
+objects your chained strategy will generate.
+
+.. ipython:: python
+
+    fixed_x_variable_y_maybe_z = st.fixed_dictionaries(
+        {"x": st.just(2), "y": st.integers(3, 4)}, optional={"z": st.just(2)}
+    )
+    fixed_x_variable_y_maybe_z.example()
+
+    special_variables = xrst.variables(dims=fixed_x_variable_y_maybe_z)
+
+    special_variables.example()
+    special_variables.example()
+
+Here we have used one of hypothesis' built-in strategies :py:func:`hypothesis.strategies.fixed_dictionaries` to create a
+strategy which generates mappings of dimension names to lengths (i.e. the ``size`` of the xarray object we want).
+This particular strategy will always generate an ``x`` dimension of length 2, and a ``y`` dimension of
+length either 3 or 4, and will sometimes also generate a ``z`` dimension of length 2.
+By feeding this strategy for dictionaries into the ``dims`` argument of xarray's :py:func:`~st.variables` strategy,
+we can generate arbitrary :py:class:`~xarray.Variable` objects whose dimensions will always match these specifications.
+
+Generating Duck-type Arrays
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Xarray objects don't have to wrap numpy arrays, in fact they can wrap any array type which presents the same API as a
+numpy array (so-called "duck array wrapping", see :ref:`wrapping numpy-like arrays <internals.duckarrays>`).
+
+Imagine we want to write a strategy which generates arbitrary ``Variable`` objects, each of which wraps a
+:py:class:`sparse.COO` array instead of a ``numpy.ndarray``. How could we do that? There are two ways:
+
+1. Create a xarray object with numpy data and use the hypothesis' ``.map()`` method to convert the underlying array to a
+different type:
+
+.. ipython:: python
+
+    import sparse
+
+.. ipython:: python
+
+    def convert_to_sparse(var):
+        return var.copy(data=sparse.COO.from_numpy(var.to_numpy()))
+
+.. ipython:: python
+
+    sparse_variables = xrst.variables(dims=xrst.dimension_names(min_dims=1)).map(
+        convert_to_sparse
+    )
+
+    sparse_variables.example()
+    sparse_variables.example()
+
+2. Pass a function which returns a strategy which generates the duck-typed arrays directly to the ``array_strategy_fn`` argument of the xarray strategies:
+
+.. ipython:: python
+
+    def sparse_random_arrays(shape: tuple[int]) -> sparse._coo.core.COO:
+        """Strategy which generates random sparse.COO arrays"""
+        if shape is None:
+            shape = npst.array_shapes()
+        else:
+            shape = st.just(shape)
+        density = st.integers(min_value=0, max_value=1)
+        # note sparse.random does not accept a dtype kwarg
+        return st.builds(sparse.random, shape=shape, density=density)
+
+
+    def sparse_random_arrays_fn(
+        *, shape: tuple[int, ...], dtype: np.dtype
+    ) -> st.SearchStrategy[sparse._coo.core.COO]:
+        return sparse_random_arrays(shape=shape)
+
+
+.. ipython:: python
+
+    sparse_random_variables = xrst.variables(
+        array_strategy_fn=sparse_random_arrays_fn, dtype=st.just(np.dtype("float64"))
+    )
+    sparse_random_variables.example()
+
+Either approach is fine, but one may be more convenient than the other depending on the type of the duck array which you
+want to wrap.
+
+Compatibility with the Python Array API Standard
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Xarray aims to be compatible with any duck-array type that conforms to the `Python Array API Standard <https://data-apis.org/array-api/latest/>`_
+(see our :ref:`docs on Array API Standard support <internals.duckarrays.array_api_standard>`).
+
+.. warning::
+
+    The strategies defined in :py:mod:`testing.strategies` are **not** guaranteed to use array API standard-compliant
+    dtypes by default.
+    For example arrays with the dtype ``np.dtype('float16')`` may be generated by :py:func:`testing.strategies.variables`
+    (assuming the ``dtype`` kwarg was not explicitly passed), despite ``np.dtype('float16')`` not being in the
+    array API standard.
+
+If the array type you want to generate has an array API-compliant top-level namespace
+(e.g. that which is conventionally imported as ``xp`` or similar),
+you can use this neat trick:
+
+.. ipython:: python
+    :okwarning:
+
+    from numpy import array_api as xp  # available in numpy 1.26.0
+
+    from hypothesis.extra.array_api import make_strategies_namespace
+
+    xps = make_strategies_namespace(xp)
+
+    xp_variables = xrst.variables(
+        array_strategy_fn=xps.arrays,
+        dtype=xps.scalar_dtypes(),
+    )
+    xp_variables.example()
+
+Another array API-compliant duck array library would replace the import, e.g. ``import cupy as cp`` instead.
+
+Testing over Subsets of Dimensions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A common task when testing xarray user code is checking that your function works for all valid input dimensions.
+We can chain strategies to achieve this, for which the helper strategy :py:func:`~testing.strategies.unique_subset_of`
+is useful.
+
+It works for lists of dimension names
+
+.. ipython:: python
+
+    dims = ["x", "y", "z"]
+    xrst.unique_subset_of(dims).example()
+    xrst.unique_subset_of(dims).example()
+
+as well as for mappings of dimension names to sizes
+
+.. ipython:: python
+
+    dim_sizes = {"x": 2, "y": 3, "z": 4}
+    xrst.unique_subset_of(dim_sizes).example()
+    xrst.unique_subset_of(dim_sizes).example()
+
+This is useful because operations like reductions can be performed over any subset of the xarray object's dimensions.
+For example we can write a pytest test that tests that a reduction gives the expected result when applying that reduction
+along any possible valid subset of the Variable's dimensions.
+
+.. code-block:: python
+
+    import numpy.testing as npt
+
+
+    @given(st.data(), xrst.variables(dims=xrst.dimension_names(min_dims=1)))
+    def test_mean(data, var):
+        """Test that the mean of an xarray Variable is always equal to the mean of the underlying array."""
+
+        # specify arbitrary reduction along at least one dimension
+        reduction_dims = data.draw(xrst.unique_subset_of(var.dims, min_size=1))
+
+        # create expected result (using nanmean because arrays with Nans will be generated)
+        reduction_axes = tuple(var.get_axis_num(dim) for dim in reduction_dims)
+        expected = np.nanmean(var.data, axis=reduction_axes)
+
+        # assert property is always satisfied
+        result = var.mean(dim=reduction_dims).data
+        npt.assert_equal(expected, result)
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 35a93af301e..cda6d6f1d74 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -23,6 +23,10 @@ v2023.11.1 (unreleased)
 New Features
 ~~~~~~~~~~~~
 
+- Added hypothesis strategies for generating :py:class:`xarray.Variable` objects containing arbitrary data, useful for parametrizing downstream tests.
+  Accessible under :py:mod:`testing.strategies`, and documented in a new page on testing in the User Guide.
+  (:issue:`6911`, :pull:`8404`)
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
 - :py:meth:`rolling` uses numbagg <https://github.com/numbagg/numbagg>`_ for
   most of its computations by default. Numbagg is up to 5x faster than bottleneck
   where parallelization is possible. Where parallelization isn't possible — for
diff --git a/xarray/core/types.py b/xarray/core/types.py
index 90f0f94e679..06ad65679d8 100644
--- a/xarray/core/types.py
+++ b/xarray/core/types.py
@@ -173,7 +173,8 @@ def copy(
 
 # Temporary placeholder for indicating an array api compliant type.
 # hopefully in the future we can narrow this down more:
-T_DuckArray = TypeVar("T_DuckArray", bound=Any)
+T_DuckArray = TypeVar("T_DuckArray", bound=Any, covariant=True)
+
 
 ScalarOrArray = Union["ArrayLike", np.generic, np.ndarray, "DaskArray"]
 VarCompatible = Union["Variable", "ScalarOrArray"]
diff --git a/xarray/testing/__init__.py b/xarray/testing/__init__.py
new file mode 100644
index 00000000000..ab2f8ba4357
--- /dev/null
+++ b/xarray/testing/__init__.py
@@ -0,0 +1,23 @@
+from xarray.testing.assertions import (  # noqa: F401
+    _assert_dataarray_invariants,
+    _assert_dataset_invariants,
+    _assert_indexes_invariants_checks,
+    _assert_internal_invariants,
+    _assert_variable_invariants,
+    _data_allclose_or_equiv,
+    assert_allclose,
+    assert_chunks_equal,
+    assert_duckarray_allclose,
+    assert_duckarray_equal,
+    assert_equal,
+    assert_identical,
+)
+
+__all__ = [
+    "assert_allclose",
+    "assert_chunks_equal",
+    "assert_duckarray_equal",
+    "assert_duckarray_allclose",
+    "assert_equal",
+    "assert_identical",
+]
diff --git a/xarray/testing.py b/xarray/testing/assertions.py
similarity index 98%
rename from xarray/testing.py
rename to xarray/testing/assertions.py
index 0837b562668..faa595a64b6 100644
--- a/xarray/testing.py
+++ b/xarray/testing/assertions.py
@@ -14,15 +14,6 @@
 from xarray.core.indexes import Index, PandasIndex, PandasMultiIndex, default_indexes
 from xarray.core.variable import IndexVariable, Variable
 
-__all__ = (
-    "assert_allclose",
-    "assert_chunks_equal",
-    "assert_duckarray_equal",
-    "assert_duckarray_allclose",
-    "assert_equal",
-    "assert_identical",
-)
-
 
 def ensure_warnings(func):
     # sometimes tests elevate warnings to errors
diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py
new file mode 100644
index 00000000000..d08cbc0b584
--- /dev/null
+++ b/xarray/testing/strategies.py
@@ -0,0 +1,447 @@
+from collections.abc import Hashable, Iterable, Mapping, Sequence
+from typing import TYPE_CHECKING, Any, Protocol, Union, overload
+
+try:
+    import hypothesis.strategies as st
+except ImportError as e:
+    raise ImportError(
+        "`xarray.testing.strategies` requires `hypothesis` to be installed."
+    ) from e
+
+import hypothesis.extra.numpy as npst
+import numpy as np
+from hypothesis.errors import InvalidArgument
+
+import xarray as xr
+from xarray.core.types import T_DuckArray
+
+if TYPE_CHECKING:
+    from xarray.core.types import _DTypeLikeNested, _ShapeLike
+
+
+__all__ = [
+    "supported_dtypes",
+    "names",
+    "dimension_names",
+    "dimension_sizes",
+    "attrs",
+    "variables",
+    "unique_subset_of",
+]
+
+
+class ArrayStrategyFn(Protocol[T_DuckArray]):
+    def __call__(
+        self,
+        *,
+        shape: "_ShapeLike",
+        dtype: "_DTypeLikeNested",
+    ) -> st.SearchStrategy[T_DuckArray]:
+        ...
+
+
+def supported_dtypes() -> st.SearchStrategy[np.dtype]:
+    """
+    Generates only those numpy dtypes which xarray can handle.
+
+    Use instead of hypothesis.extra.numpy.scalar_dtypes in order to exclude weirder dtypes such as unicode, byte_string, array, or nested dtypes.
+    Also excludes datetimes, which dodges bugs with pandas non-nanosecond datetime overflows.
+
+    Requires the hypothesis package to be installed.
+
+    See Also
+    --------
+    :ref:`testing.hypothesis`_
+    """
+    # TODO should this be exposed publicly?
+    # We should at least decide what the set of numpy dtypes that xarray officially supports is.
+    return (
+        npst.integer_dtypes()
+        | npst.unsigned_integer_dtypes()
+        | npst.floating_dtypes()
+        | npst.complex_number_dtypes()
+    )
+
+
+# TODO Generalize to all valid unicode characters once formatting bugs in xarray's reprs are fixed + docs can handle it.
+_readable_characters = st.characters(
+    categories=["L", "N"], max_codepoint=0x017F
+)  # only use characters within the "Latin Extended-A" subset of unicode
+
+
+def names() -> st.SearchStrategy[str]:
+    """
+    Generates arbitrary string names for dimensions / variables.
+
+    Requires the hypothesis package to be installed.
+
+    See Also
+    --------
+    :ref:`testing.hypothesis`_
+    """
+    return st.text(
+        _readable_characters,
+        min_size=1,
+        max_size=5,
+    )
+
+
+def dimension_names(
+    *,
+    min_dims: int = 0,
+    max_dims: int = 3,
+) -> st.SearchStrategy[list[Hashable]]:
+    """
+    Generates an arbitrary list of valid dimension names.
+
+    Requires the hypothesis package to be installed.
+
+    Parameters
+    ----------
+    min_dims
+        Minimum number of dimensions in generated list.
+    max_dims
+        Maximum number of dimensions in generated list.
+    """
+
+    return st.lists(
+        elements=names(),
+        min_size=min_dims,
+        max_size=max_dims,
+        unique=True,
+    )
+
+
+def dimension_sizes(
+    *,
+    dim_names: st.SearchStrategy[Hashable] = names(),
+    min_dims: int = 0,
+    max_dims: int = 3,
+    min_side: int = 1,
+    max_side: Union[int, None] = None,
+) -> st.SearchStrategy[Mapping[Hashable, int]]:
+    """
+    Generates an arbitrary mapping from dimension names to lengths.
+
+    Requires the hypothesis package to be installed.
+
+    Parameters
+    ----------
+    dim_names: strategy generating strings, optional
+        Strategy for generating dimension names.
+        Defaults to the `names` strategy.
+    min_dims: int, optional
+        Minimum number of dimensions in generated list.
+        Default is 1.
+    max_dims: int, optional
+        Maximum number of dimensions in generated list.
+        Default is 3.
+    min_side: int, optional
+        Minimum size of a dimension.
+        Default is 1.
+    max_side: int, optional
+        Minimum size of a dimension.
+        Default is `min_length` + 5.
+
+    See Also
+    --------
+    :ref:`testing.hypothesis`_
+    """
+
+    if max_side is None:
+        max_side = min_side + 3
+
+    return st.dictionaries(
+        keys=dim_names,
+        values=st.integers(min_value=min_side, max_value=max_side),
+        min_size=min_dims,
+        max_size=max_dims,
+    )
+
+
+_readable_strings = st.text(
+    _readable_characters,
+    max_size=5,
+)
+_attr_keys = _readable_strings
+_small_arrays = npst.arrays(
+    shape=npst.array_shapes(
+        max_side=2,
+        max_dims=2,
+    ),
+    dtype=npst.scalar_dtypes(),
+)
+_attr_values = st.none() | st.booleans() | _readable_strings | _small_arrays
+
+
+def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]:
+    """
+    Generates arbitrary valid attributes dictionaries for xarray objects.
+
+    The generated dictionaries can potentially be recursive.
+
+    Requires the hypothesis package to be installed.
+
+    See Also
+    --------
+    :ref:`testing.hypothesis`_
+    """
+    return st.recursive(
+        st.dictionaries(_attr_keys, _attr_values),
+        lambda children: st.dictionaries(_attr_keys, children),
+        max_leaves=3,
+    )
+
+
+@st.composite
+def variables(
+    draw: st.DrawFn,
+    *,
+    array_strategy_fn: Union[ArrayStrategyFn, None] = None,
+    dims: Union[
+        st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]],
+        None,
+    ] = None,
+    dtype: st.SearchStrategy[np.dtype] = supported_dtypes(),
+    attrs: st.SearchStrategy[Mapping] = attrs(),
+) -> xr.Variable:
+    """
+    Generates arbitrary xarray.Variable objects.
+
+    Follows the basic signature of the xarray.Variable constructor, but allows passing alternative strategies to
+    generate either numpy-like array data or dimensions. Also allows specifying the shape or dtype of the wrapped array
+    up front.
+
+    Passing nothing will generate a completely arbitrary Variable (containing a numpy array).
+
+    Requires the hypothesis package to be installed.
+
+    Parameters
+    ----------
+    array_strategy_fn: Callable which returns a strategy generating array-likes, optional
+        Callable must only accept shape and dtype kwargs, and must generate results consistent with its input.
+        If not passed the default is to generate a small numpy array with one of the supported_dtypes.
+    dims: Strategy for generating the dimensions, optional
+        Can either be a strategy for generating a sequence of string dimension names,
+        or a strategy for generating a mapping of string dimension names to integer lengths along each dimension.
+        If provided as a mapping the array shape will be passed to array_strategy_fn.
+        Default is to generate arbitrary dimension names for each axis in data.
+    dtype: Strategy which generates np.dtype objects, optional
+        Will be passed in to array_strategy_fn.
+        Default is to generate any scalar dtype using supported_dtypes.
+        Be aware that this default set of dtypes includes some not strictly allowed by the array API standard.
+    attrs: Strategy which generates dicts, optional
+        Default is to generate a nested attributes dictionary containing arbitrary strings, booleans, integers, Nones,
+        and numpy arrays.
+
+    Returns
+    -------
+    variable_strategy
+        Strategy for generating xarray.Variable objects.
+
+    Raises
+    ------
+    ValueError
+        If a custom array_strategy_fn returns a strategy which generates an example array inconsistent with the shape
+        & dtype input passed to it.
+
+    Examples
+    --------
+    Generate completely arbitrary Variable objects backed by a numpy array:
+
+    >>> variables().example()  # doctest: +SKIP
+    <xarray.Variable (żō: 3)>
+    array([43506,   -16,  -151], dtype=int32)
+    >>> variables().example()  # doctest: +SKIP
+    <xarray.Variable (eD: 4, ğŻżÂĕ: 2, T: 2)>
+    array([[[-10000000., -10000000.],
+            [-10000000., -10000000.]],
+           [[-10000000., -10000000.],
+            [        0., -10000000.]],
+           [[        0., -10000000.],
+            [-10000000.,        inf]],
+           [[       -0., -10000000.],
+            [-10000000.,        -0.]]], dtype=float32)
+    Attributes:
+        śřĴ:      {'ĉ': {'iĥf': array([-30117,  -1740], dtype=int16)}}
+
+    Generate only Variable objects with certain dimension names:
+
+    >>> variables(dims=st.just(["a", "b"])).example()  # doctest: +SKIP
+    <xarray.Variable (a: 5, b: 3)>
+    array([[       248, 4294967295, 4294967295],
+           [2412855555, 3514117556, 4294967295],
+           [       111, 4294967295, 4294967295],
+           [4294967295, 1084434988,      51688],
+           [     47714,        252,      11207]], dtype=uint32)
+
+    Generate only Variable objects with certain dimension names and lengths:
+
+    >>> variables(dims=st.just({"a": 2, "b": 1})).example()  # doctest: +SKIP
+    <xarray.Variable (a: 2, b: 1)>
+    array([[-1.00000000e+007+3.40282347e+038j],
+           [-2.75034266e-225+2.22507386e-311j]])
+
+    See Also
+    --------
+    :ref:`testing.hypothesis`_
+    """
+
+    if not isinstance(dims, st.SearchStrategy) and dims is not None:
+        raise InvalidArgument(
+            f"dims must be provided as a hypothesis.strategies.SearchStrategy object (or None), but got type {type(dims)}. "
+            "To specify fixed contents, use hypothesis.strategies.just()."
+        )
+    if not isinstance(dtype, st.SearchStrategy) and dtype is not None:
+        raise InvalidArgument(
+            f"dtype must be provided as a hypothesis.strategies.SearchStrategy object (or None), but got type {type(dtype)}. "
+            "To specify fixed contents, use hypothesis.strategies.just()."
+        )
+    if not isinstance(attrs, st.SearchStrategy) and attrs is not None:
+        raise InvalidArgument(
+            f"attrs must be provided as a hypothesis.strategies.SearchStrategy object (or None), but got type {type(attrs)}. "
+            "To specify fixed contents, use hypothesis.strategies.just()."
+        )
+
+    _array_strategy_fn: ArrayStrategyFn
+    if array_strategy_fn is None:
+        # For some reason if I move the default value to the function signature definition mypy incorrectly says the ignore is no longer necessary, making it impossible to satisfy mypy
+        _array_strategy_fn = npst.arrays  # type: ignore[assignment]  # npst.arrays has extra kwargs that we aren't using later
+    elif not callable(array_strategy_fn):
+        raise InvalidArgument(
+            "array_strategy_fn must be a Callable that accepts the kwargs dtype and shape and returns a hypothesis "
+            "strategy which generates corresponding array-like objects."
+        )
+    else:
+        _array_strategy_fn = (
+            array_strategy_fn  # satisfy mypy that this new variable cannot be None
+        )
+
+    _dtype = draw(dtype)
+
+    if dims is not None:
+        # generate dims first then draw data to match
+        _dims = draw(dims)
+        if isinstance(_dims, Sequence):
+            dim_names = list(_dims)
+            valid_shapes = npst.array_shapes(min_dims=len(_dims), max_dims=len(_dims))
+            _shape = draw(valid_shapes)
+            array_strategy = _array_strategy_fn(shape=_shape, dtype=_dtype)
+        elif isinstance(_dims, (Mapping, dict)):
+            # should be a mapping of form {dim_names: lengths}
+            dim_names, _shape = list(_dims.keys()), tuple(_dims.values())
+            array_strategy = _array_strategy_fn(shape=_shape, dtype=_dtype)
+        else:
+            raise InvalidArgument(
+                f"Invalid type returned by dims strategy - drew an object of type {type(dims)}"
+            )
+    else:
+        # nothing provided, so generate everything consistently
+        # We still generate the shape first here just so that we always pass shape to array_strategy_fn
+        _shape = draw(npst.array_shapes())
+        array_strategy = _array_strategy_fn(shape=_shape, dtype=_dtype)
+        dim_names = draw(dimension_names(min_dims=len(_shape), max_dims=len(_shape)))
+
+    _data = draw(array_strategy)
+
+    if _data.shape != _shape:
+        raise ValueError(
+            "array_strategy_fn returned an array object with a different shape than it was passed."
+            f"Passed {_shape}, but returned {_data.shape}."
+            "Please either specify a consistent shape via the dims kwarg or ensure the array_strategy_fn callable "
+            "obeys the shape argument passed to it."
+        )
+    if _data.dtype != _dtype:
+        raise ValueError(
+            "array_strategy_fn returned an array object with a different dtype than it was passed."
+            f"Passed {_dtype}, but returned {_data.dtype}"
+            "Please either specify a consistent dtype via the dtype kwarg or ensure the array_strategy_fn callable "
+            "obeys the dtype argument passed to it."
+        )
+
+    return xr.Variable(dims=dim_names, data=_data, attrs=draw(attrs))
+
+
+@overload
+def unique_subset_of(
+    objs: Sequence[Hashable],
+    *,
+    min_size: int = 0,
+    max_size: Union[int, None] = None,
+) -> st.SearchStrategy[Sequence[Hashable]]:
+    ...
+
+
+@overload
+def unique_subset_of(
+    objs: Mapping[Hashable, Any],
+    *,
+    min_size: int = 0,
+    max_size: Union[int, None] = None,
+) -> st.SearchStrategy[Mapping[Hashable, Any]]:
+    ...
+
+
+@st.composite
+def unique_subset_of(
+    draw: st.DrawFn,
+    objs: Union[Sequence[Hashable], Mapping[Hashable, Any]],
+    *,
+    min_size: int = 0,
+    max_size: Union[int, None] = None,
+) -> Union[Sequence[Hashable], Mapping[Hashable, Any]]:
+    """
+    Return a strategy which generates a unique subset of the given objects.
+
+    Each entry in the output subset will be unique (if input was a sequence) or have a unique key (if it was a mapping).
+
+    Requires the hypothesis package to be installed.
+
+    Parameters
+    ----------
+    objs: Union[Sequence[Hashable], Mapping[Hashable, Any]]
+        Objects from which to sample to produce the subset.
+    min_size: int, optional
+        Minimum size of the returned subset. Default is 0.
+    max_size: int, optional
+        Maximum size of the returned subset. Default is the full length of the input.
+        If set to 0 the result will be an empty mapping.
+
+    Returns
+    -------
+    unique_subset_strategy
+        Strategy generating subset of the input.
+
+    Examples
+    --------
+    >>> unique_subset_of({"x": 2, "y": 3}).example()  # doctest: +SKIP
+    {'y': 3}
+    >>> unique_subset_of(["x", "y"]).example()  # doctest: +SKIP
+    ['x']
+
+    See Also
+    --------
+    :ref:`testing.hypothesis`_
+    """
+    if not isinstance(objs, Iterable):
+        raise TypeError(
+            f"Object to sample from must be an Iterable or a Mapping, but received type {type(objs)}"
+        )
+
+    if len(objs) == 0:
+        raise ValueError("Can't sample from a length-zero object.")
+
+    keys = list(objs.keys()) if isinstance(objs, Mapping) else objs
+
+    subset_keys = draw(
+        st.lists(
+            st.sampled_from(keys),
+            unique=True,
+            min_size=min_size,
+            max_size=max_size,
+        )
+    )
+
+    return (
+        {k: objs[k] for k in subset_keys} if isinstance(objs, Mapping) else subset_keys
+    )
diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
index f7f8f823d78..ffcae0fc664 100644
--- a/xarray/tests/__init__.py
+++ b/xarray/tests/__init__.py
@@ -106,6 +106,7 @@ def _importorskip(
 requires_pandas_version_two = pytest.mark.skipif(
     not has_pandas_version_two, reason="requires pandas 2.0.0"
 )
+has_numpy_array_api, requires_numpy_array_api = _importorskip("numpy", "1.26.0")
 has_h5netcdf_ros3 = _importorskip("h5netcdf", "1.3.0")
 requires_h5netcdf_ros3 = pytest.mark.skipif(
     not has_h5netcdf_ros3[0], reason="requires h5netcdf 1.3.0"
diff --git a/xarray/tests/test_testing.py b/xarray/tests/test_assertions.py
similarity index 100%
rename from xarray/tests/test_testing.py
rename to xarray/tests/test_assertions.py
diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py
new file mode 100644
index 00000000000..44f0d56cde8
--- /dev/null
+++ b/xarray/tests/test_strategies.py
@@ -0,0 +1,271 @@
+import numpy as np
+import numpy.testing as npt
+import pytest
+
+pytest.importorskip("hypothesis")
+# isort: split
+
+import hypothesis.extra.numpy as npst
+import hypothesis.strategies as st
+from hypothesis import given
+from hypothesis.extra.array_api import make_strategies_namespace
+
+from xarray.core.variable import Variable
+from xarray.testing.strategies import (
+    attrs,
+    dimension_names,
+    dimension_sizes,
+    supported_dtypes,
+    unique_subset_of,
+    variables,
+)
+from xarray.tests import requires_numpy_array_api
+
+ALLOWED_ATTRS_VALUES_TYPES = (int, bool, str, np.ndarray)
+
+
+class TestDimensionNamesStrategy:
+    @given(dimension_names())
+    def test_types(self, dims):
+        assert isinstance(dims, list)
+        for d in dims:
+            assert isinstance(d, str)
+
+    @given(dimension_names())
+    def test_unique(self, dims):
+        assert len(set(dims)) == len(dims)
+
+    @given(st.data(), st.tuples(st.integers(0, 10), st.integers(0, 10)).map(sorted))
+    def test_number_of_dims(self, data, ndims):
+        min_dims, max_dims = ndims
+        dim_names = data.draw(dimension_names(min_dims=min_dims, max_dims=max_dims))
+        assert isinstance(dim_names, list)
+        assert min_dims <= len(dim_names) <= max_dims
+
+
+class TestDimensionSizesStrategy:
+    @given(dimension_sizes())
+    def test_types(self, dims):
+        assert isinstance(dims, dict)
+        for d, n in dims.items():
+            assert isinstance(d, str)
+            assert len(d) >= 1
+
+            assert isinstance(n, int)
+            assert n >= 0
+
+    @given(st.data(), st.tuples(st.integers(0, 10), st.integers(0, 10)).map(sorted))
+    def test_number_of_dims(self, data, ndims):
+        min_dims, max_dims = ndims
+        dim_sizes = data.draw(dimension_sizes(min_dims=min_dims, max_dims=max_dims))
+        assert isinstance(dim_sizes, dict)
+        assert min_dims <= len(dim_sizes) <= max_dims
+
+    @given(st.data())
+    def test_restrict_names(self, data):
+        capitalized_names = st.text(st.characters(), min_size=1).map(str.upper)
+        dim_sizes = data.draw(dimension_sizes(dim_names=capitalized_names))
+        for dim in dim_sizes.keys():
+            assert dim.upper() == dim
+
+
+def check_dict_values(dictionary: dict, allowed_attrs_values_types) -> bool:
+    """Helper function to assert that all values in recursive dict match one of a set of types."""
+    for key, value in dictionary.items():
+        if isinstance(value, allowed_attrs_values_types) or value is None:
+            continue
+        elif isinstance(value, dict):
+            # If the value is a dictionary, recursively check it
+            if not check_dict_values(value, allowed_attrs_values_types):
+                return False
+        else:
+            # If the value is not an integer or a dictionary, it's not valid
+            return False
+    return True
+
+
+class TestAttrsStrategy:
+    @given(attrs())
+    def test_type(self, attrs):
+        assert isinstance(attrs, dict)
+        check_dict_values(attrs, ALLOWED_ATTRS_VALUES_TYPES)
+
+
+class TestVariablesStrategy:
+    @given(variables())
+    def test_given_nothing(self, var):
+        assert isinstance(var, Variable)
+
+    @given(st.data())
+    def test_given_incorrect_types(self, data):
+        with pytest.raises(TypeError, match="dims must be provided as a"):
+            data.draw(variables(dims=["x", "y"]))  # type: ignore[arg-type]
+
+        with pytest.raises(TypeError, match="dtype must be provided as a"):
+            data.draw(variables(dtype=np.dtype("int32")))  # type: ignore[arg-type]
+
+        with pytest.raises(TypeError, match="attrs must be provided as a"):
+            data.draw(variables(attrs=dict()))  # type: ignore[arg-type]
+
+        with pytest.raises(TypeError, match="Callable"):
+            data.draw(variables(array_strategy_fn=np.array([0])))  # type: ignore[arg-type]
+
+    @given(st.data(), dimension_names())
+    def test_given_fixed_dim_names(self, data, fixed_dim_names):
+        var = data.draw(variables(dims=st.just(fixed_dim_names)))
+
+        assert list(var.dims) == fixed_dim_names
+
+    @given(st.data(), dimension_sizes())
+    def test_given_fixed_dim_sizes(self, data, dim_sizes):
+        var = data.draw(variables(dims=st.just(dim_sizes)))
+
+        assert var.dims == tuple(dim_sizes.keys())
+        assert var.shape == tuple(dim_sizes.values())
+
+    @given(st.data(), supported_dtypes())
+    def test_given_fixed_dtype(self, data, dtype):
+        var = data.draw(variables(dtype=st.just(dtype)))
+
+        assert var.dtype == dtype
+
+    @given(st.data(), npst.arrays(shape=npst.array_shapes(), dtype=supported_dtypes()))
+    def test_given_fixed_data_dims_and_dtype(self, data, arr):
+        def fixed_array_strategy_fn(*, shape=None, dtype=None):
+            """The fact this ignores shape and dtype is only okay because compatible shape & dtype will be passed separately."""
+            return st.just(arr)
+
+        dim_names = data.draw(dimension_names(min_dims=arr.ndim, max_dims=arr.ndim))
+        dim_sizes = {name: size for name, size in zip(dim_names, arr.shape)}
+
+        var = data.draw(
+            variables(
+                array_strategy_fn=fixed_array_strategy_fn,
+                dims=st.just(dim_sizes),
+                dtype=st.just(arr.dtype),
+            )
+        )
+
+        npt.assert_equal(var.data, arr)
+        assert var.dtype == arr.dtype
+
+    @given(st.data(), st.integers(0, 3))
+    def test_given_array_strat_arbitrary_size_and_arbitrary_data(self, data, ndims):
+        dim_names = data.draw(dimension_names(min_dims=ndims, max_dims=ndims))
+
+        def array_strategy_fn(*, shape=None, dtype=None):
+            return npst.arrays(shape=shape, dtype=dtype)
+
+        var = data.draw(
+            variables(
+                array_strategy_fn=array_strategy_fn,
+                dims=st.just(dim_names),
+                dtype=supported_dtypes(),
+            )
+        )
+
+        assert var.ndim == ndims
+
+    @given(st.data())
+    def test_catch_unruly_dtype_from_custom_array_strategy_fn(self, data):
+        def dodgy_array_strategy_fn(*, shape=None, dtype=None):
+            """Dodgy function which ignores the dtype it was passed"""
+            return npst.arrays(shape=shape, dtype=npst.floating_dtypes())
+
+        with pytest.raises(
+            ValueError, match="returned an array object with a different dtype"
+        ):
+            data.draw(
+                variables(
+                    array_strategy_fn=dodgy_array_strategy_fn,
+                    dtype=st.just(np.dtype("int32")),
+                )
+            )
+
+    @given(st.data())
+    def test_catch_unruly_shape_from_custom_array_strategy_fn(self, data):
+        def dodgy_array_strategy_fn(*, shape=None, dtype=None):
+            """Dodgy function which ignores the shape it was passed"""
+            return npst.arrays(shape=(3, 2), dtype=dtype)
+
+        with pytest.raises(
+            ValueError, match="returned an array object with a different shape"
+        ):
+            data.draw(
+                variables(
+                    array_strategy_fn=dodgy_array_strategy_fn,
+                    dims=st.just({"a": 2, "b": 1}),
+                    dtype=supported_dtypes(),
+                )
+            )
+
+    @requires_numpy_array_api
+    @given(st.data())
+    def test_make_strategies_namespace(self, data):
+        """
+        Test not causing a hypothesis.InvalidArgument by generating a dtype that's not in the array API.
+
+        We still want to generate dtypes not in the array API by default, but this checks we don't accidentally override
+        the user's choice of dtypes with non-API-compliant ones.
+        """
+        from numpy import (
+            array_api as np_array_api,  # requires numpy>=1.26.0, and we expect a UserWarning to be raised
+        )
+
+        np_array_api_st = make_strategies_namespace(np_array_api)
+
+        data.draw(
+            variables(
+                array_strategy_fn=np_array_api_st.arrays,
+                dtype=np_array_api_st.scalar_dtypes(),
+            )
+        )
+
+
+class TestUniqueSubsetOf:
+    @given(st.data())
+    def test_invalid(self, data):
+        with pytest.raises(TypeError, match="must be an Iterable or a Mapping"):
+            data.draw(unique_subset_of(0))  # type: ignore[call-overload]
+
+        with pytest.raises(ValueError, match="length-zero object"):
+            data.draw(unique_subset_of({}))
+
+    @given(st.data(), dimension_sizes(min_dims=1))
+    def test_mapping(self, data, dim_sizes):
+        subset_of_dim_sizes = data.draw(unique_subset_of(dim_sizes))
+
+        for dim, length in subset_of_dim_sizes.items():
+            assert dim in dim_sizes
+            assert dim_sizes[dim] == length
+
+    @given(st.data(), dimension_names(min_dims=1))
+    def test_iterable(self, data, dim_names):
+        subset_of_dim_names = data.draw(unique_subset_of(dim_names))
+
+        for dim in subset_of_dim_names:
+            assert dim in dim_names
+
+
+class TestReduction:
+    """
+    These tests are for checking that the examples given in the docs page on testing actually work.
+    """
+
+    @given(st.data(), variables(dims=dimension_names(min_dims=1)))
+    def test_mean(self, data, var):
+        """
+        Test that given a Variable of at least one dimension,
+        the mean of the Variable is always equal to the mean of the underlying array.
+        """
+
+        # specify arbitrary reduction along at least one dimension
+        reduction_dims = data.draw(unique_subset_of(var.dims, min_size=1))
+
+        # create expected result (using nanmean because arrays with Nans will be generated)
+        reduction_axes = tuple(var.get_axis_num(dim) for dim in reduction_dims)
+        expected = np.nanmean(var.data, axis=reduction_axes)
+
+        # assert property is always satisfied
+        result = var.mean(dim=reduction_dims).data
+        npt.assert_equal(expected, result)