diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 82842430b53..9fc1b0ba80a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,6 +34,12 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ +- Explicitly warn when creating xarray objects with repeated dimension names. + Such objects will also now raise when :py:meth:`DataArray.get_axis_num` is called, + which means many functions will raise. + This latter change is technically a breaking change, but whilst allowed, + this behaviour was never actually supported! (:issue:`3731`, :pull:`8491`) + By `Tom Nicholas `_. Deprecations ~~~~~~~~~~~~ diff --git a/xarray/core/common.py b/xarray/core/common.py index cb5b79defc0..cebd8f2a95b 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -21,6 +21,7 @@ emit_user_level_warning, is_scalar, ) +from xarray.namedarray.core import _raise_if_any_duplicate_dimensions try: import cftime @@ -217,6 +218,7 @@ def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, . return self._get_axis_num(dim) def _get_axis_num(self: Any, dim: Hashable) -> int: + _raise_if_any_duplicate_dimensions(self.dims) try: return self.dims.index(dim) except ValueError: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 39a947e6264..d9102dc9e0a 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -46,7 +46,7 @@ is_duck_array, maybe_coerce_to_str, ) -from xarray.namedarray.core import NamedArray +from xarray.namedarray.core import NamedArray, _raise_if_any_duplicate_dimensions NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, @@ -2876,11 +2876,8 @@ def _unified_dims(variables): all_dims = {} for var in variables: var_dims = var.dims - if len(set(var_dims)) < len(var_dims): - raise ValueError( - "broadcasting cannot handle duplicate " - f"dimensions: {list(var_dims)!r}" - ) + _raise_if_any_duplicate_dimensions(var_dims, err_context="Broadcasting") + for d, s in zip(var_dims, var.shape): if d not in all_dims: all_dims[d] = s diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index d3fcffcfd9e..002afe96358 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -481,6 +481,15 @@ def _parse_dimensions(self, dims: _DimsLike) -> _Dims: f"dimensions {dims} must have the same length as the " f"number of data dimensions, ndim={self.ndim}" ) + if len(set(dims)) < len(dims): + repeated_dims = set([d for d in dims if dims.count(d) > 1]) + warnings.warn( + f"Duplicate dimension names present: dimensions {repeated_dims} appear more than once in dims={dims}. " + "We do not yet support duplicate dimension names, but we do allow initial construction of the object. " + "We recommend you rename the dims immediately to become distinct, as most xarray functionality is likely to fail silently if you do not. " + "To rename the dimensions you will need to set the ``.dims`` attribute of each variable, ``e.g. var.dims=('x0', 'x1')``.", + UserWarning, + ) return dims @property @@ -651,6 +660,7 @@ def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, . return self._get_axis_num(dim) def _get_axis_num(self: Any, dim: Hashable) -> int: + _raise_if_any_duplicate_dimensions(self.dims) try: return self.dims.index(dim) # type: ignore[no-any-return] except ValueError: @@ -846,3 +856,13 @@ def _to_dense(self) -> NamedArray[Any, _DType_co]: _NamedArray = NamedArray[Any, np.dtype[_ScalarType_co]] + + +def _raise_if_any_duplicate_dimensions( + dims: _Dims, err_context: str = "This function" +) -> None: + if len(set(dims)) < len(dims): + repeated_dims = set([d for d in dims if dims.count(d) > 1]) + raise ValueError( + f"{err_context} cannot handle duplicate dimensions, but dimensions {repeated_dims} appear more than once on this object's dims: {dims}" + ) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 0704dd835c0..8c5f2f8b98a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3464,6 +3464,7 @@ class TestH5NetCDFDataRos3Driver(TestCommon): "https://www.unidata.ucar.edu/software/netcdf/examples/OMI-Aura_L2-example.nc" ) + @pytest.mark.filterwarnings("ignore:Duplicate dimension names") def test_get_variable_list(self) -> None: with open_dataset( self.test_remote_dataset, @@ -3472,6 +3473,7 @@ def test_get_variable_list(self) -> None: ) as actual: assert "Temperature" in list(actual) + @pytest.mark.filterwarnings("ignore:Duplicate dimension names") def test_get_variable_list_empty_driver_kwds(self) -> None: driver_kwds = { "secret_id": b"", diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index fcdf063d106..deeb5ce753a 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -475,3 +475,7 @@ def _new( var_float2: Variable[Any, np.dtype[np.float32]] var_float2 = var_float._replace(("x",), np_val2) assert var_float2.dtype == dtype_float + + def test_warn_on_repeated_dimension_names(self) -> None: + with pytest.warns(UserWarning, match="Duplicate dimension names"): + NamedArray(("x", "x"), np.arange(4).reshape(2, 2))