From cc120f7ca22b9c9a625850923f46d806fa6b64c1 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Wed, 29 Nov 2023 14:23:46 -0500 Subject: [PATCH 01/15] basic test --- xarray/tests/test_namedarray.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index fcdf063d106..d5a1292f263 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -475,3 +475,7 @@ def _new( var_float2: Variable[Any, np.dtype[np.float32]] var_float2 = var_float._replace(("x",), np_val2) assert var_float2.dtype == dtype_float + + def test_forbid_repeated_dimension_names(self) -> None: + with pytest.raises(ValueError, match="Repeated dimension names"): + NamedArray(("x", "x"), np.arange(4).reshape(2, 2)) From 27fa620f10207275e62e18a1823c474dd4692ad4 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Wed, 29 Nov 2023 14:23:58 -0500 Subject: [PATCH 02/15] implement raising error --- xarray/namedarray/core.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index d3fcffcfd9e..55bdf19453b 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -481,6 +481,11 @@ def _parse_dimensions(self, dims: _DimsLike) -> _Dims: f"dimensions {dims} must have the same length as the " f"number of data dimensions, ndim={self.ndim}" ) + if len(set(dims)) < len(dims): + repeated_dims = set([d for d in dims if dims.count(d) > 1]) + raise ValueError( + f"Repeated dimension names are forbidden, but dimensions {repeated_dims} appear more than once in dims={dims}" + ) return dims @property From 5e063264621687873c8acba5c9ea80aa9598e66d Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Wed, 29 Nov 2023 14:31:35 -0500 Subject: [PATCH 03/15] whatsnew --- doc/whats-new.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 82842430b53..c85b793e081 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,6 +34,10 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ +- Explicitly forbid creating xarray objects with repeated dimension names. + This is technically a breaking change, but whilst allowed by the constructor, + this behaviour was never actually supported! (:issue:`3731`, :pull:`8491`) + By `Tom Nicholas `_. Deprecations ~~~~~~~~~~~~ From ba3ce65eb419fbbc14b7a2789c17633d5b585ddb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 29 Nov 2023 19:32:45 +0000 Subject: [PATCH 04/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/whats-new.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c85b793e081..bf8636b92b7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -35,8 +35,8 @@ Breaking changes ~~~~~~~~~~~~~~~~ - Explicitly forbid creating xarray objects with repeated dimension names. - This is technically a breaking change, but whilst allowed by the constructor, - this behaviour was never actually supported! (:issue:`3731`, :pull:`8491`) + This is technically a breaking change, but whilst allowed by the constructor, + this behaviour was never actually supported! (:issue:`3731`, :pull:`8491`) By `Tom Nicholas `_. Deprecations From 6bea8ad87dd3be9614e7e08aa918e958a2b63d32 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Wed, 29 Nov 2023 14:56:55 -0500 Subject: [PATCH 05/15] repeated -> duplicate --- xarray/namedarray/core.py | 2 +- xarray/tests/test_namedarray.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 55bdf19453b..5568997094f 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -484,7 +484,7 @@ def _parse_dimensions(self, dims: _DimsLike) -> _Dims: if len(set(dims)) < len(dims): repeated_dims = set([d for d in dims if dims.count(d) > 1]) raise ValueError( - f"Repeated dimension names are forbidden, but dimensions {repeated_dims} appear more than once in dims={dims}" + f"Duplicate dimension names are forbidden, but dimensions {repeated_dims} appear more than once in dims={dims}" ) return dims diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index d5a1292f263..7c4a05ddb74 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -477,5 +477,5 @@ def _new( assert var_float2.dtype == dtype_float def test_forbid_repeated_dimension_names(self) -> None: - with pytest.raises(ValueError, match="Repeated dimension names"): + with pytest.raises(ValueError, match="Duplicate dimension names"): NamedArray(("x", "x"), np.arange(4).reshape(2, 2)) From 636bbb0b5939638ad6e2c24e9664cf87b0aa3bdf Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Wed, 29 Nov 2023 14:57:21 -0500 Subject: [PATCH 06/15] ensure broadcasting test passes --- xarray/tests/test_variable.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 0bea3f63673..b85bf7c5da8 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1708,10 +1708,10 @@ def test_broadcasting_math(self): def test_broadcasting_failures(self): a = Variable(["x"], np.arange(10)) b = Variable(["x"], np.arange(5)) - c = Variable(["x", "x"], np.arange(100).reshape(10, 10)) with pytest.raises(ValueError, match=r"mismatched lengths"): a + b - with pytest.raises(ValueError, match=r"duplicate dimensions"): + with pytest.raises(ValueError, match=r"Duplicate dimension"): + c = Variable(["x", "x"], np.arange(100).reshape(10, 10)) a + c def test_inplace_math(self): From aeaef53c47ca11a4c465f3f2caf9a39109fd62f9 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 30 Nov 2023 16:28:40 -0500 Subject: [PATCH 07/15] warn instead of raising in constructor --- xarray/namedarray/core.py | 7 +++++-- xarray/tests/test_namedarray.py | 4 ++-- xarray/tests/test_variable.py | 4 ++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 5568997094f..995e0505b31 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -483,8 +483,11 @@ def _parse_dimensions(self, dims: _DimsLike) -> _Dims: ) if len(set(dims)) < len(dims): repeated_dims = set([d for d in dims if dims.count(d) > 1]) - raise ValueError( - f"Duplicate dimension names are forbidden, but dimensions {repeated_dims} appear more than once in dims={dims}" + warnings.warn( + f"Duplicate dimension names present: dimensions {repeated_dims} appear more than once in dims={dims}. " + "We do not yet support duplicate dimension names, but we do allow initial construction of the object. " + "We recommend you rename the dims immediately to become distinct, as most xarray functionality is likely to fail silently if you do not.", + UserWarning, ) return dims diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 7c4a05ddb74..deeb5ce753a 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -476,6 +476,6 @@ def _new( var_float2 = var_float._replace(("x",), np_val2) assert var_float2.dtype == dtype_float - def test_forbid_repeated_dimension_names(self) -> None: - with pytest.raises(ValueError, match="Duplicate dimension names"): + def test_warn_on_repeated_dimension_names(self) -> None: + with pytest.warns(UserWarning, match="Duplicate dimension names"): NamedArray(("x", "x"), np.arange(4).reshape(2, 2)) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index b85bf7c5da8..0bea3f63673 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1708,10 +1708,10 @@ def test_broadcasting_math(self): def test_broadcasting_failures(self): a = Variable(["x"], np.arange(10)) b = Variable(["x"], np.arange(5)) + c = Variable(["x", "x"], np.arange(100).reshape(10, 10)) with pytest.raises(ValueError, match=r"mismatched lengths"): a + b - with pytest.raises(ValueError, match=r"Duplicate dimension"): - c = Variable(["x", "x"], np.arange(100).reshape(10, 10)) + with pytest.raises(ValueError, match=r"duplicate dimensions"): a + c def test_inplace_math(self): From 50866506227df38d6dde9700c23564dc21132f0b Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 30 Nov 2023 16:29:24 -0500 Subject: [PATCH 08/15] raise in get_axis_num --- xarray/core/common.py | 3 +++ xarray/core/variable.py | 9 +++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index cb5b79defc0..87de7c662a9 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -21,6 +21,8 @@ emit_user_level_warning, is_scalar, ) +from xarray.namedarray.core import _raise_if_any_duplicate_dimensions + try: import cftime @@ -217,6 +219,7 @@ def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, . return self._get_axis_num(dim) def _get_axis_num(self: Any, dim: Hashable) -> int: + _raise_if_any_duplicate_dimensions(self.dims, dim) try: return self.dims.index(dim) except ValueError: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 39a947e6264..d9102dc9e0a 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -46,7 +46,7 @@ is_duck_array, maybe_coerce_to_str, ) -from xarray.namedarray.core import NamedArray +from xarray.namedarray.core import NamedArray, _raise_if_any_duplicate_dimensions NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, @@ -2876,11 +2876,8 @@ def _unified_dims(variables): all_dims = {} for var in variables: var_dims = var.dims - if len(set(var_dims)) < len(var_dims): - raise ValueError( - "broadcasting cannot handle duplicate " - f"dimensions: {list(var_dims)!r}" - ) + _raise_if_any_duplicate_dimensions(var_dims, err_context="Broadcasting") + for d, s in zip(var_dims, var.shape): if d not in all_dims: all_dims[d] = s From 99b34605de0f21a8a64af7a6afc1617f515fea25 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 30 Nov 2023 21:33:35 +0000 Subject: [PATCH 09/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index 87de7c662a9..6f47b352ba2 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -23,7 +23,6 @@ ) from xarray.namedarray.core import _raise_if_any_duplicate_dimensions - try: import cftime except ImportError: From 4dde70ea8fc36bea982d2e79c6fdc66059da7643 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 30 Nov 2023 16:37:49 -0500 Subject: [PATCH 10/15] add _raise_if_any_duplicate_dims to namedarray.core --- xarray/namedarray/core.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 995e0505b31..9a99ef551a9 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -659,6 +659,7 @@ def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, . return self._get_axis_num(dim) def _get_axis_num(self: Any, dim: Hashable) -> int: + _raise_if_any_duplicate_dimensions(self.dims) try: return self.dims.index(dim) # type: ignore[no-any-return] except ValueError: @@ -854,3 +855,11 @@ def _to_dense(self) -> NamedArray[Any, _DType_co]: _NamedArray = NamedArray[Any, np.dtype[_ScalarType_co]] + + +def _raise_if_any_duplicate_dimensions(dims: _Dims, err_context: str = "This function"): + if len(set(dims)) < len(dims): + repeated_dims = set([d for d in dims if dims.count(d) > 1]) + raise ValueError( + f"{err_context} cannot handle duplicate dimensions, but dimensions {repeated_dims} appear more than once on this object's dims: {dims}" + ) From 04367d3c1f5c82c57894404ae806afe02a86d764 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 30 Nov 2023 16:43:52 -0500 Subject: [PATCH 11/15] whatsnew --- doc/whats-new.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bf8636b92b7..2c396f8fee1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,8 +34,10 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ -- Explicitly forbid creating xarray objects with repeated dimension names. - This is technically a breaking change, but whilst allowed by the constructor, +- Explicitly warn when creating xarray objects with repeated dimension names. + Such objects will also now raise when :py:meth:`DataArray.get_axis_num` is called, + which means many functions will raise. + This latter change is technically a breaking change, but whilst allowed, this behaviour was never actually supported! (:issue:`3731`, :pull:`8491`) By `Tom Nicholas `_. From 376253bf1894b993139de2c6abc258f304aeb9bb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 30 Nov 2023 21:45:06 +0000 Subject: [PATCH 12/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2c396f8fee1..9fc1b0ba80a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -35,7 +35,7 @@ Breaking changes ~~~~~~~~~~~~~~~~ - Explicitly warn when creating xarray objects with repeated dimension names. - Such objects will also now raise when :py:meth:`DataArray.get_axis_num` is called, + Such objects will also now raise when :py:meth:`DataArray.get_axis_num` is called, which means many functions will raise. This latter change is technically a breaking change, but whilst allowed, this behaviour was never actually supported! (:issue:`3731`, :pull:`8491`) From 472f97b381b98ca6edfdacb53e7370ad535a8bc4 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 30 Nov 2023 16:53:54 -0500 Subject: [PATCH 13/15] filter warning raised by example data which has duplicate dimension names --- xarray/tests/test_backends.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 0704dd835c0..8c5f2f8b98a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3464,6 +3464,7 @@ class TestH5NetCDFDataRos3Driver(TestCommon): "https://www.unidata.ucar.edu/software/netcdf/examples/OMI-Aura_L2-example.nc" ) + @pytest.mark.filterwarnings("ignore:Duplicate dimension names") def test_get_variable_list(self) -> None: with open_dataset( self.test_remote_dataset, @@ -3472,6 +3473,7 @@ def test_get_variable_list(self) -> None: ) as actual: assert "Temperature" in list(actual) + @pytest.mark.filterwarnings("ignore:Duplicate dimension names") def test_get_variable_list_empty_driver_kwds(self) -> None: driver_kwds = { "secret_id": b"", From a0bc00829d9b305e79a53bdf1d915da7cbaf05e5 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 30 Nov 2023 16:54:14 -0500 Subject: [PATCH 14/15] linting --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2c396f8fee1..9fc1b0ba80a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -35,7 +35,7 @@ Breaking changes ~~~~~~~~~~~~~~~~ - Explicitly warn when creating xarray objects with repeated dimension names. - Such objects will also now raise when :py:meth:`DataArray.get_axis_num` is called, + Such objects will also now raise when :py:meth:`DataArray.get_axis_num` is called, which means many functions will raise. This latter change is technically a breaking change, but whilst allowed, this behaviour was never actually supported! (:issue:`3731`, :pull:`8491`) From 2bd10afea413202dfba1058420f831b8e3a8abe2 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 30 Nov 2023 17:36:45 -0500 Subject: [PATCH 15/15] fix mypy errors --- xarray/core/common.py | 2 +- xarray/namedarray/core.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index 6f47b352ba2..cebd8f2a95b 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -218,7 +218,7 @@ def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, . return self._get_axis_num(dim) def _get_axis_num(self: Any, dim: Hashable) -> int: - _raise_if_any_duplicate_dimensions(self.dims, dim) + _raise_if_any_duplicate_dimensions(self.dims) try: return self.dims.index(dim) except ValueError: diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 9a99ef551a9..002afe96358 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -486,7 +486,8 @@ def _parse_dimensions(self, dims: _DimsLike) -> _Dims: warnings.warn( f"Duplicate dimension names present: dimensions {repeated_dims} appear more than once in dims={dims}. " "We do not yet support duplicate dimension names, but we do allow initial construction of the object. " - "We recommend you rename the dims immediately to become distinct, as most xarray functionality is likely to fail silently if you do not.", + "We recommend you rename the dims immediately to become distinct, as most xarray functionality is likely to fail silently if you do not. " + "To rename the dimensions you will need to set the ``.dims`` attribute of each variable, ``e.g. var.dims=('x0', 'x1')``.", UserWarning, ) return dims @@ -857,7 +858,9 @@ def _to_dense(self) -> NamedArray[Any, _DType_co]: _NamedArray = NamedArray[Any, np.dtype[_ScalarType_co]] -def _raise_if_any_duplicate_dimensions(dims: _Dims, err_context: str = "This function"): +def _raise_if_any_duplicate_dimensions( + dims: _Dims, err_context: str = "This function" +) -> None: if len(set(dims)) < len(dims): repeated_dims = set([d for d in dims if dims.count(d) > 1]) raise ValueError(