Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Warn on repeated dimension names during construction #8491

Merged
merged 18 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ New Features
Breaking changes
~~~~~~~~~~~~~~~~

- Explicitly warn when creating xarray objects with repeated dimension names.
Such objects will also now raise when :py:meth:`DataArray.get_axis_num` is called,
which means many functions will raise.
This latter change is technically a breaking change, but whilst allowed,
this behaviour was never actually supported! (:issue:`3731`, :pull:`8491`)
By `Tom Nicholas <https://github.com/TomNicholas>`_.

Deprecations
~~~~~~~~~~~~
Expand Down
2 changes: 2 additions & 0 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
emit_user_level_warning,
is_scalar,
)
from xarray.namedarray.core import _raise_if_any_duplicate_dimensions

try:
import cftime
Expand Down Expand Up @@ -217,6 +218,7 @@ def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, .
return self._get_axis_num(dim)

def _get_axis_num(self: Any, dim: Hashable) -> int:
_raise_if_any_duplicate_dimensions(self.dims)
try:
return self.dims.index(dim)
except ValueError:
Expand Down
9 changes: 3 additions & 6 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
is_duck_array,
maybe_coerce_to_str,
)
from xarray.namedarray.core import NamedArray
from xarray.namedarray.core import NamedArray, _raise_if_any_duplicate_dimensions

NON_NUMPY_SUPPORTED_ARRAY_TYPES = (
indexing.ExplicitlyIndexed,
Expand Down Expand Up @@ -2876,11 +2876,8 @@ def _unified_dims(variables):
all_dims = {}
for var in variables:
var_dims = var.dims
if len(set(var_dims)) < len(var_dims):
raise ValueError(
"broadcasting cannot handle duplicate "
f"dimensions: {list(var_dims)!r}"
)
_raise_if_any_duplicate_dimensions(var_dims, err_context="Broadcasting")

for d, s in zip(var_dims, var.shape):
if d not in all_dims:
all_dims[d] = s
Expand Down
20 changes: 20 additions & 0 deletions xarray/namedarray/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,15 @@ def _parse_dimensions(self, dims: _DimsLike) -> _Dims:
f"dimensions {dims} must have the same length as the "
f"number of data dimensions, ndim={self.ndim}"
)
if len(set(dims)) < len(dims):
repeated_dims = set([d for d in dims if dims.count(d) > 1])
warnings.warn(
f"Duplicate dimension names present: dimensions {repeated_dims} appear more than once in dims={dims}. "
"We do not yet support duplicate dimension names, but we do allow initial construction of the object. "
"We recommend you rename the dims immediately to become distinct, as most xarray functionality is likely to fail silently if you do not. "
"To rename the dimensions you will need to set the ``.dims`` attribute of each variable, ``e.g. var.dims=('x0', 'x1')``.",
UserWarning,
)
return dims

@property
Expand Down Expand Up @@ -651,6 +660,7 @@ def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, .
return self._get_axis_num(dim)

def _get_axis_num(self: Any, dim: Hashable) -> int:
_raise_if_any_duplicate_dimensions(self.dims)
try:
return self.dims.index(dim) # type: ignore[no-any-return]
except ValueError:
Expand Down Expand Up @@ -846,3 +856,13 @@ def _to_dense(self) -> NamedArray[Any, _DType_co]:


_NamedArray = NamedArray[Any, np.dtype[_ScalarType_co]]


def _raise_if_any_duplicate_dimensions(
dims: _Dims, err_context: str = "This function"
) -> None:
if len(set(dims)) < len(dims):
repeated_dims = set([d for d in dims if dims.count(d) > 1])
raise ValueError(
f"{err_context} cannot handle duplicate dimensions, but dimensions {repeated_dims} appear more than once on this object's dims: {dims}"
)
2 changes: 2 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -3464,6 +3464,7 @@ class TestH5NetCDFDataRos3Driver(TestCommon):
"https://www.unidata.ucar.edu/software/netcdf/examples/OMI-Aura_L2-example.nc"
)

@pytest.mark.filterwarnings("ignore:Duplicate dimension names")
def test_get_variable_list(self) -> None:
with open_dataset(
self.test_remote_dataset,
Expand All @@ -3472,6 +3473,7 @@ def test_get_variable_list(self) -> None:
) as actual:
assert "Temperature" in list(actual)

@pytest.mark.filterwarnings("ignore:Duplicate dimension names")
def test_get_variable_list_empty_driver_kwds(self) -> None:
driver_kwds = {
"secret_id": b"",
Expand Down
4 changes: 4 additions & 0 deletions xarray/tests/test_namedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,3 +475,7 @@ def _new(
var_float2: Variable[Any, np.dtype[np.float32]]
var_float2 = var_float._replace(("x",), np_val2)
assert var_float2.dtype == dtype_float

def test_warn_on_repeated_dimension_names(self) -> None:
with pytest.warns(UserWarning, match="Duplicate dimension names"):
NamedArray(("x", "x"), np.arange(4).reshape(2, 2))
Loading