Skip to content

Commit

Permalink
Raise UserWarning when rename creates a new dimension coord (#6999)
Browse files Browse the repository at this point in the history
* warn when rename creates a new dimension coord

UseWarning: no index is created anymore.

* update what's new
  • Loading branch information
benbovy authored Sep 27, 2022
1 parent 87596de commit 45c0a11
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 14 deletions.
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ Bug fixes
By `András Gunyhó <https://github.com/mgunyho>`_.
- Avoid use of random numbers in `test_weighted.test_weighted_operations_nonequal_coords` (:issue:`6504`, :pull:`6961`).
By `Luke Conibear <https://github.com/lukeconibear>`_.
- Raise a ``UserWarning`` when renaming a coordinate or a dimension creates a
non-indexed dimension coordinate, and suggest the user creating an index
either with ``swap_dims`` or ``set_index`` (:issue:`6607`, :pull:`6999`). By
`Benoît Bovy <https://github.com/benbovy>`_.
- Use ``keep_attrs=True`` in grouping and resampling operations by default (:issue:`7012`).
This means :py:attr:`Dataset.attrs` and :py:attr:`DataArray.attrs` are now preserved by default.
By `Deepak Cherian <https://github.com/dcherian>`_.
Expand Down
4 changes: 2 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2063,11 +2063,11 @@ def rename(
if utils.is_dict_like(new_name_or_name_dict) or new_name_or_name_dict is None:
# change dims/coords
name_dict = either_dict_or_kwargs(new_name_or_name_dict, names, "rename")
dataset = self._to_temp_dataset().rename(name_dict)
dataset = self._to_temp_dataset()._rename(name_dict)
return self._from_temp_dataset(dataset)
if utils.hashable(new_name_or_name_dict) and names:
# change name + dims/coords
dataset = self._to_temp_dataset().rename(names)
dataset = self._to_temp_dataset()._rename(names)
dataarray = self._from_temp_dataset(dataset)
return dataarray._replace(name=new_name_or_name_dict)
# only change name
Expand Down
55 changes: 43 additions & 12 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3560,6 +3560,48 @@ def _rename_all(

return variables, coord_names, dims, indexes

def _rename(
self: T_Dataset,
name_dict: Mapping[Any, Hashable] | None = None,
**names: Hashable,
) -> T_Dataset:
"""Also used internally by DataArray so that the warning (if any)
is raised at the right stack level.
"""
name_dict = either_dict_or_kwargs(name_dict, names, "rename")
for k in name_dict.keys():
if k not in self and k not in self.dims:
raise ValueError(
f"cannot rename {k!r} because it is not a "
"variable or dimension in this dataset"
)

create_dim_coord = False
new_k = name_dict[k]

if k in self.dims and new_k in self._coord_names:
coord_dims = self._variables[name_dict[k]].dims
if coord_dims == (k,):
create_dim_coord = True
elif k in self._coord_names and new_k in self.dims:
coord_dims = self._variables[k].dims
if coord_dims == (new_k,):
create_dim_coord = True

if create_dim_coord:
warnings.warn(
f"rename {k!r} to {name_dict[k]!r} does not create an index "
"anymore. Try using swap_dims instead or use set_index "
"after rename to create an indexed coordinate.",
UserWarning,
stacklevel=3,
)

variables, coord_names, dims, indexes = self._rename_all(
name_dict=name_dict, dims_dict=name_dict
)
return self._replace(variables, coord_names, dims=dims, indexes=indexes)

def rename(
self: T_Dataset,
name_dict: Mapping[Any, Hashable] | None = None,
Expand Down Expand Up @@ -3588,18 +3630,7 @@ def rename(
Dataset.rename_dims
DataArray.rename
"""
name_dict = either_dict_or_kwargs(name_dict, names, "rename")
for k in name_dict.keys():
if k not in self and k not in self.dims:
raise ValueError(
f"cannot rename {k!r} because it is not a "
"variable or dimension in this dataset"
)

variables, coord_names, dims, indexes = self._rename_all(
name_dict=name_dict, dims_dict=name_dict
)
return self._replace(variables, coord_names, dims=dims, indexes=indexes)
return self._rename(name_dict=name_dict, **names)

def rename_dims(
self: T_Dataset,
Expand Down
17 changes: 17 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1742,6 +1742,23 @@ def test_rename(self) -> None:
)
assert_identical(renamed_all, expected_all)

def test_rename_dimension_coord_warnings(self) -> None:
# create a dimension coordinate by renaming a dimension or coordinate
# should raise a warning (no index created)
da = DataArray([0, 0], coords={"x": ("y", [0, 1])}, dims="y")

with pytest.warns(
UserWarning, match="rename 'x' to 'y' does not create an index.*"
):
da.rename(x="y")

da = xr.DataArray([0, 0], coords={"y": ("x", [0, 1])}, dims="x")

with pytest.warns(
UserWarning, match="rename 'x' to 'y' does not create an index.*"
):
da.rename(x="y")

def test_init_value(self) -> None:
expected = DataArray(
np.full((3, 4), 3), dims=["x", "y"], coords=[range(3), range(4)]
Expand Down
17 changes: 17 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2892,6 +2892,23 @@ def test_rename_dimension_coord(self) -> None:
actual_2 = original.rename_dims({"x": "x_new"})
assert "x" in actual_2.xindexes

def test_rename_dimension_coord_warnings(self) -> None:
# create a dimension coordinate by renaming a dimension or coordinate
# should raise a warning (no index created)
ds = Dataset(coords={"x": ("y", [0, 1])})

with pytest.warns(
UserWarning, match="rename 'x' to 'y' does not create an index.*"
):
ds.rename(x="y")

ds = Dataset(coords={"y": ("x", [0, 1])})

with pytest.warns(
UserWarning, match="rename 'x' to 'y' does not create an index.*"
):
ds.rename(x="y")

def test_rename_multiindex(self) -> None:
mindex = pd.MultiIndex.from_tuples([([1, 2]), ([3, 4])], names=["a", "b"])
original = Dataset({}, {"x": mindex})
Expand Down

0 comments on commit 45c0a11

Please sign in to comment.