diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 26c975c859e..3f81678b8d5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -42,6 +42,10 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- Explicit indexes refactor: add an ``xarray.Index`` base class and + ``Dataset.xindexes`` / ``DataArray.xindexes`` properties. Also rename + ``PandasIndexAdapter`` to ``PandasIndex``, which now inherits from + ``xarray.Index`` (:pull:`5102`). By `Benoit Bovy `_. .. _whats-new.0.18.0: @@ -268,7 +272,6 @@ Internal Changes (:pull:`5188`), (:pull:`5191`). By `Maximilian Roos `_. - .. _whats-new.0.17.0: v0.17.0 (24 Feb 2021) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 98cbadcb25c..f6e026c0109 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -17,9 +17,10 @@ import numpy as np import pandas as pd -from . import dtypes, utils +from . import dtypes +from .indexes import Index, PandasIndex from .indexing import get_indexer_nd -from .utils import is_dict_like, is_full_slice, maybe_coerce_to_str +from .utils import is_dict_like, is_full_slice, maybe_coerce_to_str, safe_cast_to_index from .variable import IndexVariable, Variable if TYPE_CHECKING: @@ -30,11 +31,11 @@ DataAlignable = TypeVar("DataAlignable", bound=DataWithCoords) -def _get_joiner(join): +def _get_joiner(join, index_cls): if join == "outer": - return functools.partial(functools.reduce, pd.Index.union) + return functools.partial(functools.reduce, index_cls.union) elif join == "inner": - return functools.partial(functools.reduce, pd.Index.intersection) + return functools.partial(functools.reduce, index_cls.intersection) elif join == "left": return operator.itemgetter(0) elif join == "right": @@ -63,7 +64,7 @@ def _override_indexes(objects, all_indexes, exclude): objects = list(objects) for idx, obj in enumerate(objects[1:]): new_indexes = {} - for dim in obj.indexes: + for dim in obj.xindexes: if dim not in exclude: new_indexes[dim] = all_indexes[dim][0] objects[idx + 1] = obj._overwrite_indexes(new_indexes) @@ -284,7 +285,7 @@ def align( if dim not in exclude: all_coords[dim].append(obj.coords[dim]) try: - index = obj.indexes[dim] + index = obj.xindexes[dim] except KeyError: unlabeled_dim_sizes[dim].add(obj.sizes[dim]) else: @@ -298,16 +299,19 @@ def align( # - It ensures it's possible to do operations that don't require alignment # on indexes with duplicate values (which cannot be reindexed with # pandas). This is useful, e.g., for overwriting such duplicate indexes. - joiner = _get_joiner(join) joined_indexes = {} for dim, matching_indexes in all_indexes.items(): if dim in indexes: - index = utils.safe_cast_to_index(indexes[dim]) + # TODO: benbovy - flexible indexes. maybe move this logic in util func + if isinstance(indexes[dim], Index): + index = indexes[dim] + else: + index = PandasIndex(safe_cast_to_index(indexes[dim])) if ( any(not index.equals(other) for other in matching_indexes) or dim in unlabeled_dim_sizes ): - joined_indexes[dim] = indexes[dim] + joined_indexes[dim] = index else: if ( any( @@ -318,6 +322,7 @@ def align( ): if join == "exact": raise ValueError(f"indexes along dimension {dim!r} are not equal") + joiner = _get_joiner(join, type(matching_indexes[0])) index = joiner(matching_indexes) # make sure str coords are not cast to object index = maybe_coerce_to_str(index, all_coords[dim]) @@ -327,6 +332,9 @@ def align( if dim in unlabeled_dim_sizes: unlabeled_sizes = unlabeled_dim_sizes[dim] + # TODO: benbovy - flexible indexes: expose a size property for xarray.Index? + # Some indexes may not have a defined size (e.g., built from multiple coords of + # different sizes) labeled_size = index.size if len(unlabeled_sizes | {labeled_size}) > 1: raise ValueError( @@ -469,7 +477,7 @@ def reindex_like_indexers( ValueError If any dimensions without labels have different sizes. """ - indexers = {k: v for k, v in other.indexes.items() if k in target.dims} + indexers = {k: v for k, v in other.xindexes.items() if k in target.dims} for dim in other.dims: if dim not in indexers and dim in target.dims: @@ -487,14 +495,14 @@ def reindex_like_indexers( def reindex_variables( variables: Mapping[Any, Variable], sizes: Mapping[Any, int], - indexes: Mapping[Any, pd.Index], + indexes: Mapping[Any, Index], indexers: Mapping, method: Optional[str] = None, tolerance: Any = None, copy: bool = True, fill_value: Optional[Any] = dtypes.NA, sparse: bool = False, -) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, pd.Index]]: +) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, Index]]: """Conform a dictionary of aligned variables onto a new set of variables, filling in missing values with NaN. @@ -559,10 +567,11 @@ def reindex_variables( "from that to be indexed along {:s}".format(str(indexer.dims), dim) ) - target = new_indexes[dim] = utils.safe_cast_to_index(indexers[dim]) + target = new_indexes[dim] = PandasIndex(safe_cast_to_index(indexers[dim])) if dim in indexes: - index = indexes[dim] + # TODO (benbovy - flexible indexes): support other indexes than pd.Index? + index = indexes[dim].to_pandas_index() if not index.is_unique: raise ValueError( diff --git a/xarray/core/combine.py b/xarray/core/combine.py index e907fc32c07..105e0a5a66c 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -69,13 +69,17 @@ def _infer_concat_order_from_coords(datasets): if dim in ds0: # Need to read coordinate values to do ordering - indexes = [ds.indexes.get(dim) for ds in datasets] + indexes = [ds.xindexes.get(dim) for ds in datasets] if any(index is None for index in indexes): raise ValueError( "Every dimension needs a coordinate for " "inferring concatenation order" ) + # TODO (benbovy, flexible indexes): all indexes should be Pandas.Index + # get pd.Index objects from Index objects + indexes = [index.array for index in indexes] + # If dimension coordinate values are same on every dataset then # should be leaving this dimension alone (it's just a "bystander") if not all(index.equals(indexes[0]) for index in indexes[1:]): @@ -801,9 +805,13 @@ def combine_by_coords( ) # Check the overall coordinates are monotonically increasing + # TODO (benbovy - flexible indexes): only with pandas.Index? for dim in concat_dims: - indexes = concatenated.indexes.get(dim) - if not (indexes.is_monotonic_increasing or indexes.is_monotonic_decreasing): + indexes = concatenated.xindexes.get(dim) + if not ( + indexes.array.is_monotonic_increasing + or indexes.array.is_monotonic_decreasing + ): raise ValueError( "Resulting object does not have monotonic" " global indexes along dimension {}".format(dim) diff --git a/xarray/core/common.py b/xarray/core/common.py index c9386c4e15f..e4a5264d8e6 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -406,7 +406,7 @@ def get_index(self, key: Hashable) -> pd.Index: raise KeyError(key) try: - return self.indexes[key] + return self.xindexes[key].to_pandas_index() except KeyError: return pd.Index(range(self.sizes[key]), name=key) @@ -1162,7 +1162,8 @@ def resample( category=FutureWarning, ) - if isinstance(self.indexes[dim_name], CFTimeIndex): + # TODO (benbovy - flexible indexes): update when CFTimeIndex is an xarray Index subclass + if isinstance(self.xindexes[dim_name].to_pandas_index(), CFTimeIndex): from .resample_cftime import CFTimeGrouper grouper = CFTimeGrouper(freq, closed, label, base, loffset) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 16eecef6efe..50be8a7f677 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -17,7 +17,7 @@ import pandas as pd from . import formatting, indexing -from .indexes import Indexes +from .indexes import Index, Indexes from .merge import merge_coordinates_without_align, merge_coords from .utils import Frozen, ReprObject, either_dict_or_kwargs from .variable import Variable @@ -52,6 +52,10 @@ def dims(self) -> Union[Mapping[Hashable, int], Tuple[Hashable, ...]]: def indexes(self) -> Indexes: return self._data.indexes # type: ignore[attr-defined] + @property + def xindexes(self) -> Indexes: + return self._data.xindexes # type: ignore[attr-defined] + @property def variables(self): raise NotImplementedError() @@ -157,7 +161,7 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index: def update(self, other: Mapping[Hashable, Any]) -> None: other_vars = getattr(other, "variables", other) coords, indexes = merge_coords( - [self.variables, other_vars], priority_arg=1, indexes=self.indexes + [self.variables, other_vars], priority_arg=1, indexes=self.xindexes ) self._update_coords(coords, indexes) @@ -165,7 +169,7 @@ def _merge_raw(self, other, reflexive): """For use with binary arithmetic.""" if other is None: variables = dict(self.variables) - indexes = dict(self.indexes) + indexes = dict(self.xindexes) else: coord_list = [self, other] if not reflexive else [other, self] variables, indexes = merge_coordinates_without_align(coord_list) @@ -180,7 +184,9 @@ def _merge_inplace(self, other): # don't include indexes in prioritized, because we didn't align # first and we want indexes to be checked prioritized = { - k: (v, None) for k, v in self.variables.items() if k not in self.indexes + k: (v, None) + for k, v in self.variables.items() + if k not in self.xindexes } variables, indexes = merge_coordinates_without_align( [self, other], prioritized @@ -265,7 +271,7 @@ def to_dataset(self) -> "Dataset": return self._data._copy_listed(names) def _update_coords( - self, coords: Dict[Hashable, Variable], indexes: Mapping[Hashable, pd.Index] + self, coords: Dict[Hashable, Variable], indexes: Mapping[Hashable, Index] ) -> None: from .dataset import calculate_dimensions @@ -285,7 +291,7 @@ def _update_coords( # TODO(shoyer): once ._indexes is always populated by a dict, modify # it to update inplace instead. - original_indexes = dict(self._data.indexes) + original_indexes = dict(self._data.xindexes) original_indexes.update(indexes) self._data._indexes = original_indexes @@ -328,7 +334,7 @@ def __getitem__(self, key: Hashable) -> "DataArray": return self._data._getitem_coord(key) def _update_coords( - self, coords: Dict[Hashable, Variable], indexes: Mapping[Hashable, pd.Index] + self, coords: Dict[Hashable, Variable], indexes: Mapping[Hashable, Index] ) -> None: from .dataset import calculate_dimensions @@ -343,7 +349,7 @@ def _update_coords( # TODO(shoyer): once ._indexes is always populated by a dict, modify # it to update inplace instead. - original_indexes = dict(self._data.indexes) + original_indexes = dict(self._data.xindexes) original_indexes.update(indexes) self._data._indexes = original_indexes diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 740493b863c..21daed1cec1 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -51,7 +51,7 @@ ) from .dataset import Dataset, split_indexes from .formatting import format_item -from .indexes import Indexes, default_indexes, propagate_indexes +from .indexes import Index, Indexes, PandasIndex, default_indexes, propagate_indexes from .indexing import is_fancy_indexer from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords from .options import OPTIONS, _get_keep_attrs @@ -345,7 +345,7 @@ class DataArray(AbstractArray, DataWithCoords, DataArrayArithmetic): _cache: Dict[str, Any] _coords: Dict[Any, Variable] _close: Optional[Callable[[], None]] - _indexes: Optional[Dict[Hashable, pd.Index]] + _indexes: Optional[Dict[Hashable, Index]] _name: Optional[Hashable] _variable: Variable @@ -478,7 +478,9 @@ def _overwrite_indexes(self, indexes: Mapping[Hashable, Any]) -> "DataArray": # switch from dimension to level names, if necessary dim_names: Dict[Any, str] = {} for dim, idx in indexes.items(): - if not isinstance(idx, pd.MultiIndex) and idx.name != dim: + # TODO: benbovy - flexible indexes: update when MultiIndex has its own class + pd_idx = idx.array + if not isinstance(pd_idx, pd.MultiIndex) and pd_idx.name != dim: dim_names[dim] = idx.name if dim_names: obj = obj.rename(dim_names) @@ -772,7 +774,21 @@ def encoding(self, value: Mapping[Hashable, Any]) -> None: @property def indexes(self) -> Indexes: - """Mapping of pandas.Index objects used for label based indexing""" + """Mapping of pandas.Index objects used for label based indexing. + + Raises an error if this Dataset has indexes that cannot be coerced + to pandas.Index objects. + + See Also + -------- + DataArray.xindexes + + """ + return Indexes({k: idx.to_pandas_index() for k, idx in self.xindexes.items()}) + + @property + def xindexes(self) -> Indexes: + """Mapping of xarray Index objects used for label based indexing.""" if self._indexes is None: self._indexes = default_indexes(self._coords, self.dims) return Indexes(self._indexes) @@ -990,7 +1006,12 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray": if self._indexes is None: indexes = self._indexes else: - indexes = {k: v.copy(deep=deep) for k, v in self._indexes.items()} + # TODO: benbovy: flexible indexes: support all xarray indexes (not just pandas.Index) + # xarray Index needs a copy method. + indexes = { + k: PandasIndex(v.to_pandas_index().copy(deep=deep)) + for k, v in self._indexes.items() + } return self._replace(variable, coords, indexes=indexes) def __copy__(self) -> "DataArray": @@ -2169,7 +2190,9 @@ def to_unstacked_dataset(self, dim, level=0): Dataset.to_stacked_array """ - idx = self.indexes[dim] + # TODO: benbovy - flexible indexes: update when MultIndex has its own + # class inheriting from xarray.Index + idx = self.xindexes[dim].to_pandas_index() if not isinstance(idx, pd.MultiIndex): raise ValueError(f"'{dim}' is not a stacked coordinate") diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6b9f297dee1..706ccbde8c4 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -61,7 +61,9 @@ ) from .duck_array_ops import datetime_to_numeric from .indexes import ( + Index, Indexes, + PandasIndex, default_indexes, isel_variable_and_index, propagate_indexes, @@ -692,7 +694,7 @@ class Dataset(DataWithCoords, DatasetArithmetic, Mapping): _dims: Dict[Hashable, int] _encoding: Optional[Dict[Hashable, Any]] _close: Optional[Callable[[], None]] - _indexes: Optional[Dict[Hashable, pd.Index]] + _indexes: Optional[Dict[Hashable, Index]] _variables: Dict[Hashable, Variable] __slots__ = ( @@ -1087,7 +1089,7 @@ def _replace( coord_names: Set[Hashable] = None, dims: Dict[Any, int] = None, attrs: Union[Dict[Hashable, Any], None, Default] = _default, - indexes: Union[Dict[Any, pd.Index], None, Default] = _default, + indexes: Union[Dict[Any, Index], None, Default] = _default, encoding: Union[dict, None, Default] = _default, inplace: bool = False, ) -> "Dataset": @@ -1136,7 +1138,7 @@ def _replace_with_new_dims( variables: Dict[Hashable, Variable], coord_names: set = None, attrs: Union[Dict[Hashable, Any], None, Default] = _default, - indexes: Union[Dict[Hashable, pd.Index], None, Default] = _default, + indexes: Union[Dict[Hashable, Index], None, Default] = _default, inplace: bool = False, ) -> "Dataset": """Replace variables with recalculated dimensions.""" @@ -1164,12 +1166,12 @@ def _replace_vars_and_dims( variables, coord_names, dims, attrs, indexes=None, inplace=inplace ) - def _overwrite_indexes(self, indexes: Mapping[Any, pd.Index]) -> "Dataset": + def _overwrite_indexes(self, indexes: Mapping[Any, Index]) -> "Dataset": if not indexes: return self variables = self._variables.copy() - new_indexes = dict(self.indexes) + new_indexes = dict(self.xindexes) for name, idx in indexes.items(): variables[name] = IndexVariable(name, idx) new_indexes[name] = idx @@ -1178,8 +1180,9 @@ def _overwrite_indexes(self, indexes: Mapping[Any, pd.Index]) -> "Dataset": # switch from dimension to level names, if necessary dim_names: Dict[Hashable, str] = {} for dim, idx in indexes.items(): - if not isinstance(idx, pd.MultiIndex) and idx.name != dim: - dim_names[dim] = idx.name + pd_idx = idx.to_pandas_index() + if not isinstance(pd_idx, pd.MultiIndex) and pd_idx.name != dim: + dim_names[dim] = pd_idx.name if dim_names: obj = obj.rename(dim_names) return obj @@ -1315,9 +1318,11 @@ def _level_coords(self) -> Dict[str, Hashable]: coordinate name. """ level_coords: Dict[str, Hashable] = {} - for name, index in self.indexes.items(): - if isinstance(index, pd.MultiIndex): - level_names = index.names + for name, index in self.xindexes.items(): + # TODO: benbovy - flexible indexes: update when MultIndex has its own xarray class. + pd_index = index.to_pandas_index() + if isinstance(pd_index, pd.MultiIndex): + level_names = pd_index.names (dim,) = self.variables[name].dims level_coords.update({lname: dim for lname in level_names}) return level_coords @@ -1328,7 +1333,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> "Dataset": """ variables: Dict[Hashable, Variable] = {} coord_names = set() - indexes: Dict[Hashable, pd.Index] = {} + indexes: Dict[Hashable, Index] = {} for name in names: try: @@ -1341,7 +1346,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> "Dataset": if ref_name in self._coord_names or ref_name in self.dims: coord_names.add(var_name) if (var_name,) == var.dims: - indexes[var_name] = var.to_index() + indexes[var_name] = var._to_xindex() needed_dims: Set[Hashable] = set() for v in variables.values(): @@ -1357,8 +1362,8 @@ def _copy_listed(self, names: Iterable[Hashable]) -> "Dataset": if set(self.variables[k].dims) <= needed_dims: variables[k] = self._variables[k] coord_names.add(k) - if k in self.indexes: - indexes[k] = self.indexes[k] + if k in self.xindexes: + indexes[k] = self.xindexes[k] return self._replace(variables, coord_names, dims, indexes=indexes) @@ -1527,7 +1532,7 @@ def __delitem__(self, key: Hashable) -> None: """Remove a variable from this dataset.""" del self._variables[key] self._coord_names.discard(key) - if key in self.indexes: + if key in self.xindexes: assert self._indexes is not None del self._indexes[key] self._dims = calculate_dimensions(self._variables) @@ -1604,7 +1609,21 @@ def identical(self, other: "Dataset") -> bool: @property def indexes(self) -> Indexes: - """Mapping of pandas.Index objects used for label based indexing""" + """Mapping of pandas.Index objects used for label based indexing. + + Raises an error if this Dataset has indexes that cannot be coerced + to pandas.Index objects. + + See Also + -------- + Dataset.xindexes + + """ + return Indexes({k: idx.to_pandas_index() for k, idx in self.xindexes.items()}) + + @property + def xindexes(self) -> Indexes: + """Mapping of xarray Index objects used for label based indexing.""" if self._indexes is None: self._indexes = default_indexes(self._variables, self._dims) return Indexes(self._indexes) @@ -2069,7 +2088,9 @@ def _validate_indexers( v = np.asarray(v) if v.dtype.kind in "US": - index = self.indexes[k] + # TODO: benbovy - flexible indexes + # update when CFTimeIndex has its own xarray index class + index = self.xindexes[k].to_pandas_index() if isinstance(index, pd.DatetimeIndex): v = v.astype("datetime64[ns]") elif isinstance(index, xr.CFTimeIndex): @@ -2218,7 +2239,7 @@ def isel( continue if indexes and var_name in indexes: if var_value.ndim == 1: - indexes[var_name] = var_value.to_index() + indexes[var_name] = var_value._to_xindex() else: del indexes[var_name] variables[var_name] = var_value @@ -2246,16 +2267,16 @@ def _isel_fancy( indexers_list = list(self._validate_indexers(indexers, missing_dims)) variables: Dict[Hashable, Variable] = {} - indexes: Dict[Hashable, pd.Index] = {} + indexes: Dict[Hashable, Index] = {} for name, var in self.variables.items(): var_indexers = {k: v for k, v in indexers_list if k in var.dims} if drop and name in var_indexers: continue # drop this variable - if name in self.indexes: + if name in self.xindexes: new_var, new_index = isel_variable_and_index( - name, var, self.indexes[name], var_indexers + name, var, self.xindexes[name], var_indexers ) if new_index is not None: indexes[name] = new_index @@ -2814,7 +2835,7 @@ def _reindex( variables, indexes = alignment.reindex_variables( self.variables, self.sizes, - self.indexes, + self.xindexes, indexers, method, tolerance, @@ -3030,7 +3051,7 @@ def _validate_interp_indexer(x, new_x): variables[name] = var coord_names = obj._coord_names & variables.keys() - indexes = {k: v for k, v in obj.indexes.items() if k not in indexers} + indexes = {k: v for k, v in obj.xindexes.items() if k not in indexers} selected = self._replace_with_new_dims( variables.copy(), coord_names, indexes=indexes ) @@ -3040,7 +3061,7 @@ def _validate_interp_indexer(x, new_x): for k, v in indexers.items(): assert isinstance(v, Variable) if v.dims == (k,): - indexes[k] = v.to_index() + indexes[k] = v._to_xindex() # Extract coordinates from indexers coord_vars, new_indexes = selected._get_indexers_coords_and_indexes(coords) @@ -3136,16 +3157,18 @@ def _rename_indexes(self, name_dict, dims_set): if self._indexes is None: return None indexes = {} - for k, v in self.indexes.items(): + for k, v in self.xindexes.items(): + # TODO: benbovy - flexible indexes: make it compatible with any xarray Index + index = v.to_pandas_index() new_name = name_dict.get(k, k) if new_name not in dims_set: continue - if isinstance(v, pd.MultiIndex): - new_names = [name_dict.get(k, k) for k in v.names] - index = v.rename(names=new_names) + if isinstance(index, pd.MultiIndex): + new_names = [name_dict.get(k, k) for k in index.names] + new_index = index.rename(names=new_names) else: - index = v.rename(new_name) - indexes[new_name] = index + new_index = index.rename(new_name) + indexes[new_name] = PandasIndex(new_index) return indexes def _rename_all(self, name_dict, dims_dict): @@ -3362,19 +3385,19 @@ def swap_dims( coord_names.update({dim for dim in dims_dict.values() if dim in self.variables}) variables: Dict[Hashable, Variable] = {} - indexes: Dict[Hashable, pd.Index] = {} + indexes: Dict[Hashable, Index] = {} for k, v in self.variables.items(): dims = tuple(dims_dict.get(dim, dim) for dim in v.dims) if k in result_dims: var = v.to_index_variable() - if k in self.indexes: - indexes[k] = self.indexes[k] + if k in self.xindexes: + indexes[k] = self.xindexes[k] else: new_index = var.to_index() if new_index.nlevels == 1: # make sure index name matches dimension name new_index = new_index.rename(k) - indexes[k] = new_index + indexes[k] = PandasIndex(new_index) else: var = v.to_base_variable() var.dims = dims @@ -3637,15 +3660,17 @@ def reorder_levels( """ dim_order = either_dict_or_kwargs(dim_order, dim_order_kwargs, "reorder_levels") variables = self._variables.copy() - indexes = dict(self.indexes) + indexes = dict(self.xindexes) for dim, order in dim_order.items(): coord = self._variables[dim] - index = self.indexes[dim] + # TODO: benbovy - flexible indexes: update when MultiIndex + # has its own class inherited from xarray.Index + index = self.xindexes[dim].to_pandas_index() if not isinstance(index, pd.MultiIndex): raise ValueError(f"coordinate {dim} has no MultiIndex") new_index = index.reorder_levels(order) variables[dim] = IndexVariable(coord.dims, new_index) - indexes[dim] = new_index + indexes[dim] = PandasIndex(new_index) return self._replace(variables, indexes=indexes) @@ -3672,8 +3697,8 @@ def _stack_once(self, dims, new_dim): coord_names = set(self._coord_names) - set(dims) | {new_dim} - indexes = {k: v for k, v in self.indexes.items() if k not in dims} - indexes[new_dim] = idx + indexes = {k: v for k, v in self.xindexes.items() if k not in dims} + indexes[new_dim] = PandasIndex(idx) return self._replace_with_new_dims( variables, coord_names=coord_names, indexes=indexes @@ -3825,7 +3850,9 @@ def ensure_stackable(val): # coerce the levels of the MultiIndex to have the same type as the # input dimensions. This code is messy, so it might be better to just # input a dummy value for the singleton dimension. - idx = data_array.indexes[new_dim] + # TODO: benbovy - flexible indexes: update when MultIndex has its own + # class inheriting from xarray.Index + idx = data_array.xindexes[new_dim].to_pandas_index() levels = [idx.levels[0]] + [ level.astype(self[level.name].dtype) for level in idx.levels[1:] ] @@ -3842,7 +3869,7 @@ def _unstack_once(self, dim: Hashable, fill_value) -> "Dataset": index = remove_unused_levels_categories(index) variables: Dict[Hashable, Variable] = {} - indexes = {k: v for k, v in self.indexes.items() if k != dim} + indexes = {k: v for k, v in self.xindexes.items() if k != dim} for name, var in self.variables.items(): if name != dim: @@ -3860,7 +3887,7 @@ def _unstack_once(self, dim: Hashable, fill_value) -> "Dataset": for name, lev in zip(index.names, index.levels): variables[name] = IndexVariable(name, lev) - indexes[name] = lev + indexes[name] = PandasIndex(lev) coord_names = set(self._coord_names) - {dim} | set(index.names) @@ -3887,7 +3914,7 @@ def _unstack_full_reindex( new_dim_sizes = [lev.size for lev in index.levels] variables: Dict[Hashable, Variable] = {} - indexes = {k: v for k, v in self.indexes.items() if k != dim} + indexes = {k: v for k, v in self.xindexes.items() if k != dim} for name, var in obj.variables.items(): if name != dim: @@ -3899,7 +3926,7 @@ def _unstack_full_reindex( for name, lev in zip(new_dim_names, index.levels): variables[name] = IndexVariable(name, lev) - indexes[name] = lev + indexes[name] = PandasIndex(lev) coord_names = set(self._coord_names) - {dim} | set(new_dim_names) @@ -4161,7 +4188,7 @@ def drop_vars( variables = {k: v for k, v in self._variables.items() if k not in names} coord_names = {k for k in self._coord_names if k in variables} - indexes = {k: v for k, v in self.indexes.items() if k not in names} + indexes = {k: v for k, v in self.xindexes.items() if k not in names} return self._replace_with_new_dims( variables, coord_names=coord_names, indexes=indexes ) @@ -4871,7 +4898,7 @@ def reduce( ) coord_names = {k for k in self.coords if k in variables} - indexes = {k: v for k, v in self.indexes.items() if k in variables} + indexes = {k: v for k, v in self.xindexes.items() if k in variables} attrs = self.attrs if keep_attrs else None return self._replace_with_new_dims( variables, coord_names=coord_names, attrs=attrs, indexes=indexes @@ -5660,9 +5687,12 @@ def diff(self, dim, n=1, label="upper"): else: variables[name] = var - indexes = dict(self.indexes) + indexes = dict(self.xindexes) if dim in indexes: - indexes[dim] = indexes[dim][kwargs_new[dim]] + # TODO: benbovy - flexible indexes: check slicing of xarray indexes? + # or only allow this for pandas indexes? + index = indexes[dim].to_pandas_index() + indexes[dim] = PandasIndex(index[kwargs_new[dim]]) difference = self._replace_with_new_dims(variables, indexes=indexes) @@ -5799,14 +5829,14 @@ def roll(self, shifts=None, roll_coords=None, **shifts_kwargs): if roll_coords: indexes = {} - for k, v in self.indexes.items(): + for k, v in self.xindexes.items(): (dim,) = self.variables[k].dims if dim in shifts: indexes[k] = roll_index(v, shifts[dim]) else: indexes[k] = v else: - indexes = dict(self.indexes) + indexes = dict(self.xindexes) return self._replace(variables, indexes=indexes) @@ -5999,7 +6029,7 @@ def quantile( # construct the new dataset coord_names = {k for k in self.coords if k in variables} - indexes = {k: v for k, v in self.indexes.items() if k in variables} + indexes = {k: v for k, v in self.xindexes.items() if k in variables} if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) attrs = self.attrs if keep_attrs else None @@ -6223,7 +6253,7 @@ def _integrate_one(self, coord, datetime_unit=None, cumulative=False): variables[k] = Variable(v_dims, integ) else: variables[k] = v - indexes = {k: v for k, v in self.indexes.items() if k in variables} + indexes = {k: v for k, v in self.xindexes.items() if k in variables} return self._replace_with_new_dims( variables, coord_names=coord_names, indexes=indexes ) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index b33d08985e4..be362e1c942 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1,12 +1,199 @@ import collections.abc -from typing import Any, Dict, Hashable, Iterable, Mapping, Optional, Tuple, Union +from contextlib import suppress +from datetime import timedelta +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Hashable, + Iterable, + Mapping, + Optional, + Tuple, + Union, +) import numpy as np import pandas as pd -from . import formatting +from . import formatting, utils +from .indexing import ExplicitlyIndexedNDArrayMixin, NumpyIndexingAdapter +from .npcompat import DTypeLike from .utils import is_scalar -from .variable import Variable + +if TYPE_CHECKING: + from .variable import Variable + + +class Index: + """Base class inherited by all xarray-compatible indexes.""" + + __slots__ = ("coord_names",) + + def __init__(self, coord_names: Union[Hashable, Iterable[Hashable]]): + if isinstance(coord_names, Hashable): + coord_names = (coord_names,) + self.coord_names = tuple(coord_names) + + @classmethod + def from_variables( + cls, variables: Dict[Hashable, "Variable"], **kwargs + ): # pragma: no cover + raise NotImplementedError() + + def to_pandas_index(self) -> pd.Index: + """Cast this xarray index to a pandas.Index object or raise a TypeError + if this is not supported. + + This method is used by all xarray operations that expect/require a + pandas.Index object. + + """ + raise TypeError(f"{type(self)} cannot be cast to a pandas.Index object.") + + def equals(self, other): # pragma: no cover + raise NotImplementedError() + + def union(self, other): # pragma: no cover + raise NotImplementedError() + + def intersection(self, other): # pragma: no cover + raise NotImplementedError() + + +class PandasIndex(Index, ExplicitlyIndexedNDArrayMixin): + """Wrap a pandas.Index to preserve dtypes and handle explicit indexing.""" + + __slots__ = ("array", "_dtype") + + def __init__( + self, array: Any, dtype: DTypeLike = None, coord_name: Optional[Hashable] = None + ): + if coord_name is None: + coord_name = tuple() + super().__init__(coord_name) + + self.array = utils.safe_cast_to_index(array) + + if dtype is None: + if isinstance(array, pd.PeriodIndex): + dtype_ = np.dtype("O") + elif hasattr(array, "categories"): + # category isn't a real numpy dtype + dtype_ = array.categories.dtype + elif not utils.is_valid_numpy_dtype(array.dtype): + dtype_ = np.dtype("O") + else: + dtype_ = array.dtype + else: + dtype_ = np.dtype(dtype) + self._dtype = dtype_ + + @classmethod + def from_variables(cls, variables: Dict[Hashable, "Variable"], **kwargs): + if len(variables) > 1: + raise ValueError("Cannot set a pandas.Index from more than one variable") + + varname, var = list(variables.items())[0] + return cls(var.data, dtype=var.dtype, coord_name=varname) + + def to_pandas_index(self) -> pd.Index: + return self.array + + @property + def dtype(self) -> np.dtype: + return self._dtype + + def __array__(self, dtype: DTypeLike = None) -> np.ndarray: + if dtype is None: + dtype = self.dtype + array = self.array + if isinstance(array, pd.PeriodIndex): + with suppress(AttributeError): + # this might not be public API + array = array.astype("object") + return np.asarray(array.values, dtype=dtype) + + @property + def shape(self) -> Tuple[int]: + return (len(self.array),) + + def equals(self, other): + if isinstance(other, pd.Index): + other = PandasIndex(other) + return isinstance(other, PandasIndex) and self.array.equals(other.array) + + def union(self, other): + if isinstance(other, pd.Index): + other = PandasIndex(other) + return PandasIndex(self.array.union(other.array)) + + def intersection(self, other): + if isinstance(other, pd.Index): + other = PandasIndex(other) + return PandasIndex(self.array.intersection(other.array)) + + def __getitem__( + self, indexer + ) -> Union[ + "PandasIndex", + NumpyIndexingAdapter, + np.ndarray, + np.datetime64, + np.timedelta64, + ]: + key = indexer.tuple + if isinstance(key, tuple) and len(key) == 1: + # unpack key so it can index a pandas.Index object (pandas.Index + # objects don't like tuples) + (key,) = key + + if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional + return NumpyIndexingAdapter(self.array.values)[indexer] + + result = self.array[key] + + if isinstance(result, pd.Index): + result = PandasIndex(result, dtype=self.dtype) + else: + # result is a scalar + if result is pd.NaT: + # work around the impossibility of casting NaT with asarray + # note: it probably would be better in general to return + # pd.Timestamp rather np.than datetime64 but this is easier + # (for now) + result = np.datetime64("NaT", "ns") + elif isinstance(result, timedelta): + result = np.timedelta64(getattr(result, "value", result), "ns") + elif isinstance(result, pd.Timestamp): + # Work around for GH: pydata/xarray#1932 and numpy/numpy#10668 + # numpy fails to convert pd.Timestamp to np.datetime64[ns] + result = np.asarray(result.to_datetime64()) + elif self.dtype != object: + result = np.asarray(result, dtype=self.dtype) + + # as for numpy.ndarray indexing, we always want the result to be + # a NumPy array. + result = utils.to_0d_array(result) + + return result + + def transpose(self, order) -> pd.Index: + return self.array # self.array should be always one-dimensional + + def __repr__(self) -> str: + return f"{type(self).__name__}(array={self.array!r}, dtype={self.dtype!r})" + + def copy(self, deep: bool = True) -> "PandasIndex": + # Not the same as just writing `self.array.copy(deep=deep)`, as + # shallow copies of the underlying numpy.ndarrays become deep ones + # upon pickling + # >>> len(pickle.dumps((self.array, self.array))) + # 4000281 + # >>> len(pickle.dumps((self.array, self.array.copy(deep=False)))) + # 8000341 + array = self.array.copy(deep=True) if deep else self.array + return PandasIndex(array, self._dtype) def remove_unused_levels_categories(index: pd.Index) -> pd.Index: @@ -68,8 +255,8 @@ def __repr__(self): def default_indexes( - coords: Mapping[Any, Variable], dims: Iterable -) -> Dict[Hashable, pd.Index]: + coords: Mapping[Any, "Variable"], dims: Iterable +) -> Dict[Hashable, Index]: """Default indexes for a Dataset/DataArray. Parameters @@ -84,16 +271,18 @@ def default_indexes( Mapping from indexing keys (levels/dimension names) to indexes used for indexing along that dimension. """ - return {key: coords[key].to_index() for key in dims if key in coords} + return {key: coords[key]._to_xindex() for key in dims if key in coords} def isel_variable_and_index( name: Hashable, - variable: Variable, - index: pd.Index, - indexers: Mapping[Hashable, Union[int, slice, np.ndarray, Variable]], -) -> Tuple[Variable, Optional[pd.Index]]: + variable: "Variable", + index: Index, + indexers: Mapping[Hashable, Union[int, slice, np.ndarray, "Variable"]], +) -> Tuple["Variable", Optional[Index]]: """Index a Variable and pandas.Index together.""" + from .variable import Variable + if not indexers: # nothing to index return variable.copy(deep=False), index @@ -114,22 +303,25 @@ def isel_variable_and_index( indexer = indexers[dim] if isinstance(indexer, Variable): indexer = indexer.data - new_index = index[indexer] + pd_index = index.to_pandas_index() + new_index = PandasIndex(pd_index[indexer]) return new_variable, new_index -def roll_index(index: pd.Index, count: int, axis: int = 0) -> pd.Index: +def roll_index(index: PandasIndex, count: int, axis: int = 0) -> PandasIndex: """Roll an pandas.Index.""" - count %= index.shape[0] + pd_index = index.to_pandas_index() + count %= pd_index.shape[0] if count != 0: - return index[-count:].append(index[:-count]) + new_idx = pd_index[-count:].append(pd_index[:-count]) else: - return index[:] + new_idx = pd_index[:] + return PandasIndex(new_idx) def propagate_indexes( - indexes: Optional[Dict[Hashable, pd.Index]], exclude: Optional[Any] = None -) -> Optional[Dict[Hashable, pd.Index]]: + indexes: Optional[Dict[Hashable, Index]], exclude: Optional[Any] = None +) -> Optional[Dict[Hashable, Index]]: """Creates new indexes dict from existing dict optionally excluding some dimensions.""" if exclude is None: exclude = () diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 82e4530f428..76a0c6888b2 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -2,8 +2,6 @@ import functools import operator from collections import defaultdict -from contextlib import suppress -from datetime import timedelta from distutils.version import LooseVersion from typing import Any, Callable, Iterable, List, Sequence, Tuple, Union @@ -18,7 +16,6 @@ DASK_VERSION = LooseVersion("0") from . import duck_array_ops, nputils, utils -from .npcompat import DTypeLike from .pycompat import ( dask_array_type, integer_types, @@ -119,6 +116,8 @@ def convert_label_indexer(index, label, index_name="", method=None, tolerance=No dimension. If `index` is a pandas.MultiIndex and depending on `label`, return a new pandas.Index or pandas.MultiIndex (otherwise return None). """ + from .indexes import PandasIndex + new_index = None if isinstance(label, slice): @@ -208,6 +207,10 @@ def convert_label_indexer(index, label, index_name="", method=None, tolerance=No indexer = get_indexer_nd(index, label, method, tolerance) if np.any(indexer < 0): raise KeyError(f"not all values found in index {index_name!r}") + + if new_index is not None: + new_index = PandasIndex(new_index) + return indexer, new_index @@ -262,7 +265,7 @@ def remap_label_indexers(data_obj, indexers, method=None, tolerance=None): dim_indexers = get_dim_indexers(data_obj, indexers) for dim, label in dim_indexers.items(): try: - index = data_obj.indexes[dim] + index = data_obj.xindexes[dim].to_pandas_index() except KeyError: # no index for this dimension: reuse the provided labels if method is not None or tolerance is not None: @@ -726,7 +729,9 @@ def as_indexable(array): if isinstance(array, np.ndarray): return NumpyIndexingAdapter(array) if isinstance(array, pd.Index): - return PandasIndexAdapter(array) + from .indexes import PandasIndex + + return PandasIndex(array) if isinstance(array, dask_array_type): return DaskIndexingAdapter(array) if hasattr(array, "__array_function__"): @@ -1414,101 +1419,3 @@ def __setitem__(self, key, value): def transpose(self, order): return self.array.transpose(order) - - -class PandasIndexAdapter(ExplicitlyIndexedNDArrayMixin): - """Wrap a pandas.Index to preserve dtypes and handle explicit indexing.""" - - __slots__ = ("array", "_dtype") - - def __init__(self, array: Any, dtype: DTypeLike = None): - self.array = utils.safe_cast_to_index(array) - if dtype is None: - if isinstance(array, pd.PeriodIndex): - dtype_ = np.dtype("O") - elif hasattr(array, "categories"): - # category isn't a real numpy dtype - dtype_ = array.categories.dtype - elif not utils.is_valid_numpy_dtype(array.dtype): - dtype_ = np.dtype("O") - else: - dtype_ = array.dtype - else: - dtype_ = np.dtype(dtype) - self._dtype = dtype_ - - @property - def dtype(self) -> np.dtype: - return self._dtype - - def __array__(self, dtype: DTypeLike = None) -> np.ndarray: - if dtype is None: - dtype = self.dtype - array = self.array - if isinstance(array, pd.PeriodIndex): - with suppress(AttributeError): - # this might not be public API - array = array.astype("object") - return np.asarray(array.values, dtype=dtype) - - @property - def shape(self) -> Tuple[int]: - return (len(self.array),) - - def __getitem__( - self, indexer - ) -> Union[NumpyIndexingAdapter, np.ndarray, np.datetime64, np.timedelta64]: - key = indexer.tuple - if isinstance(key, tuple) and len(key) == 1: - # unpack key so it can index a pandas.Index object (pandas.Index - # objects don't like tuples) - (key,) = key - - if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional - return NumpyIndexingAdapter(self.array.values)[indexer] - - result = self.array[key] - - if isinstance(result, pd.Index): - result = PandasIndexAdapter(result, dtype=self.dtype) - else: - # result is a scalar - if result is pd.NaT: - # work around the impossibility of casting NaT with asarray - # note: it probably would be better in general to return - # pd.Timestamp rather np.than datetime64 but this is easier - # (for now) - result = np.datetime64("NaT", "ns") - elif isinstance(result, timedelta): - result = np.timedelta64(getattr(result, "value", result), "ns") - elif isinstance(result, pd.Timestamp): - # Work around for GH: pydata/xarray#1932 and numpy/numpy#10668 - # numpy fails to convert pd.Timestamp to np.datetime64[ns] - result = np.asarray(result.to_datetime64()) - elif self.dtype != object: - result = np.asarray(result, dtype=self.dtype) - - # as for numpy.ndarray indexing, we always want the result to be - # a NumPy array. - result = utils.to_0d_array(result) - - return result - - def transpose(self, order) -> pd.Index: - return self.array # self.array should be always one-dimensional - - def __repr__(self) -> str: - return "{}(array={!r}, dtype={!r})".format( - type(self).__name__, self.array, self.dtype - ) - - def copy(self, deep: bool = True) -> "PandasIndexAdapter": - # Not the same as just writing `self.array.copy(deep=deep)`, as - # shallow copies of the underlying numpy.ndarrays become deep ones - # upon pickling - # >>> len(pickle.dumps((self.array, self.array))) - # 4000281 - # >>> len(pickle.dumps((self.array, self.array.copy(deep=False)))) - # 8000341 - array = self.array.copy(deep=True) if deep else self.array - return PandasIndexAdapter(array, self._dtype) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 4d83855a15d..6747957ca75 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -20,6 +20,7 @@ from . import dtypes, pdcompat from .alignment import deep_align from .duck_array_ops import lazy_array_equiv +from .indexes import Index, PandasIndex from .utils import Frozen, compat_dict_union, dict_equiv, equivalent from .variable import Variable, as_variable, assert_unique_multiindex_level_names @@ -157,7 +158,7 @@ def _assert_compat_valid(compat): ) -MergeElement = Tuple[Variable, Optional[pd.Index]] +MergeElement = Tuple[Variable, Optional[Index]] def merge_collected( @@ -165,7 +166,7 @@ def merge_collected( prioritized: Mapping[Hashable, MergeElement] = None, compat: str = "minimal", combine_attrs="override", -) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, pd.Index]]: +) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, Index]]: """Merge dicts of variables, while resolving conflicts appropriately. Parameters @@ -187,7 +188,7 @@ def merge_collected( _assert_compat_valid(compat) merged_vars: Dict[Hashable, Variable] = {} - merged_indexes: Dict[Hashable, pd.Index] = {} + merged_indexes: Dict[Hashable, Index] = {} for name, elements_list in grouped.items(): if name in prioritized: @@ -261,7 +262,7 @@ def collect_variables_and_indexes( from .dataarray import DataArray from .dataset import Dataset - grouped: Dict[Hashable, List[Tuple[Variable, pd.Index]]] = {} + grouped: Dict[Hashable, List[Tuple[Variable, Optional[Index]]]] = {} def append(name, variable, index): values = grouped.setdefault(name, []) @@ -273,13 +274,13 @@ def append_all(variables, indexes): for mapping in list_of_mappings: if isinstance(mapping, Dataset): - append_all(mapping.variables, mapping.indexes) + append_all(mapping.variables, mapping.xindexes) continue for name, variable in mapping.items(): if isinstance(variable, DataArray): coords = variable._coords.copy() # use private API for speed - indexes = dict(variable.indexes) + indexes = dict(variable.xindexes) # explicitly overwritten variables should take precedence coords.pop(name, None) indexes.pop(name, None) @@ -288,7 +289,7 @@ def append_all(variables, indexes): variable = as_variable(variable, name=name) if variable.dims == (name,): variable = variable.to_index_variable() - index = variable.to_index() + index = variable._to_xindex() else: index = None append(name, variable, index) @@ -300,11 +301,11 @@ def collect_from_coordinates( list_of_coords: "List[Coordinates]", ) -> Dict[Hashable, List[MergeElement]]: """Collect variables and indexes to be merged from Coordinate objects.""" - grouped: Dict[Hashable, List[Tuple[Variable, pd.Index]]] = {} + grouped: Dict[Hashable, List[Tuple[Variable, Optional[Index]]]] = {} for coords in list_of_coords: variables = coords.variables - indexes = coords.indexes + indexes = coords.xindexes for name, variable in variables.items(): value = grouped.setdefault(name, []) value.append((variable, indexes.get(name))) @@ -315,7 +316,7 @@ def merge_coordinates_without_align( objects: "List[Coordinates]", prioritized: Mapping[Hashable, MergeElement] = None, exclude_dims: AbstractSet = frozenset(), -) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, pd.Index]]: +) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, Index]]: """Merge variables/indexes from coordinates without automatic alignments. This function is used for merging coordinate from pre-existing xarray @@ -448,9 +449,9 @@ def merge_coords( compat: str = "minimal", join: str = "outer", priority_arg: Optional[int] = None, - indexes: Optional[Mapping[Hashable, pd.Index]] = None, + indexes: Optional[Mapping[Hashable, Index]] = None, fill_value: object = dtypes.NA, -) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, pd.Index]]: +) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, Index]]: """Merge coordinate variables. See merge_core below for argument descriptions. This works similarly to @@ -484,7 +485,7 @@ def _extract_indexes_from_coords(coords): for name, variable in coords.items(): variable = as_variable(variable, name=name) if variable.dims == (name,): - yield name, variable.to_index() + yield name, variable._to_xindex() def assert_valid_explicit_coords(variables, dims, explicit_coords): @@ -569,7 +570,7 @@ def merge_core( combine_attrs: Optional[str] = "override", priority_arg: Optional[int] = None, explicit_coords: Optional[Sequence] = None, - indexes: Optional[Mapping[Hashable, pd.Index]] = None, + indexes: Optional[Mapping[Hashable, Index]] = None, fill_value: object = dtypes.NA, ) -> _MergeResult: """Core logic for merging labeled objects. @@ -970,10 +971,11 @@ def dataset_update_method( other[key] = value.drop_vars(coord_names) # use ds.coords and not ds.indexes, else str coords are cast to object - indexes = {key: dataset.coords[key] for key in dataset.indexes.keys()} + # TODO: benbovy - flexible indexes: fix this (it only works with pandas indexes) + indexes = {key: PandasIndex(dataset.coords[key]) for key in dataset.xindexes.keys()} return merge_core( [dataset, other], priority_arg=1, - indexes=indexes, + indexes=indexes, # type: ignore combine_attrs="override", ) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index d12ccc65ca6..41205242cce 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -317,9 +317,13 @@ def interp_na( if not is_scalar(max_gap): raise ValueError("max_gap must be a scalar.") + # TODO: benbovy - flexible indexes: update when CFTimeIndex (and DatetimeIndex?) + # has its own class inheriting from xarray.Index if ( - dim in self.indexes - and isinstance(self.indexes[dim], (pd.DatetimeIndex, CFTimeIndex)) + dim in self.xindexes + and isinstance( + self.xindexes[dim].to_pandas_index(), (pd.DatetimeIndex, CFTimeIndex) + ) and use_coordinate ): # Convert to float diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index 895e939c505..e1d32b7de43 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -27,6 +27,8 @@ import numpy as np +from xarray.core.indexes import PandasIndex + from .alignment import align from .dataarray import DataArray from .dataset import Dataset @@ -291,7 +293,7 @@ def _wrapper( ) # check that index lengths and values are as expected - for name, index in result.indexes.items(): + for name, index in result.xindexes.items(): if name in expected["shapes"]: if len(index) != expected["shapes"][name]: raise ValueError( @@ -357,27 +359,27 @@ def _wrapper( # check that chunk sizes are compatible input_chunks = dict(npargs[0].chunks) - input_indexes = dict(npargs[0].indexes) + input_indexes = dict(npargs[0].xindexes) for arg in xarray_objs[1:]: assert_chunks_compatible(npargs[0], arg) input_chunks.update(arg.chunks) - input_indexes.update(arg.indexes) + input_indexes.update(arg.xindexes) if template is None: # infer template by providing zero-shaped arrays template = infer_template(func, aligned[0], *args, **kwargs) - template_indexes = set(template.indexes) + template_indexes = set(template.xindexes) preserved_indexes = template_indexes & set(input_indexes) new_indexes = template_indexes - set(input_indexes) indexes = {dim: input_indexes[dim] for dim in preserved_indexes} - indexes.update({k: template.indexes[k] for k in new_indexes}) + indexes.update({k: template.xindexes[k] for k in new_indexes}) output_chunks = { dim: input_chunks[dim] for dim in template.dims if dim in input_chunks } else: # template xarray object has been provided with proper sizes and chunk shapes - indexes = dict(template.indexes) + indexes = dict(template.xindexes) if isinstance(template, DataArray): output_chunks = dict( zip(template.dims, template.chunks) # type: ignore[arg-type] @@ -501,10 +503,16 @@ def subset_dataset_to_block( } expected["data_vars"] = set(template.data_vars.keys()) # type: ignore[assignment] expected["coords"] = set(template.coords.keys()) # type: ignore[assignment] - expected["indexes"] = { - dim: indexes[dim][_get_chunk_slicer(dim, chunk_index, output_chunk_bounds)] - for dim in indexes - } + # TODO: benbovy - flexible indexes: clean this up + # for now assumes pandas index (thus can be indexed) but it won't be the case for + # all indexes + expected_indexes = {} + for dim in indexes: + idx = indexes[dim].to_pandas_index()[ + _get_chunk_slicer(dim, chunk_index, output_chunk_bounds) + ] + expected_indexes[dim] = PandasIndex(idx) + expected["indexes"] = expected_indexes from_wrapper = (gname,) + chunk_tuple graph[from_wrapper] = (_wrapper, func, blocked_args, kwargs, is_array, expected) @@ -550,7 +558,7 @@ def subset_dataset_to_block( ) result = Dataset(coords=indexes, attrs=template.attrs) - for index in result.indexes: + for index in result.xindexes: result[index].attrs = template[index].attrs result[index].encoding = template[index].encoding diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 6f828a5128c..cffaf2c3146 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -26,13 +26,8 @@ from . import common, dtypes, duck_array_ops, indexing, nputils, ops, utils from .arithmetic import VariableArithmetic from .common import AbstractArray -from .indexing import ( - BasicIndexer, - OuterIndexer, - PandasIndexAdapter, - VectorizedIndexer, - as_indexable, -) +from .indexes import PandasIndex +from .indexing import BasicIndexer, OuterIndexer, VectorizedIndexer, as_indexable from .options import _get_keep_attrs from .pycompat import ( cupy_array_type, @@ -180,11 +175,11 @@ def _maybe_wrap_data(data): Put pandas.Index and numpy.ndarray arguments in adapter objects to ensure they can be indexed properly. - NumpyArrayAdapter, PandasIndexAdapter and LazilyIndexedArray should + NumpyArrayAdapter, PandasIndex and LazilyIndexedArray should all pass through unmodified. """ if isinstance(data, pd.Index): - return PandasIndexAdapter(data) + return PandasIndex(data) return data @@ -351,7 +346,7 @@ def nbytes(self): @property def _in_memory(self): - return isinstance(self._data, (np.ndarray, np.number, PandasIndexAdapter)) or ( + return isinstance(self._data, (np.ndarray, np.number, PandasIndex)) or ( isinstance(self._data, indexing.MemoryCachedArray) and isinstance(self._data.array, indexing.NumpyIndexingAdapter) ) @@ -556,6 +551,11 @@ def to_index_variable(self): to_coord = utils.alias(to_index_variable, "to_coord") + def _to_xindex(self): + # temporary function used internally as a replacement of to_index() + # returns an xarray Index instance instead of a pd.Index instance + return PandasIndex(self.to_index()) + def to_index(self): """Convert this variable to a pandas.Index""" return self.to_index_variable().to_index() @@ -2553,8 +2553,8 @@ def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False): raise ValueError("%s objects must be 1-dimensional" % type(self).__name__) # Unlike in Variable, always eagerly load values into memory - if not isinstance(self._data, PandasIndexAdapter): - self._data = PandasIndexAdapter(self._data) + if not isinstance(self._data, PandasIndex): + self._data = PandasIndex(self._data) def __dask_tokenize__(self): from dask.base import normalize_token @@ -2890,7 +2890,7 @@ def assert_unique_multiindex_level_names(variables): level_names = defaultdict(list) all_level_names = set() for var_name, var in variables.items(): - if isinstance(var._data, PandasIndexAdapter): + if isinstance(var._data, PandasIndex): idx_level_names = var.to_index_variable().level_names if idx_level_names is not None: for n in idx_level_names: diff --git a/xarray/testing.py b/xarray/testing.py index 365b81edc40..40ca12852b9 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -4,12 +4,11 @@ from typing import Hashable, Set, Union import numpy as np -import pandas as pd from xarray.core import duck_array_ops, formatting, utils from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset -from xarray.core.indexes import default_indexes +from xarray.core.indexes import Index, default_indexes from xarray.core.variable import IndexVariable, Variable __all__ = ( @@ -254,7 +253,7 @@ def assert_chunks_equal(a, b): def _assert_indexes_invariants_checks(indexes, possible_coord_variables, dims): assert isinstance(indexes, dict), indexes - assert all(isinstance(v, pd.Index) for v in indexes.values()), { + assert all(isinstance(v, Index) for v in indexes.values()), { k: type(v) for k, v in indexes.items() } diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index feab45b1f00..3e3d6e8b8d0 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -35,7 +35,7 @@ from xarray.backends.pydap_ import PydapDataStore from xarray.coding.variables import SerializationWarning from xarray.conventions import encode_dataset_coordinates -from xarray.core import indexing +from xarray.core import indexes, indexing from xarray.core.options import set_options from xarray.core.pycompat import dask_array_type from xarray.tests import LooseVersion, mock @@ -735,7 +735,7 @@ def find_and_validate_array(obj): elif isinstance(obj.array, dask_array_type): assert isinstance(obj, indexing.DaskIndexingAdapter) elif isinstance(obj.array, pd.Index): - assert isinstance(obj, indexing.PandasIndexAdapter) + assert isinstance(obj, indexes.PandasIndex) else: raise TypeError( "{} is wrapped by {}".format(type(obj.array), type(obj)) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 8dee364a08a..725b5efee75 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -696,7 +696,7 @@ def test_concat_cftimeindex(date_type): ) da = xr.concat([da1, da2], dim="time") - assert isinstance(da.indexes["time"], CFTimeIndex) + assert isinstance(da.xindexes["time"].to_pandas_index(), CFTimeIndex) @requires_cftime diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index c4f32795b59..526f3fc30c1 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -99,7 +99,10 @@ def test_resample(freqs, closed, label, base): ) .mean() ) - da_cftime["time"] = da_cftime.indexes["time"].to_datetimeindex() + # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass + da_cftime["time"] = ( + da_cftime.xindexes["time"].to_pandas_index().to_datetimeindex() + ) xr.testing.assert_identical(da_cftime, da_datetime) @@ -145,5 +148,6 @@ def test_calendars(calendar): .resample(time=freq, closed=closed, label=label, base=base, loffset=loffset) .mean() ) - da_cftime["time"] = da_cftime.indexes["time"].to_datetimeindex() + # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass + da_cftime["time"] = da_cftime.xindexes["time"].to_pandas_index().to_datetimeindex() xr.testing.assert_identical(da_cftime, da_datetime) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 9cfc134e4fe..42232f7df57 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -521,7 +521,7 @@ def test_concat(self): stacked = concat(grouped, ds["x"]) assert_identical(foo, stacked) # with an index as the 'dim' argument - stacked = concat(grouped, ds.indexes["x"]) + stacked = concat(grouped, pd.Index(ds["x"], name="x")) assert_identical(foo, stacked) actual = concat([foo[0], foo[1]], pd.Index([0, 1])).reset_coords(drop=True) diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 3608a53f747..cd8e3419231 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -280,7 +280,7 @@ def test_decode_cf_with_dask(self): assert all( isinstance(var.data, da.Array) for name, var in decoded.variables.items() - if name not in decoded.indexes + if name not in decoded.xindexes ) assert_identical(decoded, conventions.decode_cf(original).compute()) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index ff098ced161..e6c479896e9 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -24,7 +24,7 @@ from xarray.convert import from_cdms2 from xarray.core import dtypes from xarray.core.common import full_like -from xarray.core.indexes import propagate_indexes +from xarray.core.indexes import Index, PandasIndex, propagate_indexes from xarray.core.utils import is_scalar from xarray.tests import ( LooseVersion, @@ -147,10 +147,15 @@ def test_data_property(self): def test_indexes(self): array = DataArray(np.zeros((2, 3)), [("x", [0, 1]), ("y", ["a", "b", "c"])]) - expected = {"x": pd.Index([0, 1]), "y": pd.Index(["a", "b", "c"])} - assert array.indexes.keys() == expected.keys() - for k in expected: - assert array.indexes[k].equals(expected[k]) + expected_indexes = {"x": pd.Index([0, 1]), "y": pd.Index(["a", "b", "c"])} + expected_xindexes = {k: PandasIndex(idx) for k, idx in expected_indexes.items()} + assert array.xindexes.keys() == expected_xindexes.keys() + assert array.indexes.keys() == expected_indexes.keys() + assert all([isinstance(idx, pd.Index) for idx in array.indexes.values()]) + assert all([isinstance(idx, Index) for idx in array.xindexes.values()]) + for k in expected_indexes: + assert array.xindexes[k].equals(expected_xindexes[k]) + assert array.indexes[k].equals(expected_indexes[k]) def test_get_index(self): array = DataArray(np.zeros((2, 3)), coords={"x": ["a", "b"]}, dims=["x", "y"]) @@ -1459,7 +1464,7 @@ def test_coords_alignment(self): def test_set_coords_update_index(self): actual = DataArray([1, 2, 3], [("x", [1, 2, 3])]) actual.coords["x"] = ["a", "b", "c"] - assert actual.indexes["x"].equals(pd.Index(["a", "b", "c"])) + assert actual.xindexes["x"].equals(pd.Index(["a", "b", "c"])) def test_coords_replacement_alignment(self): # regression test for GH725 @@ -1479,7 +1484,7 @@ def test_coords_delitem_delete_indexes(self): # regression test for GH3746 arr = DataArray(np.ones((2,)), dims="x", coords={"x": [0, 1]}) del arr.coords["x"] - assert "x" not in arr.indexes + assert "x" not in arr.xindexes def test_broadcast_like(self): arr1 = DataArray( @@ -1627,18 +1632,19 @@ def test_swap_dims(self): expected = DataArray(array.values, {"y": list("abc")}, dims="y") actual = array.swap_dims({"x": "y"}) assert_identical(expected, actual) - for dim_name in set().union(expected.indexes.keys(), actual.indexes.keys()): + for dim_name in set().union(expected.xindexes.keys(), actual.xindexes.keys()): pd.testing.assert_index_equal( - expected.indexes[dim_name], actual.indexes[dim_name] + expected.xindexes[dim_name].array, actual.xindexes[dim_name].array ) array = DataArray(np.random.randn(3), {"x": list("abc")}, "x") expected = DataArray(array.values, {"x": ("y", list("abc"))}, dims="y") actual = array.swap_dims({"x": "y"}) assert_identical(expected, actual) - for dim_name in set().union(expected.indexes.keys(), actual.indexes.keys()): + for dim_name in set().union(expected.xindexes.keys(), actual.xindexes.keys()): pd.testing.assert_index_equal( - expected.indexes[dim_name], actual.indexes[dim_name] + expected.xindexes[dim_name].to_pandas_index(), + actual.xindexes[dim_name].to_pandas_index(), ) # as kwargs @@ -1646,9 +1652,10 @@ def test_swap_dims(self): expected = DataArray(array.values, {"x": ("y", list("abc"))}, dims="y") actual = array.swap_dims(x="y") assert_identical(expected, actual) - for dim_name in set().union(expected.indexes.keys(), actual.indexes.keys()): + for dim_name in set().union(expected.xindexes.keys(), actual.xindexes.keys()): pd.testing.assert_index_equal( - expected.indexes[dim_name], actual.indexes[dim_name] + expected.xindexes[dim_name].to_pandas_index(), + actual.xindexes[dim_name].to_pandas_index(), ) # multiindex case @@ -1657,9 +1664,10 @@ def test_swap_dims(self): expected = DataArray(array.values, {"y": idx}, "y") actual = array.swap_dims({"x": "y"}) assert_identical(expected, actual) - for dim_name in set().union(expected.indexes.keys(), actual.indexes.keys()): + for dim_name in set().union(expected.xindexes.keys(), actual.xindexes.keys()): pd.testing.assert_index_equal( - expected.indexes[dim_name], actual.indexes[dim_name] + expected.xindexes[dim_name].to_pandas_index(), + actual.xindexes[dim_name].to_pandas_index(), ) def test_expand_dims_error(self): @@ -4334,12 +4342,12 @@ def test_matmul_align_coords(self): def test_binary_op_propagate_indexes(self): # regression test for GH2227 self.dv["x"] = np.arange(self.dv.sizes["x"]) - expected = self.dv.indexes["x"] + expected = self.dv.xindexes["x"] - actual = (self.dv * 10).indexes["x"] + actual = (self.dv * 10).xindexes["x"] assert expected is actual - actual = (self.dv > 10).indexes["x"] + actual = (self.dv > 10).xindexes["x"] assert expected is actual def test_binary_op_join_setting(self): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 33b5d16fbac..b8e1cd4b03b 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -28,6 +28,7 @@ from xarray.coding.cftimeindex import CFTimeIndex from xarray.core import dtypes, indexing, utils from xarray.core.common import duck_array_ops, full_like +from xarray.core.indexes import Index from xarray.core.pycompat import integer_types from xarray.core.utils import is_scalar @@ -582,9 +583,15 @@ def test_properties(self): assert "numbers" not in ds.data_vars assert len(ds.data_vars) == 3 + assert set(ds.xindexes) == {"dim2", "dim3", "time"} + assert len(ds.xindexes) == 3 + assert "dim2" in repr(ds.xindexes) + assert all([isinstance(idx, Index) for idx in ds.xindexes.values()]) + assert set(ds.indexes) == {"dim2", "dim3", "time"} assert len(ds.indexes) == 3 assert "dim2" in repr(ds.indexes) + assert all([isinstance(idx, pd.Index) for idx in ds.indexes.values()]) assert list(ds.coords) == ["time", "dim2", "dim3", "numbers"] assert "dim2" in ds.coords @@ -747,12 +754,12 @@ def test_coords_modify(self): # regression test for GH3746 del actual.coords["x"] - assert "x" not in actual.indexes + assert "x" not in actual.xindexes def test_update_index(self): actual = Dataset(coords={"x": [1, 2, 3]}) actual["x"] = ["a", "b", "c"] - assert actual.indexes["x"].equals(pd.Index(["a", "b", "c"])) + assert actual.xindexes["x"].equals(pd.Index(["a", "b", "c"])) def test_coords_setitem_with_new_dimension(self): actual = Dataset() @@ -1044,19 +1051,19 @@ def test_isel(self): assert {"time": 20, "dim2": 9, "dim3": 10} == ret.dims assert set(data.data_vars) == set(ret.data_vars) assert set(data.coords) == set(ret.coords) - assert set(data.indexes) == set(ret.indexes) + assert set(data.xindexes) == set(ret.xindexes) ret = data.isel(time=slice(2), dim1=0, dim2=slice(5)) assert {"time": 2, "dim2": 5, "dim3": 10} == ret.dims assert set(data.data_vars) == set(ret.data_vars) assert set(data.coords) == set(ret.coords) - assert set(data.indexes) == set(ret.indexes) + assert set(data.xindexes) == set(ret.xindexes) ret = data.isel(time=0, dim1=0, dim2=slice(5)) assert {"dim2": 5, "dim3": 10} == ret.dims assert set(data.data_vars) == set(ret.data_vars) assert set(data.coords) == set(ret.coords) - assert set(data.indexes) == set(list(ret.indexes) + ["time"]) + assert set(data.xindexes) == set(list(ret.xindexes) + ["time"]) def test_isel_fancy(self): # isel with fancy indexing. @@ -1392,13 +1399,13 @@ def test_sel_dataarray_mindex(self): ) actual_isel = mds.isel(x=xr.DataArray(np.arange(3), dims="x")) - actual_sel = mds.sel(x=DataArray(mds.indexes["x"][:3], dims="x")) + actual_sel = mds.sel(x=DataArray(midx[:3], dims="x")) assert actual_isel["x"].dims == ("x",) assert actual_sel["x"].dims == ("x",) assert_identical(actual_isel, actual_sel) actual_isel = mds.isel(x=xr.DataArray(np.arange(3), dims="z")) - actual_sel = mds.sel(x=Variable("z", mds.indexes["x"][:3])) + actual_sel = mds.sel(x=Variable("z", midx[:3])) assert actual_isel["x"].dims == ("z",) assert actual_sel["x"].dims == ("z",) assert_identical(actual_isel, actual_sel) @@ -1408,7 +1415,7 @@ def test_sel_dataarray_mindex(self): x=xr.DataArray(np.arange(3), dims="z", coords={"z": [0, 1, 2]}) ) actual_sel = mds.sel( - x=xr.DataArray(mds.indexes["x"][:3], dims="z", coords={"z": [0, 1, 2]}) + x=xr.DataArray(midx[:3], dims="z", coords={"z": [0, 1, 2]}) ) assert actual_isel["x"].dims == ("z",) assert actual_sel["x"].dims == ("z",) @@ -2421,7 +2428,7 @@ def test_drop_labels_by_keyword(self): with pytest.warns(FutureWarning): data.drop(arr.coords) with pytest.warns(FutureWarning): - data.drop(arr.indexes) + data.drop(arr.xindexes) assert_array_equal(ds1.coords["x"], ["b"]) assert_array_equal(ds2.coords["x"], ["b"]) @@ -2711,21 +2718,23 @@ def test_rename_does_not_change_CFTimeIndex_type(self): orig = Dataset(coords={"time": time}) renamed = orig.rename(time="time_new") - assert "time_new" in renamed.indexes - assert isinstance(renamed.indexes["time_new"], CFTimeIndex) - assert renamed.indexes["time_new"].name == "time_new" + assert "time_new" in renamed.xindexes + # TODO: benbovy - flexible indexes: update when CFTimeIndex + # inherits from xarray.Index + assert isinstance(renamed.xindexes["time_new"].to_pandas_index(), CFTimeIndex) + assert renamed.xindexes["time_new"].to_pandas_index().name == "time_new" # check original has not changed - assert "time" in orig.indexes - assert isinstance(orig.indexes["time"], CFTimeIndex) - assert orig.indexes["time"].name == "time" + assert "time" in orig.xindexes + assert isinstance(orig.xindexes["time"].to_pandas_index(), CFTimeIndex) + assert orig.xindexes["time"].to_pandas_index().name == "time" # note: rename_dims(time="time_new") drops "ds.indexes" renamed = orig.rename_dims() - assert isinstance(renamed.indexes["time"], CFTimeIndex) + assert isinstance(renamed.xindexes["time"].to_pandas_index(), CFTimeIndex) renamed = orig.rename_vars() - assert isinstance(renamed.indexes["time"], CFTimeIndex) + assert isinstance(renamed.xindexes["time"].to_pandas_index(), CFTimeIndex) def test_rename_does_not_change_DatetimeIndex_type(self): # make sure DatetimeIndex is conderved on rename @@ -2734,21 +2743,23 @@ def test_rename_does_not_change_DatetimeIndex_type(self): orig = Dataset(coords={"time": time}) renamed = orig.rename(time="time_new") - assert "time_new" in renamed.indexes - assert isinstance(renamed.indexes["time_new"], DatetimeIndex) - assert renamed.indexes["time_new"].name == "time_new" + assert "time_new" in renamed.xindexes + # TODO: benbovy - flexible indexes: update when DatetimeIndex + # inherits from xarray.Index? + assert isinstance(renamed.xindexes["time_new"].to_pandas_index(), DatetimeIndex) + assert renamed.xindexes["time_new"].to_pandas_index().name == "time_new" # check original has not changed - assert "time" in orig.indexes - assert isinstance(orig.indexes["time"], DatetimeIndex) - assert orig.indexes["time"].name == "time" + assert "time" in orig.xindexes + assert isinstance(orig.xindexes["time"].to_pandas_index(), DatetimeIndex) + assert orig.xindexes["time"].to_pandas_index().name == "time" # note: rename_dims(time="time_new") drops "ds.indexes" renamed = orig.rename_dims() - assert isinstance(renamed.indexes["time"], DatetimeIndex) + assert isinstance(renamed.xindexes["time"].to_pandas_index(), DatetimeIndex) renamed = orig.rename_vars() - assert isinstance(renamed.indexes["time"], DatetimeIndex) + assert isinstance(renamed.xindexes["time"].to_pandas_index(), DatetimeIndex) def test_swap_dims(self): original = Dataset({"x": [1, 2, 3], "y": ("x", list("abc")), "z": 42}) @@ -2757,7 +2768,10 @@ def test_swap_dims(self): assert_identical(expected, actual) assert isinstance(actual.variables["y"], IndexVariable) assert isinstance(actual.variables["x"], Variable) - pd.testing.assert_index_equal(actual.indexes["y"], expected.indexes["y"]) + pd.testing.assert_index_equal( + actual.xindexes["y"].to_pandas_index(), + expected.xindexes["y"].to_pandas_index(), + ) roundtripped = actual.swap_dims({"y": "x"}) assert_identical(original.set_coords("y"), roundtripped) @@ -2788,7 +2802,10 @@ def test_swap_dims(self): assert_identical(expected, actual) assert isinstance(actual.variables["y"], IndexVariable) assert isinstance(actual.variables["x"], Variable) - pd.testing.assert_index_equal(actual.indexes["y"], expected.indexes["y"]) + pd.testing.assert_index_equal( + actual.xindexes["y"].to_pandas_index(), + expected.xindexes["y"].to_pandas_index(), + ) def test_expand_dims_error(self): original = Dataset( @@ -3165,7 +3182,9 @@ def test_to_stacked_array_dtype_dims(self): D = xr.Dataset({"a": a, "b": b}) sample_dims = ["x"] y = D.to_stacked_array("features", sample_dims) - assert y.indexes["features"].levels[1].dtype == D.y.dtype + # TODO: benbovy - flexible indexes: update when MultiIndex has its own class + # inherited from xarray.Index + assert y.xindexes["features"].to_pandas_index().levels[1].dtype == D.y.dtype assert y.dims == ("x", "features") def test_to_stacked_array_to_unstacked_dataset(self): @@ -5565,8 +5584,8 @@ def test_binary_op_propagate_indexes(self): ds = Dataset( {"d1": DataArray([1, 2, 3], dims=["x"], coords={"x": [10, 20, 30]})} ) - expected = ds.indexes["x"] - actual = (ds * 2).indexes["x"] + expected = ds.xindexes["x"] + actual = (ds * 2).xindexes["x"] assert expected is actual def test_binary_op_join_setting(self): diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index b3334e92c4a..1e0dff45dd2 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -11,6 +11,7 @@ from xarray import Coordinate, DataArray, Dataset, IndexVariable, Variable, set_options from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.common import full_like, ones_like, zeros_like +from xarray.core.indexes import PandasIndex from xarray.core.indexing import ( BasicIndexer, CopyOnWriteArray, @@ -19,7 +20,6 @@ MemoryCachedArray, NumpyIndexingAdapter, OuterIndexer, - PandasIndexAdapter, VectorizedIndexer, ) from xarray.core.pycompat import dask_array_type @@ -535,7 +535,7 @@ def test_copy_index(self): v = self.cls("x", midx) for deep in [True, False]: w = v.copy(deep=deep) - assert isinstance(w._data, PandasIndexAdapter) + assert isinstance(w._data, PandasIndex) assert isinstance(w.to_index(), pd.MultiIndex) assert_array_equal(v._data.array, w._data.array) @@ -2145,7 +2145,7 @@ def test_multiindex_default_level_names(self): def test_data(self): x = IndexVariable("x", np.arange(3.0)) - assert isinstance(x._data, PandasIndexAdapter) + assert isinstance(x._data, PandasIndex) assert isinstance(x.data, np.ndarray) assert float == x.dtype assert_array_equal(np.arange(3), x) @@ -2287,7 +2287,7 @@ def test_coarsen_2d(self): class TestAsCompatibleData: def test_unchanged_types(self): - types = (np.asarray, PandasIndexAdapter, LazilyIndexedArray) + types = (np.asarray, PandasIndex, LazilyIndexedArray) for t in types: for data in [ np.arange(3),