diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml
index 78ead40d5a2..2468ec6267e 100644
--- a/ci/requirements/environment-windows.yml
+++ b/ci/requirements/environment-windows.yml
@@ -39,6 +39,7 @@ dependencies:
- setuptools
- sparse
- toolz
+ - typing_extensions
- zarr
- pip:
- numbagg
diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml
index f64ca3677cc..162faa7b74d 100644
--- a/ci/requirements/environment.yml
+++ b/ci/requirements/environment.yml
@@ -43,6 +43,7 @@ dependencies:
- setuptools
- sparse
- toolz
+ - typing_extensions
- zarr
- pip:
- numbagg
diff --git a/ci/requirements/py37-bare-minimum.yml b/ci/requirements/py37-bare-minimum.yml
index 408cf76fdd6..0cecf885436 100644
--- a/ci/requirements/py37-bare-minimum.yml
+++ b/ci/requirements/py37-bare-minimum.yml
@@ -13,3 +13,4 @@ dependencies:
- numpy=1.17
- pandas=1.0
- setuptools=40.4
+ - typing_extensions=3.7
diff --git a/ci/requirements/py37-min-all-deps.yml b/ci/requirements/py37-min-all-deps.yml
index 7c3230f87b0..c73c5327d3b 100644
--- a/ci/requirements/py37-min-all-deps.yml
+++ b/ci/requirements/py37-min-all-deps.yml
@@ -47,6 +47,7 @@ dependencies:
- setuptools=40.4
- sparse=0.8
- toolz=0.10
+ - typing_extensions=3.7
- zarr=2.4
- pip:
- numbagg==0.1
diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml
index 3f82990f3b5..688dfb7a2bc 100644
--- a/ci/requirements/py38-all-but-dask.yml
+++ b/ci/requirements/py38-all-but-dask.yml
@@ -39,6 +39,7 @@ dependencies:
- setuptools
- sparse
- toolz
+ - typing_extensions
- zarr
- pip:
- numbagg
diff --git a/doc/api.rst b/doc/api.rst
index 6c54da01d08..9433ecfa56d 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -66,6 +66,7 @@ Attributes
Dataset.encoding
Dataset.indexes
Dataset.chunks
+ Dataset.chunksizes
Dataset.nbytes
Dictionary interface
@@ -273,6 +274,7 @@ Attributes
DataArray.attrs
DataArray.encoding
DataArray.indexes
+ DataArray.chunksizes
ndarray attributes
------------------
diff --git a/doc/ecosystem.rst b/doc/ecosystem.rst
index 1c919b3040b..460541e91d7 100644
--- a/doc/ecosystem.rst
+++ b/doc/ecosystem.rst
@@ -37,6 +37,7 @@ Geosciences
- `Spyfit `_: FTIR spectroscopy of the atmosphere
- `windspharm `_: Spherical
harmonic wind analysis in Python.
+- `wradlib `_: An Open Source Library for Weather Radar Data Processing.
- `wrf-python `_: A collection of diagnostic and interpolation routines for use with output of the Weather Research and Forecasting (WRF-ARW) Model.
- `xarray-simlab `_: xarray extension for computer model simulations.
- `xarray-spatial `_: Numba-accelerated raster-based spatial processing tools (NDVI, curvature, zonal-statistics, proximity, hillshading, viewshed, etc.)
diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst
index 93738da9d9b..c6bc84e6ddb 100644
--- a/doc/getting-started-guide/installing.rst
+++ b/doc/getting-started-guide/installing.rst
@@ -8,6 +8,7 @@ Required dependencies
- Python (3.7 or later)
- setuptools (40.4 or later)
+- ``typing_extensions`` (3.7 or later)
- `numpy `__ (1.17 or later)
- `pandas `__ (1.0 or later)
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index bcc42c25d5c..3533e616509 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -38,6 +38,10 @@ New Features
`Nathan Lis `_.
- Histogram plots are set with a title displaying the scalar coords if any, similarly to the other plots (:issue:`5791`, :pull:`5792`).
By `Maxime Liquet `_.
+- Added a new :py:attr:`Dataset.chunksizes`, :py:attr:`DataArray.chunksizes`, and :py:attr:`Variable.chunksizes`
+ property, which will always return a mapping from dimension names to chunking pattern along that dimension,
+ regardless of whether the object is a Dataset, DataArray, or Variable. (:issue:`5846`, :pull:`5900`)
+ By `Tom Nicholas `_.
Breaking changes
~~~~~~~~~~~~~~~~
@@ -76,17 +80,23 @@ Bug fixes
- Fixed performance bug where ``cftime`` import attempted within various core operations if ``cftime`` not
installed (:pull:`5640`).
By `Luke Sewell `_
+- Fixed bug when combining named DataArrays using :py:func:`combine_by_coords`. (:pull:`5834`).
+ By `Tom Nicholas `_.
- When a custom engine was used in :py:func:`~xarray.open_dataset` the engine
wasn't initialized properly, causing missing argument errors or inconsistent
method signatures. (:pull:`5684`)
By `Jimmy Westling `_.
- Numbers are properly formatted in a plot's title (:issue:`5788`, :pull:`5789`).
By `Maxime Liquet `_.
+- Faceted plots will no longer raise a `pint.UnitStrippedWarning` when a `pint.Quantity` array is plotted,
+ and will correctly display the units of the data in the colorbar (if there is one) (:pull:`5886`).
+ By `Tom Nicholas `_.
- With backends, check for path-like objects rather than ``pathlib.Path``
type, use ``os.fspath`` (:pull:`5879`).
By `Mike Taves `_.
- ``open_mfdataset()`` now accepts a single ``pathlib.Path`` object (:issue: `5881`).
By `Panos Mavrogiorgos `_.
+- Improved performance of :py:meth:`Dataset.unstack` (:pull:`5906`). By `Tom Augspurger `_.
Documentation
~~~~~~~~~~~~~
diff --git a/requirements.txt b/requirements.txt
index 732d40cde18..0fa83c8ccc1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,4 @@
numpy >= 1.17
pandas >= 1.0
setuptools >= 40.4
-typing-extensions >= 3.10
+typing-extensions >= 3.7
diff --git a/setup.cfg b/setup.cfg
index aa8ca8df0ff..2dc1b7ffeca 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -78,6 +78,7 @@ python_requires = >=3.7
install_requires =
numpy >= 1.17
pandas >= 1.0
+ typing_extensions >= 3.7
setuptools >= 40.4 # For pkg_resources
[options.extras_require]
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 56956a57e02..081b53391ba 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -673,7 +673,7 @@ def combine_by_coords(
Attempt to auto-magically combine the given datasets (or data arrays)
into one by using dimension coordinates.
- This method attempts to combine a group of datasets along any number of
+ This function attempts to combine a group of datasets along any number of
dimensions into a single entity by inspecting coords and metadata and using
a combination of concat and merge.
@@ -765,6 +765,8 @@ def combine_by_coords(
Returns
-------
combined : xarray.Dataset or xarray.DataArray
+ Will return a Dataset unless all the inputs are unnamed DataArrays, in which case a
+ DataArray will be returned.
See also
--------
@@ -870,6 +872,50 @@ def combine_by_coords(
Data variables:
temperature (y, x) float64 10.98 14.3 12.06 nan ... 18.89 10.44 8.293
precipitation (y, x) float64 0.4376 0.8918 0.9637 ... 0.5684 0.01879 0.6176
+
+ You can also combine DataArray objects, but the behaviour will differ depending on
+ whether or not the DataArrays are named. If all DataArrays are named then they will
+ be promoted to Datasets before combining, and then the resultant Dataset will be
+ returned, e.g.
+
+ >>> named_da1 = xr.DataArray(
+ ... name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x"
+ ... )
+ >>> named_da1
+
+ array([1., 2.])
+ Coordinates:
+ * x (x) int64 0 1
+
+ >>> named_da2 = xr.DataArray(
+ ... name="a", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x"
+ ... )
+ >>> named_da2
+
+ array([3., 4.])
+ Coordinates:
+ * x (x) int64 2 3
+
+ >>> xr.combine_by_coords([named_da1, named_da2])
+
+ Dimensions: (x: 4)
+ Coordinates:
+ * x (x) int64 0 1 2 3
+ Data variables:
+ a (x) float64 1.0 2.0 3.0 4.0
+
+ If all the DataArrays are unnamed, a single DataArray will be returned, e.g.
+
+ >>> unnamed_da1 = xr.DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")
+ >>> unnamed_da2 = xr.DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x")
+ >>> xr.combine_by_coords([unnamed_da1, unnamed_da2])
+
+ array([1., 2., 3., 4.])
+ Coordinates:
+ * x (x) int64 0 1 2 3
+
+ Finally, if you attempt to combine a mix of unnamed DataArrays with either named
+ DataArrays or Datasets, a ValueError will be raised (as this is an ambiguous operation).
"""
# TODO remove after version 0.21, see PR4696
@@ -883,33 +929,41 @@ def combine_by_coords(
if not data_objects:
return Dataset()
- mixed_arrays_and_datasets = any(
+ objs_are_unnamed_dataarrays = [
isinstance(data_object, DataArray) and data_object.name is None
for data_object in data_objects
- ) and any(isinstance(data_object, Dataset) for data_object in data_objects)
- if mixed_arrays_and_datasets:
- raise ValueError("Can't automatically combine datasets with unnamed arrays.")
-
- all_unnamed_data_arrays = all(
- isinstance(data_object, DataArray) and data_object.name is None
- for data_object in data_objects
- )
- if all_unnamed_data_arrays:
- unnamed_arrays = data_objects
- temp_datasets = [data_array._to_temp_dataset() for data_array in unnamed_arrays]
-
- combined_temp_dataset = _combine_single_variable_hypercube(
- temp_datasets,
- fill_value=fill_value,
- data_vars=data_vars,
- coords=coords,
- compat=compat,
- join=join,
- combine_attrs=combine_attrs,
- )
- return DataArray()._from_temp_dataset(combined_temp_dataset)
-
+ ]
+ if any(objs_are_unnamed_dataarrays):
+ if all(objs_are_unnamed_dataarrays):
+ # Combine into a single larger DataArray
+ temp_datasets = [
+ unnamed_dataarray._to_temp_dataset()
+ for unnamed_dataarray in data_objects
+ ]
+
+ combined_temp_dataset = _combine_single_variable_hypercube(
+ temp_datasets,
+ fill_value=fill_value,
+ data_vars=data_vars,
+ coords=coords,
+ compat=compat,
+ join=join,
+ combine_attrs=combine_attrs,
+ )
+ return DataArray()._from_temp_dataset(combined_temp_dataset)
+ else:
+ # Must be a mix of unnamed dataarrays with either named dataarrays or with datasets
+ # Can't combine these as we wouldn't know whether to merge or concatenate the arrays
+ raise ValueError(
+ "Can't automatically combine unnamed DataArrays with either named DataArrays or Datasets."
+ )
else:
+ # Promote any named DataArrays to single-variable Datasets to simplify combining
+ data_objects = [
+ obj.to_dataset() if isinstance(obj, DataArray) else obj
+ for obj in data_objects
+ ]
+
# Group by data vars
sorted_datasets = sorted(data_objects, key=vars_as_keys)
grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
diff --git a/xarray/core/common.py b/xarray/core/common.py
index 2c5d7900ef8..b5dc3bf0e20 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -1813,6 +1813,23 @@ def ones_like(other, dtype: DTypeLike = None):
return full_like(other, 1, dtype)
+def get_chunksizes(
+ variables: Iterable[Variable],
+) -> Mapping[Any, Tuple[int, ...]]:
+
+ chunks: Dict[Any, Tuple[int, ...]] = {}
+ for v in variables:
+ if hasattr(v.data, "chunks"):
+ for dim, c in v.chunksizes.items():
+ if dim in chunks and c != chunks[dim]:
+ raise ValueError(
+ f"Object has inconsistent chunks along dimension {dim}. "
+ "This can be fixed by calling unify_chunks()."
+ )
+ chunks[dim] = c
+ return Frozen(chunks)
+
+
def is_np_datetime_like(dtype: DTypeLike) -> bool:
"""Check if a dtype is a subclass of the numpy datetime types"""
return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64)
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index ed8b393628d..c6f534796d1 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -43,7 +43,7 @@
reindex_like_indexers,
)
from .arithmetic import DataArrayArithmetic
-from .common import AbstractArray, DataWithCoords
+from .common import AbstractArray, DataWithCoords, get_chunksizes
from .computation import unify_chunks
from .coordinates import (
DataArrayCoordinates,
@@ -1058,11 +1058,37 @@ def __deepcopy__(self, memo=None) -> "DataArray":
@property
def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
- """Block dimensions for this array's data or None if it's not a dask
- array.
+ """
+ Tuple of block lengths for this dataarray's data, in order of dimensions, or None if
+ the underlying data is not a dask array.
+
+ See Also
+ --------
+ DataArray.chunk
+ DataArray.chunksizes
+ xarray.unify_chunks
"""
return self.variable.chunks
+ @property
+ def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
+ """
+ Mapping from dimension names to block lengths for this dataarray's data, or None if
+ the underlying data is not a dask array.
+ Cannot be modified directly, but can be modified by calling .chunk().
+
+ Differs from DataArray.chunks because it returns a mapping of dimensions to chunk shapes
+ instead of a tuple of chunk shapes.
+
+ See Also
+ --------
+ DataArray.chunk
+ DataArray.chunks
+ xarray.unify_chunks
+ """
+ all_variables = [self.variable] + [c.variable for c in self.coords.values()]
+ return get_chunksizes(all_variables)
+
def chunk(
self,
chunks: Union[
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 3055e50aaf5..e882495dce5 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -52,7 +52,7 @@
)
from .alignment import _broadcast_helper, _get_broadcast_dims_map_common_coords, align
from .arithmetic import DatasetArithmetic
-from .common import DataWithCoords, _contains_datetime_like_objects
+from .common import DataWithCoords, _contains_datetime_like_objects, get_chunksizes
from .computation import unify_chunks
from .coordinates import (
DatasetCoordinates,
@@ -2095,20 +2095,37 @@ def info(self, buf=None) -> None:
@property
def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
- """Block dimensions for this dataset's data or None if it's not a dask
- array.
"""
- chunks: Dict[Hashable, Tuple[int, ...]] = {}
- for v in self.variables.values():
- if v.chunks is not None:
- for dim, c in zip(v.dims, v.chunks):
- if dim in chunks and c != chunks[dim]:
- raise ValueError(
- f"Object has inconsistent chunks along dimension {dim}. "
- "This can be fixed by calling unify_chunks()."
- )
- chunks[dim] = c
- return Frozen(chunks)
+ Mapping from dimension names to block lengths for this dataset's data, or None if
+ the underlying data is not a dask array.
+ Cannot be modified directly, but can be modified by calling .chunk().
+
+ Same as Dataset.chunksizes, but maintained for backwards compatibility.
+
+ See Also
+ --------
+ Dataset.chunk
+ Dataset.chunksizes
+ xarray.unify_chunks
+ """
+ return get_chunksizes(self.variables.values())
+
+ @property
+ def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
+ """
+ Mapping from dimension names to block lengths for this dataset's data, or None if
+ the underlying data is not a dask array.
+ Cannot be modified directly, but can be modified by calling .chunk().
+
+ Same as Dataset.chunks.
+
+ See Also
+ --------
+ Dataset.chunk
+ Dataset.chunks
+ xarray.unify_chunks
+ """
+ return get_chunksizes(self.variables.values())
def chunk(
self,
@@ -2147,6 +2164,12 @@ def chunk(
Returns
-------
chunked : xarray.Dataset
+
+ See Also
+ --------
+ Dataset.chunks
+ Dataset.chunksizes
+ xarray.unify_chunks
"""
if chunks is None:
warnings.warn(
@@ -4153,34 +4176,34 @@ def unstack(
)
result = self.copy(deep=False)
- for dim in dims:
- if (
- # Dask arrays don't support assignment by index, which the fast unstack
- # function requires.
- # https://github.com/pydata/xarray/pull/4746#issuecomment-753282125
- any(is_duck_dask_array(v.data) for v in self.variables.values())
- # Sparse doesn't currently support (though we could special-case
- # it)
- # https://github.com/pydata/sparse/issues/422
- or any(
- isinstance(v.data, sparse_array_type)
- for v in self.variables.values()
- )
- or sparse
- # Until https://github.com/pydata/xarray/pull/4751 is resolved,
- # we check explicitly whether it's a numpy array. Once that is
- # resolved, explicitly exclude pint arrays.
- # # pint doesn't implement `np.full_like` in a way that's
- # # currently compatible.
- # # https://github.com/pydata/xarray/pull/4746#issuecomment-753425173
- # # or any(
- # # isinstance(v.data, pint_array_type) for v in self.variables.values()
- # # )
- or any(
- not isinstance(v.data, np.ndarray) for v in self.variables.values()
- )
- ):
+ # we want to avoid allocating an object-dtype ndarray for a MultiIndex,
+ # so we can't just access self.variables[v].data for every variable.
+ # We only check the non-index variables.
+ # https://github.com/pydata/xarray/issues/5902
+ nonindexes = [
+ self.variables[k] for k in set(self.variables) - set(self.xindexes)
+ ]
+ # Notes for each of these cases:
+ # 1. Dask arrays don't support assignment by index, which the fast unstack
+ # function requires.
+ # https://github.com/pydata/xarray/pull/4746#issuecomment-753282125
+ # 2. Sparse doesn't currently support (though we could special-case it)
+ # https://github.com/pydata/sparse/issues/422
+ # 3. pint requires checking if it's a NumPy array until
+ # https://github.com/pydata/xarray/pull/4751 is resolved,
+ # Once that is resolved, explicitly exclude pint arrays.
+ # pint doesn't implement `np.full_like` in a way that's
+ # currently compatible.
+ needs_full_reindex = sparse or any(
+ is_duck_dask_array(v.data)
+ or isinstance(v.data, sparse_array_type)
+ or not isinstance(v.data, np.ndarray)
+ for v in nonindexes
+ )
+
+ for dim in dims:
+ if needs_full_reindex:
result = result._unstack_full_reindex(dim, fill_value, sparse)
else:
result = result._unstack_once(dim, fill_value)
diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py
index 6e22c8cf0a4..09f78c5971c 100644
--- a/xarray/core/npcompat.py
+++ b/xarray/core/npcompat.py
@@ -41,17 +41,10 @@
# fall back for numpy < 1.20, ArrayLike adapted from numpy.typing._array_like
if sys.version_info >= (3, 8):
from typing import Protocol
-
- HAVE_PROTOCOL = True
else:
- try:
- from typing_extensions import Protocol
- except ImportError:
- HAVE_PROTOCOL = False
- else:
- HAVE_PROTOCOL = True
+ from typing_extensions import Protocol
- if TYPE_CHECKING or HAVE_PROTOCOL:
+ if TYPE_CHECKING:
class _SupportsArray(Protocol):
def __array__(self) -> np.ndarray:
diff --git a/xarray/core/options.py b/xarray/core/options.py
index 14f77306316..c9e037e6fd6 100644
--- a/xarray/core/options.py
+++ b/xarray/core/options.py
@@ -6,70 +6,35 @@
# TODO: Remove this check once python 3.7 is not supported:
if sys.version_info >= (3, 8):
from typing import TYPE_CHECKING, Literal, TypedDict, Union
+else:
+ from typing import TYPE_CHECKING, Union
- if TYPE_CHECKING:
- try:
- from matplotlib.colors import Colormap
- except ImportError:
- Colormap = str
-
- class T_Options(TypedDict):
- arithmetic_join: Literal["inner", "outer", "left", "right", "exact"]
- cmap_divergent: Union[str, "Colormap"]
- cmap_sequential: Union[str, "Colormap"]
- display_max_rows: int
- display_style: Literal["text", "html"]
- display_width: int
- display_expand_attrs: Literal["default", True, False]
- display_expand_coords: Literal["default", True, False]
- display_expand_data_vars: Literal["default", True, False]
- display_expand_data: Literal["default", True, False]
- enable_cftimeindex: bool
- file_cache_maxsize: int
- keep_attrs: Literal["default", True, False]
- warn_for_unclosed_files: bool
- use_bottleneck: bool
+ from typing_extensions import Literal, TypedDict
-else:
- # See GH5624, this is a convoluted way to allow type-checking to use
- # `TypedDict` and `Literal` without requiring typing_extensions as a
- # required dependency to _run_ the code (it is required to type-check).
+if TYPE_CHECKING:
try:
- from typing import TYPE_CHECKING, Union
-
- from typing_extensions import Literal, TypedDict
-
- if TYPE_CHECKING:
- try:
- from matplotlib.colors import Colormap
- except ImportError:
- Colormap = str
-
- class T_Options(TypedDict):
- arithmetic_join: Literal["inner", "outer", "left", "right", "exact"]
- cmap_divergent: Union[str, "Colormap"]
- cmap_sequential: Union[str, "Colormap"]
- display_max_rows: int
- display_style: Literal["text", "html"]
- display_width: int
- display_expand_attrs: Literal["default", True, False]
- display_expand_coords: Literal["default", True, False]
- display_expand_data_vars: Literal["default", True, False]
- display_expand_data: Literal["default", True, False]
- enable_cftimeindex: bool
- file_cache_maxsize: int
- keep_attrs: Literal["default", True, False]
- warn_for_unclosed_files: bool
- use_bottleneck: bool
-
+ from matplotlib.colors import Colormap
except ImportError:
- from typing import TYPE_CHECKING, Any, Dict, Hashable
-
- if TYPE_CHECKING:
- raise
- else:
- T_Options = Dict[Hashable, Any]
+ Colormap = str
+
+
+class T_Options(TypedDict):
+ arithmetic_join: Literal["inner", "outer", "left", "right", "exact"]
+ cmap_divergent: Union[str, "Colormap"]
+ cmap_sequential: Union[str, "Colormap"]
+ display_max_rows: int
+ display_style: Literal["text", "html"]
+ display_width: int
+ display_expand_attrs: Literal["default", True, False]
+ display_expand_coords: Literal["default", True, False]
+ display_expand_data_vars: Literal["default", True, False]
+ display_expand_data: Literal["default", True, False]
+ enable_cftimeindex: bool
+ file_cache_maxsize: int
+ keep_attrs: Literal["default", True, False]
+ warn_for_unclosed_files: bool
+ use_bottleneck: bool
OPTIONS: T_Options = {
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 191bb4059f5..a96adb31e64 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -45,6 +45,7 @@
sparse_array_type,
)
from .utils import (
+ Frozen,
NdimSizeLenMixin,
OrderedSet,
_default,
@@ -996,16 +997,44 @@ def __deepcopy__(self, memo=None):
__hash__ = None # type: ignore[assignment]
@property
- def chunks(self):
- """Block dimensions for this array's data or None if it's not a dask
- array.
+ def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
+ """
+ Tuple of block lengths for this dataarray's data, in order of dimensions, or None if
+ the underlying data is not a dask array.
+
+ See Also
+ --------
+ Variable.chunk
+ Variable.chunksizes
+ xarray.unify_chunks
"""
return getattr(self._data, "chunks", None)
+ @property
+ def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
+ """
+ Mapping from dimension names to block lengths for this variable's data, or None if
+ the underlying data is not a dask array.
+ Cannot be modified directly, but can be modified by calling .chunk().
+
+ Differs from variable.chunks because it returns a mapping of dimensions to chunk shapes
+ instead of a tuple of chunk shapes.
+
+ See Also
+ --------
+ Variable.chunk
+ Variable.chunks
+ xarray.unify_chunks
+ """
+ if hasattr(self._data, "chunks"):
+ return Frozen({dim: c for dim, c in zip(self.dims, self.data.chunks)})
+ else:
+ return {}
+
_array_counter = itertools.count()
def chunk(self, chunks={}, name=None, lock=False):
- """Coerce this array's data into a dask arrays with the given chunks.
+ """Coerce this array's data into a dask array with the given chunks.
If this variable is a non-dask array, it will be converted to dask
array. If it's a dask array, it will be rechunked to the given chunk
diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py
index b384dea0571..a518a78dbf6 100644
--- a/xarray/plot/facetgrid.py
+++ b/xarray/plot/facetgrid.py
@@ -173,11 +173,11 @@ def __init__(
)
# Set up the lists of names for the row and column facet variables
- col_names = list(data[col].values) if col else []
- row_names = list(data[row].values) if row else []
+ col_names = list(data[col].to_numpy()) if col else []
+ row_names = list(data[row].to_numpy()) if row else []
if single_group:
- full = [{single_group: x} for x in data[single_group].values]
+ full = [{single_group: x} for x in data[single_group].to_numpy()]
empty = [None for x in range(nrow * ncol - len(full))]
name_dicts = full + empty
else:
@@ -251,7 +251,7 @@ def map_dataarray(self, func, x, y, **kwargs):
raise ValueError("cbar_ax not supported by FacetGrid.")
cmap_params, cbar_kwargs = _process_cmap_cbar_kwargs(
- func, self.data.values, **kwargs
+ func, self.data.to_numpy(), **kwargs
)
self._cmap_extend = cmap_params.get("extend")
@@ -347,7 +347,7 @@ def map_dataset(
if hue and meta_data["hue_style"] == "continuous":
cmap_params, cbar_kwargs = _process_cmap_cbar_kwargs(
- func, self.data[hue].values, **kwargs
+ func, self.data[hue].to_numpy(), **kwargs
)
kwargs["meta_data"]["cmap_params"] = cmap_params
kwargs["meta_data"]["cbar_kwargs"] = cbar_kwargs
@@ -423,7 +423,7 @@ def _adjust_fig_for_guide(self, guide):
def add_legend(self, **kwargs):
self.figlegend = self.fig.legend(
handles=self._mappables[-1],
- labels=list(self._hue_var.values),
+ labels=list(self._hue_var.to_numpy()),
title=self._hue_label,
loc="center right",
**kwargs,
@@ -619,7 +619,7 @@ def map(self, func, *args, **kwargs):
if namedict is not None:
data = self.data.loc[namedict]
plt.sca(ax)
- innerargs = [data[a].values for a in args]
+ innerargs = [data[a].to_numpy() for a in args]
maybe_mappable = func(*innerargs, **kwargs)
# TODO: better way to verify that an artist is mappable?
# https://stackoverflow.com/questions/33023036/is-it-possible-to-detect-if-a-matplotlib-artist-is-a-mappable-suitable-for-use-w#33023522
diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py
index 1e1e59e2f71..60f132d07e1 100644
--- a/xarray/plot/plot.py
+++ b/xarray/plot/plot.py
@@ -1075,7 +1075,7 @@ def newplotfunc(
# Matplotlib does not support normalising RGB data, so do it here.
# See eg. https://github.com/matplotlib/matplotlib/pull/10220
if robust or vmax is not None or vmin is not None:
- darray = _rescale_imshow_rgb(darray, vmin, vmax, robust)
+ darray = _rescale_imshow_rgb(darray.as_numpy(), vmin, vmax, robust)
vmin, vmax, robust = None, None, False
if subplot_kws is None:
@@ -1146,10 +1146,6 @@ def newplotfunc(
else:
dims = (yval.dims[0], xval.dims[0])
- # better to pass the ndarrays directly to plotting functions
- xval = xval.to_numpy()
- yval = yval.to_numpy()
-
# May need to transpose for correct x, y labels
# xlab may be the name of a coord, we have to check for dim names
if imshow_rgb:
@@ -1162,6 +1158,10 @@ def newplotfunc(
if dims != darray.dims:
darray = darray.transpose(*dims, transpose_coords=True)
+ # better to pass the ndarrays directly to plotting functions
+ xval = xval.to_numpy()
+ yval = yval.to_numpy()
+
# Pass the data as a masked ndarray too
zval = darray.to_masked_array(copy=False)
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index cbe09aab815..8d0c09eacec 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -12,6 +12,7 @@
combine_by_coords,
combine_nested,
concat,
+ merge,
)
from xarray.core import dtypes
from xarray.core.combine import (
@@ -688,7 +689,7 @@ def test_nested_combine_mixed_datasets_arrays(self):
combine_nested(objs, "x")
-class TestCombineAuto:
+class TestCombineDatasetsbyCoords:
def test_combine_by_coords(self):
objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
actual = combine_by_coords(objs)
@@ -730,17 +731,6 @@ def test_combine_by_coords(self):
def test_empty_input(self):
assert_identical(Dataset(), combine_by_coords([]))
- def test_combine_coords_mixed_datasets_arrays(self):
- objs = [
- DataArray([0, 1], dims=("x"), coords=({"x": [0, 1]})),
- Dataset({"x": [2, 3]}),
- ]
- with pytest.raises(
- ValueError,
- match=r"Can't automatically combine datasets with unnamed arrays.",
- ):
- combine_by_coords(objs)
-
@pytest.mark.parametrize(
"join, expected",
[
@@ -1044,7 +1034,35 @@ def test_combine_by_coords_incomplete_hypercube(self):
with pytest.raises(ValueError):
combine_by_coords([x1, x2, x3], fill_value=None)
- def test_combine_by_coords_unnamed_arrays(self):
+
+class TestCombineMixedObjectsbyCoords:
+ def test_combine_by_coords_mixed_unnamed_dataarrays(self):
+ named_da = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")
+ unnamed_da = DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x")
+
+ with pytest.raises(
+ ValueError, match="Can't automatically combine unnamed DataArrays with"
+ ):
+ combine_by_coords([named_da, unnamed_da])
+
+ da = DataArray([0, 1], dims="x", coords=({"x": [0, 1]}))
+ ds = Dataset({"x": [2, 3]})
+ with pytest.raises(
+ ValueError,
+ match="Can't automatically combine unnamed DataArrays with",
+ ):
+ combine_by_coords([da, ds])
+
+ def test_combine_coords_mixed_datasets_named_dataarrays(self):
+ da = DataArray(name="a", data=[4, 5], dims="x", coords=({"x": [0, 1]}))
+ ds = Dataset({"b": ("x", [2, 3])})
+ actual = combine_by_coords([da, ds])
+ expected = Dataset(
+ {"a": ("x", [4, 5]), "b": ("x", [2, 3])}, coords={"x": ("x", [0, 1])}
+ )
+ assert_identical(expected, actual)
+
+ def test_combine_by_coords_all_unnamed_dataarrays(self):
unnamed_array = DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")
actual = combine_by_coords([unnamed_array])
@@ -1060,6 +1078,33 @@ def test_combine_by_coords_unnamed_arrays(self):
)
assert_identical(expected, actual)
+ def test_combine_by_coords_all_named_dataarrays(self):
+ named_da = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")
+
+ actual = combine_by_coords([named_da])
+ expected = named_da.to_dataset()
+ assert_identical(expected, actual)
+
+ named_da1 = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")
+ named_da2 = DataArray(name="b", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x")
+
+ actual = combine_by_coords([named_da1, named_da2])
+ expected = Dataset(
+ {
+ "a": DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x"),
+ "b": DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x"),
+ }
+ )
+ assert_identical(expected, actual)
+
+ def test_combine_by_coords_all_dataarrays_with_the_same_name(self):
+ named_da1 = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")
+ named_da2 = DataArray(name="a", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x")
+
+ actual = combine_by_coords([named_da1, named_da2])
+ expected = merge([named_da1, named_da2])
+ assert_identical(expected, actual)
+
@requires_cftime
def test_combine_by_coords_distant_cftime_dates():
diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index d5d460056aa..9eda6ccdf19 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -104,6 +104,11 @@ def test_chunk(self):
assert rechunked.chunks == expected
self.assertLazyAndIdentical(self.eager_var, rechunked)
+ expected_chunksizes = {
+ dim: chunks for dim, chunks in zip(self.lazy_var.dims, expected)
+ }
+ assert rechunked.chunksizes == expected_chunksizes
+
def test_indexing(self):
u = self.eager_var
v = self.lazy_var
@@ -255,13 +260,13 @@ def test_missing_methods(self):
except NotImplementedError as err:
assert "dask" in str(err)
- @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
+ @pytest.mark.filterwarnings("ignore::FutureWarning")
def test_univariate_ufunc(self):
u = self.eager_var
v = self.lazy_var
self.assertLazyAndAllClose(np.sin(u), xu.sin(v))
- @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
+ @pytest.mark.filterwarnings("ignore::FutureWarning")
def test_bivariate_ufunc(self):
u = self.eager_var
v = self.lazy_var
@@ -330,6 +335,38 @@ def setUp(self):
self.data, coords={"x": range(4)}, dims=("x", "y"), name="foo"
)
+ def test_chunk(self):
+ for chunks, expected in [
+ ({}, ((2, 2), (2, 2, 2))),
+ (3, ((3, 1), (3, 3))),
+ ({"x": 3, "y": 3}, ((3, 1), (3, 3))),
+ ({"x": 3}, ((3, 1), (2, 2, 2))),
+ ({"x": (3, 1)}, ((3, 1), (2, 2, 2))),
+ ]:
+ # Test DataArray
+ rechunked = self.lazy_array.chunk(chunks)
+ assert rechunked.chunks == expected
+ self.assertLazyAndIdentical(self.eager_array, rechunked)
+
+ expected_chunksizes = {
+ dim: chunks for dim, chunks in zip(self.lazy_array.dims, expected)
+ }
+ assert rechunked.chunksizes == expected_chunksizes
+
+ # Test Dataset
+ lazy_dataset = self.lazy_array.to_dataset()
+ eager_dataset = self.eager_array.to_dataset()
+ expected_chunksizes = {
+ dim: chunks for dim, chunks in zip(lazy_dataset.dims, expected)
+ }
+ rechunked = lazy_dataset.chunk(chunks)
+
+ # Dataset.chunks has a different return type to DataArray.chunks - see issue #5843
+ assert rechunked.chunks == expected_chunksizes
+ self.assertLazyAndIdentical(eager_dataset, rechunked)
+
+ assert rechunked.chunksizes == expected_chunksizes
+
def test_rechunk(self):
chunked = self.eager_array.chunk({"x": 2}).chunk({"y": 2})
assert chunked.chunks == ((2,) * 2, (2,) * 3)
@@ -563,7 +600,7 @@ def duplicate_and_merge(array):
actual = duplicate_and_merge(self.lazy_array)
self.assertLazyAndEqual(expected, actual)
- @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
+ @pytest.mark.filterwarnings("ignore::FutureWarning")
def test_ufuncs(self):
u = self.eager_array
v = self.lazy_array
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index 6d49e20909d..c032a781e47 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -258,6 +258,11 @@ def from_series_or_scalar(se):
def series_reduce(da, func, dim, **kwargs):
"""convert DataArray to pd.Series, apply pd.func, then convert back to
a DataArray. Multiple dims cannot be specified."""
+
+ # pd no longer accepts skipna=None https://github.com/pandas-dev/pandas/issues/44178
+ if kwargs.get("skipna", True) is None:
+ kwargs["skipna"] = True
+
if dim is None or da.ndim == 1:
se = da.to_series()
return from_series_or_scalar(getattr(se, func)(**kwargs))
diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py
index 3d57d3dc961..ad0aafff15e 100644
--- a/xarray/tests/test_sparse.py
+++ b/xarray/tests/test_sparse.py
@@ -276,11 +276,11 @@ def test_unary_op(self):
assert_sparse_equal(abs(self.var).data, abs(self.data))
assert_sparse_equal(self.var.round().data, self.data.round())
- @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
+ @pytest.mark.filterwarnings("ignore::FutureWarning")
def test_univariate_ufunc(self):
assert_sparse_equal(np.sin(self.data), xu.sin(self.var).data)
- @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
+ @pytest.mark.filterwarnings("ignore::FutureWarning")
def test_bivariate_ufunc(self):
assert_sparse_equal(np.maximum(self.data, 0), xu.maximum(self.var, 0).data)
assert_sparse_equal(np.maximum(self.data, 0), xu.maximum(0, self.var).data)
@@ -664,7 +664,7 @@ def test_stack(self):
roundtripped = stacked.unstack()
assert_identical(arr, roundtripped)
- @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
+ @pytest.mark.filterwarnings("ignore::FutureWarning")
def test_ufuncs(self):
x = self.sp_xr
assert_equal(np.sin(x), xu.sin(x))
diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
index 7bde6ce8b9f..8be20c5f81c 100644
--- a/xarray/tests/test_units.py
+++ b/xarray/tests/test_units.py
@@ -5614,7 +5614,7 @@ def test_units_in_line_plot_labels(self):
assert ax.get_ylabel() == "pressure [pascal]"
assert ax.get_xlabel() == "x [meters]"
- def test_units_in_2d_plot_labels(self):
+ def test_units_in_2d_plot_colorbar_label(self):
arr = np.ones((2, 3)) * unit_registry.Pa
da = xr.DataArray(data=arr, dims=["x", "y"], name="pressure")
@@ -5622,3 +5622,27 @@ def test_units_in_2d_plot_labels(self):
ax = da.plot.contourf(ax=ax, cbar_ax=cax, add_colorbar=True)
assert cax.get_ylabel() == "pressure [pascal]"
+
+ def test_units_facetgrid_plot_labels(self):
+ arr = np.ones((2, 3)) * unit_registry.Pa
+ da = xr.DataArray(data=arr, dims=["x", "y"], name="pressure")
+
+ fig, (ax, cax) = plt.subplots(1, 2)
+ fgrid = da.plot.line(x="x", col="y")
+
+ assert fgrid.axes[0, 0].get_ylabel() == "pressure [pascal]"
+
+ def test_units_facetgrid_2d_imshow_plot_colorbar_labels(self):
+ arr = np.ones((2, 3, 4, 5)) * unit_registry.Pa
+ da = xr.DataArray(data=arr, dims=["x", "y", "z", "w"], name="pressure")
+
+ da.plot.imshow(x="x", y="y", col="w") # no colorbar to check labels of
+
+ def test_units_facetgrid_2d_contourf_plot_colorbar_labels(self):
+ arr = np.ones((2, 3, 4)) * unit_registry.Pa
+ da = xr.DataArray(data=arr, dims=["x", "y", "z"], name="pressure")
+
+ fig, (ax1, ax2, ax3, cax) = plt.subplots(1, 4)
+ fgrid = da.plot.contourf(x="x", y="y", col="z")
+
+ assert fgrid.cbar.ax.get_ylabel() == "pressure [pascal]"