Merge branch 'main' into api-reducitons

dcherian · Oct 29, 2021 · cc6a004 · cc6a004
2 parents 99d599a + 867646f
commit cc6a004
Show file tree

Hide file tree

Showing 25 changed files with 412 additions and 174 deletions.
diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml
@@ -39,6 +39,7 @@ dependencies:
   - setuptools
   - sparse
   - toolz
+  - typing_extensions
   - zarr
   - pip:
     - numbagg
diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml
@@ -43,6 +43,7 @@ dependencies:
   - setuptools
   - sparse
   - toolz
+  - typing_extensions
   - zarr
   - pip:
     - numbagg
diff --git a/ci/requirements/py37-bare-minimum.yml b/ci/requirements/py37-bare-minimum.yml
@@ -13,3 +13,4 @@ dependencies:
   - numpy=1.17
   - pandas=1.0
   - setuptools=40.4
+  - typing_extensions=3.7
diff --git a/ci/requirements/py37-min-all-deps.yml b/ci/requirements/py37-min-all-deps.yml
@@ -47,6 +47,7 @@ dependencies:
   - setuptools=40.4
   - sparse=0.8
   - toolz=0.10
+  - typing_extensions=3.7
   - zarr=2.4
   - pip:
     - numbagg==0.1
diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml
@@ -39,6 +39,7 @@ dependencies:
   - setuptools
   - sparse
   - toolz
+  - typing_extensions
   - zarr
   - pip:
     - numbagg
diff --git a/doc/api.rst b/doc/api.rst
@@ -66,6 +66,7 @@ Attributes
    Dataset.encoding
    Dataset.indexes
    Dataset.chunks
+   Dataset.chunksizes
    Dataset.nbytes
 
 Dictionary interface
@@ -273,6 +274,7 @@ Attributes
    DataArray.attrs
    DataArray.encoding
    DataArray.indexes
+   DataArray.chunksizes
 
 ndarray attributes
 ------------------

diff --git a/doc/ecosystem.rst b/doc/ecosystem.rst
@@ -37,6 +37,7 @@ Geosciences
 - `Spyfit <https://spyfit.readthedocs.io/en/master/>`_: FTIR spectroscopy of the atmosphere
 - `windspharm <https://ajdawson.github.io/windspharm/index.html>`_: Spherical
   harmonic wind analysis in Python.
+- `wradlib <https://wradlib.org/>`_: An Open Source Library for Weather Radar Data Processing.
 - `wrf-python <https://wrf-python.readthedocs.io/>`_: A collection of diagnostic and interpolation routines for use with output of the Weather Research and Forecasting (WRF-ARW) Model.
 - `xarray-simlab <https://xarray-simlab.readthedocs.io>`_: xarray extension for computer model simulations.
 - `xarray-spatial <https://makepath.github.io/xarray-spatial>`_: Numba-accelerated raster-based spatial processing tools (NDVI, curvature, zonal-statistics, proximity, hillshading, viewshed, etc.)

diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst
@@ -8,6 +8,7 @@ Required dependencies
 
 - Python (3.7 or later)
 - setuptools (40.4 or later)
+- ``typing_extensions`` (3.7 or later)
 - `numpy <http://www.numpy.org/>`__ (1.17 or later)
 - `pandas <http://pandas.pydata.org/>`__ (1.0 or later)
 

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -38,6 +38,10 @@ New Features
   `Nathan Lis <https://github.com/wxman22>`_.
 - Histogram plots are set with a title displaying the scalar coords if any, similarly to the other plots (:issue:`5791`, :pull:`5792`).
   By `Maxime Liquet <https://github.com/maximlt>`_.
+- Added a new :py:attr:`Dataset.chunksizes`, :py:attr:`DataArray.chunksizes`, and :py:attr:`Variable.chunksizes`
+  property, which will always return a mapping from dimension names to chunking pattern along that dimension,
+  regardless of whether the object is a Dataset, DataArray, or Variable. (:issue:`5846`, :pull:`5900`)
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~
@@ -76,17 +80,23 @@ Bug fixes
 - Fixed performance bug where ``cftime`` import attempted within various core operations if ``cftime`` not
   installed (:pull:`5640`).
   By `Luke Sewell <https://github.com/lusewell>`_
+- Fixed bug when combining named DataArrays using :py:func:`combine_by_coords`. (:pull:`5834`).
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
 - When a custom engine was used in :py:func:`~xarray.open_dataset` the engine
   wasn't initialized properly, causing missing argument errors or inconsistent
   method signatures. (:pull:`5684`)
   By `Jimmy Westling <https://github.com/illviljan>`_.
 - Numbers are properly formatted in a plot's title (:issue:`5788`, :pull:`5789`).
   By `Maxime Liquet <https://github.com/maximlt>`_.
+- Faceted plots will no longer raise a `pint.UnitStrippedWarning` when a `pint.Quantity` array is plotted,
+  and will correctly display the units of the data in the colorbar (if there is one) (:pull:`5886`).
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
 - With backends, check for path-like objects rather than ``pathlib.Path``
   type, use ``os.fspath`` (:pull:`5879`).
   By `Mike Taves <https://github.com/mwtoews>`_.
 - ``open_mfdataset()`` now accepts a single ``pathlib.Path`` object (:issue: `5881`).
   By `Panos Mavrogiorgos <https://github.com/pmav99>`_.
+- Improved performance of :py:meth:`Dataset.unstack` (:pull:`5906`). By `Tom Augspurger <https://github.com/TomAugspurger>`_.
 
 Documentation
 ~~~~~~~~~~~~~

diff --git a/requirements.txt b/requirements.txt
@@ -5,4 +5,4 @@
 numpy >= 1.17
 pandas >= 1.0
 setuptools >= 40.4
-typing-extensions >= 3.10
+typing-extensions >= 3.7
diff --git a/setup.cfg b/setup.cfg
@@ -78,6 +78,7 @@ python_requires = >=3.7
 install_requires =
     numpy >= 1.17
     pandas >= 1.0
+    typing_extensions >= 3.7
     setuptools >= 40.4  # For pkg_resources
 
 [options.extras_require]

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
@@ -673,7 +673,7 @@ def combine_by_coords(
     Attempt to auto-magically combine the given datasets (or data arrays)
     into one by using dimension coordinates.
 
-    This method attempts to combine a group of datasets along any number of
+    This function attempts to combine a group of datasets along any number of
     dimensions into a single entity by inspecting coords and metadata and using
     a combination of concat and merge.
 
@@ -765,6 +765,8 @@ def combine_by_coords(
     Returns
     -------
     combined : xarray.Dataset or xarray.DataArray
+        Will return a Dataset unless all the inputs are unnamed DataArrays, in which case a
+        DataArray will be returned.
 
     See also
     --------
@@ -870,6 +872,50 @@ def combine_by_coords(
     Data variables:
         temperature    (y, x) float64 10.98 14.3 12.06 nan ... 18.89 10.44 8.293
         precipitation  (y, x) float64 0.4376 0.8918 0.9637 ... 0.5684 0.01879 0.6176
+
+    You can also combine DataArray objects, but the behaviour will differ depending on
+    whether or not the DataArrays are named. If all DataArrays are named then they will
+    be promoted to Datasets before combining, and then the resultant Dataset will be
+    returned, e.g.
+
+    >>> named_da1 = xr.DataArray(
+    ...     name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x"
+    ... )
+    >>> named_da1
+    <xarray.DataArray 'a' (x: 2)>
+    array([1., 2.])
+    Coordinates:
+      * x        (x) int64 0 1
+
+    >>> named_da2 = xr.DataArray(
+    ...     name="a", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x"
+    ... )
+    >>> named_da2
+    <xarray.DataArray 'a' (x: 2)>
+    array([3., 4.])
+    Coordinates:
+      * x        (x) int64 2 3
+
+    >>> xr.combine_by_coords([named_da1, named_da2])
+    <xarray.Dataset>
+    Dimensions:  (x: 4)
+    Coordinates:
+      * x        (x) int64 0 1 2 3
+    Data variables:
+        a        (x) float64 1.0 2.0 3.0 4.0
+
+    If all the DataArrays are unnamed, a single DataArray will be returned, e.g.
+
+    >>> unnamed_da1 = xr.DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")
+    >>> unnamed_da2 = xr.DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x")
+    >>> xr.combine_by_coords([unnamed_da1, unnamed_da2])
+    <xarray.DataArray (x: 4)>
+    array([1., 2., 3., 4.])
+    Coordinates:
+      * x        (x) int64 0 1 2 3
+
+    Finally, if you attempt to combine a mix of unnamed DataArrays with either named
+    DataArrays or Datasets, a ValueError will be raised (as this is an ambiguous operation).
     """
 
     # TODO remove after version 0.21, see PR4696
@@ -883,33 +929,41 @@ def combine_by_coords(
     if not data_objects:
         return Dataset()
 
-    mixed_arrays_and_datasets = any(
+    objs_are_unnamed_dataarrays = [
         isinstance(data_object, DataArray) and data_object.name is None
         for data_object in data_objects
-    ) and any(isinstance(data_object, Dataset) for data_object in data_objects)
-    if mixed_arrays_and_datasets:
-        raise ValueError("Can't automatically combine datasets with unnamed arrays.")
-
-    all_unnamed_data_arrays = all(
-        isinstance(data_object, DataArray) and data_object.name is None
-        for data_object in data_objects
-    )
-    if all_unnamed_data_arrays:
-        unnamed_arrays = data_objects
-        temp_datasets = [data_array._to_temp_dataset() for data_array in unnamed_arrays]
-
-        combined_temp_dataset = _combine_single_variable_hypercube(
-            temp_datasets,
-            fill_value=fill_value,
-            data_vars=data_vars,
-            coords=coords,
-            compat=compat,
-            join=join,
-            combine_attrs=combine_attrs,
-        )
-        return DataArray()._from_temp_dataset(combined_temp_dataset)
-
+    ]
+    if any(objs_are_unnamed_dataarrays):
+        if all(objs_are_unnamed_dataarrays):
+            # Combine into a single larger DataArray
+            temp_datasets = [
+                unnamed_dataarray._to_temp_dataset()
+                for unnamed_dataarray in data_objects
+            ]
+
+            combined_temp_dataset = _combine_single_variable_hypercube(
+                temp_datasets,
+                fill_value=fill_value,
+                data_vars=data_vars,
+                coords=coords,
+                compat=compat,
+                join=join,
+                combine_attrs=combine_attrs,
+            )
+            return DataArray()._from_temp_dataset(combined_temp_dataset)
+        else:
+            # Must be a mix of unnamed dataarrays with either named dataarrays or with datasets
+            # Can't combine these as we wouldn't know whether to merge or concatenate the arrays
+            raise ValueError(
+                "Can't automatically combine unnamed DataArrays with either named DataArrays or Datasets."
+            )
     else:
+        # Promote any named DataArrays to single-variable Datasets to simplify combining
+        data_objects = [
+            obj.to_dataset() if isinstance(obj, DataArray) else obj
+            for obj in data_objects
+        ]
+
         # Group by data vars
         sorted_datasets = sorted(data_objects, key=vars_as_keys)
         grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -1813,6 +1813,23 @@ def ones_like(other, dtype: DTypeLike = None):
     return full_like(other, 1, dtype)
 
 
+def get_chunksizes(
+    variables: Iterable[Variable],
+) -> Mapping[Any, Tuple[int, ...]]:
+
+    chunks: Dict[Any, Tuple[int, ...]] = {}
+    for v in variables:
+        if hasattr(v.data, "chunks"):
+            for dim, c in v.chunksizes.items():
+                if dim in chunks and c != chunks[dim]:
+                    raise ValueError(
+                        f"Object has inconsistent chunks along dimension {dim}. "
+                        "This can be fixed by calling unify_chunks()."
+                    )
+                chunks[dim] = c
+    return Frozen(chunks)
+
+
 def is_np_datetime_like(dtype: DTypeLike) -> bool:
     """Check if a dtype is a subclass of the numpy datetime types"""
     return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -43,7 +43,7 @@
     reindex_like_indexers,
 )
 from .arithmetic import DataArrayArithmetic
-from .common import AbstractArray, DataWithCoords
+from .common import AbstractArray, DataWithCoords, get_chunksizes
 from .computation import unify_chunks
 from .coordinates import (
     DataArrayCoordinates,
@@ -1058,11 +1058,37 @@ def __deepcopy__(self, memo=None) -> "DataArray":
 
     @property
     def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
-        """Block dimensions for this array's data or None if it's not a dask
-        array.
+        """
+        Tuple of block lengths for this dataarray's data, in order of dimensions, or None if
+        the underlying data is not a dask array.
+
+        See Also
+        --------
+        DataArray.chunk
+        DataArray.chunksizes
+        xarray.unify_chunks
         """
         return self.variable.chunks
 
+    @property
+    def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
+        """
+        Mapping from dimension names to block lengths for this dataarray's data, or None if
+        the underlying data is not a dask array.
+        Cannot be modified directly, but can be modified by calling .chunk().
+
+        Differs from DataArray.chunks because it returns a mapping of dimensions to chunk shapes
+        instead of a tuple of chunk shapes.
+
+        See Also
+        --------
+        DataArray.chunk
+        DataArray.chunks
+        xarray.unify_chunks
+        """
+        all_variables = [self.variable] + [c.variable for c in self.coords.values()]
+        return get_chunksizes(all_variables)
+
     def chunk(
         self,
         chunks: Union[