Merge remote-tracking branch 'upstream/main' into docstring

* upstream/main: Add typing_extensions as a required dependency (pydata#5911) pydata#5740 follow up: supress xr.ufunc warnings in tests (pydata#5914) Avoid accessing slow .data in unstack (pydata#5906) Add wradlib to ecosystem in docs (pydata#5915) Use .to_numpy() for quantified facetgrids (pydata#5886) [test-upstream] fix pd skipna=None (pydata#5899) Add var and std to weighted computations (pydata#5870)
dcherian · Oct 29, 2021 · 43adc03 · 43adc03
2 parents 7b3fb24 + bcb96ce
commit 43adc03
Show file tree

Hide file tree

Showing 23 changed files with 404 additions and 123 deletions.
diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml
@@ -39,6 +39,7 @@ dependencies:
   - setuptools
   - sparse
   - toolz
+  - typing_extensions
   - zarr
   - pip:
     - numbagg
diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml
@@ -43,6 +43,7 @@ dependencies:
   - setuptools
   - sparse
   - toolz
+  - typing_extensions
   - zarr
   - pip:
     - numbagg
diff --git a/ci/requirements/py37-bare-minimum.yml b/ci/requirements/py37-bare-minimum.yml
@@ -13,3 +13,4 @@ dependencies:
   - numpy=1.17
   - pandas=1.0
   - setuptools=40.4
+  - typing_extensions=3.7
diff --git a/ci/requirements/py37-min-all-deps.yml b/ci/requirements/py37-min-all-deps.yml
@@ -47,6 +47,7 @@ dependencies:
   - setuptools=40.4
   - sparse=0.8
   - toolz=0.10
+  - typing_extensions=3.7
   - zarr=2.4
   - pip:
     - numbagg==0.1
diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml
@@ -39,6 +39,7 @@ dependencies:
   - setuptools
   - sparse
   - toolz
+  - typing_extensions
   - zarr
   - pip:
     - numbagg
diff --git a/doc/api.rst b/doc/api.rst
@@ -779,12 +779,18 @@ Weighted objects
 
    core.weighted.DataArrayWeighted
    core.weighted.DataArrayWeighted.mean
+   core.weighted.DataArrayWeighted.std
    core.weighted.DataArrayWeighted.sum
+   core.weighted.DataArrayWeighted.sum_of_squares
    core.weighted.DataArrayWeighted.sum_of_weights
+   core.weighted.DataArrayWeighted.var
    core.weighted.DatasetWeighted
    core.weighted.DatasetWeighted.mean
+   core.weighted.DatasetWeighted.std
    core.weighted.DatasetWeighted.sum
+   core.weighted.DatasetWeighted.sum_of_squares
    core.weighted.DatasetWeighted.sum_of_weights
+   core.weighted.DatasetWeighted.var
 
 
 Coarsen objects

diff --git a/doc/ecosystem.rst b/doc/ecosystem.rst
@@ -37,6 +37,7 @@ Geosciences
 - `Spyfit <https://spyfit.readthedocs.io/en/master/>`_: FTIR spectroscopy of the atmosphere
 - `windspharm <https://ajdawson.github.io/windspharm/index.html>`_: Spherical
   harmonic wind analysis in Python.
+- `wradlib <https://wradlib.org/>`_: An Open Source Library for Weather Radar Data Processing.
 - `wrf-python <https://wrf-python.readthedocs.io/>`_: A collection of diagnostic and interpolation routines for use with output of the Weather Research and Forecasting (WRF-ARW) Model.
 - `xarray-simlab <https://xarray-simlab.readthedocs.io>`_: xarray extension for computer model simulations.
 - `xarray-spatial <https://makepath.github.io/xarray-spatial>`_: Numba-accelerated raster-based spatial processing tools (NDVI, curvature, zonal-statistics, proximity, hillshading, viewshed, etc.)

diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst
@@ -8,6 +8,7 @@ Required dependencies
 
 - Python (3.7 or later)
 - setuptools (40.4 or later)
+- ``typing_extensions`` (3.7 or later)
 - `numpy <http://www.numpy.org/>`__ (1.17 or later)
 - `pandas <http://pandas.pydata.org/>`__ (1.0 or later)
 

diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst
@@ -263,7 +263,7 @@ Weighted array reductions
 
 :py:class:`DataArray` and :py:class:`Dataset` objects include :py:meth:`DataArray.weighted`
 and :py:meth:`Dataset.weighted` array reduction methods. They currently
-support weighted ``sum`` and weighted ``mean``.
+support weighted ``sum``, ``mean``, ``std`` and ``var``.
 
 .. ipython:: python
 
@@ -298,13 +298,27 @@ The weighted sum corresponds to:
     weighted_sum = (prec * weights).sum()
     weighted_sum
 
-and the weighted mean to:
+the weighted mean to:
 
 .. ipython:: python
 
     weighted_mean = weighted_sum / weights.sum()
     weighted_mean
 
+the weighted variance to:
+
+.. ipython:: python
+
+    weighted_var = weighted_prec.sum_of_squares() / weights.sum()
+    weighted_var
+
+and the weighted standard deviation to:
+
+.. ipython:: python
+
+    weighted_std = np.sqrt(weighted_var)
+    weighted_std
+
 However, the functions also take missing values in the data into account:
 
 .. ipython:: python
@@ -327,7 +341,7 @@ If the weights add up to to 0, ``sum`` returns 0:
 
     data.weighted(weights).sum()
 
-and ``mean`` returns ``NaN``:
+and ``mean``, ``std`` and ``var`` return ``NaN``:
 
 .. ipython:: python
 

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -23,6 +23,8 @@ v0.19.1 (unreleased)
 
 New Features
 ~~~~~~~~~~~~
+- Add :py:meth:`var`, :py:meth:`std` and :py:meth:`sum_of_squares` to :py:meth:`Dataset.weighted` and :py:meth:`DataArray.weighted`.
+  By `Christian Jauvin <https://github.com/cjauvin>`_.
 - Added a :py:func:`get_options` method to xarray's root namespace (:issue:`5698`, :pull:`5716`)
   By `Pushkar Kopparla <https://github.com/pkopparla>`_.
 - Xarray now does a better job rendering variable names that are long LaTeX sequences when plotting (:issue:`5681`, :pull:`5682`).
@@ -80,11 +82,15 @@ Bug fixes
   By `Jimmy Westling <https://github.com/illviljan>`_.
 - Numbers are properly formatted in a plot's title (:issue:`5788`, :pull:`5789`).
   By `Maxime Liquet <https://github.com/maximlt>`_.
+- Faceted plots will no longer raise a `pint.UnitStrippedWarning` when a `pint.Quantity` array is plotted,
+  and will correctly display the units of the data in the colorbar (if there is one) (:pull:`5886`).
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
 - With backends, check for path-like objects rather than ``pathlib.Path``
   type, use ``os.fspath`` (:pull:`5879`).
   By `Mike Taves <https://github.com/mwtoews>`_.
 - ``open_mfdataset()`` now accepts a single ``pathlib.Path`` object (:issue: `5881`).
   By `Panos Mavrogiorgos <https://github.com/pmav99>`_.
+- Improved performance of :py:meth:`Dataset.unstack` (:pull:`5906`). By `Tom Augspurger <https://github.com/TomAugspurger>`_.
 
 Documentation
 ~~~~~~~~~~~~~

diff --git a/requirements.txt b/requirements.txt
@@ -5,4 +5,4 @@
 numpy >= 1.17
 pandas >= 1.0
 setuptools >= 40.4
-typing-extensions >= 3.10
+typing-extensions >= 3.7
diff --git a/setup.cfg b/setup.cfg
@@ -78,6 +78,7 @@ python_requires = >=3.7
 install_requires =
     numpy >= 1.17
     pandas >= 1.0
+    typing_extensions >= 3.7
     setuptools >= 40.4  # For pkg_resources
 
 [options.extras_require]

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -4153,34 +4153,34 @@ def unstack(
                 )
 
         result = self.copy(deep=False)
-        for dim in dims:
 
-            if (
-                # Dask arrays don't support assignment by index, which the fast unstack
-                # function requires.
-                # https://github.com/pydata/xarray/pull/4746#issuecomment-753282125
-                any(is_duck_dask_array(v.data) for v in self.variables.values())
-                # Sparse doesn't currently support (though we could special-case
-                # it)
-                # https://github.com/pydata/sparse/issues/422
-                or any(
-                    isinstance(v.data, sparse_array_type)
-                    for v in self.variables.values()
-                )
-                or sparse
-                # Until https://github.com/pydata/xarray/pull/4751 is resolved,
-                # we check explicitly whether it's a numpy array. Once that is
-                # resolved, explicitly exclude pint arrays.
-                # # pint doesn't implement `np.full_like` in a way that's
-                # # currently compatible.
-                # # https://github.com/pydata/xarray/pull/4746#issuecomment-753425173
-                # # or any(
-                # #     isinstance(v.data, pint_array_type) for v in self.variables.values()
-                # # )
-                or any(
-                    not isinstance(v.data, np.ndarray) for v in self.variables.values()
-                )
-            ):
+        # we want to avoid allocating an object-dtype ndarray for a MultiIndex,
+        # so we can't just access self.variables[v].data for every variable.
+        # We only check the non-index variables.
+        # https://github.com/pydata/xarray/issues/5902
+        nonindexes = [
+            self.variables[k] for k in set(self.variables) - set(self.xindexes)
+        ]
+        # Notes for each of these cases:
+        # 1. Dask arrays don't support assignment by index, which the fast unstack
+        #    function requires.
+        #    https://github.com/pydata/xarray/pull/4746#issuecomment-753282125
+        # 2. Sparse doesn't currently support (though we could special-case it)
+        #    https://github.com/pydata/sparse/issues/422
+        # 3. pint requires checking if it's a NumPy array until
+        #    https://github.com/pydata/xarray/pull/4751 is resolved,
+        #    Once that is resolved, explicitly exclude pint arrays.
+        #    pint doesn't implement `np.full_like` in a way that's
+        #    currently compatible.
+        needs_full_reindex = sparse or any(
+            is_duck_dask_array(v.data)
+            or isinstance(v.data, sparse_array_type)
+            or not isinstance(v.data, np.ndarray)
+            for v in nonindexes
+        )
+
+        for dim in dims:
+            if needs_full_reindex:
                 result = result._unstack_full_reindex(dim, fill_value, sparse)
             else:
                 result = result._unstack_once(dim, fill_value)

diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py
@@ -41,17 +41,10 @@
     # fall back for numpy < 1.20, ArrayLike adapted from numpy.typing._array_like
     if sys.version_info >= (3, 8):
         from typing import Protocol
-
-        HAVE_PROTOCOL = True
     else:
-        try:
-            from typing_extensions import Protocol
-        except ImportError:
-            HAVE_PROTOCOL = False
-        else:
-            HAVE_PROTOCOL = True
+        from typing_extensions import Protocol
 
-    if TYPE_CHECKING or HAVE_PROTOCOL:
+    if TYPE_CHECKING:
 
         class _SupportsArray(Protocol):
             def __array__(self) -> np.ndarray:

diff --git a/xarray/core/options.py b/xarray/core/options.py
@@ -6,70 +6,35 @@
 # TODO: Remove this check once python 3.7 is not supported:
 if sys.version_info >= (3, 8):
     from typing import TYPE_CHECKING, Literal, TypedDict, Union
+else:
+    from typing import TYPE_CHECKING, Union
 
-    if TYPE_CHECKING:
-        try:
-            from matplotlib.colors import Colormap
-        except ImportError:
-            Colormap = str
-
-    class T_Options(TypedDict):
-        arithmetic_join: Literal["inner", "outer", "left", "right", "exact"]
-        cmap_divergent: Union[str, "Colormap"]
-        cmap_sequential: Union[str, "Colormap"]
-        display_max_rows: int
-        display_style: Literal["text", "html"]
-        display_width: int
-        display_expand_attrs: Literal["default", True, False]
-        display_expand_coords: Literal["default", True, False]
-        display_expand_data_vars: Literal["default", True, False]
-        display_expand_data: Literal["default", True, False]
-        enable_cftimeindex: bool
-        file_cache_maxsize: int
-        keep_attrs: Literal["default", True, False]
-        warn_for_unclosed_files: bool
-        use_bottleneck: bool
+    from typing_extensions import Literal, TypedDict
 
 
-else:
-    # See GH5624, this is a convoluted way to allow type-checking to use
-    # `TypedDict` and `Literal` without requiring typing_extensions as a
-    #  required dependency to _run_ the code (it is required to type-check).
+if TYPE_CHECKING:
     try:
-        from typing import TYPE_CHECKING, Union
-
-        from typing_extensions import Literal, TypedDict
-
-        if TYPE_CHECKING:
-            try:
-                from matplotlib.colors import Colormap
-            except ImportError:
-                Colormap = str
-
-        class T_Options(TypedDict):
-            arithmetic_join: Literal["inner", "outer", "left", "right", "exact"]
-            cmap_divergent: Union[str, "Colormap"]
-            cmap_sequential: Union[str, "Colormap"]
-            display_max_rows: int
-            display_style: Literal["text", "html"]
-            display_width: int
-            display_expand_attrs: Literal["default", True, False]
-            display_expand_coords: Literal["default", True, False]
-            display_expand_data_vars: Literal["default", True, False]
-            display_expand_data: Literal["default", True, False]
-            enable_cftimeindex: bool
-            file_cache_maxsize: int
-            keep_attrs: Literal["default", True, False]
-            warn_for_unclosed_files: bool
-            use_bottleneck: bool
-
+        from matplotlib.colors import Colormap
     except ImportError:
-        from typing import TYPE_CHECKING, Any, Dict, Hashable
-
-        if TYPE_CHECKING:
-            raise
-        else:
-            T_Options = Dict[Hashable, Any]
+        Colormap = str
+
+
+class T_Options(TypedDict):
+    arithmetic_join: Literal["inner", "outer", "left", "right", "exact"]
+    cmap_divergent: Union[str, "Colormap"]
+    cmap_sequential: Union[str, "Colormap"]
+    display_max_rows: int
+    display_style: Literal["text", "html"]
+    display_width: int
+    display_expand_attrs: Literal["default", True, False]
+    display_expand_coords: Literal["default", True, False]
+    display_expand_data_vars: Literal["default", True, False]
+    display_expand_data: Literal["default", True, False]
+    enable_cftimeindex: bool
+    file_cache_maxsize: int
+    keep_attrs: Literal["default", True, False]
+    warn_for_unclosed_files: bool
+    use_bottleneck: bool
 
 
 OPTIONS: T_Options = {