diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 78ead40d5a2..2468ec6267e 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -39,6 +39,7 @@ dependencies: - setuptools - sparse - toolz + - typing_extensions - zarr - pip: - numbagg diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index f64ca3677cc..162faa7b74d 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -43,6 +43,7 @@ dependencies: - setuptools - sparse - toolz + - typing_extensions - zarr - pip: - numbagg diff --git a/ci/requirements/py37-bare-minimum.yml b/ci/requirements/py37-bare-minimum.yml index 408cf76fdd6..0cecf885436 100644 --- a/ci/requirements/py37-bare-minimum.yml +++ b/ci/requirements/py37-bare-minimum.yml @@ -13,3 +13,4 @@ dependencies: - numpy=1.17 - pandas=1.0 - setuptools=40.4 + - typing_extensions=3.7 diff --git a/ci/requirements/py37-min-all-deps.yml b/ci/requirements/py37-min-all-deps.yml index 7c3230f87b0..c73c5327d3b 100644 --- a/ci/requirements/py37-min-all-deps.yml +++ b/ci/requirements/py37-min-all-deps.yml @@ -47,6 +47,7 @@ dependencies: - setuptools=40.4 - sparse=0.8 - toolz=0.10 + - typing_extensions=3.7 - zarr=2.4 - pip: - numbagg==0.1 diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml index 3f82990f3b5..688dfb7a2bc 100644 --- a/ci/requirements/py38-all-but-dask.yml +++ b/ci/requirements/py38-all-but-dask.yml @@ -39,6 +39,7 @@ dependencies: - setuptools - sparse - toolz + - typing_extensions - zarr - pip: - numbagg diff --git a/doc/api.rst b/doc/api.rst index 4686751c536..83015cb3993 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -779,12 +779,18 @@ Weighted objects core.weighted.DataArrayWeighted core.weighted.DataArrayWeighted.mean + core.weighted.DataArrayWeighted.std core.weighted.DataArrayWeighted.sum + core.weighted.DataArrayWeighted.sum_of_squares core.weighted.DataArrayWeighted.sum_of_weights + core.weighted.DataArrayWeighted.var core.weighted.DatasetWeighted core.weighted.DatasetWeighted.mean + core.weighted.DatasetWeighted.std core.weighted.DatasetWeighted.sum + core.weighted.DatasetWeighted.sum_of_squares core.weighted.DatasetWeighted.sum_of_weights + core.weighted.DatasetWeighted.var Coarsen objects diff --git a/doc/ecosystem.rst b/doc/ecosystem.rst index 1c919b3040b..460541e91d7 100644 --- a/doc/ecosystem.rst +++ b/doc/ecosystem.rst @@ -37,6 +37,7 @@ Geosciences - `Spyfit `_: FTIR spectroscopy of the atmosphere - `windspharm `_: Spherical harmonic wind analysis in Python. +- `wradlib `_: An Open Source Library for Weather Radar Data Processing. - `wrf-python `_: A collection of diagnostic and interpolation routines for use with output of the Weather Research and Forecasting (WRF-ARW) Model. - `xarray-simlab `_: xarray extension for computer model simulations. - `xarray-spatial `_: Numba-accelerated raster-based spatial processing tools (NDVI, curvature, zonal-statistics, proximity, hillshading, viewshed, etc.) diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst index 93738da9d9b..c6bc84e6ddb 100644 --- a/doc/getting-started-guide/installing.rst +++ b/doc/getting-started-guide/installing.rst @@ -8,6 +8,7 @@ Required dependencies - Python (3.7 or later) - setuptools (40.4 or later) +- ``typing_extensions`` (3.7 or later) - `numpy `__ (1.17 or later) - `pandas `__ (1.0 or later) diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst index e592291d2a8..fc3c457308f 100644 --- a/doc/user-guide/computation.rst +++ b/doc/user-guide/computation.rst @@ -263,7 +263,7 @@ Weighted array reductions :py:class:`DataArray` and :py:class:`Dataset` objects include :py:meth:`DataArray.weighted` and :py:meth:`Dataset.weighted` array reduction methods. They currently -support weighted ``sum`` and weighted ``mean``. +support weighted ``sum``, ``mean``, ``std`` and ``var``. .. ipython:: python @@ -298,13 +298,27 @@ The weighted sum corresponds to: weighted_sum = (prec * weights).sum() weighted_sum -and the weighted mean to: +the weighted mean to: .. ipython:: python weighted_mean = weighted_sum / weights.sum() weighted_mean +the weighted variance to: + +.. ipython:: python + + weighted_var = weighted_prec.sum_of_squares() / weights.sum() + weighted_var + +and the weighted standard deviation to: + +.. ipython:: python + + weighted_std = np.sqrt(weighted_var) + weighted_std + However, the functions also take missing values in the data into account: .. ipython:: python @@ -327,7 +341,7 @@ If the weights add up to to 0, ``sum`` returns 0: data.weighted(weights).sum() -and ``mean`` returns ``NaN``: +and ``mean``, ``std`` and ``var`` return ``NaN``: .. ipython:: python diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 516c4b5772f..8dfecdd2aa8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,6 +23,8 @@ v0.19.1 (unreleased) New Features ~~~~~~~~~~~~ +- Add :py:meth:`var`, :py:meth:`std` and :py:meth:`sum_of_squares` to :py:meth:`Dataset.weighted` and :py:meth:`DataArray.weighted`. + By `Christian Jauvin `_. - Added a :py:func:`get_options` method to xarray's root namespace (:issue:`5698`, :pull:`5716`) By `Pushkar Kopparla `_. - Xarray now does a better job rendering variable names that are long LaTeX sequences when plotting (:issue:`5681`, :pull:`5682`). @@ -80,11 +82,15 @@ Bug fixes By `Jimmy Westling `_. - Numbers are properly formatted in a plot's title (:issue:`5788`, :pull:`5789`). By `Maxime Liquet `_. +- Faceted plots will no longer raise a `pint.UnitStrippedWarning` when a `pint.Quantity` array is plotted, + and will correctly display the units of the data in the colorbar (if there is one) (:pull:`5886`). + By `Tom Nicholas `_. - With backends, check for path-like objects rather than ``pathlib.Path`` type, use ``os.fspath`` (:pull:`5879`). By `Mike Taves `_. - ``open_mfdataset()`` now accepts a single ``pathlib.Path`` object (:issue: `5881`). By `Panos Mavrogiorgos `_. +- Improved performance of :py:meth:`Dataset.unstack` (:pull:`5906`). By `Tom Augspurger `_. Documentation ~~~~~~~~~~~~~ diff --git a/requirements.txt b/requirements.txt index 732d40cde18..0fa83c8ccc1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,4 @@ numpy >= 1.17 pandas >= 1.0 setuptools >= 40.4 -typing-extensions >= 3.10 +typing-extensions >= 3.7 diff --git a/setup.cfg b/setup.cfg index aa8ca8df0ff..2dc1b7ffeca 100644 --- a/setup.cfg +++ b/setup.cfg @@ -78,6 +78,7 @@ python_requires = >=3.7 install_requires = numpy >= 1.17 pandas >= 1.0 + typing_extensions >= 3.7 setuptools >= 40.4 # For pkg_resources [options.extras_require] diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3055e50aaf5..0e6ae905aa8 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4153,34 +4153,34 @@ def unstack( ) result = self.copy(deep=False) - for dim in dims: - if ( - # Dask arrays don't support assignment by index, which the fast unstack - # function requires. - # https://github.com/pydata/xarray/pull/4746#issuecomment-753282125 - any(is_duck_dask_array(v.data) for v in self.variables.values()) - # Sparse doesn't currently support (though we could special-case - # it) - # https://github.com/pydata/sparse/issues/422 - or any( - isinstance(v.data, sparse_array_type) - for v in self.variables.values() - ) - or sparse - # Until https://github.com/pydata/xarray/pull/4751 is resolved, - # we check explicitly whether it's a numpy array. Once that is - # resolved, explicitly exclude pint arrays. - # # pint doesn't implement `np.full_like` in a way that's - # # currently compatible. - # # https://github.com/pydata/xarray/pull/4746#issuecomment-753425173 - # # or any( - # # isinstance(v.data, pint_array_type) for v in self.variables.values() - # # ) - or any( - not isinstance(v.data, np.ndarray) for v in self.variables.values() - ) - ): + # we want to avoid allocating an object-dtype ndarray for a MultiIndex, + # so we can't just access self.variables[v].data for every variable. + # We only check the non-index variables. + # https://github.com/pydata/xarray/issues/5902 + nonindexes = [ + self.variables[k] for k in set(self.variables) - set(self.xindexes) + ] + # Notes for each of these cases: + # 1. Dask arrays don't support assignment by index, which the fast unstack + # function requires. + # https://github.com/pydata/xarray/pull/4746#issuecomment-753282125 + # 2. Sparse doesn't currently support (though we could special-case it) + # https://github.com/pydata/sparse/issues/422 + # 3. pint requires checking if it's a NumPy array until + # https://github.com/pydata/xarray/pull/4751 is resolved, + # Once that is resolved, explicitly exclude pint arrays. + # pint doesn't implement `np.full_like` in a way that's + # currently compatible. + needs_full_reindex = sparse or any( + is_duck_dask_array(v.data) + or isinstance(v.data, sparse_array_type) + or not isinstance(v.data, np.ndarray) + for v in nonindexes + ) + + for dim in dims: + if needs_full_reindex: result = result._unstack_full_reindex(dim, fill_value, sparse) else: result = result._unstack_once(dim, fill_value) diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py index 6e22c8cf0a4..09f78c5971c 100644 --- a/xarray/core/npcompat.py +++ b/xarray/core/npcompat.py @@ -41,17 +41,10 @@ # fall back for numpy < 1.20, ArrayLike adapted from numpy.typing._array_like if sys.version_info >= (3, 8): from typing import Protocol - - HAVE_PROTOCOL = True else: - try: - from typing_extensions import Protocol - except ImportError: - HAVE_PROTOCOL = False - else: - HAVE_PROTOCOL = True + from typing_extensions import Protocol - if TYPE_CHECKING or HAVE_PROTOCOL: + if TYPE_CHECKING: class _SupportsArray(Protocol): def __array__(self) -> np.ndarray: diff --git a/xarray/core/options.py b/xarray/core/options.py index 3e2df379c55..6a321d4bd2a 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -6,70 +6,35 @@ # TODO: Remove this check once python 3.7 is not supported: if sys.version_info >= (3, 8): from typing import TYPE_CHECKING, Literal, TypedDict, Union +else: + from typing import TYPE_CHECKING, Union - if TYPE_CHECKING: - try: - from matplotlib.colors import Colormap - except ImportError: - Colormap = str - - class T_Options(TypedDict): - arithmetic_join: Literal["inner", "outer", "left", "right", "exact"] - cmap_divergent: Union[str, "Colormap"] - cmap_sequential: Union[str, "Colormap"] - display_max_rows: int - display_style: Literal["text", "html"] - display_width: int - display_expand_attrs: Literal["default", True, False] - display_expand_coords: Literal["default", True, False] - display_expand_data_vars: Literal["default", True, False] - display_expand_data: Literal["default", True, False] - enable_cftimeindex: bool - file_cache_maxsize: int - keep_attrs: Literal["default", True, False] - warn_for_unclosed_files: bool - use_bottleneck: bool + from typing_extensions import Literal, TypedDict -else: - # See GH5624, this is a convoluted way to allow type-checking to use - # `TypedDict` and `Literal` without requiring typing_extensions as a - # required dependency to _run_ the code (it is required to type-check). +if TYPE_CHECKING: try: - from typing import TYPE_CHECKING, Union - - from typing_extensions import Literal, TypedDict - - if TYPE_CHECKING: - try: - from matplotlib.colors import Colormap - except ImportError: - Colormap = str - - class T_Options(TypedDict): - arithmetic_join: Literal["inner", "outer", "left", "right", "exact"] - cmap_divergent: Union[str, "Colormap"] - cmap_sequential: Union[str, "Colormap"] - display_max_rows: int - display_style: Literal["text", "html"] - display_width: int - display_expand_attrs: Literal["default", True, False] - display_expand_coords: Literal["default", True, False] - display_expand_data_vars: Literal["default", True, False] - display_expand_data: Literal["default", True, False] - enable_cftimeindex: bool - file_cache_maxsize: int - keep_attrs: Literal["default", True, False] - warn_for_unclosed_files: bool - use_bottleneck: bool - + from matplotlib.colors import Colormap except ImportError: - from typing import TYPE_CHECKING, Any, Dict, Hashable - - if TYPE_CHECKING: - raise - else: - T_Options = Dict[Hashable, Any] + Colormap = str + + +class T_Options(TypedDict): + arithmetic_join: Literal["inner", "outer", "left", "right", "exact"] + cmap_divergent: Union[str, "Colormap"] + cmap_sequential: Union[str, "Colormap"] + display_max_rows: int + display_style: Literal["text", "html"] + display_width: int + display_expand_attrs: Literal["default", True, False] + display_expand_coords: Literal["default", True, False] + display_expand_data_vars: Literal["default", True, False] + display_expand_data: Literal["default", True, False] + enable_cftimeindex: bool + file_cache_maxsize: int + keep_attrs: Literal["default", True, False] + warn_for_unclosed_files: bool + use_bottleneck: bool OPTIONS: T_Options = { diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index c31b24f53b5..0676d351b6f 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -1,4 +1,6 @@ -from typing import TYPE_CHECKING, Generic, Hashable, Iterable, Optional, Union +from typing import TYPE_CHECKING, Generic, Hashable, Iterable, Optional, Union, cast + +import numpy as np from . import duck_array_ops from .computation import dot @@ -35,7 +37,7 @@ """ _SUM_OF_WEIGHTS_DOCSTRING = """ - Calculate the sum of weights, accounting for missing values in the data + Calculate the sum of weights, accounting for missing values in the data. Parameters ---------- @@ -177,13 +179,25 @@ def _sum_of_weights( return sum_of_weights.where(valid_weights) + def _sum_of_squares( + self, + da: "DataArray", + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + ) -> "DataArray": + """Reduce a DataArray by a weighted ``sum_of_squares`` along some dimension(s).""" + + demeaned = da - da.weighted(self.weights).mean(dim=dim) + + return self._reduce((demeaned ** 2), self.weights, dim=dim, skipna=skipna) + def _weighted_sum( self, da: "DataArray", dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, skipna: Optional[bool] = None, ) -> "DataArray": - """Reduce a DataArray by a by a weighted ``sum`` along some dimension(s).""" + """Reduce a DataArray by a weighted ``sum`` along some dimension(s).""" return self._reduce(da, self.weights, dim=dim, skipna=skipna) @@ -201,6 +215,30 @@ def _weighted_mean( return weighted_sum / sum_of_weights + def _weighted_var( + self, + da: "DataArray", + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + ) -> "DataArray": + """Reduce a DataArray by a weighted ``var`` along some dimension(s).""" + + sum_of_squares = self._sum_of_squares(da, dim=dim, skipna=skipna) + + sum_of_weights = self._sum_of_weights(da, dim=dim) + + return sum_of_squares / sum_of_weights + + def _weighted_std( + self, + da: "DataArray", + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + ) -> "DataArray": + """Reduce a DataArray by a weighted ``std`` along some dimension(s).""" + + return cast("DataArray", np.sqrt(self._weighted_var(da, dim, skipna))) + def _implementation(self, func, dim, **kwargs): raise NotImplementedError("Use `Dataset.weighted` or `DataArray.weighted`") @@ -215,6 +253,17 @@ def sum_of_weights( self._sum_of_weights, dim=dim, keep_attrs=keep_attrs ) + def sum_of_squares( + self, + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + ) -> T_Xarray: + + return self._implementation( + self._sum_of_squares, dim=dim, skipna=skipna, keep_attrs=keep_attrs + ) + def sum( self, dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, @@ -237,6 +286,28 @@ def mean( self._weighted_mean, dim=dim, skipna=skipna, keep_attrs=keep_attrs ) + def var( + self, + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + ) -> T_Xarray: + + return self._implementation( + self._weighted_var, dim=dim, skipna=skipna, keep_attrs=keep_attrs + ) + + def std( + self, + dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + ) -> T_Xarray: + + return self._implementation( + self._weighted_std, dim=dim, skipna=skipna, keep_attrs=keep_attrs + ) + def __repr__(self): """provide a nice str repr of our Weighted object""" @@ -275,6 +346,18 @@ def _inject_docstring(cls, cls_name): cls=cls_name, fcn="mean", on_zero="NaN" ) + cls.sum_of_squares.__doc__ = _WEIGHTED_REDUCE_DOCSTRING_TEMPLATE.format( + cls=cls_name, fcn="sum_of_squares", on_zero="0" + ) + + cls.var.__doc__ = _WEIGHTED_REDUCE_DOCSTRING_TEMPLATE.format( + cls=cls_name, fcn="var", on_zero="NaN" + ) + + cls.std.__doc__ = _WEIGHTED_REDUCE_DOCSTRING_TEMPLATE.format( + cls=cls_name, fcn="std", on_zero="NaN" + ) + _inject_docstring(DataArrayWeighted, "DataArray") _inject_docstring(DatasetWeighted, "Dataset") diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py index b384dea0571..a518a78dbf6 100644 --- a/xarray/plot/facetgrid.py +++ b/xarray/plot/facetgrid.py @@ -173,11 +173,11 @@ def __init__( ) # Set up the lists of names for the row and column facet variables - col_names = list(data[col].values) if col else [] - row_names = list(data[row].values) if row else [] + col_names = list(data[col].to_numpy()) if col else [] + row_names = list(data[row].to_numpy()) if row else [] if single_group: - full = [{single_group: x} for x in data[single_group].values] + full = [{single_group: x} for x in data[single_group].to_numpy()] empty = [None for x in range(nrow * ncol - len(full))] name_dicts = full + empty else: @@ -251,7 +251,7 @@ def map_dataarray(self, func, x, y, **kwargs): raise ValueError("cbar_ax not supported by FacetGrid.") cmap_params, cbar_kwargs = _process_cmap_cbar_kwargs( - func, self.data.values, **kwargs + func, self.data.to_numpy(), **kwargs ) self._cmap_extend = cmap_params.get("extend") @@ -347,7 +347,7 @@ def map_dataset( if hue and meta_data["hue_style"] == "continuous": cmap_params, cbar_kwargs = _process_cmap_cbar_kwargs( - func, self.data[hue].values, **kwargs + func, self.data[hue].to_numpy(), **kwargs ) kwargs["meta_data"]["cmap_params"] = cmap_params kwargs["meta_data"]["cbar_kwargs"] = cbar_kwargs @@ -423,7 +423,7 @@ def _adjust_fig_for_guide(self, guide): def add_legend(self, **kwargs): self.figlegend = self.fig.legend( handles=self._mappables[-1], - labels=list(self._hue_var.values), + labels=list(self._hue_var.to_numpy()), title=self._hue_label, loc="center right", **kwargs, @@ -619,7 +619,7 @@ def map(self, func, *args, **kwargs): if namedict is not None: data = self.data.loc[namedict] plt.sca(ax) - innerargs = [data[a].values for a in args] + innerargs = [data[a].to_numpy() for a in args] maybe_mappable = func(*innerargs, **kwargs) # TODO: better way to verify that an artist is mappable? # https://stackoverflow.com/questions/33023036/is-it-possible-to-detect-if-a-matplotlib-artist-is-a-mappable-suitable-for-use-w#33023522 diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 1e1e59e2f71..60f132d07e1 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -1075,7 +1075,7 @@ def newplotfunc( # Matplotlib does not support normalising RGB data, so do it here. # See eg. https://github.com/matplotlib/matplotlib/pull/10220 if robust or vmax is not None or vmin is not None: - darray = _rescale_imshow_rgb(darray, vmin, vmax, robust) + darray = _rescale_imshow_rgb(darray.as_numpy(), vmin, vmax, robust) vmin, vmax, robust = None, None, False if subplot_kws is None: @@ -1146,10 +1146,6 @@ def newplotfunc( else: dims = (yval.dims[0], xval.dims[0]) - # better to pass the ndarrays directly to plotting functions - xval = xval.to_numpy() - yval = yval.to_numpy() - # May need to transpose for correct x, y labels # xlab may be the name of a coord, we have to check for dim names if imshow_rgb: @@ -1162,6 +1158,10 @@ def newplotfunc( if dims != darray.dims: darray = darray.transpose(*dims, transpose_coords=True) + # better to pass the ndarrays directly to plotting functions + xval = xval.to_numpy() + yval = yval.to_numpy() + # Pass the data as a masked ndarray too zval = darray.to_masked_array(copy=False) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index d5d460056aa..de69c972fc6 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -255,13 +255,13 @@ def test_missing_methods(self): except NotImplementedError as err: assert "dask" in str(err) - @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") + @pytest.mark.filterwarnings("ignore::FutureWarning") def test_univariate_ufunc(self): u = self.eager_var v = self.lazy_var self.assertLazyAndAllClose(np.sin(u), xu.sin(v)) - @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") + @pytest.mark.filterwarnings("ignore::FutureWarning") def test_bivariate_ufunc(self): u = self.eager_var v = self.lazy_var @@ -563,7 +563,7 @@ def duplicate_and_merge(array): actual = duplicate_and_merge(self.lazy_array) self.assertLazyAndEqual(expected, actual) - @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") + @pytest.mark.filterwarnings("ignore::FutureWarning") def test_ufuncs(self): u = self.eager_array v = self.lazy_array diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 6d49e20909d..c032a781e47 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -258,6 +258,11 @@ def from_series_or_scalar(se): def series_reduce(da, func, dim, **kwargs): """convert DataArray to pd.Series, apply pd.func, then convert back to a DataArray. Multiple dims cannot be specified.""" + + # pd no longer accepts skipna=None https://github.com/pandas-dev/pandas/issues/44178 + if kwargs.get("skipna", True) is None: + kwargs["skipna"] = True + if dim is None or da.ndim == 1: se = da.to_series() return from_series_or_scalar(getattr(se, func)(**kwargs)) diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 3d57d3dc961..ad0aafff15e 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -276,11 +276,11 @@ def test_unary_op(self): assert_sparse_equal(abs(self.var).data, abs(self.data)) assert_sparse_equal(self.var.round().data, self.data.round()) - @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") + @pytest.mark.filterwarnings("ignore::FutureWarning") def test_univariate_ufunc(self): assert_sparse_equal(np.sin(self.data), xu.sin(self.var).data) - @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") + @pytest.mark.filterwarnings("ignore::FutureWarning") def test_bivariate_ufunc(self): assert_sparse_equal(np.maximum(self.data, 0), xu.maximum(self.var, 0).data) assert_sparse_equal(np.maximum(self.data, 0), xu.maximum(0, self.var).data) @@ -664,7 +664,7 @@ def test_stack(self): roundtripped = stacked.unstack() assert_identical(arr, roundtripped) - @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") + @pytest.mark.filterwarnings("ignore::FutureWarning") def test_ufuncs(self): x = self.sp_xr assert_equal(np.sin(x), xu.sin(x)) diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 7bde6ce8b9f..8be20c5f81c 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -5614,7 +5614,7 @@ def test_units_in_line_plot_labels(self): assert ax.get_ylabel() == "pressure [pascal]" assert ax.get_xlabel() == "x [meters]" - def test_units_in_2d_plot_labels(self): + def test_units_in_2d_plot_colorbar_label(self): arr = np.ones((2, 3)) * unit_registry.Pa da = xr.DataArray(data=arr, dims=["x", "y"], name="pressure") @@ -5622,3 +5622,27 @@ def test_units_in_2d_plot_labels(self): ax = da.plot.contourf(ax=ax, cbar_ax=cax, add_colorbar=True) assert cax.get_ylabel() == "pressure [pascal]" + + def test_units_facetgrid_plot_labels(self): + arr = np.ones((2, 3)) * unit_registry.Pa + da = xr.DataArray(data=arr, dims=["x", "y"], name="pressure") + + fig, (ax, cax) = plt.subplots(1, 2) + fgrid = da.plot.line(x="x", col="y") + + assert fgrid.axes[0, 0].get_ylabel() == "pressure [pascal]" + + def test_units_facetgrid_2d_imshow_plot_colorbar_labels(self): + arr = np.ones((2, 3, 4, 5)) * unit_registry.Pa + da = xr.DataArray(data=arr, dims=["x", "y", "z", "w"], name="pressure") + + da.plot.imshow(x="x", y="y", col="w") # no colorbar to check labels of + + def test_units_facetgrid_2d_contourf_plot_colorbar_labels(self): + arr = np.ones((2, 3, 4)) * unit_registry.Pa + da = xr.DataArray(data=arr, dims=["x", "y", "z"], name="pressure") + + fig, (ax1, ax2, ax3, cax) = plt.subplots(1, 4) + fgrid = da.plot.contourf(x="x", y="y", col="z") + + assert fgrid.cbar.ax.get_ylabel() == "pressure [pascal]" diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index 45e662f118e..36923ed49c3 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -224,6 +224,150 @@ def test_weighted_mean_bool(): assert_equal(expected, result) +@pytest.mark.parametrize( + ("weights", "expected"), + (([1, 2], 2 / 3), ([2, 0], 0), ([0, 0], 0), ([-1, 1], 0)), +) +def test_weighted_sum_of_squares_no_nan(weights, expected): + + da = DataArray([1, 2]) + weights = DataArray(weights) + result = da.weighted(weights).sum_of_squares() + + expected = DataArray(expected) + + assert_equal(expected, result) + + +@pytest.mark.parametrize( + ("weights", "expected"), + (([1, 2], 0), ([2, 0], 0), ([0, 0], 0), ([-1, 1], 0)), +) +def test_weighted_sum_of_squares_nan(weights, expected): + + da = DataArray([np.nan, 2]) + weights = DataArray(weights) + result = da.weighted(weights).sum_of_squares() + + expected = DataArray(expected) + + assert_equal(expected, result) + + +@pytest.mark.filterwarnings("error") +@pytest.mark.parametrize("da", ([1.0, 2], [1, np.nan])) +@pytest.mark.parametrize("skipna", (True, False)) +@pytest.mark.parametrize("factor", [1, 2, 3.14]) +def test_weighted_var_equal_weights(da, skipna, factor): + # if all weights are equal (!= 0), should yield the same result as var + + da = DataArray(da) + + # all weights as 1. + weights = xr.full_like(da, factor) + + expected = da.var(skipna=skipna) + result = da.weighted(weights).var(skipna=skipna) + + assert_equal(expected, result) + + +@pytest.mark.parametrize( + ("weights", "expected"), (([4, 6], 0.24), ([1, 0], 0.0), ([0, 0], np.nan)) +) +def test_weighted_var_no_nan(weights, expected): + + da = DataArray([1, 2]) + weights = DataArray(weights) + expected = DataArray(expected) + + result = da.weighted(weights).var() + + assert_equal(expected, result) + + +@pytest.mark.parametrize( + ("weights", "expected"), (([4, 6], 0), ([1, 0], np.nan), ([0, 0], np.nan)) +) +def test_weighted_var_nan(weights, expected): + + da = DataArray([np.nan, 2]) + weights = DataArray(weights) + expected = DataArray(expected) + + result = da.weighted(weights).var() + + assert_equal(expected, result) + + +def test_weighted_var_bool(): + # https://github.com/pydata/xarray/issues/4074 + da = DataArray([1, 1]) + weights = DataArray([True, True]) + expected = DataArray(0) + + result = da.weighted(weights).var() + + assert_equal(expected, result) + + +@pytest.mark.filterwarnings("error") +@pytest.mark.parametrize("da", ([1.0, 2], [1, np.nan])) +@pytest.mark.parametrize("skipna", (True, False)) +@pytest.mark.parametrize("factor", [1, 2, 3.14]) +def test_weighted_std_equal_weights(da, skipna, factor): + # if all weights are equal (!= 0), should yield the same result as std + + da = DataArray(da) + + # all weights as 1. + weights = xr.full_like(da, factor) + + expected = da.std(skipna=skipna) + result = da.weighted(weights).std(skipna=skipna) + + assert_equal(expected, result) + + +@pytest.mark.parametrize( + ("weights", "expected"), (([4, 6], np.sqrt(0.24)), ([1, 0], 0.0), ([0, 0], np.nan)) +) +def test_weighted_std_no_nan(weights, expected): + + da = DataArray([1, 2]) + weights = DataArray(weights) + expected = DataArray(expected) + + result = da.weighted(weights).std() + + assert_equal(expected, result) + + +@pytest.mark.parametrize( + ("weights", "expected"), (([4, 6], 0), ([1, 0], np.nan), ([0, 0], np.nan)) +) +def test_weighted_std_nan(weights, expected): + + da = DataArray([np.nan, 2]) + weights = DataArray(weights) + expected = DataArray(expected) + + result = da.weighted(weights).std() + + assert_equal(expected, result) + + +def test_weighted_std_bool(): + # https://github.com/pydata/xarray/issues/4074 + da = DataArray([1, 1]) + weights = DataArray([True, True]) + expected = DataArray(0) + + result = da.weighted(weights).std() + + assert_equal(expected, result) + + def expected_weighted(da, weights, dim, skipna, operation): """ Generate expected result using ``*`` and ``sum``. This is checked against @@ -248,6 +392,20 @@ def expected_weighted(da, weights, dim, skipna, operation): if operation == "mean": return weighted_mean + demeaned = da - weighted_mean + sum_of_squares = ((demeaned ** 2) * weights).sum(dim=dim, skipna=skipna) + + if operation == "sum_of_squares": + return sum_of_squares + + var = sum_of_squares / sum_of_weights + + if operation == "var": + return var + + if operation == "std": + return np.sqrt(var) + def check_weighted_operations(data, weights, dim, skipna): @@ -266,6 +424,21 @@ def check_weighted_operations(data, weights, dim, skipna): expected = expected_weighted(data, weights, dim, skipna, "mean") assert_allclose(expected, result) + # check weighted sum of squares + result = data.weighted(weights).sum_of_squares(dim, skipna=skipna) + expected = expected_weighted(data, weights, dim, skipna, "sum_of_squares") + assert_allclose(expected, result) + + # check weighted var + result = data.weighted(weights).var(dim, skipna=skipna) + expected = expected_weighted(data, weights, dim, skipna, "var") + assert_allclose(expected, result) + + # check weighted std + result = data.weighted(weights).std(dim, skipna=skipna) + expected = expected_weighted(data, weights, dim, skipna, "std") + assert_allclose(expected, result) + @pytest.mark.parametrize("dim", ("a", "b", "c", ("a", "b"), ("a", "b", "c"), None)) @pytest.mark.parametrize("add_nans", (True, False)) @@ -330,7 +503,9 @@ def test_weighted_operations_different_shapes( check_weighted_operations(data, weights, None, skipna) -@pytest.mark.parametrize("operation", ("sum_of_weights", "sum", "mean")) +@pytest.mark.parametrize( + "operation", ("sum_of_weights", "sum", "mean", "sum_of_squares", "var", "std") +) @pytest.mark.parametrize("as_dataset", (True, False)) @pytest.mark.parametrize("keep_attrs", (True, False, None)) def test_weighted_operations_keep_attr(operation, as_dataset, keep_attrs): @@ -357,7 +532,9 @@ def test_weighted_operations_keep_attr(operation, as_dataset, keep_attrs): assert not result.attrs -@pytest.mark.parametrize("operation", ("sum", "mean")) +@pytest.mark.parametrize( + "operation", ("sum_of_weights", "sum", "mean", "sum_of_squares", "var", "std") +) def test_weighted_operations_keep_attr_da_in_ds(operation): # GH #3595