diff --git a/asv_bench/benchmarks/polyfit.py b/asv_bench/benchmarks/polyfit.py new file mode 100644 index 00000000000..429ffa19baa --- /dev/null +++ b/asv_bench/benchmarks/polyfit.py @@ -0,0 +1,38 @@ +import numpy as np + +import xarray as xr + +from . import parameterized, randn, requires_dask + +NDEGS = (2, 5, 20) +NX = (10**2, 10**6) + + +class Polyval: + def setup(self, *args, **kwargs): + self.xs = {nx: xr.DataArray(randn((nx,)), dims="x", name="x") for nx in NX} + self.coeffs = { + ndeg: xr.DataArray( + randn((ndeg,)), dims="degree", coords={"degree": np.arange(ndeg)} + ) + for ndeg in NDEGS + } + + @parameterized(["nx", "ndeg"], [NX, NDEGS]) + def time_polyval(self, nx, ndeg): + x = self.xs[nx] + c = self.coeffs[ndeg] + xr.polyval(x, c).compute() + + @parameterized(["nx", "ndeg"], [NX, NDEGS]) + def peakmem_polyval(self, nx, ndeg): + x = self.xs[nx] + c = self.coeffs[ndeg] + xr.polyval(x, c).compute() + + +class PolyvalDask(Polyval): + def setup(self, *args, **kwargs): + requires_dask() + super().setup(*args, **kwargs) + self.xs = {k: v.chunk({"x": 10000}) for k, v in self.xs.items()} diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4882402073c..dfa36a4dcb0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -41,6 +41,9 @@ New Features - Allow passing chunks in ``**kwargs`` form to :py:meth:`Dataset.chunk`, :py:meth:`DataArray.chunk`, and :py:meth:`Variable.chunk`. (:pull:`6471`) By `Tom Nicholas `_. +- :py:meth:`xr.polyval` now supports :py:class:`Dataset` and :py:class:`DataArray` args of any shape, + is faster and requires less memory. (:pull:`6548`) + By `Michael Niklas `_. Breaking changes ~~~~~~~~~~~~~~~~ @@ -54,6 +57,10 @@ Breaking changes - Xarray's ufuncs have been removed, now that they can be replaced by numpy's ufuncs in all supported versions of numpy. By `Maximilian Roos `_. +- :py:meth:`xr.polyval` now uses the ``coord`` argument directly instead of its index coordinate. + (:pull:`6548`) + By `Michael Niklas `_. + Deprecations ~~~~~~~~~~~~ diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 1834622d96e..1a32cda512c 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -17,12 +17,15 @@ Iterable, Mapping, Sequence, + overload, ) import numpy as np from . import dtypes, duck_array_ops, utils from .alignment import align, deep_align +from .common import zeros_like +from .duck_array_ops import datetime_to_numeric from .indexes import Index, filter_indexes_from_coords from .merge import merge_attrs, merge_coordinates_without_align from .options import OPTIONS, _get_keep_attrs @@ -1843,36 +1846,100 @@ def where(cond, x, y, keep_attrs=None): ) -def polyval(coord, coeffs, degree_dim="degree"): +@overload +def polyval(coord: DataArray, coeffs: DataArray, degree_dim: Hashable) -> DataArray: + ... + + +@overload +def polyval(coord: T_Xarray, coeffs: Dataset, degree_dim: Hashable) -> Dataset: + ... + + +@overload +def polyval(coord: Dataset, coeffs: T_Xarray, degree_dim: Hashable) -> Dataset: + ... + + +def polyval( + coord: T_Xarray, coeffs: T_Xarray, degree_dim: Hashable = "degree" +) -> T_Xarray: """Evaluate a polynomial at specific values Parameters ---------- - coord : DataArray - The 1D coordinate along which to evaluate the polynomial. - coeffs : DataArray - Coefficients of the polynomials. - degree_dim : str, default: "degree" + coord : DataArray or Dataset + Values at which to evaluate the polynomial. + coeffs : DataArray or Dataset + Coefficients of the polynomial. + degree_dim : Hashable, default: "degree" Name of the polynomial degree dimension in `coeffs`. + Returns + ------- + DataArray or Dataset + Evaluated polynomial. + See Also -------- xarray.DataArray.polyfit - numpy.polyval + numpy.polynomial.polynomial.polyval """ - from .dataarray import DataArray - from .missing import get_clean_interp_index - x = get_clean_interp_index(coord, coord.name, strict=False) + if degree_dim not in coeffs._indexes: + raise ValueError( + f"Dimension `{degree_dim}` should be a coordinate variable with labels." + ) + if not np.issubdtype(coeffs[degree_dim].dtype, int): + raise ValueError( + f"Dimension `{degree_dim}` should be of integer dtype. Received {coeffs[degree_dim].dtype} instead." + ) + max_deg = coeffs[degree_dim].max().item() + coeffs = coeffs.reindex( + {degree_dim: np.arange(max_deg + 1)}, fill_value=0, copy=False + ) + coord = _ensure_numeric(coord) + + # using Horner's method + # https://en.wikipedia.org/wiki/Horner%27s_method + res = coeffs.isel({degree_dim: max_deg}, drop=True) + zeros_like(coord) + for deg in range(max_deg - 1, -1, -1): + res *= coord + res += coeffs.isel({degree_dim: deg}, drop=True) - deg_coord = coeffs[degree_dim] + return res - lhs = DataArray( - np.vander(x, int(deg_coord.max()) + 1), - dims=(coord.name, degree_dim), - coords={coord.name: coord, degree_dim: np.arange(deg_coord.max() + 1)[::-1]}, - ) - return (lhs * coeffs).sum(degree_dim) + +def _ensure_numeric(data: T_Xarray) -> T_Xarray: + """Converts all datetime64 variables to float64 + + Parameters + ---------- + data : DataArray or Dataset + Variables with possible datetime dtypes. + + Returns + ------- + DataArray or Dataset + Variables with datetime64 dtypes converted to float64. + """ + from .dataset import Dataset + + def to_floatable(x: DataArray) -> DataArray: + if x.dtype.kind in "mM": + return x.copy( + data=datetime_to_numeric( + x.data, + offset=np.datetime64("1970-01-01"), + datetime_unit="ns", + ), + ) + return x + + if isinstance(data, Dataset): + return data.map(to_floatable) + else: + return to_floatable(data) def _calc_idxminmax( diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 7a397428ba3..127fdc5404f 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1933,37 +1933,100 @@ def test_where_attrs() -> None: assert actual.attrs == {} -@pytest.mark.parametrize("use_dask", [True, False]) -@pytest.mark.parametrize("use_datetime", [True, False]) -def test_polyval(use_dask, use_datetime) -> None: - if use_dask and not has_dask: - pytest.skip("requires dask") - - if use_datetime: - xcoord = xr.DataArray( - pd.date_range("2000-01-01", freq="D", periods=10), dims=("x",), name="x" - ) - x = xr.core.missing.get_clean_interp_index(xcoord, "x") - else: - x = np.arange(10) - xcoord = xr.DataArray(x, dims=("x",), name="x") - - da = xr.DataArray( - np.stack((1.0 + x + 2.0 * x**2, 1.0 + 2.0 * x + 3.0 * x**2)), - dims=("d", "x"), - coords={"x": xcoord, "d": [0, 1]}, - ) - coeffs = xr.DataArray( - [[2, 1, 1], [3, 2, 1]], - dims=("d", "degree"), - coords={"d": [0, 1], "degree": [2, 1, 0]}, - ) +@pytest.mark.parametrize("use_dask", [False, True]) +@pytest.mark.parametrize( + ["x", "coeffs", "expected"], + [ + pytest.param( + xr.DataArray([1, 2, 3], dims="x"), + xr.DataArray([2, 3, 4], dims="degree", coords={"degree": [0, 1, 2]}), + xr.DataArray([9, 2 + 6 + 16, 2 + 9 + 36], dims="x"), + id="simple", + ), + pytest.param( + xr.DataArray([1, 2, 3], dims="x"), + xr.DataArray( + [[0, 1], [0, 1]], dims=("y", "degree"), coords={"degree": [0, 1]} + ), + xr.DataArray([[1, 2, 3], [1, 2, 3]], dims=("y", "x")), + id="broadcast-x", + ), + pytest.param( + xr.DataArray([1, 2, 3], dims="x"), + xr.DataArray( + [[0, 1], [1, 0], [1, 1]], + dims=("x", "degree"), + coords={"degree": [0, 1]}, + ), + xr.DataArray([1, 1, 1 + 3], dims="x"), + id="shared-dim", + ), + pytest.param( + xr.DataArray([1, 2, 3], dims="x"), + xr.DataArray([1, 0, 0], dims="degree", coords={"degree": [2, 1, 0]}), + xr.DataArray([1, 2**2, 3**2], dims="x"), + id="reordered-index", + ), + pytest.param( + xr.DataArray([1, 2, 3], dims="x"), + xr.DataArray([5], dims="degree", coords={"degree": [3]}), + xr.DataArray([5, 5 * 2**3, 5 * 3**3], dims="x"), + id="sparse-index", + ), + pytest.param( + xr.DataArray([1, 2, 3], dims="x"), + xr.Dataset( + {"a": ("degree", [0, 1]), "b": ("degree", [1, 0])}, + coords={"degree": [0, 1]}, + ), + xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("x", [1, 1, 1])}), + id="array-dataset", + ), + pytest.param( + xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("x", [2, 3, 4])}), + xr.DataArray([1, 1], dims="degree", coords={"degree": [0, 1]}), + xr.Dataset({"a": ("x", [2, 3, 4]), "b": ("x", [3, 4, 5])}), + id="dataset-array", + ), + pytest.param( + xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("y", [2, 3, 4])}), + xr.Dataset( + {"a": ("degree", [0, 1]), "b": ("degree", [1, 1])}, + coords={"degree": [0, 1]}, + ), + xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("y", [3, 4, 5])}), + id="dataset-dataset", + ), + pytest.param( + xr.DataArray(pd.date_range("1970-01-01", freq="s", periods=3), dims="x"), + xr.DataArray([0, 1], dims="degree", coords={"degree": [0, 1]}), + xr.DataArray( + [0, 1e9, 2e9], + dims="x", + coords={"x": pd.date_range("1970-01-01", freq="s", periods=3)}, + ), + id="datetime", + ), + ], +) +def test_polyval(use_dask, x, coeffs, expected) -> None: if use_dask: - coeffs = coeffs.chunk({"d": 2}) + if not has_dask: + pytest.skip("requires dask") + coeffs = coeffs.chunk({"degree": 2}) + x = x.chunk({"x": 2}) + with raise_if_dask_computes(): + actual = xr.polyval(x, coeffs) + xr.testing.assert_allclose(actual, expected) - da_pv = xr.polyval(da.x, coeffs) - xr.testing.assert_allclose(da, da_pv.T) +def test_polyval_degree_dim_checks(): + x = (xr.DataArray([1, 2, 3], dims="x"),) + coeffs = xr.DataArray([2, 3, 4], dims="degree", coords={"degree": [0, 1, 2]}) + with pytest.raises(ValueError): + xr.polyval(x, coeffs.drop_vars("degree")) + with pytest.raises(ValueError): + xr.polyval(x, coeffs.assign_coords(degree=coeffs.degree.astype(float))) @pytest.mark.parametrize("use_dask", [False, True])