Skip to content

Commit

Permalink
polyval: Use Horner's algorithm + support chunked inputs (#6548)
Browse files Browse the repository at this point in the history
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
  • Loading branch information
4 people authored May 5, 2022
1 parent 39bda44 commit 6fbeb13
Show file tree
Hide file tree
Showing 4 changed files with 220 additions and 45 deletions.
38 changes: 38 additions & 0 deletions asv_bench/benchmarks/polyfit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import numpy as np

import xarray as xr

from . import parameterized, randn, requires_dask

NDEGS = (2, 5, 20)
NX = (10**2, 10**6)


class Polyval:
def setup(self, *args, **kwargs):
self.xs = {nx: xr.DataArray(randn((nx,)), dims="x", name="x") for nx in NX}
self.coeffs = {
ndeg: xr.DataArray(
randn((ndeg,)), dims="degree", coords={"degree": np.arange(ndeg)}
)
for ndeg in NDEGS
}

@parameterized(["nx", "ndeg"], [NX, NDEGS])
def time_polyval(self, nx, ndeg):
x = self.xs[nx]
c = self.coeffs[ndeg]
xr.polyval(x, c).compute()

@parameterized(["nx", "ndeg"], [NX, NDEGS])
def peakmem_polyval(self, nx, ndeg):
x = self.xs[nx]
c = self.coeffs[ndeg]
xr.polyval(x, c).compute()


class PolyvalDask(Polyval):
def setup(self, *args, **kwargs):
requires_dask()
super().setup(*args, **kwargs)
self.xs = {k: v.chunk({"x": 10000}) for k, v in self.xs.items()}
7 changes: 7 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ New Features
- Allow passing chunks in ``**kwargs`` form to :py:meth:`Dataset.chunk`, :py:meth:`DataArray.chunk`, and
:py:meth:`Variable.chunk`. (:pull:`6471`)
By `Tom Nicholas <https://github.com/TomNicholas>`_.
- :py:meth:`xr.polyval` now supports :py:class:`Dataset` and :py:class:`DataArray` args of any shape,
is faster and requires less memory. (:pull:`6548`)
By `Michael Niklas <https://github.com/headtr1ck>`_.

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -74,6 +77,10 @@ Breaking changes
- Xarray's ufuncs have been removed, now that they can be replaced by numpy's ufuncs in all
supported versions of numpy.
By `Maximilian Roos <https://github.com/max-sixty>`_.
- :py:meth:`xr.polyval` now uses the ``coord`` argument directly instead of its index coordinate.
(:pull:`6548`)
By `Michael Niklas <https://github.com/headtr1ck>`_.


Deprecations
~~~~~~~~~~~~
Expand Down
101 changes: 84 additions & 17 deletions xarray/core/computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@
Iterable,
Mapping,
Sequence,
overload,
)

import numpy as np

from . import dtypes, duck_array_ops, utils
from .alignment import align, deep_align
from .common import zeros_like
from .duck_array_ops import datetime_to_numeric
from .indexes import Index, filter_indexes_from_coords
from .merge import merge_attrs, merge_coordinates_without_align
from .options import OPTIONS, _get_keep_attrs
Expand Down Expand Up @@ -1843,36 +1846,100 @@ def where(cond, x, y, keep_attrs=None):
)


def polyval(coord, coeffs, degree_dim="degree"):
@overload
def polyval(coord: DataArray, coeffs: DataArray, degree_dim: Hashable) -> DataArray:
...


@overload
def polyval(coord: T_Xarray, coeffs: Dataset, degree_dim: Hashable) -> Dataset:
...


@overload
def polyval(coord: Dataset, coeffs: T_Xarray, degree_dim: Hashable) -> Dataset:
...


def polyval(
coord: T_Xarray, coeffs: T_Xarray, degree_dim: Hashable = "degree"
) -> T_Xarray:
"""Evaluate a polynomial at specific values
Parameters
----------
coord : DataArray
The 1D coordinate along which to evaluate the polynomial.
coeffs : DataArray
Coefficients of the polynomials.
degree_dim : str, default: "degree"
coord : DataArray or Dataset
Values at which to evaluate the polynomial.
coeffs : DataArray or Dataset
Coefficients of the polynomial.
degree_dim : Hashable, default: "degree"
Name of the polynomial degree dimension in `coeffs`.
Returns
-------
DataArray or Dataset
Evaluated polynomial.
See Also
--------
xarray.DataArray.polyfit
numpy.polyval
numpy.polynomial.polynomial.polyval
"""
from .dataarray import DataArray
from .missing import get_clean_interp_index

x = get_clean_interp_index(coord, coord.name, strict=False)
if degree_dim not in coeffs._indexes:
raise ValueError(
f"Dimension `{degree_dim}` should be a coordinate variable with labels."
)
if not np.issubdtype(coeffs[degree_dim].dtype, int):
raise ValueError(
f"Dimension `{degree_dim}` should be of integer dtype. Received {coeffs[degree_dim].dtype} instead."
)
max_deg = coeffs[degree_dim].max().item()
coeffs = coeffs.reindex(
{degree_dim: np.arange(max_deg + 1)}, fill_value=0, copy=False
)
coord = _ensure_numeric(coord)

# using Horner's method
# https://en.wikipedia.org/wiki/Horner%27s_method
res = coeffs.isel({degree_dim: max_deg}, drop=True) + zeros_like(coord)
for deg in range(max_deg - 1, -1, -1):
res *= coord
res += coeffs.isel({degree_dim: deg}, drop=True)

deg_coord = coeffs[degree_dim]
return res

lhs = DataArray(
np.vander(x, int(deg_coord.max()) + 1),
dims=(coord.name, degree_dim),
coords={coord.name: coord, degree_dim: np.arange(deg_coord.max() + 1)[::-1]},
)
return (lhs * coeffs).sum(degree_dim)

def _ensure_numeric(data: T_Xarray) -> T_Xarray:
"""Converts all datetime64 variables to float64
Parameters
----------
data : DataArray or Dataset
Variables with possible datetime dtypes.
Returns
-------
DataArray or Dataset
Variables with datetime64 dtypes converted to float64.
"""
from .dataset import Dataset

def to_floatable(x: DataArray) -> DataArray:
if x.dtype.kind in "mM":
return x.copy(
data=datetime_to_numeric(
x.data,
offset=np.datetime64("1970-01-01"),
datetime_unit="ns",
),
)
return x

if isinstance(data, Dataset):
return data.map(to_floatable)
else:
return to_floatable(data)


def _calc_idxminmax(
Expand Down
119 changes: 91 additions & 28 deletions xarray/tests/test_computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1933,37 +1933,100 @@ def test_where_attrs() -> None:
assert actual.attrs == {}


@pytest.mark.parametrize("use_dask", [True, False])
@pytest.mark.parametrize("use_datetime", [True, False])
def test_polyval(use_dask, use_datetime) -> None:
if use_dask and not has_dask:
pytest.skip("requires dask")

if use_datetime:
xcoord = xr.DataArray(
pd.date_range("2000-01-01", freq="D", periods=10), dims=("x",), name="x"
)
x = xr.core.missing.get_clean_interp_index(xcoord, "x")
else:
x = np.arange(10)
xcoord = xr.DataArray(x, dims=("x",), name="x")

da = xr.DataArray(
np.stack((1.0 + x + 2.0 * x**2, 1.0 + 2.0 * x + 3.0 * x**2)),
dims=("d", "x"),
coords={"x": xcoord, "d": [0, 1]},
)
coeffs = xr.DataArray(
[[2, 1, 1], [3, 2, 1]],
dims=("d", "degree"),
coords={"d": [0, 1], "degree": [2, 1, 0]},
)
@pytest.mark.parametrize("use_dask", [False, True])
@pytest.mark.parametrize(
["x", "coeffs", "expected"],
[
pytest.param(
xr.DataArray([1, 2, 3], dims="x"),
xr.DataArray([2, 3, 4], dims="degree", coords={"degree": [0, 1, 2]}),
xr.DataArray([9, 2 + 6 + 16, 2 + 9 + 36], dims="x"),
id="simple",
),
pytest.param(
xr.DataArray([1, 2, 3], dims="x"),
xr.DataArray(
[[0, 1], [0, 1]], dims=("y", "degree"), coords={"degree": [0, 1]}
),
xr.DataArray([[1, 2, 3], [1, 2, 3]], dims=("y", "x")),
id="broadcast-x",
),
pytest.param(
xr.DataArray([1, 2, 3], dims="x"),
xr.DataArray(
[[0, 1], [1, 0], [1, 1]],
dims=("x", "degree"),
coords={"degree": [0, 1]},
),
xr.DataArray([1, 1, 1 + 3], dims="x"),
id="shared-dim",
),
pytest.param(
xr.DataArray([1, 2, 3], dims="x"),
xr.DataArray([1, 0, 0], dims="degree", coords={"degree": [2, 1, 0]}),
xr.DataArray([1, 2**2, 3**2], dims="x"),
id="reordered-index",
),
pytest.param(
xr.DataArray([1, 2, 3], dims="x"),
xr.DataArray([5], dims="degree", coords={"degree": [3]}),
xr.DataArray([5, 5 * 2**3, 5 * 3**3], dims="x"),
id="sparse-index",
),
pytest.param(
xr.DataArray([1, 2, 3], dims="x"),
xr.Dataset(
{"a": ("degree", [0, 1]), "b": ("degree", [1, 0])},
coords={"degree": [0, 1]},
),
xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("x", [1, 1, 1])}),
id="array-dataset",
),
pytest.param(
xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("x", [2, 3, 4])}),
xr.DataArray([1, 1], dims="degree", coords={"degree": [0, 1]}),
xr.Dataset({"a": ("x", [2, 3, 4]), "b": ("x", [3, 4, 5])}),
id="dataset-array",
),
pytest.param(
xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("y", [2, 3, 4])}),
xr.Dataset(
{"a": ("degree", [0, 1]), "b": ("degree", [1, 1])},
coords={"degree": [0, 1]},
),
xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("y", [3, 4, 5])}),
id="dataset-dataset",
),
pytest.param(
xr.DataArray(pd.date_range("1970-01-01", freq="s", periods=3), dims="x"),
xr.DataArray([0, 1], dims="degree", coords={"degree": [0, 1]}),
xr.DataArray(
[0, 1e9, 2e9],
dims="x",
coords={"x": pd.date_range("1970-01-01", freq="s", periods=3)},
),
id="datetime",
),
],
)
def test_polyval(use_dask, x, coeffs, expected) -> None:
if use_dask:
coeffs = coeffs.chunk({"d": 2})
if not has_dask:
pytest.skip("requires dask")
coeffs = coeffs.chunk({"degree": 2})
x = x.chunk({"x": 2})
with raise_if_dask_computes():
actual = xr.polyval(x, coeffs)
xr.testing.assert_allclose(actual, expected)

da_pv = xr.polyval(da.x, coeffs)

xr.testing.assert_allclose(da, da_pv.T)
def test_polyval_degree_dim_checks():
x = (xr.DataArray([1, 2, 3], dims="x"),)
coeffs = xr.DataArray([2, 3, 4], dims="degree", coords={"degree": [0, 1, 2]})
with pytest.raises(ValueError):
xr.polyval(x, coeffs.drop_vars("degree"))
with pytest.raises(ValueError):
xr.polyval(x, coeffs.assign_coords(degree=coeffs.degree.astype(float)))


@pytest.mark.parametrize("use_dask", [False, True])
Expand Down

0 comments on commit 6fbeb13

Please sign in to comment.