Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: keepdims=True for xarray reductions #3033

Merged
merged 7 commits into from
Jun 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ Enhancements
~~~~~~~~~~~~


- Add ``keepdims`` argument for reduce operations (:issue:`2170`)
By `Scott Wales <https://github.com/ScottWales>`_.
- netCDF chunksizes are now only dropped when original_shape is different,
not when it isn't found. (:issue:`2207`)
By `Karel van de Plassche <https://github.com/Karel-van-de-Plassche>`_.
Expand Down
18 changes: 15 additions & 3 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,14 @@ def _replace(self, variable=None, coords=None, name=__default):
return type(self)(variable, coords, name=name, fastpath=True)

def _replace_maybe_drop_dims(self, variable, name=__default):
if variable.dims == self.dims:
if variable.dims == self.dims and variable.shape == self.shape:
coords = self._coords.copy()
elif variable.dims == self.dims:
# Shape has changed (e.g. from reduce(..., keepdims=True)
new_sizes = dict(zip(self.dims, variable.shape))
coords = OrderedDict((k, v) for k, v in self._coords.items()
if v.shape == tuple(new_sizes[d]
for d in v.dims))
else:
allowed_dims = set(variable.dims)
coords = OrderedDict((k, v) for k, v in self._coords.items()
Expand Down Expand Up @@ -1637,7 +1643,8 @@ def combine_first(self, other):
"""
return ops.fillna(self, other, join="outer")

def reduce(self, func, dim=None, axis=None, keep_attrs=None, **kwargs):
def reduce(self, func, dim=None, axis=None, keep_attrs=None,
max-sixty marked this conversation as resolved.
Show resolved Hide resolved
keepdims=False, **kwargs):
"""Reduce this array by applying `func` along some dimension(s).

Parameters
Expand All @@ -1657,6 +1664,10 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=None, **kwargs):
If True, the variable's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
keepdims : bool, default False
If True, the dimensions which are reduced are left in the result
as dimensions of size one. Coordinates that use these dimensions
are removed.
**kwargs : dict
Additional keyword arguments passed on to `func`.

Expand All @@ -1667,7 +1678,8 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=None, **kwargs):
summarized data and the indicated dimension(s) removed.
"""

var = self.variable.reduce(func, dim, axis, keep_attrs, **kwargs)
var = self.variable.reduce(func, dim, axis, keep_attrs, keepdims,
**kwargs)
return self._replace_maybe_drop_dims(var)

def to_pandas(self):
Expand Down
9 changes: 7 additions & 2 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3132,8 +3132,8 @@ def combine_first(self, other):
out = ops.fillna(self, other, join="outer", dataset_join="outer")
return out

def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False,
allow_lazy=False, **kwargs):
def reduce(self, func, dim=None, keep_attrs=None, keepdims=False,
numeric_only=False, allow_lazy=False, **kwargs):
"""Reduce this dataset by applying `func` along some dimension(s).

Parameters
Expand All @@ -3149,6 +3149,10 @@ def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False,
If True, the dataset's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
keepdims : bool, default False
If True, the dimensions which are reduced are left in the result
as dimensions of size one. Coordinates that use these dimensions
are removed.
numeric_only : bool, optional
If True, only apply ``func`` to variables with a numeric dtype.
**kwargs : dict
Expand Down Expand Up @@ -3198,6 +3202,7 @@ def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False,
reduce_dims = None
variables[name] = var.reduce(func, dim=reduce_dims,
keep_attrs=keep_attrs,
keepdims=keepdims,
allow_lazy=allow_lazy,
**kwargs)

Expand Down
20 changes: 17 additions & 3 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1334,7 +1334,7 @@ def where(self, cond, other=dtypes.NA):
return ops.where_method(self, cond, other)

def reduce(self, func, dim=None, axis=None,
keep_attrs=None, allow_lazy=False, **kwargs):
dcherian marked this conversation as resolved.
Show resolved Hide resolved
keep_attrs=None, keepdims=False, allow_lazy=False, **kwargs):
"""Reduce this array by applying `func` along some dimension(s).

Parameters
Expand All @@ -1354,6 +1354,9 @@ def reduce(self, func, dim=None, axis=None,
If True, the variable's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
keepdims : bool, default False
If True, the dimensions which are reduced are left in the result
as dimensions of size one
**kwargs : dict
Additional keyword arguments passed on to `func`.

Expand Down Expand Up @@ -1381,8 +1384,19 @@ def reduce(self, func, dim=None, axis=None,
else:
removed_axes = (range(self.ndim) if axis is None
else np.atleast_1d(axis) % self.ndim)
dims = [adim for n, adim in enumerate(self.dims)
if n not in removed_axes]
if keepdims:
# Insert np.newaxis for removed dims
slices = tuple(np.newaxis if i in removed_axes else
slice(None, None) for i in range(self.ndim))
Copy link
Member

@shoyer shoyer Jun 21, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just FYI, slice(None) is equivalent to slice(None, None)

if getattr(data, 'shape', None) is None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this would be equivalent to just hasattr(data, 'shape')?

# Reduce has produced a scalar value, not an array-like
data = np.asanyarray(data)[slices]
else:
data = data[slices]
dims = self.dims
else:
dims = [adim for n, adim in enumerate(self.dims)
if n not in removed_axes]

if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
Expand Down
38 changes: 38 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1976,6 +1976,44 @@ def test_reduce(self):
dims=['x', 'y']).mean('x')
assert_equal(actual, expected)

def test_reduce_keepdims(self):
coords = {'x': [-1, -2], 'y': ['ab', 'cd', 'ef'],
'lat': (['x', 'y'], [[1, 2, 3], [-1, -2, -3]]),
'c': -999}
orig = DataArray([[-1, 0, 1], [-3, 0, 3]], coords, dims=['x', 'y'])

# Mean on all axes loses non-constant coordinates
actual = orig.mean(keepdims=True)
expected = DataArray(orig.data.mean(keepdims=True), dims=orig.dims,
coords={k: v for k, v in coords.items()
if k in ['c']})
assert_equal(actual, expected)

assert actual.sizes['x'] == 1
assert actual.sizes['y'] == 1

# Mean on specific axes loses coordinates not involving that axis
actual = orig.mean('y', keepdims=True)
expected = DataArray(orig.data.mean(axis=1, keepdims=True),
dims=orig.dims,
coords={k: v for k, v in coords.items()
if k not in ['y', 'lat']})
assert_equal(actual, expected)

@requires_bottleneck
def test_reduce_keepdims_bottleneck(self):
import bottleneck

coords = {'x': [-1, -2], 'y': ['ab', 'cd', 'ef'],
'lat': (['x', 'y'], [[1, 2, 3], [-1, -2, -3]]),
'c': -999}
orig = DataArray([[-1, 0, 1], [-3, 0, 3]], coords, dims=['x', 'y'])

# Bottleneck does not have its own keepdims implementation
actual = orig.reduce(bottleneck.nanmean, keepdims=True)
expected = orig.mean(keepdims=True)
assert_equal(actual, expected)

def test_reduce_dtype(self):
coords = {'x': [-1, -2], 'y': ['ab', 'cd', 'ef'],
'lat': (['x', 'y'], [[1, 2, 3], [-1, -2, -3]]),
Expand Down
19 changes: 19 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3858,6 +3858,25 @@ def total_sum(x):
with raises_regex(TypeError, "unexpected keyword argument 'axis'"):
ds.reduce(total_sum, dim='x')

def test_reduce_keepdims(self):
ds = Dataset({'a': (['x', 'y'], [[0, 1, 2, 3, 4]])},
coords={'y': [0, 1, 2, 3, 4], 'x': [0],
'lat': (['x', 'y'], [[0, 1, 2, 3, 4]]),
'c': -999.0})

# Shape should match behaviour of numpy reductions with keepdims=True
# Coordinates involved in the reduction should be removed
actual = ds.mean(keepdims=True)
expected = Dataset({'a': (['x', 'y'], np.mean(ds.a, keepdims=True))},
coords={'c': ds.c})
assert_identical(expected, actual)

actual = ds.mean('x', keepdims=True)
expected = Dataset({'a': (['x', 'y'],
np.mean(ds.a, axis=0, keepdims=True))},
coords={'y': ds.y, 'c': ds.c})
assert_identical(expected, actual)

def test_quantile(self):

ds = create_test_data(seed=123)
Expand Down
36 changes: 36 additions & 0 deletions xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1540,6 +1540,42 @@ def test_reduce_funcs(self):
assert_identical(
v.max(), Variable([], pd.Timestamp('2000-01-03')))

def test_reduce_keepdims(self):
v = Variable(['x', 'y'], self.d)

assert_identical(v.mean(keepdims=True),
Variable(v.dims, np.mean(self.d, keepdims=True)))
assert_identical(v.mean(dim='x', keepdims=True),
Variable(v.dims, np.mean(self.d, axis=0,
keepdims=True)))
assert_identical(v.mean(dim='y', keepdims=True),
Variable(v.dims, np.mean(self.d, axis=1,
keepdims=True)))
assert_identical(v.mean(dim=['y', 'x'], keepdims=True),
Variable(v.dims, np.mean(self.d, axis=(1, 0),
keepdims=True)))

v = Variable([], 1.0)
assert_identical(v.mean(keepdims=True),
Variable([], np.mean(v.data, keepdims=True)))

@requires_dask
def test_reduce_keepdims_dask(self):
import dask.array
v = Variable(['x', 'y'], self.d).chunk()

actual = v.mean(keepdims=True)
assert isinstance(actual.data, dask.array.Array)

expected = Variable(v.dims, np.mean(self.d, keepdims=True))
assert_identical(actual, expected)

actual = v.mean(dim='y', keepdims=True)
assert isinstance(actual.data, dask.array.Array)

expected = Variable(v.dims, np.mean(self.d, axis=1, keepdims=True))
assert_identical(actual, expected)

def test_reduce_keep_attrs(self):
_attrs = {'units': 'test', 'long_name': 'testing'}

Expand Down