From 8cc142bc8879b929d6e07316d8d0c7541c662a79 Mon Sep 17 00:00:00 2001 From: 0x0L <0x0L@github.com> Date: Tue, 21 Nov 2017 17:58:09 +0100 Subject: [PATCH] initial support for rank --- doc/api-hidden.rst | 1 + doc/api.rst | 1 + doc/whats-new.rst | 3 +++ xarray/core/dataarray.py | 37 ++++++++++++++++++++++++++++++++++ xarray/tests/test_dataarray.py | 17 +++++++++++++++- 5 files changed, 58 insertions(+), 1 deletion(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index c27db1e46a9..a83dcce11c5 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -91,6 +91,7 @@ DataArray.T DataArray.cumsum DataArray.cumprod + DataArray.rank ufuncs.angle ufuncs.arccos diff --git a/doc/api.rst b/doc/api.rst index 0eb9e4c131c..9d586476f30 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -312,6 +312,7 @@ Computation :py:attr:`~DataArray.T` :py:attr:`~DataArray.cumsum` :py:attr:`~DataArray.cumprod` +:py:attr:`~DataArray.rank` **Grouped operations**: :py:attr:`~core.groupby.DataArrayGroupBy.assign_coords` diff --git a/doc/whats-new.rst b/doc/whats-new.rst index dd775417132..e31fba4d054 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -208,6 +208,9 @@ Enhancements (:issue:`1485`). By `Joe Hamman `_. +- New :py:meth:`~xarray.DataArray.rank`. Requires bottleneck (:issue:`1731`). + By `0x0L `_. + **Performance improvements** - :py:func:`~xarray.concat` was computing variables that aren't in memory diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 1dac72335d2..b012708f0c5 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -19,6 +19,7 @@ from .accessors import DatetimeAccessor from .alignment import align, reindex_like_indexers from .common import AbstractArray, BaseDataObject +from .computation import apply_ufunc from .coordinates import (DataArrayCoordinates, LevelCoordinatesSource, Indexes) from .dataset import Dataset, merge_indexes, split_indexes @@ -1958,6 +1959,42 @@ def quantile(self, q, dim=None, interpolation='linear', keep_attrs=False): interpolation=interpolation) return self._from_temp_dataset(ds) + def rank(self, dim): + """Ranks the data. + + Equal values are assigned a rank that is the average of the ranks that + would have been otherwise assigned to all of the values within that set. + Ranks begin at 1, not 0. + + NaNs in the input array are returned as NaNs. + + Parameters + ---------- + dim : str, optional + + Returns + ------- + ranked : DataArray + DataArray with the same coordinates and dtype 'float64'. + + Examples + -------- + + >>> arr = xr.DataArray([5, 6, 7], dims='x') + >>> arr.rank('x') + + array([ 1., 2., 3.]) + Dimensions without coordinates: x + """ + import bottleneck as bn + axis = self.get_axis_num(dim) + func = bn.nanrankdata if self.dtype.kind is 'f' else bn.rankdata + return apply_ufunc(func, self, + dask='parallelized', + keep_attrs=True, + output_dtypes=[np.float_], + kwargs=dict(axis=axis)).transpose(*self.dims) + # priority most be higher than Variable to properly work with binary ufuncs ops.inject_all_ops_and_reduce_methods(DataArray, priority=60) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index d39232c04c8..0ba97dc1d27 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -19,7 +19,7 @@ from xarray.tests import ( TestCase, ReturnItem, source_ndarray, unittest, requires_dask, assert_identical, assert_equal, assert_allclose, assert_array_equal, - raises_regex, requires_scipy) + raises_regex, requires_scipy, requires_bottleneck) class TestDataArray(TestCase): @@ -2976,6 +2976,21 @@ def test_sortby(self): actual = da.sortby(['x', 'y']) self.assertDataArrayEqual(actual, expected) + @requires_bottleneck + def test_rank(self): + # floats + ar = DataArray([[3, 4, np.nan, 1]]) + expect_0 = DataArray([[1, 1, np.nan, 1]]) + expect_1 = DataArray([[2, 3, np.nan, 1]]) + self.assertDataArrayEqual(ar.rank('dim_0'), expect_0) + self.assertDataArrayEqual(ar.rank('dim_1'), expect_1) + # int + x = DataArray([3,2,1]) + self.assertDataArrayEqual(x.rank('dim_0'), x) + # str + y = DataArray(['c', 'b', 'a']) + self.assertDataArrayEqual(y.rank('dim_0'), x) + @pytest.fixture(params=[1]) def da(request):