From 56c16e4bf45a3771fd9acba76d802c0199c14519 Mon Sep 17 00:00:00 2001 From: Keisuke Fujii Date: Sat, 16 Nov 2019 23:36:43 +0900 Subject: [PATCH] Added fill_value for unstack (#3541) * Added fill_value for unstack * remove sparse option and fix unintended changes * a bug fix * using assert_equal * assert_equals -> assert_equal --- doc/whats-new.rst | 3 +++ xarray/core/dataarray.py | 7 +++++-- xarray/core/dataset.py | 13 +++++++++---- xarray/tests/test_dataset.py | 17 +++++++++++++++++ 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c835fbeff45..6bf495713fe 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,6 +39,9 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Added the ``fill_value`` option to :py:meth:`~xarray.DataArray.unstack` and + :py:meth:`~xarray.Dataset.unstack` (:issue:`3518`). + By `Keisuke Fujii `_. - Added the ``max_gap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data gap that will be filled by interpolation. By `Deepak Cherian `_. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b27a61d530b..23342fc5e0d 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1726,7 +1726,9 @@ def stack( return self._from_temp_dataset(ds) def unstack( - self, dim: Union[Hashable, Sequence[Hashable], None] = None + self, + dim: Union[Hashable, Sequence[Hashable], None] = None, + fill_value: Any = dtypes.NA, ) -> "DataArray": """ Unstack existing dimensions corresponding to MultiIndexes into @@ -1739,6 +1741,7 @@ def unstack( dim : hashable or sequence of hashable, optional Dimension(s) over which to unstack. By default unstacks all MultiIndexes. + fill_value: value to be filled. By default, np.nan Returns ------- @@ -1770,7 +1773,7 @@ def unstack( -------- DataArray.stack """ - ds = self._to_temp_dataset().unstack(dim) + ds = self._to_temp_dataset().unstack(dim, fill_value) return self._from_temp_dataset(ds) def to_unstacked_dataset(self, dim, level=0): diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3a83b477681..371e0d6bf26 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3333,7 +3333,7 @@ def ensure_stackable(val): return data_array - def _unstack_once(self, dim: Hashable) -> "Dataset": + def _unstack_once(self, dim: Hashable, fill_value) -> "Dataset": index = self.get_index(dim) index = index.remove_unused_levels() full_idx = pd.MultiIndex.from_product(index.levels, names=index.names) @@ -3342,7 +3342,7 @@ def _unstack_once(self, dim: Hashable) -> "Dataset": if index.equals(full_idx): obj = self else: - obj = self.reindex({dim: full_idx}, copy=False) + obj = self.reindex({dim: full_idx}, copy=False, fill_value=fill_value) new_dim_names = index.names new_dim_sizes = [lev.size for lev in index.levels] @@ -3368,7 +3368,11 @@ def _unstack_once(self, dim: Hashable) -> "Dataset": variables, coord_names=coord_names, indexes=indexes ) - def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset": + def unstack( + self, + dim: Union[Hashable, Iterable[Hashable]] = None, + fill_value: Any = dtypes.NA, + ) -> "Dataset": """ Unstack existing dimensions corresponding to MultiIndexes into multiple new dimensions. @@ -3380,6 +3384,7 @@ def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset": dim : Hashable or iterable of Hashable, optional Dimension(s) over which to unstack. By default unstacks all MultiIndexes. + fill_value: value to be filled. By default, np.nan Returns ------- @@ -3417,7 +3422,7 @@ def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset": result = self.copy(deep=False) for dim in dims: - result = result._unstack_once(dim) + result = result._unstack_once(dim, fill_value) return result def update(self, other: "CoercibleMapping", inplace: bool = None) -> "Dataset": diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 780843f2e61..be40ce7c6e8 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2794,6 +2794,23 @@ def test_unstack_errors(self): with raises_regex(ValueError, "do not have a MultiIndex"): ds.unstack("x") + def test_unstack_fill_value(self): + ds = xr.Dataset( + {"var": (("x",), np.arange(6))}, + coords={"x": [0, 1, 2] * 2, "y": (("x",), ["a"] * 3 + ["b"] * 3)}, + ) + # make ds incomplete + ds = ds.isel(x=[0, 2, 3, 4]).set_index(index=["x", "y"]) + # test fill_value + actual = ds.unstack("index", fill_value=-1) + expected = ds.unstack("index").fillna(-1).astype(np.int) + assert actual["var"].dtype == np.int + assert_equal(actual, expected) + + actual = ds["var"].unstack("index", fill_value=-1) + expected = ds["var"].unstack("index").fillna(-1).astype(np.int) + assert actual.equals(expected) + def test_stack_unstack_fast(self): ds = Dataset( {