From 00519753da74d533527dd7eafac7ac9f2fde6b07 Mon Sep 17 00:00:00 2001 From: keewis Date: Tue, 5 Jan 2021 21:39:11 +0100 Subject: [PATCH 01/51] action to detect a keyword in the commit summary line (#4764) * add a custom action to detect keywords in the commit summary line * rename keywords to keyword * fetch one more commit using git fetch --deepen --- .github/actions/detect-ci-trigger/action.yaml | 29 +++++++++++++ .github/actions/detect-ci-trigger/script.sh | 41 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 .github/actions/detect-ci-trigger/action.yaml create mode 100644 .github/actions/detect-ci-trigger/script.sh diff --git a/.github/actions/detect-ci-trigger/action.yaml b/.github/actions/detect-ci-trigger/action.yaml new file mode 100644 index 00000000000..c255d0c57cc --- /dev/null +++ b/.github/actions/detect-ci-trigger/action.yaml @@ -0,0 +1,29 @@ +name: Detect CI Trigger +description: | + Detect a keyword used to control the CI in the subject line of a commit message. +inputs: + keyword: + description: | + The keyword to detect. + required: true +outputs: + trigger-found: + description: | + true if the keyword has been found in the subject line of the commit message + value: ${{ steps.detect-trigger.outputs.CI_TRIGGERED }} +runs: + using: "composite" + steps: + - name: detect trigger + id: detect-trigger + run: | + bash $GITHUB_ACTION_PATH/script.sh ${{ github.event_name }} ${{ inputs.keyword }} + shell: bash + - name: show detection result + run: | + echo "::group::final summary" + echo "commit message: ${{ steps.detect-trigger.outputs.COMMIT_MESSAGE }}" + echo "trigger keyword: ${{ inputs.keyword }}" + echo "trigger found: ${{ steps.detect-trigger.outputs.CI_TRIGGERED }}" + echo "::endgroup::" + shell: bash diff --git a/.github/actions/detect-ci-trigger/script.sh b/.github/actions/detect-ci-trigger/script.sh new file mode 100644 index 00000000000..d54da9b7ef0 --- /dev/null +++ b/.github/actions/detect-ci-trigger/script.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +event_name="$1" +keyword="$2" + +echo "::group::fetch a sufficient number of commits" +if [[ "$event_name" == "pull_request" ]]; then + git fetch --deepen=1 --no-tags 2>&1 +else + echo "nothing to do." +fi +echo "::endgroup::" + +echo "::group::extracting the commit message" +echo "event name: $event_name" +if [[ "$event_name" == "pull_request" ]]; then + ref="HEAD^2" +else + ref="HEAD" +fi + +commit_message="$(git log -n 1 --pretty=format:%s "$ref")" + +if [[ $(echo $commit_message | wc -l) -le 1 ]]; then + echo "commit message: '$commit_message'" +else + echo -e "commit message:\n--- start ---\n$commit_message\n--- end ---" +fi +echo "::endgroup::" + +echo "::group::scanning for the keyword" +echo "searching for: '$keyword'" +if echo "$commit_message" | grep -qF "$keyword"; then + result="true" +else + result="false" +fi +echo "keyword detected: $result" +echo "::endgroup::" + +echo "::set-output name=COMMIT_MESSAGE::$commit_message" +echo "::set-output name=CI_TRIGGERED::$result" From a8bbaedbce4539f82ef50037c7302814d48a4607 Mon Sep 17 00:00:00 2001 From: keewis Date: Tue, 5 Jan 2021 23:51:01 +0100 Subject: [PATCH 02/51] update the pre-commit hook versions (#4769) --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 18d60f61eb9..6659366538b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ repos: # isort should run before black as black sometimes tweaks the isort output - repo: https://github.com/PyCQA/isort - rev: 5.6.4 + rev: 5.7.0 hooks: - id: isort # https://github.com/python/black#version-control-integration @@ -11,7 +11,7 @@ repos: hooks: - id: black - repo: https://github.com/keewis/blackdoc - rev: v0.3.1 + rev: v0.3.2 hooks: - id: blackdoc - repo: https://gitlab.com/pycqa/flake8 From 7298df0c05168896a9813249b54a2d11f35cfa8f Mon Sep 17 00:00:00 2001 From: rhkleijn <32801740+rhkleijn@users.noreply.github.com> Date: Wed, 6 Jan 2021 00:00:28 +0100 Subject: [PATCH 03/51] speedup attribute style access and tab completion (#4742) * speedup attribute style access and tab completion * changes from code review, whats-new entry --- doc/whats-new.rst | 3 ++- xarray/core/common.py | 32 ++++++++++++++++---------------- xarray/core/coordinates.py | 23 ----------------------- xarray/core/dataarray.py | 31 +++++++++++++++++++------------ xarray/core/dataset.py | 27 +++++++++++++++------------ xarray/core/utils.py | 29 +++++++++++++++++++++++++++++ 6 files changed, 81 insertions(+), 64 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7f41e83236f..d4579cb3631 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -76,11 +76,12 @@ Internal Changes - Run the tests in parallel using pytest-xdist (:pull:`4694`). By `Justus Magin `_ and `Mathias Hauser `_. - - Replace all usages of ``assert x.identical(y)`` with ``assert_identical(x, y)`` for clearer error messages. (:pull:`4752`); By `Maximilian Roos `_. +- Speed up attribute style access (e.g. ``ds.somevar`` instead of ``ds["somevar"]``) and tab completion + in ipython (:issue:`4741`, :pull:`4742`). By `Richard Kleijn `_. .. _whats-new.0.16.2: diff --git a/xarray/core/common.py b/xarray/core/common.py index 302a8e52365..283114770cf 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -209,14 +209,14 @@ def __init_subclass__(cls): ) @property - def _attr_sources(self) -> List[Mapping[Hashable, Any]]: - """List of places to look-up items for attribute-style access""" - return [] + def _attr_sources(self) -> Iterable[Mapping[Hashable, Any]]: + """Places to look-up items for attribute-style access""" + yield from () @property - def _item_sources(self) -> List[Mapping[Hashable, Any]]: - """List of places to look-up items for key-autocompletion""" - return [] + def _item_sources(self) -> Iterable[Mapping[Hashable, Any]]: + """Places to look-up items for key-autocompletion""" + yield from () def __getattr__(self, name: str) -> Any: if name not in {"__dict__", "__setstate__"}: @@ -272,26 +272,26 @@ def __dir__(self) -> List[str]: """Provide method name lookup and completion. Only provide 'public' methods. """ - extra_attrs = [ + extra_attrs = set( item - for sublist in self._attr_sources - for item in sublist + for source in self._attr_sources + for item in source if isinstance(item, str) - ] - return sorted(set(dir(type(self)) + extra_attrs)) + ) + return sorted(set(dir(type(self))) | extra_attrs) def _ipython_key_completions_(self) -> List[str]: """Provide method for the key-autocompletions in IPython. See http://ipython.readthedocs.io/en/stable/config/integrating.html#tab-completion For the details. """ - item_lists = [ + items = set( item - for sublist in self._item_sources - for item in sublist + for source in self._item_sources + for item in source if isinstance(item, str) - ] - return list(set(item_lists)) + ) + return list(items) def get_squeeze_dims( diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 846e4044a2c..37c462f79f4 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -325,29 +325,6 @@ def _ipython_key_completions_(self): return self._data._ipython_key_completions_() -class LevelCoordinatesSource(Mapping[Hashable, Any]): - """Iterator for MultiIndex level coordinates. - - Used for attribute style lookup with AttrAccessMixin. Not returned directly - by any public methods. - """ - - __slots__ = ("_data",) - - def __init__(self, data_object: "Union[DataArray, Dataset]"): - self._data = data_object - - def __getitem__(self, key): - # not necessary -- everything here can already be found in coords. - raise KeyError() - - def __iter__(self) -> Iterator[Hashable]: - return iter(self._data._level_coords) - - def __len__(self) -> int: - return len(self._data._level_coords) - - def assert_coordinate_consistent( obj: Union["DataArray", "Dataset"], coords: Mapping[Hashable, Variable] ) -> None: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 489642d03f7..b76c89fa56a 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -46,7 +46,6 @@ from .common import AbstractArray, DataWithCoords from .coordinates import ( DataArrayCoordinates, - LevelCoordinatesSource, assert_coordinate_consistent, remap_label_indexers, ) @@ -56,7 +55,13 @@ from .indexing import is_fancy_indexer from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords from .options import OPTIONS, _get_keep_attrs -from .utils import Default, ReprObject, _default, either_dict_or_kwargs +from .utils import ( + Default, + HybridMappingProxy, + ReprObject, + _default, + either_dict_or_kwargs, +) from .variable import ( IndexVariable, Variable, @@ -721,18 +726,20 @@ def __delitem__(self, key: Any) -> None: del self.coords[key] @property - def _attr_sources(self) -> List[Mapping[Hashable, Any]]: - """List of places to look-up items for attribute-style access""" - return self._item_sources + [self.attrs] + def _attr_sources(self) -> Iterable[Mapping[Hashable, Any]]: + """Places to look-up items for attribute-style access""" + yield from self._item_sources + yield self.attrs @property - def _item_sources(self) -> List[Mapping[Hashable, Any]]: - """List of places to look-up items for key-completion""" - return [ - self.coords, - {d: self.coords[d] for d in self.dims}, - LevelCoordinatesSource(self), - ] + def _item_sources(self) -> Iterable[Mapping[Hashable, Any]]: + """Places to look-up items for key-completion""" + yield HybridMappingProxy(keys=self._coords, mapping=self.coords) + + # virtual coordinates + # uses empty dict -- everything here can already be found in self.coords. + yield HybridMappingProxy(keys=self.dims, mapping={}) + yield HybridMappingProxy(keys=self._level_coords, mapping={}) def __contains__(self, key: Any) -> bool: return key in self.data diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c38e9d63c68..bcf856e877b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -58,7 +58,6 @@ ) from .coordinates import ( DatasetCoordinates, - LevelCoordinatesSource, assert_coordinate_consistent, remap_label_indexers, ) @@ -84,6 +83,7 @@ from .utils import ( Default, Frozen, + HybridMappingProxy, SortedKeysDict, _default, decode_numpy_dict_values, @@ -1341,19 +1341,22 @@ def __deepcopy__(self, memo=None) -> "Dataset": return self.copy(deep=True) @property - def _attr_sources(self) -> List[Mapping[Hashable, Any]]: - """List of places to look-up items for attribute-style access""" - return self._item_sources + [self.attrs] + def _attr_sources(self) -> Iterable[Mapping[Hashable, Any]]: + """Places to look-up items for attribute-style access""" + yield from self._item_sources + yield self.attrs @property - def _item_sources(self) -> List[Mapping[Hashable, Any]]: - """List of places to look-up items for key-completion""" - return [ - self.data_vars, - self.coords, - {d: self[d] for d in self.dims}, - LevelCoordinatesSource(self), - ] + def _item_sources(self) -> Iterable[Mapping[Hashable, Any]]: + """Places to look-up items for key-completion""" + yield self.data_vars + yield HybridMappingProxy(keys=self._coord_names, mapping=self.coords) + + # virtual coordinates + yield HybridMappingProxy(keys=self.dims, mapping=self) + + # uses empty dict -- everything here can already be found in self.coords. + yield HybridMappingProxy(keys=self._level_coords, mapping={}) def __contains__(self, key: object) -> bool: """The 'in' operator will return true or false depending on whether diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 093b30d088d..bbd80b97832 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -435,6 +435,35 @@ def FrozenDict(*args, **kwargs) -> Frozen: return Frozen(dict(*args, **kwargs)) +class HybridMappingProxy(Mapping[K, V]): + """Implements the Mapping interface. Uses the wrapped mapping for item lookup + and a separate wrapped keys collection for iteration. + + Can be used to construct a mapping object from another dict-like object without + eagerly accessing its items or when a mapping object is expected but only + iteration over keys is actually used. + + Note: HybridMappingProxy does not validate consistency of the provided `keys` + and `mapping`. It is the caller's responsibility to ensure that they are + suitable for the task at hand. + """ + + __slots__ = ("_keys", "mapping") + + def __init__(self, keys: Collection[K], mapping: Mapping[K, V]): + self._keys = keys + self.mapping = mapping + + def __getitem__(self, key: K) -> V: + return self.mapping[key] + + def __iter__(self) -> Iterator[K]: + return iter(self._keys) + + def __len__(self) -> int: + return len(self._keys) + + class SortedKeysDict(MutableMapping[K, V]): """An wrapper for dictionary-like objects that always iterates over its items in sorted order by key but is otherwise equivalent to the underlying From 31d540f9d668fc5f8c1c92165f950c568778db01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mesejo-Le=C3=B3n?= Date: Wed, 6 Jan 2021 00:45:02 +0100 Subject: [PATCH 04/51] Closes #4647 DataArray transpose inconsistent with Dataset Ellipsis usage (#4767) - Add missing_dims parameter to transpose to mimic isel behavior - Add missing_dims to infix_dims to make function consistent across different methods. Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- doc/internals.rst | 2 +- doc/plotting.rst | 2 +- doc/whats-new.rst | 2 ++ xarray/core/dataarray.py | 15 ++++++-- xarray/core/utils.py | 64 +++++++++++++++++++++++++++++----- xarray/tests/test_dataarray.py | 23 +++++++----- xarray/tests/test_dataset.py | 4 +-- xarray/tests/test_variable.py | 4 +-- 8 files changed, 90 insertions(+), 26 deletions(-) diff --git a/doc/internals.rst b/doc/internals.rst index 60d32128c60..177cc60ac0a 100644 --- a/doc/internals.rst +++ b/doc/internals.rst @@ -230,4 +230,4 @@ re-open it directly with Zarr: zgroup = zarr.open("rasm.zarr") print(zgroup.tree()) - dict(zgroup["Tair"].attrs) + dict(zgroup["Tair"].attrs) \ No newline at end of file diff --git a/doc/plotting.rst b/doc/plotting.rst index 3699f794ae8..dde48d47569 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -955,4 +955,4 @@ One can also make line plots with multidimensional coordinates. In this case, `` f, ax = plt.subplots(2, 1) da.plot.line(x="lon", hue="y", ax=ax[0]) @savefig plotting_example_2d_hue_xy.png - da.plot.line(x="lon", hue="x", ax=ax[1]) + da.plot.line(x="lon", hue="x", ax=ax[1]) \ No newline at end of file diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d4579cb3631..0d839bc678b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -55,6 +55,7 @@ Bug fixes - Fix a crash in orthogonal indexing on geographic coordinates with ``engine='cfgrib'`` (:issue:`4733` :pull:`4737`). By `Alessandro Amici `_ - Limit number of data rows when printing large datasets. (:issue:`4736`, :pull:`4750`). By `Jimmy Westling `_. +- Add ``missing_dims`` parameter to transpose (:issue:`4647`, :pull:`4767`). By `Daniel Mesejo `_. Documentation ~~~~~~~~~~~~~ @@ -76,6 +77,7 @@ Internal Changes - Run the tests in parallel using pytest-xdist (:pull:`4694`). By `Justus Magin `_ and `Mathias Hauser `_. + - Replace all usages of ``assert x.identical(y)`` with ``assert_identical(x, y)`` for clearer error messages. (:pull:`4752`); diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b76c89fa56a..b3a545dec73 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2120,7 +2120,12 @@ def to_unstacked_dataset(self, dim, level=0): # unstacked dataset return Dataset(data_dict) - def transpose(self, *dims: Hashable, transpose_coords: bool = True) -> "DataArray": + def transpose( + self, + *dims: Hashable, + transpose_coords: bool = True, + missing_dims: str = "raise", + ) -> "DataArray": """Return a new DataArray object with transposed dimensions. Parameters @@ -2130,6 +2135,12 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = True) -> "DataArra dimensions to this order. transpose_coords : bool, default: True If True, also transpose the coordinates of this DataArray. + missing_dims : {"raise", "warn", "ignore"}, default: "raise" + What to do if dimensions that should be selected from are not present in the + DataArray: + - "raise": raise an exception + - "warning": raise a warning, and ignore the missing dimensions + - "ignore": ignore the missing dimensions Returns ------- @@ -2148,7 +2159,7 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = True) -> "DataArra Dataset.transpose """ if dims: - dims = tuple(utils.infix_dims(dims, self.dims)) + dims = tuple(utils.infix_dims(dims, self.dims, missing_dims)) variable = self.variable.transpose(*dims) if transpose_coords: coords: Dict[Hashable, Variable] = {} diff --git a/xarray/core/utils.py b/xarray/core/utils.py index bbd80b97832..c0e2635d084 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -744,28 +744,32 @@ def __len__(self) -> int: return len(self._data) - num_hidden -def infix_dims(dims_supplied: Collection, dims_all: Collection) -> Iterator: +def infix_dims( + dims_supplied: Collection, dims_all: Collection, missing_dims: str = "raise" +) -> Iterator: """ - Resolves a supplied list containing an ellispsis representing other items, to + Resolves a supplied list containing an ellipsis representing other items, to a generator with the 'realized' list of all items """ if ... in dims_supplied: if len(set(dims_all)) != len(dims_all): raise ValueError("Cannot use ellipsis with repeated dims") - if len([d for d in dims_supplied if d == ...]) > 1: + if list(dims_supplied).count(...) > 1: raise ValueError("More than one ellipsis supplied") other_dims = [d for d in dims_all if d not in dims_supplied] - for d in dims_supplied: - if d == ...: + existing_dims = drop_missing_dims(dims_supplied, dims_all, missing_dims) + for d in existing_dims: + if d is ...: yield from other_dims else: yield d else: - if set(dims_supplied) ^ set(dims_all): + existing_dims = drop_missing_dims(dims_supplied, dims_all, missing_dims) + if set(existing_dims) ^ set(dims_all): raise ValueError( f"{dims_supplied} must be a permuted list of {dims_all}, unless `...` is included" ) - yield from dims_supplied + yield from existing_dims def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable: @@ -805,7 +809,7 @@ def drop_dims_from_indexers( invalid = indexers.keys() - set(dims) if invalid: raise ValueError( - f"dimensions {invalid} do not exist. Expected one or more of {dims}" + f"Dimensions {invalid} do not exist. Expected one or more of {dims}" ) return indexers @@ -818,7 +822,7 @@ def drop_dims_from_indexers( invalid = indexers.keys() - set(dims) if invalid: warnings.warn( - f"dimensions {invalid} do not exist. Expected one or more of {dims}" + f"Dimensions {invalid} do not exist. Expected one or more of {dims}" ) for key in invalid: indexers.pop(key) @@ -834,6 +838,48 @@ def drop_dims_from_indexers( ) +def drop_missing_dims( + supplied_dims: Collection, dims: Collection, missing_dims: str +) -> Collection: + """Depending on the setting of missing_dims, drop any dimensions from supplied_dims that + are not present in dims. + + Parameters + ---------- + supplied_dims : dict + dims : sequence + missing_dims : {"raise", "warn", "ignore"} + """ + + if missing_dims == "raise": + supplied_dims_set = set(val for val in supplied_dims if val is not ...) + invalid = supplied_dims_set - set(dims) + if invalid: + raise ValueError( + f"Dimensions {invalid} do not exist. Expected one or more of {dims}" + ) + + return supplied_dims + + elif missing_dims == "warn": + + invalid = set(supplied_dims) - set(dims) + if invalid: + warnings.warn( + f"Dimensions {invalid} do not exist. Expected one or more of {dims}" + ) + + return [val for val in supplied_dims if val in dims or val is ...] + + elif missing_dims == "ignore": + return [val for val in supplied_dims if val in dims or val is ...] + + else: + raise ValueError( + f"Unrecognised option {missing_dims} for missing_dims argument" + ) + + class UncachedAccessor: """Acts like a property, but on both classes and class instances diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 204ff706a5b..8215a9ddaac 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -797,13 +797,13 @@ def test_isel(self): assert_identical(self.dv[:3, :5], self.dv.isel(x=slice(3), y=slice(5))) with raises_regex( ValueError, - r"dimensions {'not_a_dim'} do not exist. Expected " + r"Dimensions {'not_a_dim'} do not exist. Expected " r"one or more of \('x', 'y'\)", ): self.dv.isel(not_a_dim=0) with pytest.warns( UserWarning, - match=r"dimensions {'not_a_dim'} do not exist. " + match=r"Dimensions {'not_a_dim'} do not exist. " r"Expected one or more of \('x', 'y'\)", ): self.dv.isel(not_a_dim=0, missing_dims="warn") @@ -2231,9 +2231,21 @@ def test_transpose(self): actual = da.transpose("z", ..., "x", transpose_coords=True) assert_equal(expected, actual) + # same as previous but with a missing dimension + actual = da.transpose( + "z", "y", "x", "not_a_dim", transpose_coords=True, missing_dims="ignore" + ) + assert_equal(expected, actual) + with pytest.raises(ValueError): da.transpose("x", "y") + with pytest.raises(ValueError): + da.transpose("not_a_dim", "z", "x", ...) + + with pytest.warns(UserWarning): + da.transpose("not_a_dim", "y", "x", ..., missing_dims="warn") + def test_squeeze(self): assert_equal(self.dv.variable.squeeze(), self.dv.squeeze().variable) @@ -6227,7 +6239,6 @@ def da_dask(seed=123): @pytest.mark.parametrize("da", ("repeating_ints",), indirect=True) def test_isin(da): - expected = DataArray( np.asarray([[0, 0, 0], [1, 0, 0]]), dims=list("yx"), @@ -6277,7 +6288,6 @@ def test_coarsen_keep_attrs(): @pytest.mark.parametrize("da", (1, 2), indirect=True) def test_rolling_iter(da): - rolling_obj = da.rolling(time=7) rolling_obj_mean = rolling_obj.mean() @@ -6452,7 +6462,6 @@ def test_rolling_construct(center, window): @pytest.mark.parametrize("window", (1, 2, 3, 4)) @pytest.mark.parametrize("name", ("sum", "mean", "std", "max")) def test_rolling_reduce(da, center, min_periods, window, name): - if min_periods is not None and window < min_periods: min_periods = window @@ -6491,7 +6500,6 @@ def test_rolling_reduce_nonnumeric(center, min_periods, window, name): def test_rolling_count_correct(): - da = DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time") kwargs = [ @@ -6579,7 +6587,6 @@ def test_ndrolling_construct(center, fill_value): ], ) def test_rolling_keep_attrs(funcname, argument): - attrs_da = {"da_attr": "test"} data = np.linspace(10, 15, 100) @@ -6623,7 +6630,6 @@ def test_rolling_keep_attrs(funcname, argument): def test_rolling_keep_attrs_deprecated(): - attrs_da = {"da_attr": "test"} data = np.linspace(10, 15, 100) @@ -6957,7 +6963,6 @@ def test_rolling_exp(da, dim, window_type, window): @requires_numbagg def test_rolling_exp_keep_attrs(da): - attrs = {"attrs": "da"} da.attrs = attrs diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index b007aa268ae..204f08c2eec 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1024,14 +1024,14 @@ def test_isel(self): data.isel(not_a_dim=slice(0, 2)) with raises_regex( ValueError, - r"dimensions {'not_a_dim'} do not exist. Expected " + r"Dimensions {'not_a_dim'} do not exist. Expected " r"one or more of " r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*", ): data.isel(not_a_dim=slice(0, 2)) with pytest.warns( UserWarning, - match=r"dimensions {'not_a_dim'} do not exist. " + match=r"Dimensions {'not_a_dim'} do not exist. " r"Expected one or more of " r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*", ): diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 91eeb0448bc..e751179b84a 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1270,13 +1270,13 @@ def test_isel(self): assert_identical(v.isel(time=[]), v[[]]) with raises_regex( ValueError, - r"dimensions {'not_a_dim'} do not exist. Expected one or more of " + r"Dimensions {'not_a_dim'} do not exist. Expected one or more of " r"\('time', 'x'\)", ): v.isel(not_a_dim=0) with pytest.warns( UserWarning, - match=r"dimensions {'not_a_dim'} do not exist. Expected one or more of " + match=r"Dimensions {'not_a_dim'} do not exist. Expected one or more of " r"\('time', 'x'\)", ): v.isel(not_a_dim=0, missing_dims="warn") From bc49e277a45212f7ef33165d7fa1247c219ce276 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 6 Jan 2021 15:17:10 -0700 Subject: [PATCH 05/51] Trigger upstream CI on cron schedule (by default) (#4729) Co-authored-by: keewis Co-authored-by: Keewis --- .github/PULL_REQUEST_TEMPLATE.md | 3 +++ .github/workflows/upstream-dev-ci.yaml | 22 +++++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 91e55284e34..03e046512fb 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -5,3 +5,6 @@ - [ ] Passes `isort . && black . && mypy . && flake8` - [ ] User visible changes (including notable bug fixes) are documented in `whats-new.rst` - [ ] New functions/methods are listed in `api.rst` + + +By default, the upstream dev CI is disabled on pull request and push events. You can override this behavior per commit by adding a `[test-upstream]` tag to the first line of the commit message. diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index 629b1c27b05..cb5e1272d91 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -11,10 +11,30 @@ on: workflow_dispatch: # allows you to trigger the workflow run manually jobs: + detect-ci-trigger: + name: detect upstream-dev ci trigger + runs-on: ubuntu-latest + if: github.event_name == 'push' || github.event_name == 'pull_request' + outputs: + triggered: ${{ steps.detect-trigger.outputs.trigger-found }} + steps: + - uses: actions/checkout@v2 + - uses: ./.github/actions/detect-ci-trigger + id: detect-trigger + with: + keyword: "[test-upstream]" + upstream-dev: name: upstream-dev runs-on: ubuntu-latest - if: github.repository == 'pydata/xarray' + needs: detect-ci-trigger + if: | + always() + && github.repository == 'pydata/xarray' + && ( + (github.event_name == 'scheduled' || github.event_name == 'workflow_dispatch') + || needs.detect-ci-trigger.outputs.triggered == 'true' + ) defaults: run: shell: bash -l {0} From 477e75af4cd2c5fa1f88820a5c6c6ba0501c5a01 Mon Sep 17 00:00:00 2001 From: keewis Date: Thu, 7 Jan 2021 19:15:17 +0100 Subject: [PATCH 06/51] drop support for python 3.6 (#4720) * stop to testing on python 3.6 * update the trove classifiers * update the required minimum python version * update the minimum python version in the install docs * update the pipelines ci matrix * update the minimum versions check ci * update whats-new.rst * update the PR number * upgrade setuptools to 40.4 * update the required setuptools version in the install instructions * update the policy override for setuptools * update whats-new.rst * update the minimum required version of setuptools in setup.cfg * update the remaining mentions of the setuptools version --- azure-pipelines.yml | 18 +++---- ci/min_deps_check.py | 6 ++- ci/requirements/py36.yml | 49 ------------------- ...bare-minimum.yml => py37-bare-minimum.yml} | 4 +- ...min-all-deps.yml => py37-min-all-deps.yml} | 8 +-- ...{py36-min-nep18.yml => py37-min-nep18.yml} | 4 +- doc/installing.rst | 6 +-- doc/whats-new.rst | 12 +++++ requirements.txt | 2 +- setup.cfg | 8 +-- 10 files changed, 40 insertions(+), 77 deletions(-) delete mode 100644 ci/requirements/py36.yml rename ci/requirements/{py36-bare-minimum.yml => py37-bare-minimum.yml} (84%) rename ci/requirements/{py36-min-all-deps.yml => py37-min-all-deps.yml} (90%) rename ci/requirements/{py36-min-nep18.yml => py37-min-nep18.yml} (91%) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 2cbf55f52ca..f9bfa032a98 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -8,14 +8,12 @@ jobs: - job: Linux strategy: matrix: - py36-bare-minimum: - conda_env: py36-bare-minimum - py36-min-all-deps: - conda_env: py36-min-all-deps - py36-min-nep18: - conda_env: py36-min-nep18 - py36: - conda_env: py36 + py37-bare-minimum: + conda_env: py37-bare-minimum + py37-min-all-deps: + conda_env: py37-min-all-deps + py37-min-nep18: + conda_env: py37-min-nep18 py37: conda_env: py37 py38: @@ -121,6 +119,6 @@ jobs: - template: ci/azure/add-conda-to-path.yml - bash: | conda install -y pyyaml - python ci/min_deps_check.py ci/requirements/py36-bare-minimum.yml - python ci/min_deps_check.py ci/requirements/py36-min-all-deps.yml + python ci/min_deps_check.py ci/requirements/py37-bare-minimum.yml + python ci/min_deps_check.py ci/requirements/py37-min-all-deps.yml displayName: minimum versions policy diff --git a/ci/min_deps_check.py b/ci/min_deps_check.py index 95c6ff22aa2..5f94c81ef8a 100755 --- a/ci/min_deps_check.py +++ b/ci/min_deps_check.py @@ -35,9 +35,11 @@ # setuptools-scm doesn't work with setuptools < 36.7 (Nov 2017). # The conda metadata is malformed for setuptools < 38.4 (Jan 2018) # (it's missing a timestamp which prevents this tool from working). + # setuptools < 40.4 (Sep 2018) from conda-forge cannot be installed into a py37 + # environment # TODO remove this special case and the matching note in installing.rst - # after July 2021. - "setuptools": (38, 4), + # after March 2022. + "setuptools": (40, 4), } has_errors = False diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml deleted file mode 100644 index 97fe92e2271..00000000000 --- a/ci/requirements/py36.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: xarray-tests -channels: - - conda-forge - - nodefaults -dependencies: - - python=3.6 - - black - - boto3 - - bottleneck - - cartopy - - cdms2 - - cfgrib - - cftime - - coveralls - - dask - - distributed - - flake8 - - h5netcdf - - h5py=2 - - hdf5 - - hypothesis - - iris - - isort - - lxml # Optional dep of pydap - - matplotlib-base - - mypy=0.790 # Must match .pre-commit-config.yaml - - nc-time-axis - - netcdf4 - - numba - - numpy - - pandas - - pint - - pip - - pseudonetcdf - - pydap - # - pynio: not compatible with netCDF4>1.5.3; only tested in py36-bare-minimum - - pytest - - pytest-cov - - pytest-env - - pytest-xdist - - rasterio - - scipy - - seaborn - - setuptools - - sparse - - toolz - - zarr - - pip: - - numbagg diff --git a/ci/requirements/py36-bare-minimum.yml b/ci/requirements/py37-bare-minimum.yml similarity index 84% rename from ci/requirements/py36-bare-minimum.yml rename to ci/requirements/py37-bare-minimum.yml index f9255d719de..fbeb87032b7 100644 --- a/ci/requirements/py36-bare-minimum.yml +++ b/ci/requirements/py37-bare-minimum.yml @@ -3,7 +3,7 @@ channels: - conda-forge - nodefaults dependencies: - - python=3.6 + - python=3.7 - coveralls - pip - pytest @@ -12,4 +12,4 @@ dependencies: - pytest-xdist - numpy=1.15 - pandas=0.25 - - setuptools=38.4 + - setuptools=40.4 diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py37-min-all-deps.yml similarity index 90% rename from ci/requirements/py36-min-all-deps.yml rename to ci/requirements/py37-min-all-deps.yml index bb25ffcdc76..feef86ddf5c 100644 --- a/ci/requirements/py36-min-all-deps.yml +++ b/ci/requirements/py37-min-all-deps.yml @@ -7,7 +7,7 @@ dependencies: # Run ci/min_deps_check.py to verify that this file respects the policy. # When upgrading python, numpy, or pandas, must also change # doc/installing.rst and setup.py. - - python=3.6 + - python=3.7 - black - boto3=1.9 - bottleneck=1.2 @@ -33,7 +33,7 @@ dependencies: - numba=0.46 - numpy=1.15 - pandas=0.25 - # - pint # See py36-min-nep18.yml + # - pint # See py37-min-nep18.yml - pip - pseudonetcdf=3.0 - pydap=3.2 @@ -45,8 +45,8 @@ dependencies: - rasterio=1.0 - scipy=1.3 - seaborn=0.9 - - setuptools=38.4 - # - sparse # See py36-min-nep18.yml + - setuptools=40.4 + # - sparse # See py37-min-nep18.yml - toolz=0.10 - zarr=2.3 - pip: diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py37-min-nep18.yml similarity index 91% rename from ci/requirements/py36-min-nep18.yml rename to ci/requirements/py37-min-nep18.yml index 3171c90992d..aea86261a0e 100644 --- a/ci/requirements/py36-min-nep18.yml +++ b/ci/requirements/py37-min-nep18.yml @@ -5,7 +5,7 @@ channels: dependencies: # Optional dependencies that require NEP18, such as sparse and pint, # require drastically newer packages than everything else - - python=3.6 + - python=3.7 - coveralls - dask=2.9 - distributed=2.9 @@ -18,5 +18,5 @@ dependencies: - pytest-env - pytest-xdist - scipy=1.3 - - setuptools=38.4 + - setuptools=40.4 - sparse=0.8 diff --git a/doc/installing.rst b/doc/installing.rst index 45999b254ad..d5783e557e1 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -6,8 +6,8 @@ Installation Required dependencies --------------------- -- Python (3.6 or later) -- setuptools (38.4 or later) +- Python (3.7 or later) +- setuptools (40.4 or later) - `numpy `__ (1.15 or later) - `pandas `__ (0.25 or later) @@ -100,7 +100,7 @@ dependencies: - **Python:** 42 months (`NEP-29 `_) -- **setuptools:** 42 months (but no older than 38.4) +- **setuptools:** 42 months (but no older than 40.4) - **numpy:** 24 months (`NEP-29 `_) - **dask and dask.distributed:** 12 months (but no older than 2.9) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0d839bc678b..70b3c407b36 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,18 @@ v0.16.3 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ +- xarray no longer supports python 3.6 + + The minimum versions of some other dependencies were changed: + ============ ====== ==== + Package Old New + ============ ====== ==== + Python 3.6 3.7 + setuptools 38.4 40.4 + ============ ====== ==== + + (:issue:`4688`, :pull:`4720`) + By `Justus Magin `_. - As a result of :pull:`4684` the default units encoding for datetime-like values (``np.datetime64[ns]`` or ``cftime.datetime``) will now always be set such that ``int64`` values can be used. In the past, no units diff --git a/requirements.txt b/requirements.txt index 3cbeb368c09..23eff8f07cb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,4 @@ numpy >= 1.15 pandas >= 0.25 -setuptools >= 38.4 +setuptools >= 40.4 diff --git a/setup.cfg b/setup.cfg index b92db683779..a695191bf02 100644 --- a/setup.cfg +++ b/setup.cfg @@ -64,21 +64,21 @@ classifiers = Intended Audience :: Science/Research Programming Language :: Python Programming Language :: Python :: 3 - Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 Topic :: Scientific/Engineering [options] packages = find: zip_safe = False # https://mypy.readthedocs.io/en/latest/installed_packages.html include_package_data = True -python_requires = >=3.6 +python_requires = >=3.7 install_requires = numpy >= 1.15 pandas >= 0.25 - setuptools >= 38.4 # For pkg_resources + setuptools >= 40.4 # For pkg_resources setup_requires = - setuptools >= 38.4 + setuptools >= 40.4 setuptools_scm From 540b7ef4376cf1bc1a450ce2764f4546a0147e70 Mon Sep 17 00:00:00 2001 From: Ray Bell Date: Thu, 7 Jan 2021 14:47:13 -0500 Subject: [PATCH 07/51] DOC: update hyperlink for xskillscore (#4778) --- doc/related-projects.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/related-projects.rst b/doc/related-projects.rst index 7be187c92d5..456cb64197f 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -73,7 +73,7 @@ Extend xarray capabilities - `xrft `_: Fourier transforms for xarray data. - `xr-scipy `_: A lightweight scipy wrapper for xarray. - `X-regression `_: Multiple linear regression from Statsmodels library coupled with Xarray library. -- `xskillscore `_: Metrics for verifying forecasts. +- `xskillscore `_: Metrics for verifying forecasts. - `xyzpy `_: Easily generate high dimensional data, including parallelization. Visualization From 01a0fafdb385872c52473262bb980f933ed570d6 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Thu, 7 Jan 2021 22:00:26 +0100 Subject: [PATCH 08/51] CI: ignore some warnings (#4773) * CI: ignore some warnings * remove test that is no longer necessary --- xarray/tests/test_backends_file_manager.py | 5 ----- xarray/tests/test_computation.py | 4 ++-- xarray/tests/test_variable.py | 3 +++ 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/xarray/tests/test_backends_file_manager.py b/xarray/tests/test_backends_file_manager.py index eb8236b8613..16f059c7bad 100644 --- a/xarray/tests/test_backends_file_manager.py +++ b/xarray/tests/test_backends_file_manager.py @@ -202,11 +202,6 @@ def test_file_manager_read(tmpdir, file_cache): manager.close() -def test_file_manager_invalid_kwargs(): - with pytest.raises(TypeError): - CachingFileManager(open, "dummy", mode="w", invalid=True) - - def test_file_manager_acquire_context(tmpdir, file_cache): path = str(tmpdir.join("testing.txt")) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index a1c8e75b6bc..4890536a5d7 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -924,11 +924,11 @@ def test_vectorize_dask_dtype_meta(): vectorize=True, dask="parallelized", output_dtypes=[int], - dask_gufunc_kwargs=dict(meta=np.ndarray((0, 0), dtype=np.float)), + dask_gufunc_kwargs=dict(meta=np.ndarray((0, 0), dtype=float)), ) assert_identical(expected, actual) - assert np.float == actual.dtype + assert float == actual.dtype def pandas_median_add(x, y): diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index e751179b84a..41bf24c7f88 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -835,6 +835,9 @@ def test_getitem_error(self): ], ) @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) + @pytest.mark.filterwarnings( + r"ignore:dask.array.pad.+? converts integers to floats." + ) def test_pad(self, mode, xr_arg, np_arg): data = np.arange(4 * 3 * 2).reshape(4, 3, 2) v = self.cls(["x", "y", "z"], data) From 1ae977867b59d5075df4b21f3ce61e3221e7cb87 Mon Sep 17 00:00:00 2001 From: rhkleijn <32801740+rhkleijn@users.noreply.github.com> Date: Fri, 8 Jan 2021 00:28:47 +0100 Subject: [PATCH 09/51] improve typing of OrderedSet (#4774) * improve typing of OrderedSet * change Any to Hashable --- xarray/core/missing.py | 2 +- xarray/core/utils.py | 16 ++++++---------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index f608468ed9f..a2cdae55cb1 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -651,7 +651,7 @@ def interp(var, indexes_coords, method, **kwargs): out_dims.update(indexes_coords[d][1].dims) else: out_dims.add(d) - result = result.transpose(*tuple(out_dims)) + result = result.transpose(*out_dims) return result diff --git a/xarray/core/utils.py b/xarray/core/utils.py index c0e2635d084..1eff91e3608 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -9,7 +9,6 @@ import warnings from enum import Enum from typing import ( - AbstractSet, Any, Callable, Collection, @@ -509,17 +508,14 @@ class OrderedSet(MutableSet[T]): __slots__ = ("_d",) - def __init__(self, values: AbstractSet[T] = None): + def __init__(self, values: Iterable[T] = None): self._d = {} if values is not None: - # Disable type checking - both mypy and PyCharm believe that - # we're altering the type of self in place (see signature of - # MutableSet.__ior__) - self |= values # type: ignore + self.update(values) # Required methods for MutableSet - def __contains__(self, value: object) -> bool: + def __contains__(self, value: Hashable) -> bool: return value in self._d def __iter__(self) -> Iterator[T]: @@ -536,9 +532,9 @@ def discard(self, value: T) -> None: # Additional methods - def update(self, values: AbstractSet[T]) -> None: - # See comment on __init__ re. type checking - self |= values # type: ignore + def update(self, values: Iterable[T]) -> None: + for v in values: + self._d[v] = None def __repr__(self) -> str: return "{}({!r})".format(type(self).__name__, list(self)) From 5ddb8d58b3639de7ce748b64ab60f709c58858f0 Mon Sep 17 00:00:00 2001 From: alexamici Date: Fri, 8 Jan 2021 10:33:19 +0100 Subject: [PATCH 10/51] Remove special case in guess_engines (#4777) * Remove special case in guess_engines * Sync the error messages between APIv1 and APIv2 --- xarray/backends/plugins.py | 5 ----- xarray/core/utils.py | 1 + xarray/tests/test_backends.py | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index 6e9045c1c7f..d5799a78f91 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -104,11 +104,6 @@ def list_engines(): def guess_engine(store_spec): engines = list_engines() - # use the pre-defined selection order for netCDF files - for engine in ["netcdf4", "h5netcdf", "scipy"]: - if engine in engines and engines[engine].guess_can_open(store_spec): - return engine - for engine, backend in engines.items(): try: if backend.guess_can_open and backend.guess_can_open(store_spec): diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 1eff91e3608..e76db3c8501 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -636,6 +636,7 @@ def read_magic_number(filename_or_obj, count=8): elif isinstance(filename_or_obj, io.IOBase): if filename_or_obj.tell() != 0: raise ValueError( + "cannot guess the engine, " "file-like object read/write pointer not at the start of the file, " "please close and reopen, or use a context manager" ) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 2d20f7f63b3..3750c0715ae 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2661,7 +2661,7 @@ def test_open_fileobj(self): open_dataset(f, engine="scipy") f.seek(8) - with raises_regex(ValueError, "read/write pointer not at the start"): + with raises_regex(ValueError, "cannot guess the engine"): open_dataset(f) From 8ff8113cb164e9f7eef648e05358d371c7d69f75 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 8 Jan 2021 16:55:38 +0100 Subject: [PATCH 11/51] Speed up missing._get_interpolator (#4776) * Speed up _get_interpolator Importing scipy.interpolate is slow and should only be done when necessary. Test case from 200ms to 6ms. * typos * retain info from the except. --- xarray/core/missing.py | 46 +++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index a2cdae55cb1..477c7f40547 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -437,6 +437,16 @@ def bfill(arr, dim=None, limit=None): ).transpose(*arr.dims) +def _import_interpolant(interpolant, method): + """Import interpolant from scipy.interpolate.""" + try: + from scipy import interpolate + + return getattr(interpolate, interpolant) + except ImportError as e: + raise ImportError(f"Interpolation with method {method} requires scipy.") from e + + def _get_interpolator(method, vectorizeable_only=False, **kwargs): """helper function to select the appropriate interpolator class @@ -459,12 +469,6 @@ def _get_interpolator(method, vectorizeable_only=False, **kwargs): "akima", ] - has_scipy = True - try: - from scipy import interpolate - except ImportError: - has_scipy = False - # prioritize scipy.interpolate if ( method == "linear" @@ -475,32 +479,29 @@ def _get_interpolator(method, vectorizeable_only=False, **kwargs): interp_class = NumpyInterpolator elif method in valid_methods: - if not has_scipy: - raise ImportError("Interpolation with method `%s` requires scipy" % method) - if method in interp1d_methods: kwargs.update(method=method) interp_class = ScipyInterpolator elif vectorizeable_only: raise ValueError( - "{} is not a vectorizeable interpolator. " - "Available methods are {}".format(method, interp1d_methods) + f"{method} is not a vectorizeable interpolator. " + f"Available methods are {interp1d_methods}" ) elif method == "barycentric": - interp_class = interpolate.BarycentricInterpolator + interp_class = _import_interpolant("BarycentricInterpolator", method) elif method == "krog": - interp_class = interpolate.KroghInterpolator + interp_class = _import_interpolant("KroghInterpolator", method) elif method == "pchip": - interp_class = interpolate.PchipInterpolator + interp_class = _import_interpolant("PchipInterpolator", method) elif method == "spline": kwargs.update(method=method) interp_class = SplineInterpolator elif method == "akima": - interp_class = interpolate.Akima1DInterpolator + interp_class = _import_interpolant("Akima1DInterpolator", method) else: - raise ValueError("%s is not a valid scipy interpolator" % method) + raise ValueError(f"{method} is not a valid scipy interpolator") else: - raise ValueError("%s is not a valid interpolator" % method) + raise ValueError(f"{method} is not a valid interpolator") return interp_class, kwargs @@ -512,18 +513,13 @@ def _get_interpolator_nd(method, **kwargs): """ valid_methods = ["linear", "nearest"] - try: - from scipy import interpolate - except ImportError: - raise ImportError("Interpolation with method `%s` requires scipy" % method) - if method in valid_methods: kwargs.update(method=method) - interp_class = interpolate.interpn + interp_class = _import_interpolant("interpn", method) else: raise ValueError( - "%s is not a valid interpolator for interpolating " - "over multiple dimensions." % method + f"{method} is not a valid interpolator for interpolating " + "over multiple dimensions." ) return interp_class, kwargs From db6f4be2ca6aa57c49b4be29c7352763cf3f3f78 Mon Sep 17 00:00:00 2001 From: keewis Date: Mon, 11 Jan 2021 12:45:22 +0100 Subject: [PATCH 12/51] use conda.api instead of parallel calls to the conda binary (#4775) * use conda.api instead of parallel calls to the conda binary * don't select releases without release dates * update the format to be wide enough to fit matplotlib-base * don't verify the version using the filename * filter invalid / missing dates before retrieving the metadata --- ci/min_deps_check.py | 64 +++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 36 deletions(-) diff --git a/ci/min_deps_check.py b/ci/min_deps_check.py index 5f94c81ef8a..5bb4c155410 100755 --- a/ci/min_deps_check.py +++ b/ci/min_deps_check.py @@ -2,14 +2,15 @@ publication date. Compare it against requirements/py36-min-all-deps.yml to verify the policy on obsolete dependencies is being followed. Print a pretty report :) """ -import subprocess +import itertools import sys -from concurrent.futures import ThreadPoolExecutor from datetime import datetime, timedelta from typing import Dict, Iterator, Optional, Tuple +import conda.api import yaml +CHANNELS = ["conda-forge", "defaults"] IGNORE_DEPS = { "black", "coveralls", @@ -91,30 +92,23 @@ def query_conda(pkg: str) -> Dict[Tuple[int, int], datetime]: Return map of {(major version, minor version): publication date} """ - stdout = subprocess.check_output( - ["conda", "search", pkg, "--info", "-c", "defaults", "-c", "conda-forge"] - ) - out = {} # type: Dict[Tuple[int, int], datetime] - major = None - minor = None - - for row in stdout.decode("utf-8").splitlines(): - label, _, value = row.partition(":") - label = label.strip() - if label == "file name": - value = value.strip()[len(pkg) :] - smajor, sminor = value.split("-")[1].split(".")[:2] - major = int(smajor) - minor = int(sminor) - if label == "timestamp": - assert major is not None - assert minor is not None - ts = datetime.strptime(value.split()[0].strip(), "%Y-%m-%d") - - if (major, minor) in out: - out[major, minor] = min(out[major, minor], ts) - else: - out[major, minor] = ts + + def metadata(entry): + version = entry.version + + time = datetime.fromtimestamp(entry.timestamp) + major, minor = map(int, version.split(".")[:2]) + + return (major, minor), time + + raw_data = conda.api.SubdirData.query_all(pkg, channels=CHANNELS) + data = sorted(metadata(entry) for entry in raw_data if entry.timestamp != 0) + + release_dates = { + version: [time for _, time in group if time is not None] + for version, group in itertools.groupby(data, key=lambda x: x[0]) + } + out = {version: min(dates) for version, dates in release_dates.items() if dates} # Hardcoded fix to work around incorrect dates in conda if pkg == "python": @@ -202,16 +196,14 @@ def fmt_version(major: int, minor: int, patch: int = None) -> str: def main() -> None: fname = sys.argv[1] - with ThreadPoolExecutor(8) as ex: - futures = [ - ex.submit(process_pkg, pkg, major, minor, patch) - for pkg, major, minor, patch in parse_requirements(fname) - ] - rows = [f.result() for f in futures] - - print("Package Required Policy Status") - print("------------- -------------------- -------------------- ------") - fmt = "{:13} {:7} ({:10}) {:7} ({:10}) {}" + rows = [ + process_pkg(pkg, major, minor, patch) + for pkg, major, minor, patch in parse_requirements(fname) + ] + + print("Package Required Policy Status") + print("----------------- -------------------- -------------------- ------") + fmt = "{:17} {:7} ({:10}) {:7} ({:10}) {}" for row in rows: print(fmt.format(*row)) From d241aa4a4ae11e0278881f16f38ca0be17b4298c Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 11 Jan 2021 05:11:04 -0700 Subject: [PATCH 13/51] Migrate CI from azure pipelines to GitHub Actions (#4730) * Run isort, black, mypy, flake8 checks via pre-commit * Fix line ending * Fix end of line * Fix mypy pre-commit hook. Thanks @keewis * Add main CI * Add additional CI * Fetch all history for all branches and tags * Add windows environment * Import xarray * Add doctests workflow * Add minimum version policy workflow * Simplify if logic * Add flaky and backend-api-v2 settings * Fix if elif else statements * Fix typo * Remove azure pipelines configurations * Fix environment file name * Upload code coverage for additional CI * Cache conda pkgs_dir * Fix cache key * Remove unnecessary cache number variable * Use `runner.os` instead of `matrix.os` * Use RUNNER_OS env variable * Disable name for the time being * Another attempt at setting name * `runner.os` doesn't work. Use `matrix.os` instead * Update env creation guidelines * Add `pre-commit run --all-files` check * Update blackdoc version * Add new pre-commit hooks * Add some of the out-of-the box hooks * Formatting only * Remove bad change * Remove isort and add pre-commit * Fix bad merge * Enable `cfgrib` on windows for the time being * Disable cfgrib on windows * Remove coveralls * Formatting only * Remove remaining reference to azure pipelines * Remove py 3.6 from CI matrix * Use py37 * Remove all references to py36 env file * Add check for skip ci * rename job to `detect ci trigger` * [skip ci] Empty commit * [skip-ci] Test skip CI trigger * Update PR template * Fix typ * GH markdown doesn't like lists in * Remove the `-OO` flag for consistency --- .github/ISSUE_TEMPLATE/config.yml | 4 +- .github/PULL_REQUEST_TEMPLATE.md | 9 +- .github/stale.yml | 2 +- .github/workflows/ci-additional.yaml | 188 ++++++++++++++++++ .github/workflows/ci-pre-commit.yml | 16 ++ .github/workflows/ci.yaml | 102 ++++++++++ .github/workflows/upstream-dev-ci.yaml | 10 +- .pre-commit-config.yaml | 7 + README.rst | 4 +- azure-pipelines.yml | 124 ------------ ci/azure/add-conda-to-path.yml | 18 -- ci/azure/install.yml | 68 ------- ci/azure/unit-tests.yml | 39 ---- ci/min_deps_check.py | 4 +- ...37-windows.yml => environment-windows.yml} | 9 +- ci/requirements/{py38.yml => environment.yml} | 9 +- ci/requirements/py37.yml | 49 ----- ci/requirements/py38-all-but-dask.yml | 2 +- doc/_templates/autosummary/base.rst | 2 +- doc/contributing.rst | 43 ++-- doc/gallery/README.txt | 1 - doc/groupby.rst | 6 +- doc/howdoi.rst | 1 - doc/indexing.rst | 2 +- doc/installing.rst | 4 +- doc/internals.rst | 2 +- doc/interpolation.rst | 4 +- doc/io.rst | 4 +- doc/plotting.rst | 2 +- doc/quick-overview.rst | 2 +- doc/reshaping.rst | 4 +- doc/weather-climate.rst | 2 +- doc/whats-new.rst | 2 +- licenses/PYTHON_LICENSE | 2 +- xarray/tests/data/example.ict | 2 +- xarray/tests/test_plot.py | 2 +- 36 files changed, 369 insertions(+), 382 deletions(-) create mode 100644 .github/workflows/ci-additional.yaml create mode 100644 .github/workflows/ci-pre-commit.yml create mode 100644 .github/workflows/ci.yaml delete mode 100644 azure-pipelines.yml delete mode 100644 ci/azure/add-conda-to-path.yml delete mode 100644 ci/azure/install.yml delete mode 100644 ci/azure/unit-tests.yml rename ci/requirements/{py37-windows.yml => environment-windows.yml} (77%) rename ci/requirements/{py38.yml => environment.yml} (82%) delete mode 100644 ci/requirements/py37.yml diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 00c65e99767..0ad7e5f3e13 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -4,7 +4,5 @@ contact_links: url: https://github.com/pydata/xarray/discussions about: | Ask questions and discuss with other community members here. - If you have a question like "How do I concatenate a list of datasets?" then + If you have a question like "How do I concatenate a list of datasets?" then please include a self-contained reproducible example if possible. - - \ No newline at end of file diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 03e046512fb..74f3fe2430f 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -2,9 +2,14 @@ - [ ] Closes #xxxx - [ ] Tests added -- [ ] Passes `isort . && black . && mypy . && flake8` +- [ ] Passes `pre-commit run --all-files` - [ ] User visible changes (including notable bug fixes) are documented in `whats-new.rst` - [ ] New functions/methods are listed in `api.rst` -By default, the upstream dev CI is disabled on pull request and push events. You can override this behavior per commit by adding a `[test-upstream]` tag to the first line of the commit message. + +

+ Overriding CI behaviors +

+ By default, the upstream dev CI is disabled on pull request and push events. You can override this behavior per commit by adding a `[test-upstream]` tag to the first line of the commit message. For documentation-only commits, you can skip the CI per commit by adding a `[skip-ci]` tag to the first line of the commit message +
diff --git a/.github/stale.yml b/.github/stale.yml index f4835b5eeec..f4057844d01 100644 --- a/.github/stale.yml +++ b/.github/stale.yml @@ -56,4 +56,4 @@ limitPerRun: 1 # start with a small number # issues: # exemptLabels: -# - confirmed \ No newline at end of file +# - confirmed diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml new file mode 100644 index 00000000000..1e19f594853 --- /dev/null +++ b/.github/workflows/ci-additional.yaml @@ -0,0 +1,188 @@ +name: CI Additional +on: + push: + branches: + - "*" + pull_request: + branches: + - "*" + workflow_dispatch: # allows you to trigger manually + +jobs: + detect-ci-trigger: + name: detect ci trigger + runs-on: ubuntu-latest + if: github.event_name == 'push' || github.event_name == 'pull_request' + outputs: + triggered: ${{ steps.detect-trigger.outputs.trigger-found }} + steps: + - uses: actions/checkout@v2 + - uses: ./.github/actions/detect-ci-trigger + id: detect-trigger + with: + keyword: "[skip-ci]" + + test: + name: ${{ matrix.os }} ${{ matrix.env }} + runs-on: ${{ matrix.os }} + needs: detect-ci-trigger + if: needs.detect-ci-trigger.outputs.triggered == 'false' + defaults: + run: + shell: bash -l {0} + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest"] + env: + [ + "py37-bare-minimum", + "py37-min-all-deps", + "py37-min-nep18", + "py38-all-but-dask", + "py38-backend-api-v2", + "py38-flaky", + ] + steps: + - name: Cancel previous runs + uses: styfle/cancel-workflow-action@0.6.0 + with: + access_token: ${{ github.token }} + - uses: actions/checkout@v2 + with: + fetch-depth: 0 # Fetch all history for all branches and tags. + + - name: Set environment variables + run: | + if [[ ${{ matrix.env }} == "py38-backend-api-v2" ]] ; + then + echo "CONDA_ENV_FILE=ci/requirements/environment.yml" >> $GITHUB_ENV + echo "XARRAY_BACKEND_API=v2" >> $GITHUB_ENV + + elif [[ ${{ matrix.env }} == "py38-flaky" ]] ; + then + echo "CONDA_ENV_FILE=ci/requirements/environment.yml" >> $GITHUB_ENV + echo "PYTEST_EXTRA_FLAGS=--run-flaky --run-network-tests" >> $GITHUB_ENV + + else + echo "CONDA_ENV_FILE=ci/requirements/${{ matrix.env }}.yml" >> $GITHUB_ENV + fi + - name: Cache conda + uses: actions/cache@v2 + with: + path: ~/conda_pkgs_dir + key: + ${{ runner.os }}-conda-${{ matrix.env }}-${{ + hashFiles('ci/requirements/**.yml') }} + + - uses: conda-incubator/setup-miniconda@v2 + with: + channels: conda-forge + channel-priority: strict + mamba-version: "*" + activate-environment: xarray-tests + auto-update-conda: false + python-version: 3.8 + use-only-tar-bz2: true + + - name: Install conda dependencies + run: | + mamba env update -f $CONDA_ENV_FILE + + - name: Install xarray + run: | + python -m pip install --no-deps -e . + + - name: Version info + run: | + conda info -a + conda list + python xarray/util/print_versions.py + - name: Import xarray + run: | + python -c "import xarray" + - name: Run tests + run: | + python -m pytest -n 4 \ + --cov=xarray \ + --cov-report=xml \ + $PYTEST_EXTRA_FLAGS + + - name: Upload code coverage to Codecov + uses: codecov/codecov-action@v1 + with: + file: ./coverage.xml + flags: unittests,${{ matrix.env }} + env_vars: RUNNER_OS + name: codecov-umbrella + fail_ci_if_error: false + doctest: + name: Doctests + runs-on: "ubuntu-latest" + needs: detect-ci-trigger + if: needs.detect-ci-trigger.outputs.triggered == 'false' + defaults: + run: + shell: bash -l {0} + + steps: + - name: Cancel previous runs + uses: styfle/cancel-workflow-action@0.6.0 + with: + access_token: ${{ github.token }} + - uses: actions/checkout@v2 + with: + fetch-depth: 0 # Fetch all history for all branches and tags. + - uses: conda-incubator/setup-miniconda@v2 + with: + channels: conda-forge + channel-priority: strict + mamba-version: "*" + activate-environment: xarray-tests + auto-update-conda: false + python-version: "3.8" + + - name: Install conda dependencies + run: | + mamba env update -f ci/requirements/environment.yml + - name: Install xarray + run: | + python -m pip install --no-deps -e . + - name: Version info + run: | + conda info -a + conda list + python xarray/util/print_versions.py + - name: Run doctests + run: | + python -m pytest --doctest-modules xarray --ignore xarray/tests + + min-version-policy: + name: Minimum Version Policy + runs-on: "ubuntu-latest" + needs: detect-ci-trigger + if: needs.detect-ci-trigger.outputs.triggered == 'false' + defaults: + run: + shell: bash -l {0} + + steps: + - name: Cancel previous runs + uses: styfle/cancel-workflow-action@0.6.0 + with: + access_token: ${{ github.token }} + - uses: actions/checkout@v2 + with: + fetch-depth: 0 # Fetch all history for all branches and tags. + - uses: conda-incubator/setup-miniconda@v2 + with: + channels: conda-forge + channel-priority: strict + mamba-version: "*" + auto-update-conda: false + + - name: minimum versions policy + run: | + mamba install -y pyyaml + python ci/min_deps_check.py ci/requirements/py37-bare-minimum.yml + python ci/min_deps_check.py ci/requirements/py37-min-all-deps.yml diff --git a/.github/workflows/ci-pre-commit.yml b/.github/workflows/ci-pre-commit.yml new file mode 100644 index 00000000000..1ab5642367e --- /dev/null +++ b/.github/workflows/ci-pre-commit.yml @@ -0,0 +1,16 @@ +name: linting + +on: + push: + branches: "*" + pull_request: + branches: "*" + +jobs: + linting: + name: "pre-commit hooks" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + - uses: pre-commit/action@v2.0.0 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 00000000000..849d8a445a5 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,102 @@ +name: CI +on: + push: + branches: + - "*" + pull_request: + branches: + - "*" + workflow_dispatch: # allows you to trigger manually + +jobs: + detect-ci-trigger: + name: detect ci trigger + runs-on: ubuntu-latest + if: github.event_name == 'push' || github.event_name == 'pull_request' + outputs: + triggered: ${{ steps.detect-trigger.outputs.trigger-found }} + steps: + - uses: actions/checkout@v2 + - uses: ./.github/actions/detect-ci-trigger + id: detect-trigger + with: + keyword: "[skip-ci]" + test: + name: ${{ matrix.os }} py${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + needs: detect-ci-trigger + if: needs.detect-ci-trigger.outputs.triggered == 'false' + defaults: + run: + shell: bash -l {0} + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest", "macos-latest", "windows-latest"] + python-version: ["3.7", "3.8"] + steps: + - name: Cancel previous runs + uses: styfle/cancel-workflow-action@0.6.0 + with: + access_token: ${{ github.token }} + - uses: actions/checkout@v2 + with: + fetch-depth: 0 # Fetch all history for all branches and tags. + - name: Set environment variables + run: | + if [[ ${{ matrix.os }} == windows* ]] ; + then + echo "CONDA_ENV_FILE=ci/requirements/environment-windows.yml" >> $GITHUB_ENV + else + echo "CONDA_ENV_FILE=ci/requirements/environment.yml" >> $GITHUB_ENV + + fi + echo "PYTHON_VERSION=${{ matrix.python-version }}" >> $GITHUB_ENV + + - name: Cache conda + uses: actions/cache@v2 + with: + path: ~/conda_pkgs_dir + key: + ${{ runner.os }}-conda-py${{ matrix.python-version }}-${{ + hashFiles('ci/requirements/**.yml') }} + - uses: conda-incubator/setup-miniconda@v2 + with: + channels: conda-forge + channel-priority: strict + mamba-version: "*" + activate-environment: xarray-tests + auto-update-conda: false + python-version: ${{ matrix.python-version }} + use-only-tar-bz2: true + + - name: Install conda dependencies + run: | + mamba env update -f $CONDA_ENV_FILE + + - name: Install xarray + run: | + python -m pip install --no-deps -e . + + - name: Version info + run: | + conda info -a + conda list + python xarray/util/print_versions.py + - name: Import xarray + run: | + python -c "import xarray" + - name: Run tests + run: | + python -m pytest -n 4 \ + --cov=xarray \ + --cov-report=xml + + - name: Upload code coverage to Codecov + uses: codecov/codecov-action@v1 + with: + file: ./coverage.xml + flags: unittests + env_vars: RUNNER_OS,PYTHON_VERSION + name: codecov-umbrella + fail_ci_if_error: false diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index cb5e1272d91..bb325c1837e 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -1,4 +1,4 @@ -name: CI +name: CI Upstream on: push: branches: @@ -23,7 +23,7 @@ jobs: id: detect-trigger with: keyword: "[test-upstream]" - + upstream-dev: name: upstream-dev runs-on: ubuntu-latest @@ -60,7 +60,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Set up conda environment run: | - mamba env update -f ci/requirements/py38.yml + mamba env update -f ci/requirements/environment.yml bash ci/install-upstream-wheels.sh conda list - name: import xarray @@ -148,8 +148,8 @@ jobs: } const result = await github.graphql(query, variables) - // If no issue is open, create a new issue, - // else update the body of the existing issue. + // If no issue is open, create a new issue, + // else update the body of the existing issue. if (result.repository.issues.edges.length === 0) { github.issues.create({ owner: variables.owner, diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6659366538b..b0fa21a7bf9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,11 @@ # https://pre-commit.com/ repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml # isort should run before black as black sometimes tweaks the isort output - repo: https://github.com/PyCQA/isort rev: 5.7.0 @@ -22,6 +28,7 @@ repos: rev: v0.790 # Must match ci/requirements/*.yml hooks: - id: mypy + exclude: "properties|asv_bench" # run this occasionally, ref discussion https://github.com/pydata/xarray/pull/3194 # - repo: https://github.com/asottile/pyupgrade # rev: v1.22.1 diff --git a/README.rst b/README.rst index 0b3b66c468d..e258a8ccd23 100644 --- a/README.rst +++ b/README.rst @@ -1,8 +1,8 @@ xarray: N-D labeled arrays and datasets ======================================= -.. image:: https://dev.azure.com/xarray/xarray/_apis/build/status/pydata.xarray?branchName=master - :target: https://dev.azure.com/xarray/xarray/_build/latest?definitionId=1&branchName=master +.. image:: https://github.com/pydata/xarray/workflows/CI/badge.svg?branch=master + :target: https://github.com/pydata/xarray/actions?query=workflow%3ACI .. image:: https://codecov.io/gh/pydata/xarray/branch/master/graph/badge.svg :target: https://codecov.io/gh/pydata/xarray .. image:: https://readthedocs.org/projects/xray/badge/?version=latest diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index f9bfa032a98..00000000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,124 +0,0 @@ -variables: - pytest_extra_flags: '' - allow_failure: false - upstream_dev: false - -jobs: - -- job: Linux - strategy: - matrix: - py37-bare-minimum: - conda_env: py37-bare-minimum - py37-min-all-deps: - conda_env: py37-min-all-deps - py37-min-nep18: - conda_env: py37-min-nep18 - py37: - conda_env: py37 - py38: - conda_env: py38 - py38-backend-api-v2: - conda_env: py38 - environment_variables: XARRAY_BACKEND_API=v2 - py38-all-but-dask: - conda_env: py38-all-but-dask - py38-upstream-dev: - conda_env: py38 - upstream_dev: true - allow_failure: true - py38-flaky: - conda_env: py38 - pytest_extra_flags: --run-flaky --run-network-tests - allow_failure: true - pool: - vmImage: 'ubuntu-16.04' - steps: - - template: ci/azure/unit-tests.yml - -- job: MacOSX - strategy: - matrix: - py38: - conda_env: py38 - pool: - vmImage: 'macOS-10.15' - steps: - - template: ci/azure/unit-tests.yml - -- job: Windows - strategy: - matrix: - py37: - conda_env: py37-windows - pool: - vmImage: 'windows-2019' - steps: - - template: ci/azure/unit-tests.yml - -- job: LintFlake8 - pool: - vmImage: 'ubuntu-16.04' - steps: - - task: UsePythonVersion@0 - - bash: python -m pip install flake8 - displayName: Install flake8 - - bash: flake8 - displayName: flake8 lint checks - -- job: FormattingBlack - pool: - vmImage: 'ubuntu-16.04' - steps: - - task: UsePythonVersion@0 - - bash: python -m pip install black - displayName: Install black - - bash: black --diff --check . - displayName: black formatting check - -- job: Doctests - variables: - conda_env: py38 - pool: - vmImage: 'ubuntu-16.04' - steps: - - template: ci/azure/install.yml - - bash: | - source activate xarray-tests - python -m pytest --doctest-modules xarray --ignore xarray/tests - displayName: Run doctests - -- job: TypeChecking - variables: - conda_env: py38 - pool: - vmImage: 'ubuntu-16.04' - steps: - - template: ci/azure/install.yml - - bash: | - source activate xarray-tests - mypy . - displayName: mypy type checks - -- job: isort - variables: - conda_env: py38 - pool: - vmImage: 'ubuntu-16.04' - steps: - - template: ci/azure/install.yml - - bash: | - source activate xarray-tests - isort --check . - displayName: isort formatting checks - -- job: MinimumVersionsPolicy - pool: - vmImage: 'ubuntu-16.04' - steps: - - template: ci/azure/add-conda-to-path.yml - - bash: | - conda install -y pyyaml - python ci/min_deps_check.py ci/requirements/py37-bare-minimum.yml - python ci/min_deps_check.py ci/requirements/py37-min-all-deps.yml - displayName: minimum versions policy diff --git a/ci/azure/add-conda-to-path.yml b/ci/azure/add-conda-to-path.yml deleted file mode 100644 index e5173835388..00000000000 --- a/ci/azure/add-conda-to-path.yml +++ /dev/null @@ -1,18 +0,0 @@ -# https://docs.microsoft.com/en-us/azure/devops/pipelines/languages/anaconda -steps: - -- bash: | - echo "##vso[task.prependpath]$CONDA/bin" - displayName: Add conda to PATH (Linux) - condition: eq(variables['Agent.OS'], 'Linux') - -- bash: | - echo "##vso[task.prependpath]$CONDA/bin" - sudo chown -R $USER $CONDA - displayName: Add conda to PATH (OS X) - condition: eq(variables['Agent.OS'], 'Darwin') - -- powershell: | - Write-Host "##vso[task.prependpath]$env:CONDA\Scripts" - displayName: Add conda to PATH (Windows) - condition: eq(variables['Agent.OS'], 'Windows_NT') diff --git a/ci/azure/install.yml b/ci/azure/install.yml deleted file mode 100644 index 2b080287669..00000000000 --- a/ci/azure/install.yml +++ /dev/null @@ -1,68 +0,0 @@ -parameters: - env_file: ci/requirements/$CONDA_ENV.yml - -steps: - -- template: add-conda-to-path.yml - -- bash: | - conda install -y mamba -c conda-forge - source activate # required for mamba on windows - mamba env create -n xarray-tests --file ${{ parameters.env_file }} - displayName: Install conda dependencies - -# TODO: add sparse back in, once Numba works with the development version of -# NumPy again: https://github.com/pydata/xarray/issues/4146 -- bash: | - source activate xarray-tests - conda uninstall -y --force \ - numpy \ - scipy \ - pandas \ - matplotlib \ - dask \ - distributed \ - zarr \ - cftime \ - rasterio \ - pint \ - bottleneck \ - sparse - python -m pip install \ - -i https://pypi.anaconda.org/scipy-wheels-nightly/simple \ - --no-deps \ - --pre \ - --upgrade \ - numpy \ - scipy \ - pandas - python -m pip install \ - -f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com \ - --no-deps \ - --pre \ - --upgrade \ - matplotlib - python -m pip install \ - --no-deps \ - --upgrade \ - git+https://github.com/dask/dask \ - git+https://github.com/dask/distributed \ - git+https://github.com/zarr-developers/zarr \ - git+https://github.com/Unidata/cftime \ - git+https://github.com/mapbox/rasterio \ - git+https://github.com/hgrecco/pint \ - git+https://github.com/pydata/bottleneck - condition: eq(variables['UPSTREAM_DEV'], 'true') - displayName: Install upstream dev dependencies - -- bash: | - source activate xarray-tests - python -m pip install --no-deps -e . - displayName: Install xarray - -- bash: | - source activate xarray-tests - conda info -a - conda list - python xarray/util/print_versions.py - displayName: Version info diff --git a/ci/azure/unit-tests.yml b/ci/azure/unit-tests.yml deleted file mode 100644 index 0fb2d53cc36..00000000000 --- a/ci/azure/unit-tests.yml +++ /dev/null @@ -1,39 +0,0 @@ -steps: - -- template: install.yml - -- bash: | - source activate xarray-tests - python -OO -c "import xarray" - displayName: Import xarray - -# Work around for allowed test failures: -# https://github.com/microsoft/azure-pipelines-tasks/issues/9302 -- bash: | - source activate xarray-tests - $(environment_variables) pytest -n 4 \ - --junitxml=junit/test-results.xml \ - --cov=xarray \ - --cov-report=xml \ - $(pytest_extra_flags) \ - || ( \ - [ "$ALLOW_FAILURE" = "true" ] \ - && echo "##vso[task.logissue type=warning]Allowed CI failure!!" \ - && echo "##vso[task.complete result=SucceededWithIssues]" \ - ) - displayName: Run tests - -- bash: | - curl https://codecov.io/bash > codecov.sh - bash codecov.sh -t 688f4d53-31bb-49b5-8370-4ce6f792cf3d - displayName: Upload coverage to codecov.io - -# TODO: publish coverage results to Azure, once we can merge them across -# multiple jobs: https://stackoverflow.com/questions/56776185 - -- task: PublishTestResults@2 - condition: succeededOrFailed() - inputs: - testResultsFiles: '**/test-*.xml' - failTaskOnFailedTests: false - testRunTitle: '$(Agent.JobName)' diff --git a/ci/min_deps_check.py b/ci/min_deps_check.py index 5bb4c155410..3ffab645e8e 100755 --- a/ci/min_deps_check.py +++ b/ci/min_deps_check.py @@ -1,5 +1,5 @@ """Fetch from conda database all available versions of the xarray dependencies and their -publication date. Compare it against requirements/py36-min-all-deps.yml to verify the +publication date. Compare it against requirements/py37-min-all-deps.yml to verify the policy on obsolete dependencies is being followed. Print a pretty report :) """ import itertools @@ -56,7 +56,7 @@ def warning(msg: str) -> None: def parse_requirements(fname) -> Iterator[Tuple[str, int, int, Optional[int]]]: - """Load requirements/py36-min-all-deps.yml + """Load requirements/py37-min-all-deps.yml Yield (package name, major version, minor version, [patch version]) """ diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/environment-windows.yml similarity index 77% rename from ci/requirements/py37-windows.yml rename to ci/requirements/environment-windows.yml index 6dd8be768b1..6de2bc8dc64 100644 --- a/ci/requirements/py37-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -2,27 +2,21 @@ name: xarray-tests channels: - conda-forge dependencies: - - python=3.7 - - black - boto3 - bottleneck - cartopy # - cdms2 # Not available on Windows - # - cfgrib # Causes Python interpreter crash on Windows + # - cfgrib # Causes Python interpreter crash on Windows: https://github.com/pydata/xarray/pull/3340 - cftime - - coveralls - dask - distributed - - flake8 - h5netcdf - h5py=2 - hdf5 - hypothesis - iris - - isort - lxml # Optional dep of pydap - matplotlib-base - - mypy=0.790 # Must match .pre-commit-config.yaml - nc-time-axis - netcdf4 - numba @@ -30,6 +24,7 @@ dependencies: - pandas - pint - pip + - pre-commit - pseudonetcdf - pydap # - pynio # Not available on Windows diff --git a/ci/requirements/py38.yml b/ci/requirements/environment.yml similarity index 82% rename from ci/requirements/py38.yml rename to ci/requirements/environment.yml index 7e31216c285..0f59d9570c8 100644 --- a/ci/requirements/py38.yml +++ b/ci/requirements/environment.yml @@ -3,27 +3,21 @@ channels: - conda-forge - nodefaults dependencies: - - python=3.8 - - black - boto3 - bottleneck - cartopy - cdms2 - cfgrib - cftime - - coveralls - dask - distributed - - flake8 - h5netcdf - h5py=2 - hdf5 - hypothesis - iris - - isort - lxml # Optional dep of pydap - matplotlib-base - - mypy=0.790 # Must match .pre-commit-config.yaml - nc-time-axis - netcdf4 - numba @@ -31,9 +25,10 @@ dependencies: - pandas - pint - pip=20.2 + - pre-commit - pseudonetcdf - pydap - # - pynio: not compatible with netCDF4>1.5.3; only tested in py36-bare-minimum + # - pynio: not compatible with netCDF4>1.5.3; only tested in py37-bare-minimum - pytest - pytest-cov - pytest-env diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml deleted file mode 100644 index 34d8b3a3fc2..00000000000 --- a/ci/requirements/py37.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: xarray-tests -channels: - - conda-forge - - nodefaults -dependencies: - - python=3.7 - - black - - boto3 - - bottleneck - - cartopy - - cdms2 - - cfgrib - - cftime - - coveralls - - dask - - distributed - - flake8 - - h5netcdf - - h5py=2 - - hdf5 - - hypothesis - - iris - - isort - - lxml # Optional dep of pydap - - matplotlib-base - - mypy=0.790 # Must match .pre-commit-config.yaml - - nc-time-axis - - netcdf4 - - numba - - numpy - - pandas - - pint - - pip - - pseudonetcdf - - pydap - # - pynio: not compatible with netCDF4>1.5.3; only tested in py36-bare-minimum - - pytest - - pytest-cov - - pytest-env - - pytest-xdist - - rasterio - - scipy - - seaborn - - setuptools - - sparse - - toolz - - zarr - - pip: - - numbagg diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml index 5eb8fa39cef..14930f5272d 100644 --- a/ci/requirements/py38-all-but-dask.yml +++ b/ci/requirements/py38-all-but-dask.yml @@ -30,7 +30,7 @@ dependencies: - pip - pseudonetcdf - pydap - # - pynio: not compatible with netCDF4>1.5.3; only tested in py36-bare-minimum + # - pynio: not compatible with netCDF4>1.5.3; only tested in py37-bare-minimum - pytest - pytest-cov - pytest-env diff --git a/doc/_templates/autosummary/base.rst b/doc/_templates/autosummary/base.rst index e6726a71d7c..53f2a29c193 100644 --- a/doc/_templates/autosummary/base.rst +++ b/doc/_templates/autosummary/base.rst @@ -1,3 +1,3 @@ :github_url: {{ fullname | github_url | escape_underscores }} -{% extends "!autosummary/base.rst" %} \ No newline at end of file +{% extends "!autosummary/base.rst" %} diff --git a/doc/contributing.rst b/doc/contributing.rst index 39863ec96c7..9c4ce5a0af2 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -152,8 +152,13 @@ We'll now kick off a two-step process: .. code-block:: sh # Create and activate the build environment - # This is for Linux and MacOS. On Windows, use py37-windows.yml instead. - conda env create -f ci/requirements/py37.yml + conda create -c conda-forge -n xarray-tests python=3.8 + + # This is for Linux and MacOS + conda env update -f ci/requirements/environment.yml + + # On windows, use environment-windows.yml instead + conda env update -f ci/requirements/environment-windows.yml conda activate xarray-tests @@ -363,28 +368,9 @@ xarray uses several tools to ensure a consistent code format throughout the proj - `mypy `_ for static type checking on `type hints `_ -``pip``:: - - pip install black flake8 isort mypy blackdoc - -and then run from the root of the xarray repository:: - - isort . - black -t py36 . - blackdoc -t py36 . - flake8 - mypy . - -to auto-format your code. Additionally, many editors have plugins that will -apply ``black`` as you edit files. - -Optionally, you may wish to setup `pre-commit hooks `_ +We highly recommend that you setup `pre-commit hooks `_ to automatically run all the above tools every time you make a git commit. This -can be done by installing ``pre-commit``:: - - pip install pre-commit - -and then running:: +can be done by running:: pre-commit install @@ -406,12 +392,8 @@ Testing With Continuous Integration ----------------------------------- The *xarray* test suite runs automatically the -`Azure Pipelines `__, -continuous integration service, once your pull request is submitted. However, -if you wish to run the test suite on a branch prior to submitting the pull -request, then Azure Pipelines -`needs to be configured `_ -for your GitHub repository. +`GitHub Actions `__, +continuous integration service, once your pull request is submitted. A pull-request will be considered for merging when you have an all 'green' build. If any tests are failing, then you will get a red 'X', where you can click through to see the @@ -857,8 +839,7 @@ PR checklist - **Properly format your code** and verify that it passes the formatting guidelines set by `Black `_ and `Flake8 `_. See `"Code formatting" `_. You can use `pre-commit `_ to run these automatically on each commit. - - Run ``black .`` in the root directory. This may modify some files. Confirm and commit any formatting changes. - - Run ``flake8`` in the root directory. If this fails, it will log an error message. + - Run ``pre-commit run --all-files`` in the root directory. This may modify some files. Confirm and commit any formatting changes. - **Push your code and** `create a PR on GitHub `_. - **Use a helpful title for your pull request** by summarizing the main contributions rather than using the latest commit message. If the PR addresses an `issue `_, please `reference it `_. diff --git a/doc/gallery/README.txt b/doc/gallery/README.txt index b17f803696b..63f7d477cf4 100644 --- a/doc/gallery/README.txt +++ b/doc/gallery/README.txt @@ -2,4 +2,3 @@ Gallery ======= - diff --git a/doc/groupby.rst b/doc/groupby.rst index c72a26c45ea..d0c0b1849f9 100644 --- a/doc/groupby.rst +++ b/doc/groupby.rst @@ -124,7 +124,7 @@ dimensions *other than* the provided one: .. note:: We use an ellipsis (`...`) here to indicate we want to reduce over all - other dimensions + other dimensions First and last @@ -220,10 +220,10 @@ may be desirable: da.groupby_bins("lon", [0, 45, 50]).sum() These methods group by `lon` values. It is also possible to groupby each -cell in a grid, regardless of value, by stacking multiple dimensions, +cell in a grid, regardless of value, by stacking multiple dimensions, applying your function, and then unstacking the result: .. ipython:: python stacked = da.stack(gridcell=["ny", "nx"]) - stacked.groupby("gridcell").sum(...).unstack("gridcell") \ No newline at end of file + stacked.groupby("gridcell").sum(...).unstack("gridcell") diff --git a/doc/howdoi.rst b/doc/howdoi.rst index 84c0c786027..3604d66bd0c 100644 --- a/doc/howdoi.rst +++ b/doc/howdoi.rst @@ -59,4 +59,3 @@ How do I ... - ``obj.dt.ceil``, ``obj.dt.floor``, ``obj.dt.round``. See :ref:`dt_accessor` for more. * - make a mask that is ``True`` where an object contains any of the values in a array - :py:meth:`Dataset.isin`, :py:meth:`DataArray.isin` - diff --git a/doc/indexing.rst b/doc/indexing.rst index 58064582354..78766b8fd81 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -548,7 +548,7 @@ you can supply a :py:class:`~xarray.DataArray` with a coordinate, x=xr.DataArray([0, 1, 6], dims="z", coords={"z": ["a", "b", "c"]}), y=xr.DataArray([0, 1, 0], dims="z"), ) - + Analogously, label-based pointwise-indexing is also possible by the ``.sel`` method: diff --git a/doc/installing.rst b/doc/installing.rst index d5783e557e1..99b8b621aed 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -118,9 +118,9 @@ at the moment of publishing the xarray release is guaranteed to work. You can see the actual minimum tested versions: - `For NEP-18 libraries - `_ + `_ - `For everything else - `_ + `_ .. _installation-instructions: diff --git a/doc/internals.rst b/doc/internals.rst index 177cc60ac0a..60d32128c60 100644 --- a/doc/internals.rst +++ b/doc/internals.rst @@ -230,4 +230,4 @@ re-open it directly with Zarr: zgroup = zarr.open("rasm.zarr") print(zgroup.tree()) - dict(zgroup["Tair"].attrs) \ No newline at end of file + dict(zgroup["Tair"].attrs) diff --git a/doc/interpolation.rst b/doc/interpolation.rst index c2922813e15..9a3b7a7ee2d 100644 --- a/doc/interpolation.rst +++ b/doc/interpolation.rst @@ -68,7 +68,7 @@ by specifying the time periods required. Interpolation of data indexed by a :py:class:`~xarray.CFTimeIndex` is also allowed. See :ref:`CFTimeIndex` for examples. - + .. note:: Currently, our interpolation only works for regular grids. @@ -317,4 +317,4 @@ The remapping can be done as follows dsi = ds.interp(lon=lon, lat=lat) dsi.air.plot(ax=axes[1]) @savefig interpolation_sample4.png width=8in - axes[1].set_title("Remapped data") \ No newline at end of file + axes[1].set_title("Remapped data") diff --git a/doc/io.rst b/doc/io.rst index e4fa57689a6..2e46879929b 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -1107,7 +1107,7 @@ We recommend installing PyNIO via conda:: conda install -c conda-forge pynio .. note:: - + PyNIO is no longer actively maintained and conflicts with netcdf4 > 1.5.3. The PyNIO backend may be moved outside of xarray in the future. @@ -1152,4 +1152,4 @@ Third party libraries More formats are supported by extension libraries: -- `xarray-mongodb `_: Store xarray objects on MongoDB \ No newline at end of file +- `xarray-mongodb `_: Store xarray objects on MongoDB diff --git a/doc/plotting.rst b/doc/plotting.rst index dde48d47569..3699f794ae8 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -955,4 +955,4 @@ One can also make line plots with multidimensional coordinates. In this case, `` f, ax = plt.subplots(2, 1) da.plot.line(x="lon", hue="y", ax=ax[0]) @savefig plotting_example_2d_hue_xy.png - da.plot.line(x="lon", hue="x", ax=ax[1]) \ No newline at end of file + da.plot.line(x="lon", hue="x", ax=ax[1]) diff --git a/doc/quick-overview.rst b/doc/quick-overview.rst index e3d1456f017..1a2bc809550 100644 --- a/doc/quick-overview.rst +++ b/doc/quick-overview.rst @@ -153,7 +153,7 @@ Visualizing your datasets is quick and convenient: data.plot() Note the automatic labeling with names and units. Our effort in adding metadata attributes has paid off! Many aspects of these figures are customizable: see :ref:`plotting`. - + pandas ------ diff --git a/doc/reshaping.rst b/doc/reshaping.rst index d07929b3b8f..81fd4a6d35e 100644 --- a/doc/reshaping.rst +++ b/doc/reshaping.rst @@ -19,7 +19,7 @@ Reordering dimensions --------------------- To reorder dimensions on a :py:class:`~xarray.DataArray` or across all variables -on a :py:class:`~xarray.Dataset`, use :py:meth:`~xarray.DataArray.transpose`. An +on a :py:class:`~xarray.Dataset`, use :py:meth:`~xarray.DataArray.transpose`. An ellipsis (`...`) can be use to represent all other dimensions: .. ipython:: python @@ -288,4 +288,4 @@ As a shortcut, you can refer to existing coordinates by name: ds.sortby("x") ds.sortby(["y", "x"]) - ds.sortby(["y", "x"], ascending=False) \ No newline at end of file + ds.sortby(["y", "x"], ascending=False) diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst index cb2921e2ed1..db612d74859 100644 --- a/doc/weather-climate.rst +++ b/doc/weather-climate.rst @@ -136,7 +136,7 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: da.time.dt.ceil("3D") da.time.dt.floor("5D") da.time.dt.round("2D") - + - Group-by operations based on datetime accessor attributes (e.g. by month of the year): diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 70b3c407b36..c969453b108 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -90,7 +90,7 @@ Internal Changes By `Justus Magin `_ and `Mathias Hauser `_. -- Replace all usages of ``assert x.identical(y)`` with ``assert_identical(x, y)`` +- Replace all usages of ``assert x.identical(y)`` with ``assert_identical(x, y)`` for clearer error messages. (:pull:`4752`); By `Maximilian Roos `_. diff --git a/licenses/PYTHON_LICENSE b/licenses/PYTHON_LICENSE index 43829c533b9..88251f5b6e8 100644 --- a/licenses/PYTHON_LICENSE +++ b/licenses/PYTHON_LICENSE @@ -251,4 +251,4 @@ FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT -OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. \ No newline at end of file +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/xarray/tests/data/example.ict b/xarray/tests/data/example.ict index bc04888fb80..41bbfeb996c 100644 --- a/xarray/tests/data/example.ict +++ b/xarray/tests/data/example.ict @@ -28,4 +28,4 @@ Start_UTC, lat, lon, elev, TEST_ppbv, TESTM_ppbv 43200, 41.00000, -71.00000, 5, 1.2345, 2.220 46800, 42.00000, -72.00000, 15, 2.3456, -9999 50400, 42.00000, -73.00000, 20, 3.4567, -7777 -50400, 42.00000, -74.00000, 25, 4.5678, -8888 \ No newline at end of file +50400, 42.00000, -74.00000, 25, 4.5678, -8888 diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index d730b6dc1c6..2f4a4edd436 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -2240,7 +2240,7 @@ def test_datetime_hue(self, hue_style): ds2.plot.scatter(x="A", y="B", hue="hue", hue_style=hue_style) def test_facetgrid_hue_style(self): - # Can't move this to pytest.mark.parametrize because py36-bare-minimum + # Can't move this to pytest.mark.parametrize because py37-bare-minimum # doesn't have matplotlib. for hue_style, map_type in ( ("discrete", list), From 3c1ed2006b55fc38e7cf0d6500e8048ffed461ea Mon Sep 17 00:00:00 2001 From: keewis Date: Mon, 11 Jan 2021 13:53:43 +0100 Subject: [PATCH 14/51] install conda as a library in the minimum dependency check CI (#4792) * install conda as a library * use python 3.8 instead of 3.9 as it seems conda is not ready for it, yet. * install py38 when initially setting up the environment --- .github/workflows/ci-additional.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 1e19f594853..00f8ea42b97 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -180,9 +180,10 @@ jobs: channel-priority: strict mamba-version: "*" auto-update-conda: false + python-version: "3.8" - name: minimum versions policy run: | - mamba install -y pyyaml + mamba install -y pyyaml conda python ci/min_deps_check.py ci/requirements/py37-bare-minimum.yml python ci/min_deps_check.py ci/requirements/py37-min-all-deps.yml From 81ed5075fa02ac484f7448f15e0d7e621ab8b187 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 12 Jan 2021 01:21:18 +0100 Subject: [PATCH 15/51] Print number of variables in repr (#4762) * Print number of variables in repr * Tweak title only when there's many rows Workaround to avoid having to redo every single doctest... It is really only necessary when the data rows are limited. But I find it a bit difficult to count the rows quickly past like 7. * Remove min() No need to limit max_rows now because the if condition handles that. --- xarray/core/formatting.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 8dd8d43efab..282620e3569 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -372,7 +372,9 @@ def _mapping_repr(mapping, title, summarizer, col_width=None, max_rows=None): max_rows = OPTIONS["display_max_rows"] summary = [f"{title}:"] if mapping: - if len(mapping) > max_rows: + len_mapping = len(mapping) + if len_mapping > max_rows: + summary = [f"{summary[0]} ({max_rows}/{len_mapping})"] first_rows = max_rows // 2 + max_rows % 2 items = list(mapping.items()) summary += [summarizer(k, v, col_width) for k, v in items[:first_rows]] From 9bb0302b8dc46687a4118bab10285aa831d967c4 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 12 Jan 2021 11:15:27 +0100 Subject: [PATCH 16/51] Always force dask arrays to float in missing.interp_func (#4771) * scipy.interpolate.interp1d always forces to float. * Copy type-check from scipy.interpolate.interp1d * Update missing.py * Test that pre- and post-compute dtypes matches * Update test_missing.py --- xarray/core/missing.py | 9 ++++++++- xarray/tests/test_missing.py | 14 ++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 477c7f40547..8d112b4603c 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -730,6 +730,13 @@ def interp_func(var, x, new_x, method, kwargs): # if usefull, re-use localize for each chunk of new_x localize = (method in ["linear", "nearest"]) and (new_x[0].chunks is not None) + # scipy.interpolate.interp1d always forces to float. + # Use the same check for blockwise as well: + if not issubclass(var.dtype.type, np.inexact): + dtype = np.float_ + else: + dtype = var.dtype + return da.blockwise( _dask_aware_interpnd, out_ind, @@ -738,7 +745,7 @@ def interp_func(var, x, new_x, method, kwargs): interp_kwargs=kwargs, localize=localize, concatenate=True, - dtype=var.dtype, + dtype=dtype, new_axes=new_axes, ) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 21d82b1948b..2ab3508b667 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -370,6 +370,20 @@ def test_interpolate_dask_raises_for_invalid_chunk_dim(): da.interpolate_na("time") +@requires_dask +@requires_scipy +@pytest.mark.parametrize("dtype, method", [(int, "linear"), (int, "nearest")]) +def test_interpolate_dask_expected_dtype(dtype, method): + da = xr.DataArray( + data=np.array([0, 1], dtype=dtype), + dims=["time"], + coords=dict(time=np.array([0, 1])), + ).chunk(dict(time=2)) + da = da.interp(time=np.array([0, 0.5, 1, 2]), method=method) + + assert da.dtype == da.compute().dtype + + @requires_bottleneck def test_ffill(): da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x") From f52a95cbe694336fe47bc5a42c713bee8ad74d64 Mon Sep 17 00:00:00 2001 From: keewis Date: Tue, 12 Jan 2021 17:44:48 +0100 Subject: [PATCH 17/51] Fix interval labels with units (#4794) * make sure we actually get the correct axis label * append the interval position suffix before the units * update whats-new.rst * also update the call in facetgrid * test both x and y using parametrize --- doc/whats-new.rst | 2 ++ xarray/plot/facetgrid.py | 4 +++- xarray/plot/plot.py | 13 ++++++------- xarray/plot/utils.py | 10 ++++++---- xarray/tests/test_plot.py | 14 ++++++++++++++ 5 files changed, 31 insertions(+), 12 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c969453b108..f1137b7b2a2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -68,6 +68,8 @@ Bug fixes By `Alessandro Amici `_ - Limit number of data rows when printing large datasets. (:issue:`4736`, :pull:`4750`). By `Jimmy Westling `_. - Add ``missing_dims`` parameter to transpose (:issue:`4647`, :pull:`4767`). By `Daniel Mesejo `_. +- Resolve intervals before appending other metadata to labels when plotting (:issue:`4322`, :pull:`4794`). + By `Justus Magin `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py index 8ed8815a060..58b38251352 100644 --- a/xarray/plot/facetgrid.py +++ b/xarray/plot/facetgrid.py @@ -306,9 +306,11 @@ def map_dataarray_line( ) self._mappables.append(mappable) - _, _, hueplt, xlabel, ylabel, huelabel = _infer_line_data( + xplt, yplt, hueplt, huelabel = _infer_line_data( darray=self.data.loc[self.name_dicts.flat[0]], x=x, y=y, hue=hue ) + xlabel = label_from_attrs(xplt) + ylabel = label_from_attrs(yplt) self._hue_var = hueplt self._hue_label = huelabel diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 2f10240e1b7..8a57e17e5e8 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -107,10 +107,7 @@ def _infer_line_data(darray, x, y, hue): huelabel = label_from_attrs(darray[huename]) hueplt = darray[huename] - xlabel = label_from_attrs(xplt) - ylabel = label_from_attrs(yplt) - - return xplt, yplt, hueplt, xlabel, ylabel, huelabel + return xplt, yplt, hueplt, huelabel def plot( @@ -292,12 +289,14 @@ def line( assert "args" not in kwargs ax = get_axis(figsize, size, aspect, ax) - xplt, yplt, hueplt, xlabel, ylabel, hue_label = _infer_line_data(darray, x, y, hue) + xplt, yplt, hueplt, hue_label = _infer_line_data(darray, x, y, hue) # Remove pd.Intervals if contained in xplt.values and/or yplt.values. - xplt_val, yplt_val, xlabel, ylabel, kwargs = _resolve_intervals_1dplot( - xplt.values, yplt.values, xlabel, ylabel, kwargs + xplt_val, yplt_val, x_suffix, y_suffix, kwargs = _resolve_intervals_1dplot( + xplt.values, yplt.values, kwargs ) + xlabel = label_from_attrs(xplt, extra=x_suffix) + ylabel = label_from_attrs(yplt, extra=y_suffix) _ensure_plottable(xplt_val, yplt_val) diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 3eca90a1dfe..16c67e154fc 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -503,12 +503,14 @@ def _interval_to_double_bound_points(xarray, yarray): return xarray, yarray -def _resolve_intervals_1dplot(xval, yval, xlabel, ylabel, kwargs): +def _resolve_intervals_1dplot(xval, yval, kwargs): """ Helper function to replace the values of x and/or y coordinate arrays containing pd.Interval with their mid-points or - for step plots - double points which double the length. """ + x_suffix = "" + y_suffix = "" # Is it a step plot? (see matplotlib.Axes.step) if kwargs.get("drawstyle", "").startswith("steps-"): @@ -534,13 +536,13 @@ def _resolve_intervals_1dplot(xval, yval, xlabel, ylabel, kwargs): # Convert intervals to mid points and adjust labels if _valid_other_type(xval, [pd.Interval]): xval = _interval_to_mid_points(xval) - xlabel += "_center" + x_suffix = "_center" if _valid_other_type(yval, [pd.Interval]): yval = _interval_to_mid_points(yval) - ylabel += "_center" + y_suffix = "_center" # return converted arguments - return xval, yval, xlabel, ylabel, kwargs + return xval, yval, x_suffix, y_suffix, kwargs def _resolve_intervals_2dplot(val, func_name): diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 2f4a4edd436..471bbb7051e 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -592,6 +592,20 @@ def test_coord_with_interval_xy(self): bins = [-1, 0, 1, 2] self.darray.groupby_bins("dim_0", bins).mean(...).dim_0_bins.plot() + @pytest.mark.parametrize("dim", ("x", "y")) + def test_labels_with_units_with_interval(self, dim): + """Test line plot with intervals and a units attribute.""" + bins = [-1, 0, 1, 2] + arr = self.darray.groupby_bins("dim_0", bins).mean(...) + arr.dim_0_bins.attrs["units"] = "m" + + (mappable,) = arr.plot(**{dim: "dim_0_bins"}) + ax = mappable.figure.gca() + actual = getattr(ax, f"get_{dim}label")() + + expected = "dim_0_bins_center [m]" + assert actual == expected + class TestPlot1D(PlotTestCase): @pytest.fixture(autouse=True) From fb67358ceb0c386560e6a6991dd937292ba54d46 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Wed, 13 Jan 2021 18:09:05 +0100 Subject: [PATCH 18/51] coords: retain str dtype (#4759) * coords: retain str dtype * fix doctests * update what's new * fix multiindex repr * rename function * ensure minimum str dtype * fix EOL spaces --- doc/whats-new.rst | 3 +++ xarray/core/alignment.py | 12 ++++++---- xarray/core/concat.py | 4 ++-- xarray/core/dataarray.py | 4 ++-- xarray/core/dataset.py | 6 ++--- xarray/core/merge.py | 4 +++- xarray/core/utils.py | 19 +++++++++++++++ xarray/core/variable.py | 4 ++++ xarray/tests/test_concat.py | 44 ++++++++++++++++++++++++++++++++++ xarray/tests/test_dataarray.py | 33 +++++++++++++++++++++++++ xarray/tests/test_dataset.py | 34 ++++++++++++++++++++++++++ xarray/tests/test_utils.py | 27 +++++++++++++++++++++ xarray/tests/test_variable.py | 11 +++++++++ 13 files changed, 193 insertions(+), 12 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f1137b7b2a2..398c332433f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -66,6 +66,9 @@ Bug fixes By `Anderson Banihirwe `_ - Fix a crash in orthogonal indexing on geographic coordinates with ``engine='cfgrib'`` (:issue:`4733` :pull:`4737`). By `Alessandro Amici `_ +- Coordinates with dtype ``str`` or ``bytes`` now retain their dtype on many operations, + e.g. ``reindex``, ``align``, ``concat``, ``assign``, previously they were cast to an object dtype + (:issue:`2658` and :issue:`4543`) by `Mathias Hauser `_. - Limit number of data rows when printing large datasets. (:issue:`4736`, :pull:`4750`). By `Jimmy Westling `_. - Add ``missing_dims`` parameter to transpose (:issue:`4647`, :pull:`4767`). By `Daniel Mesejo `_. - Resolve intervals before appending other metadata to labels when plotting (:issue:`4322`, :pull:`4794`). diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 21bda8ef8d7..debf3aad96a 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -19,7 +19,7 @@ from . import dtypes, utils from .indexing import get_indexer_nd -from .utils import is_dict_like, is_full_slice +from .utils import is_dict_like, is_full_slice, maybe_coerce_to_str from .variable import IndexVariable, Variable if TYPE_CHECKING: @@ -278,10 +278,12 @@ def align( return (obj.copy(deep=copy),) all_indexes = defaultdict(list) + all_coords = defaultdict(list) unlabeled_dim_sizes = defaultdict(set) for obj in objects: for dim in obj.dims: if dim not in exclude: + all_coords[dim].append(obj.coords[dim]) try: index = obj.indexes[dim] except KeyError: @@ -306,7 +308,7 @@ def align( any(not index.equals(other) for other in matching_indexes) or dim in unlabeled_dim_sizes ): - joined_indexes[dim] = index + joined_indexes[dim] = indexes[dim] else: if ( any( @@ -318,9 +320,11 @@ def align( if join == "exact": raise ValueError(f"indexes along dimension {dim!r} are not equal") index = joiner(matching_indexes) + # make sure str coords are not cast to object + index = maybe_coerce_to_str(index, all_coords[dim]) joined_indexes[dim] = index else: - index = matching_indexes[0] + index = all_coords[dim][0] if dim in unlabeled_dim_sizes: unlabeled_sizes = unlabeled_dim_sizes[dim] @@ -583,7 +587,7 @@ def reindex_variables( args: tuple = (var.attrs, var.encoding) else: args = () - reindexed[dim] = IndexVariable((dim,), target, *args) + reindexed[dim] = IndexVariable((dim,), indexers[dim], *args) for dim in sizes: if dim not in indexes and dim in indexers: diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 1275d002cd3..5cda5aa903c 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -187,7 +187,7 @@ def concat( array([[0, 1, 2], [3, 4, 5]]) Coordinates: - * x (x) object 'a' 'b' + * x (x) >> xr.concat([da.isel(x=0), da.isel(x=1)], "new_dim") @@ -503,7 +503,7 @@ def ensure_common_dims(vars): for k in datasets[0].variables: if k in concat_over: try: - vars = ensure_common_dims([ds.variables[k] for ds in datasets]) + vars = ensure_common_dims([ds[k].variable for ds in datasets]) except KeyError: raise ValueError("%r is not present in all datasets." % k) combined = concat_vars(vars, dim, positions) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b3a545dec73..6fdda8fc418 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1325,8 +1325,8 @@ def broadcast_like( [ 2.2408932 , 1.86755799, -0.97727788], [ nan, nan, nan]]) Coordinates: - * x (x) object 'a' 'b' 'c' - * y (y) object 'a' 'b' 'c' + * x (x) Dimensions: (station: 4) Coordinates: - * station (station) object 'boston' 'austin' 'seattle' 'lincoln' + * station (station) Dimensions: (station: 4) Coordinates: - * station (station) object 'boston' 'austin' 'seattle' 'lincoln' + * station (station) Dimensions: (station: 4) Coordinates: - * station (station) object 'boston' 'austin' 'seattle' 'lincoln' + * station (station) pd.Index: """Given an array, safely cast it to a pandas.Index. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 0a6eef44c90..797de65bbcf 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -48,6 +48,7 @@ ensure_us_time_resolution, infix_dims, is_duck_array, + maybe_coerce_to_str, ) NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( @@ -2523,6 +2524,9 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False): indices = nputils.inverse_permutation(np.concatenate(positions)) data = data.take(indices) + # keep as str if possible as pandas.Index uses object (converts to numpy array) + data = maybe_coerce_to_str(data, variables) + attrs = dict(first_var.attrs) if not shortcut: for var in variables: diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 0d5507b6879..7416cab13ed 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -376,6 +376,30 @@ def test_concat_fill_value(self, fill_value): actual = concat(datasets, dim="t", fill_value=fill_value) assert_identical(actual, expected) + @pytest.mark.parametrize("dtype", [str, bytes]) + @pytest.mark.parametrize("dim", ["x1", "x2"]) + def test_concat_str_dtype(self, dtype, dim): + + data = np.arange(4).reshape([2, 2]) + + da1 = Dataset( + { + "data": (["x1", "x2"], data), + "x1": [0, 1], + "x2": np.array(["a", "b"], dtype=dtype), + } + ) + da2 = Dataset( + { + "data": (["x1", "x2"], data), + "x1": np.array([1, 2]), + "x2": np.array(["c", "d"], dtype=dtype), + } + ) + actual = concat([da1, da2], dim=dim) + + assert np.issubdtype(actual.x2.dtype, dtype) + class TestConcatDataArray: def test_concat(self): @@ -525,6 +549,26 @@ def test_concat_combine_attrs_kwarg(self): actual = concat([da1, da2], dim="x", combine_attrs=combine_attrs) assert_identical(actual, expected[combine_attrs]) + @pytest.mark.parametrize("dtype", [str, bytes]) + @pytest.mark.parametrize("dim", ["x1", "x2"]) + def test_concat_str_dtype(self, dtype, dim): + + data = np.arange(4).reshape([2, 2]) + + da1 = DataArray( + data=data, + dims=["x1", "x2"], + coords={"x1": [0, 1], "x2": np.array(["a", "b"], dtype=dtype)}, + ) + da2 = DataArray( + data=data, + dims=["x1", "x2"], + coords={"x1": np.array([1, 2]), "x2": np.array(["c", "d"], dtype=dtype)}, + ) + actual = concat([da1, da2], dim=dim) + + assert np.issubdtype(actual.x2.dtype, dtype) + @pytest.mark.parametrize("attr1", ({"a": {"meta": [10, 20, 30]}}, {"a": [1, 2, 3]}, {})) @pytest.mark.parametrize("attr2", ({"a": [1, 2, 3]}, {})) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 8215a9ddaac..3ead427e22e 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1568,6 +1568,19 @@ def test_reindex_fill_value(self, fill_value): ) assert_identical(expected, actual) + @pytest.mark.parametrize("dtype", [str, bytes]) + def test_reindex_str_dtype(self, dtype): + + data = DataArray( + [1, 2], dims="x", coords={"x": np.array(["a", "b"], dtype=dtype)} + ) + + actual = data.reindex(x=data.x) + expected = data + + assert_identical(expected, actual) + assert actual.dtype == expected.dtype + def test_rename(self): renamed = self.dv.rename("bar") assert_identical(renamed.to_dataset(), self.ds.rename({"foo": "bar"})) @@ -3435,6 +3448,26 @@ def test_align_without_indexes_errors(self): DataArray([1, 2], coords=[("x", [0, 1])]), ) + def test_align_str_dtype(self): + + a = DataArray([0, 1], dims=["x"], coords={"x": ["a", "b"]}) + b = DataArray([1, 2], dims=["x"], coords={"x": ["b", "c"]}) + + expected_a = DataArray( + [0, 1, np.NaN], dims=["x"], coords={"x": ["a", "b", "c"]} + ) + expected_b = DataArray( + [np.NaN, 1, 2], dims=["x"], coords={"x": ["a", "b", "c"]} + ) + + actual_a, actual_b = xr.align(a, b, join="outer") + + assert_identical(expected_a, actual_a) + assert expected_a.x.dtype == actual_a.x.dtype + + assert_identical(expected_b, actual_b) + assert expected_b.x.dtype == actual_b.x.dtype + def test_broadcast_arrays(self): x = DataArray([1, 2], coords=[("a", [-1, -2])], name="x") y = DataArray([1, 2], coords=[("b", [3, 4])], name="y") diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 204f08c2eec..bd1938455b1 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1949,6 +1949,16 @@ def test_reindex_like_fill_value(self, fill_value): ) assert_identical(expected, actual) + @pytest.mark.parametrize("dtype", [str, bytes]) + def test_reindex_str_dtype(self, dtype): + data = Dataset({"data": ("x", [1, 2]), "x": np.array(["a", "b"], dtype=dtype)}) + + actual = data.reindex(x=data.x) + expected = data + + assert_identical(expected, actual) + assert actual.x.dtype == expected.x.dtype + @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"foo": 2, "bar": 1}]) def test_align_fill_value(self, fill_value): x = Dataset({"foo": DataArray([1, 2], dims=["x"], coords={"x": [1, 2]})}) @@ -2134,6 +2144,22 @@ def test_align_non_unique(self): with raises_regex(ValueError, "cannot reindex or align"): align(x, y) + def test_align_str_dtype(self): + + a = Dataset({"foo": ("x", [0, 1]), "x": ["a", "b"]}) + b = Dataset({"foo": ("x", [1, 2]), "x": ["b", "c"]}) + + expected_a = Dataset({"foo": ("x", [0, 1, np.NaN]), "x": ["a", "b", "c"]}) + expected_b = Dataset({"foo": ("x", [np.NaN, 1, 2]), "x": ["a", "b", "c"]}) + + actual_a, actual_b = xr.align(a, b, join="outer") + + assert_identical(expected_a, actual_a) + assert expected_a.x.dtype == actual_a.x.dtype + + assert_identical(expected_b, actual_b) + assert expected_b.x.dtype == actual_b.x.dtype + def test_broadcast(self): ds = Dataset( {"foo": 0, "bar": ("x", [1]), "baz": ("y", [2, 3])}, {"c": ("x", [4])} @@ -3420,6 +3446,14 @@ def test_setitem_align_new_indexes(self): ) assert_identical(ds, expected) + @pytest.mark.parametrize("dtype", [str, bytes]) + def test_setitem_str_dtype(self, dtype): + + ds = xr.Dataset(coords={"x": np.array(["x", "y"], dtype=dtype)}) + ds["foo"] = xr.DataArray(np.array([0, 0]), dims=["x"]) + + assert np.issubdtype(ds.x.dtype, dtype) + def test_assign(self): ds = Dataset() actual = ds.assign(x=[0, 1, 2], y=2) diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 5f8b1770bd3..193c45f01cd 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -39,6 +39,33 @@ def test_safe_cast_to_index(): assert expected.dtype == actual.dtype +@pytest.mark.parametrize( + "a, b, expected", [["a", "b", np.array(["a", "b"])], [1, 2, pd.Index([1, 2])]] +) +def test_maybe_coerce_to_str(a, b, expected): + + a = np.array([a]) + b = np.array([b]) + index = pd.Index(a).append(pd.Index(b)) + + actual = utils.maybe_coerce_to_str(index, [a, b]) + + assert_array_equal(expected, actual) + assert expected.dtype == actual.dtype + + +def test_maybe_coerce_to_str_minimal_str_dtype(): + + a = np.array(["a", "a_long_string"]) + index = pd.Index(["a"]) + + actual = utils.maybe_coerce_to_str(index, [a]) + expected = np.array("a") + + assert_array_equal(expected, actual) + assert expected.dtype == actual.dtype + + @requires_cftime def test_safe_cast_to_index_cftimeindex(): date_types = _all_cftime_date_types() diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 41bf24c7f88..e1ae3e1f258 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -2094,6 +2094,17 @@ def test_concat_multiindex(self): assert_identical(actual, expected) assert isinstance(actual.to_index(), pd.MultiIndex) + @pytest.mark.parametrize("dtype", [str, bytes]) + def test_concat_str_dtype(self, dtype): + + a = IndexVariable("x", np.array(["a"], dtype=dtype)) + b = IndexVariable("x", np.array(["b"], dtype=dtype)) + expected = IndexVariable("x", np.array(["a", "b"], dtype=dtype)) + + actual = IndexVariable.concat([a, b]) + assert actual.identical(expected) + assert np.issubdtype(actual.dtype, dtype) + def test_coordinate_alias(self): with pytest.warns(Warning, match="deprecated"): x = Coordinate("x", [1, 2, 3]) From 1ce8938f1d783971b56d22a5a077d8cbddc836a0 Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 13 Jan 2021 22:35:43 +0100 Subject: [PATCH 19/51] don't skip the scheduled CI (#4806) --- .github/workflows/upstream-dev-ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index bb325c1837e..b36ab410d8c 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -32,7 +32,7 @@ jobs: always() && github.repository == 'pydata/xarray' && ( - (github.event_name == 'scheduled' || github.event_name == 'workflow_dispatch') + (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') || needs.detect-ci-trigger.outputs.triggered == 'true' ) defaults: From 747fe26881af073e23b9b09796664f2dd2d4821b Mon Sep 17 00:00:00 2001 From: keewis Date: Thu, 14 Jan 2021 00:02:32 +0100 Subject: [PATCH 20/51] scatter plot by order of the first appearance of hue (#4723) * plot by order of first appearance * use ravel to avoid copying the data * update whats-new.rst * add a test to make sure the legend labels and the mappable labels match * test with upstream-dev [test-upstream] * add a comment about the reason for using pd.unique [skip-ci] * empty commit [skip-ci] --- doc/whats-new.rst | 2 ++ xarray/plot/dataset_plot.py | 7 +++++-- xarray/tests/test_plot.py | 11 +++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 398c332433f..db10ec653c5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -64,6 +64,8 @@ Bug fixes By `Richard Kleijn `_ . - Remove dictionary unpacking when using ``.loc`` to avoid collision with ``.sel`` parameters (:pull:`4695`). By `Anderson Banihirwe `_ +- Fix the legend created by :py:meth:`Dataset.plot.scatter` (:issue:`4641`, :pull:`4723`). + By `Justus Magin `_. - Fix a crash in orthogonal indexing on geographic coordinates with ``engine='cfgrib'`` (:issue:`4733` :pull:`4737`). By `Alessandro Amici `_ - Coordinates with dtype ``str`` or ``bytes`` now retain their dtype on many operations, diff --git a/xarray/plot/dataset_plot.py b/xarray/plot/dataset_plot.py index 7ba0f93f33a..6d942e1b0fa 100644 --- a/xarray/plot/dataset_plot.py +++ b/xarray/plot/dataset_plot.py @@ -291,7 +291,7 @@ def newplotfunc( allargs = locals().copy() allargs["plotfunc"] = globals()[plotfunc.__name__] allargs["data"] = ds - # TODO dcherian: why do I need to remove kwargs? + # remove kwargs to avoid passing the information twice for arg in ["meta_data", "kwargs", "ds"]: del allargs[arg] @@ -422,7 +422,10 @@ def scatter(ds, x, y, ax, **kwargs): if hue_style == "discrete": primitive = [] - for label in np.unique(data["hue"].values): + # use pd.unique instead of np.unique because that keeps the order of the labels, + # which is important to keep them in sync with the ones used in + # FacetGrid.add_legend + for label in pd.unique(data["hue"].values.ravel()): mask = data["hue"] == label if data["sizes"] is not None: kwargs.update(s=data["sizes"].where(mask, drop=True).values.flatten()) diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 471bbb7051e..47b15446f1d 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -2290,6 +2290,17 @@ def test_legend_labels(self): lines = ds2.plot.scatter(x="A", y="B", hue="hue") assert [t.get_text() for t in lines[0].axes.get_legend().texts] == ["a", "b"] + def test_legend_labels_facetgrid(self): + ds2 = self.ds.copy() + ds2["hue"] = ["d", "a", "c", "b"] + g = ds2.plot.scatter(x="A", y="B", hue="hue", col="col") + legend_labels = tuple(t.get_text() for t in g.figlegend.texts) + attached_labels = [ + tuple(m.get_label() for m in mappables_per_ax) + for mappables_per_ax in g._mappables + ] + assert list(set(attached_labels)) == [legend_labels] + def test_add_legend_by_default(self): sc = self.ds.plot.scatter(x="A", y="B", hue="hue") assert len(sc.figure.axes) == 2 From 3721725754f2491da48aeba506e1b036e340b6a6 Mon Sep 17 00:00:00 2001 From: keewis Date: Thu, 14 Jan 2021 00:44:19 +0100 Subject: [PATCH 21/51] fix the ci trigger action (#4805) * add some more diagnostics * also print the log after fetching [skip-ci] * try using the current ref to fetch and checkout the FETCH_HEAD * add some more options and print the log before checkout * don't add a progress report when printing to a file [skip-ci] * add back the progress report but also use -q [skip-ci] * replace the backticks with tt html tags [skip-ci] * skip the auto-fetch within the action [skip-ci] --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/actions/detect-ci-trigger/script.sh | 16 +++++++++++----- .github/workflows/ci-additional.yaml | 2 ++ .github/workflows/ci.yaml | 2 ++ .github/workflows/upstream-dev-ci.yaml | 2 ++ 5 files changed, 18 insertions(+), 6 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 74f3fe2430f..09ef053bb39 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -11,5 +11,5 @@

Overriding CI behaviors

- By default, the upstream dev CI is disabled on pull request and push events. You can override this behavior per commit by adding a `[test-upstream]` tag to the first line of the commit message. For documentation-only commits, you can skip the CI per commit by adding a `[skip-ci]` tag to the first line of the commit message + By default, the upstream dev CI is disabled on pull request and push events. You can override this behavior per commit by adding a [test-upstream] tag to the first line of the commit message. For documentation-only commits, you can skip the CI per commit by adding a [skip-ci] tag to the first line of the commit message diff --git a/.github/actions/detect-ci-trigger/script.sh b/.github/actions/detect-ci-trigger/script.sh index d54da9b7ef0..c98175a5a08 100644 --- a/.github/actions/detect-ci-trigger/script.sh +++ b/.github/actions/detect-ci-trigger/script.sh @@ -3,11 +3,17 @@ event_name="$1" keyword="$2" echo "::group::fetch a sufficient number of commits" -if [[ "$event_name" == "pull_request" ]]; then - git fetch --deepen=1 --no-tags 2>&1 -else - echo "nothing to do." -fi +echo "skipped" +# git log -n 5 2>&1 +# if [[ "$event_name" == "pull_request" ]]; then +# ref=$(git log -1 --format='%H') +# git -c protocol.version=2 fetch --deepen=2 --no-tags --prune --progress -q origin $ref 2>&1 +# git log FETCH_HEAD +# git checkout FETCH_HEAD +# else +# echo "nothing to do." +# fi +# git log -n 5 2>&1 echo "::endgroup::" echo "::group::extracting the commit message" diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 00f8ea42b97..fdc61f2f4f7 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -17,6 +17,8 @@ jobs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: - uses: actions/checkout@v2 + with: + fetch-depth: 2 - uses: ./.github/actions/detect-ci-trigger id: detect-trigger with: diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 849d8a445a5..7d7326eb5c2 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -17,6 +17,8 @@ jobs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: - uses: actions/checkout@v2 + with: + fetch-depth: 2 - uses: ./.github/actions/detect-ci-trigger id: detect-trigger with: diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index b36ab410d8c..29fd745fbcc 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -19,6 +19,8 @@ jobs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: - uses: actions/checkout@v2 + with: + fetch-depth: 2 - uses: ./.github/actions/detect-ci-trigger id: detect-trigger with: From 800ccb3098495cc4c4952097e89de52aaf22f118 Mon Sep 17 00:00:00 2001 From: keewis Date: Fri, 15 Jan 2021 00:45:06 +0100 Subject: [PATCH 22/51] add a version info step to the upstream-dev CI (#4815) * add a version info step to the upstream-dev CI [test-upstream] [skip-ci] * Merge branch 'master' into version-info-step --- .github/workflows/upstream-dev-ci.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index 29fd745fbcc..dda762878c5 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -64,7 +64,11 @@ jobs: run: | mamba env update -f ci/requirements/environment.yml bash ci/install-upstream-wheels.sh + - name: Version info + run: | + conda info -a conda list + python xarray/util/print_versions.py - name: import xarray run: | python -c 'import xarray' From 84df75d366edaaa0af172047145a3409cac9bb3a Mon Sep 17 00:00:00 2001 From: Julien Seguinot Date: Fri, 15 Jan 2021 18:22:27 +0100 Subject: [PATCH 23/51] Expand user dir paths (~) in open_mfdataset and to_zarr. (#4795) * Normalize wildcard paths in open_mfdataset. * Document normalized mfdataset paths in what's new. * Also normalize paths in to_zarr. --- doc/whats-new.rst | 3 +++ xarray/backends/api.py | 14 ++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index db10ec653c5..51b16d65a62 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -75,6 +75,9 @@ Bug fixes - Add ``missing_dims`` parameter to transpose (:issue:`4647`, :pull:`4767`). By `Daniel Mesejo `_. - Resolve intervals before appending other metadata to labels when plotting (:issue:`4322`, :pull:`4794`). By `Justus Magin `_. +- Expand user directory paths (e.g. ``~/``) in :py:func:`open_mfdataset` and + :py:meth:`Dataset.to_zarr` (:issue:`4783`, :pull:`4795`). + By `Julien Seguinot `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index faa7e6cf3d3..4958062a262 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -887,7 +887,7 @@ def open_mfdataset( paths ) ) - paths = sorted(glob(paths)) + paths = sorted(glob(_normalize_path(paths))) else: paths = [str(p) if isinstance(p, Path) else p for p in paths] @@ -1386,10 +1386,11 @@ def to_zarr( See `Dataset.to_zarr` for full API docs. """ - if isinstance(store, Path): - store = str(store) - if isinstance(chunk_store, Path): - chunk_store = str(store) + + # expand str and Path arguments + store = _normalize_path(store) + chunk_store = _normalize_path(chunk_store) + if encoding is None: encoding = {} @@ -1419,9 +1420,6 @@ def to_zarr( "compute=False before writing data." ) - if isinstance(store, Path): - store = str(store) - # validate Dataset keys, DataArray names, and attr keys/values _validate_dataset_names(dataset) _validate_attrs(dataset) From a2b1712afd957deaf189c9b1a04e469596d853c9 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 15 Jan 2021 19:19:55 +0100 Subject: [PATCH 24/51] fix decode for scale/ offset list (#4802) * fix decode for scale/ offset list * typo --- doc/whats-new.rst | 2 ++ xarray/coding/variables.py | 4 ++-- xarray/tests/test_coding.py | 14 +++++++++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 51b16d65a62..88994a5bfc0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -75,6 +75,8 @@ Bug fixes - Add ``missing_dims`` parameter to transpose (:issue:`4647`, :pull:`4767`). By `Daniel Mesejo `_. - Resolve intervals before appending other metadata to labels when plotting (:issue:`4322`, :pull:`4794`). By `Justus Magin `_. +- Fix regression when decoding a variable with a ``scale_factor`` and ``add_offset`` given + as a list of length one (:issue:`4631`) by `Mathias Hauser `_. - Expand user directory paths (e.g. ``~/``) in :py:func:`open_mfdataset` and :py:meth:`Dataset.to_zarr` (:issue:`4783`, :pull:`4795`). By `Julien Seguinot `_. diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 80277e9cd4e..b035ff82086 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -270,9 +270,9 @@ def decode(self, variable, name=None): add_offset = pop_to(attrs, encoding, "add_offset", name=name) dtype = _choose_float_dtype(data.dtype, "add_offset" in attrs) if np.ndim(scale_factor) > 0: - scale_factor = scale_factor.item() + scale_factor = np.asarray(scale_factor).item() if np.ndim(add_offset) > 0: - add_offset = add_offset.item() + add_offset = np.asarray(add_offset).item() transform = partial( _scale_offset_decoding, scale_factor=scale_factor, diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py index 0f191049284..e0df7782aa7 100644 --- a/xarray/tests/test_coding.py +++ b/xarray/tests/test_coding.py @@ -8,7 +8,7 @@ from xarray.coding import variables from xarray.conventions import decode_cf_variable, encode_cf_variable -from . import assert_equal, assert_identical, requires_dask +from . import assert_allclose, assert_equal, assert_identical, requires_dask with suppress(ImportError): import dask.array as da @@ -105,3 +105,15 @@ def test_scaling_converts_to_float32(dtype): roundtripped = coder.decode(encoded) assert_identical(original, roundtripped) assert roundtripped.dtype == np.float32 + + +@pytest.mark.parametrize("scale_factor", (10, [10])) +@pytest.mark.parametrize("add_offset", (0.1, [0.1])) +def test_scaling_offset_as_list(scale_factor, add_offset): + # test for #4631 + encoding = dict(scale_factor=scale_factor, add_offset=add_offset) + original = xr.Variable(("x",), np.arange(10.0), encoding=encoding) + coder = variables.CFScaleOffsetCoder() + encoded = coder.encode(original) + roundtripped = coder.decode(encoded) + assert_allclose(original, roundtripped) From 2a433855789cd20d31f61917a01d0a26a0a9e91a Mon Sep 17 00:00:00 2001 From: alexamici Date: Mon, 18 Jan 2021 16:19:34 +0100 Subject: [PATCH 25/51] Remove the references to `_file_obj` outside low level code paths, change to `_close` (#4809) * Move from _file_obj object to _close function * Remove all references to _close outside of low level * Fix type hints * Cleanup code style * Fix non-trivial type hint problem * Revert adding the `close` argument and add a set_close instead * Remove helper class for an easier helper function + code style * Add set_close docstring * Code style * Revert changes in _replace to keep cose as an exception See: https://github.com/pydata/xarray/pull/4809/files#r557628298 * One more bit to revert * One more bit to revert * Add What's New entry * Use set_close setter * Apply suggestions from code review Co-authored-by: Stephan Hoyer * Rename user-visible argument * Sync wording in docstrings. Co-authored-by: Stephan Hoyer --- doc/whats-new.rst | 2 ++ xarray/backends/api.py | 25 +++++++++---------------- xarray/backends/apiv2.py | 2 +- xarray/backends/rasterio_.py | 2 +- xarray/backends/store.py | 3 +-- xarray/conventions.py | 6 +++--- xarray/core/common.py | 29 ++++++++++++++++++++++++----- xarray/core/dataarray.py | 5 +++-- xarray/core/dataset.py | 17 +++++++++-------- 9 files changed, 53 insertions(+), 38 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 88994a5bfc0..09bd56dbe94 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -108,6 +108,8 @@ Internal Changes By `Maximilian Roos `_. - Speed up attribute style access (e.g. ``ds.somevar`` instead of ``ds["somevar"]``) and tab completion in ipython (:issue:`4741`, :pull:`4742`). By `Richard Kleijn `_. +- Added the ``set_close`` method to ``Dataset`` and ``DataArray`` for beckends to specify how to voluntary release + all resources. (:pull:`#4809`), By `Alessandro Amici `_. .. _whats-new.0.16.2: diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 4958062a262..81314588784 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -522,7 +522,7 @@ def maybe_decode_store(store, chunks): else: ds2 = ds - ds2._file_obj = ds._file_obj + ds2.set_close(ds._close) return ds2 filename_or_obj = _normalize_path(filename_or_obj) @@ -701,7 +701,7 @@ def open_dataarray( else: (data_array,) = dataset.data_vars.values() - data_array._file_obj = dataset._file_obj + data_array.set_close(dataset._close) # Reset names if they were changed during saving # to ensure that we can 'roundtrip' perfectly @@ -715,17 +715,6 @@ def open_dataarray( return data_array -class _MultiFileCloser: - __slots__ = ("file_objs",) - - def __init__(self, file_objs): - self.file_objs = file_objs - - def close(self): - for f in self.file_objs: - f.close() - - def open_mfdataset( paths, chunks=None, @@ -918,14 +907,14 @@ def open_mfdataset( getattr_ = getattr datasets = [open_(p, **open_kwargs) for p in paths] - file_objs = [getattr_(ds, "_file_obj") for ds in datasets] + closers = [getattr_(ds, "_close") for ds in datasets] if preprocess is not None: datasets = [preprocess(ds) for ds in datasets] if parallel: # calling compute here will return the datasets/file_objs lists, # the underlying datasets will still be stored as dask arrays - datasets, file_objs = dask.compute(datasets, file_objs) + datasets, closers = dask.compute(datasets, closers) # Combine all datasets, closing them in case of a ValueError try: @@ -963,7 +952,11 @@ def open_mfdataset( ds.close() raise - combined._file_obj = _MultiFileCloser(file_objs) + def multi_file_closer(): + for closer in closers: + closer() + + combined.set_close(multi_file_closer) # read global attributes from the attrs_file or from the first dataset if attrs_file is not None: diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py index 0f98291983d..d31fc9ea773 100644 --- a/xarray/backends/apiv2.py +++ b/xarray/backends/apiv2.py @@ -90,7 +90,7 @@ def _dataset_from_backend_dataset( **extra_tokens, ) - ds._file_obj = backend_ds._file_obj + ds.set_close(backend_ds._close) # Ensure source filename always stored in dataset object (GH issue #2550) if "source" not in ds.encoding: diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py index a0500c7e1c2..c689c1e99d7 100644 --- a/xarray/backends/rasterio_.py +++ b/xarray/backends/rasterio_.py @@ -361,6 +361,6 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc result = result.chunk(chunks, name_prefix=name_prefix, token=token) # Make the file closeable - result._file_obj = manager + result.set_close(manager.close) return result diff --git a/xarray/backends/store.py b/xarray/backends/store.py index d314a9c3ca9..20fa13af202 100644 --- a/xarray/backends/store.py +++ b/xarray/backends/store.py @@ -19,7 +19,6 @@ def open_backend_dataset_store( decode_timedelta=None, ): vars, attrs = store.load() - file_obj = store encoding = store.get_encoding() vars, attrs, coord_names = conventions.decode_cf_variables( @@ -36,7 +35,7 @@ def open_backend_dataset_store( ds = Dataset(vars, attrs=attrs) ds = ds.set_coords(coord_names.intersection(vars)) - ds._file_obj = file_obj + ds.set_close(store.close) ds.encoding = encoding return ds diff --git a/xarray/conventions.py b/xarray/conventions.py index bb0b92c77a1..e33ae53b31d 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -576,12 +576,12 @@ def decode_cf( vars = obj._variables attrs = obj.attrs extra_coords = set(obj.coords) - file_obj = obj._file_obj + close = obj._close encoding = obj.encoding elif isinstance(obj, AbstractDataStore): vars, attrs = obj.load() extra_coords = set() - file_obj = obj + close = obj.close encoding = obj.get_encoding() else: raise TypeError("can only decode Dataset or DataStore objects") @@ -599,7 +599,7 @@ def decode_cf( ) ds = Dataset(vars, attrs=attrs) ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars)) - ds._file_obj = file_obj + ds.set_close(close) ds.encoding = encoding return ds diff --git a/xarray/core/common.py b/xarray/core/common.py index 283114770cf..a69ba03a7a4 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -11,6 +11,7 @@ Iterator, List, Mapping, + Optional, Tuple, TypeVar, Union, @@ -330,7 +331,9 @@ def get_squeeze_dims( class DataWithCoords(SupportsArithmetic, AttrAccessMixin): """Shared base class for Dataset and DataArray.""" - __slots__ = () + _close: Optional[Callable[[], None]] + + __slots__ = ("_close",) _rolling_exp_cls = RollingExp @@ -1263,11 +1266,27 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): return ops.where_method(self, cond, other) + def set_close(self, close: Optional[Callable[[], None]]) -> None: + """Register the function that releases any resources linked to this object. + + This method controls how xarray cleans up resources associated + with this object when the ``.close()`` method is called. It is mostly + intended for backend developers and it is rarely needed by regular + end-users. + + Parameters + ---------- + close : callable + The function that when called like ``close()`` releases + any resources linked to this object. + """ + self._close = close + def close(self: Any) -> None: - """Close any files linked to this object""" - if self._file_obj is not None: - self._file_obj.close() - self._file_obj = None + """Release any resources linked to this object.""" + if self._close is not None: + self._close() + self._close = None def isnull(self, keep_attrs: bool = None): """Test each value in the array for whether it is a missing value. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 6fdda8fc418..e13ea44baad 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -344,6 +344,7 @@ class DataArray(AbstractArray, DataWithCoords): _cache: Dict[str, Any] _coords: Dict[Any, Variable] + _close: Optional[Callable[[], None]] _indexes: Optional[Dict[Hashable, pd.Index]] _name: Optional[Hashable] _variable: Variable @@ -351,7 +352,7 @@ class DataArray(AbstractArray, DataWithCoords): __slots__ = ( "_cache", "_coords", - "_file_obj", + "_close", "_indexes", "_name", "_variable", @@ -421,7 +422,7 @@ def __init__( # public interface. self._indexes = indexes - self._file_obj = None + self._close = None def _replace( self, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 7edc2fab067..136edffb202 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -636,6 +636,7 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords): _coord_names: Set[Hashable] _dims: Dict[Hashable, int] _encoding: Optional[Dict[Hashable, Any]] + _close: Optional[Callable[[], None]] _indexes: Optional[Dict[Hashable, pd.Index]] _variables: Dict[Hashable, Variable] @@ -645,7 +646,7 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords): "_coord_names", "_dims", "_encoding", - "_file_obj", + "_close", "_indexes", "_variables", "__weakref__", @@ -687,7 +688,7 @@ def __init__( ) self._attrs = dict(attrs) if attrs is not None else None - self._file_obj = None + self._close = None self._encoding = None self._variables = variables self._coord_names = coord_names @@ -703,7 +704,7 @@ def load_store(cls, store, decoder=None) -> "Dataset": if decoder: variables, attributes = decoder(variables, attributes) obj = cls(variables, attrs=attributes) - obj._file_obj = store + obj.set_close(store.close) return obj @property @@ -876,7 +877,7 @@ def __dask_postcompute__(self): self._attrs, self._indexes, self._encoding, - self._file_obj, + self._close, ) return self._dask_postcompute, args @@ -896,7 +897,7 @@ def __dask_postpersist__(self): self._attrs, self._indexes, self._encoding, - self._file_obj, + self._close, ) return self._dask_postpersist, args @@ -1007,7 +1008,7 @@ def _construct_direct( attrs=None, indexes=None, encoding=None, - file_obj=None, + close=None, ): """Shortcut around __init__ for internal use when we want to skip costly validation @@ -1020,7 +1021,7 @@ def _construct_direct( obj._dims = dims obj._indexes = indexes obj._attrs = attrs - obj._file_obj = file_obj + obj._close = close obj._encoding = encoding return obj @@ -2122,7 +2123,7 @@ def isel( attrs=self._attrs, indexes=indexes, encoding=self._encoding, - file_obj=self._file_obj, + close=self._close, ) def _isel_fancy( From ba42c08af9afbd9e79d47bda404bf4a92a7314a0 Mon Sep 17 00:00:00 2001 From: alexamici Date: Mon, 18 Jan 2021 16:21:15 +0100 Subject: [PATCH 26/51] Fix RST. --- doc/whats-new.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 09bd56dbe94..e873a76cab0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,8 +39,8 @@ Breaking changes always be set such that ``int64`` values can be used. In the past, no units finer than "seconds" were chosen, which would sometimes mean that ``float64`` values were required, which would lead to inaccurate I/O round-trips. -- remove deprecated ``autoclose`` kwargs from :py:func:`open_dataset` (:pull: `4725`). - By `Aureliana Barghini `_ +- remove deprecated ``autoclose`` kwargs from :py:func:`open_dataset` (:pull:`4725`). + By `Aureliana Barghini `_. New Features From 295606707a0464cd13727794a979f5b709cd92a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mesejo-Le=C3=B3n?= Date: Tue, 19 Jan 2021 00:59:08 +0100 Subject: [PATCH 27/51] Add drop_isel (#4819) * Closes #4658 - Use get_index(dim) in drop_sel - Add drop_isel * address issues in PR * extract dict creation out of the loop --- doc/api.rst | 2 + doc/whats-new.rst | 1 + xarray/core/dataarray.py | 22 +++++++++++ xarray/core/dataset.py | 67 +++++++++++++++++++++++++++++++++- xarray/tests/test_dataarray.py | 6 +++ xarray/tests/test_dataset.py | 36 +++++++++++++++++- 6 files changed, 131 insertions(+), 3 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index ceab7dcc976..9cb02441d37 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -126,6 +126,7 @@ Indexing Dataset.isel Dataset.sel Dataset.drop_sel + Dataset.drop_isel Dataset.head Dataset.tail Dataset.thin @@ -307,6 +308,7 @@ Indexing DataArray.isel DataArray.sel DataArray.drop_sel + DataArray.drop_isel DataArray.head DataArray.tail DataArray.thin diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e873a76cab0..16b0cbf4ea1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -80,6 +80,7 @@ Bug fixes - Expand user directory paths (e.g. ``~/``) in :py:func:`open_mfdataset` and :py:meth:`Dataset.to_zarr` (:issue:`4783`, :pull:`4795`). By `Julien Seguinot `_. +- Add :py:meth:`Dataset.drop_isel` and :py:meth:`DataArray.drop_isel` (:issue:`4658`, :pull:`4819`). By `Daniel Mesejo `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index e13ea44baad..f062b70aac1 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2248,6 +2248,28 @@ def drop_sel( ds = self._to_temp_dataset().drop_sel(labels, errors=errors) return self._from_temp_dataset(ds) + def drop_isel(self, indexers=None, **indexers_kwargs): + """Drop index positions from this DataArray. + + Parameters + ---------- + indexers : mapping of hashable to Any + Index locations to drop + **indexers_kwargs : {dim: position, ...}, optional + The keyword arguments form of ``dim`` and ``positions`` + + Returns + ------- + dropped : DataArray + + Raises + ------ + IndexError + """ + dataset = self._to_temp_dataset() + dataset = dataset.drop_isel(indexers=indexers, **indexers_kwargs) + return self._from_temp_dataset(dataset) + def dropna( self, dim: Hashable, how: str = "any", thresh: int = None ) -> "DataArray": diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 136edffb202..8954ebfcc38 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4054,13 +4054,78 @@ def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs): labels_for_dim = [labels_for_dim] labels_for_dim = np.asarray(labels_for_dim) try: - index = self.indexes[dim] + index = self.get_index(dim) except KeyError: raise ValueError("dimension %r does not have coordinate labels" % dim) new_index = index.drop(labels_for_dim, errors=errors) ds = ds.loc[{dim: new_index}] return ds + def drop_isel(self, indexers=None, **indexers_kwargs): + """Drop index positions from this Dataset. + + Parameters + ---------- + indexers : mapping of hashable to Any + Index locations to drop + **indexers_kwargs : {dim: position, ...}, optional + The keyword arguments form of ``dim`` and ``positions`` + + Returns + ------- + dropped : Dataset + + Raises + ------ + IndexError + + Examples + -------- + >>> data = np.arange(6).reshape(2, 3) + >>> labels = ["a", "b", "c"] + >>> ds = xr.Dataset({"A": (["x", "y"], data), "y": labels}) + >>> ds + + Dimensions: (x: 2, y: 3) + Coordinates: + * y (y) >> ds.drop_isel(y=[0, 2]) + + Dimensions: (x: 2, y: 1) + Coordinates: + * y (y) >> ds.drop_isel(y=1) + + Dimensions: (x: 2, y: 2) + Coordinates: + * y (y) "Dataset": diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 3ead427e22e..afb234029dc 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2327,6 +2327,12 @@ def test_drop_index_labels(self): with pytest.warns(DeprecationWarning): arr.drop([0, 1, 3], dim="y", errors="ignore") + def test_drop_index_positions(self): + arr = DataArray(np.random.randn(2, 3), dims=["x", "y"]) + actual = arr.drop_sel(y=[0, 1]) + expected = arr[:, 2:] + assert_identical(actual, expected) + def test_dropna(self): x = np.random.randn(4, 4) x[::2, 0] = np.nan diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index bd1938455b1..f71b8ec7741 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2371,8 +2371,12 @@ def test_drop_index_labels(self): data.drop(DataArray(["a", "b", "c"]), dim="x", errors="ignore") assert_identical(expected, actual) - with raises_regex(ValueError, "does not have coordinate labels"): - data.drop_sel(y=1) + actual = data.drop_sel(y=[1]) + expected = data.isel(y=[0, 2]) + assert_identical(expected, actual) + + with raises_regex(KeyError, "not found in axis"): + data.drop_sel(x=0) def test_drop_labels_by_keyword(self): data = Dataset( @@ -2410,6 +2414,34 @@ def test_drop_labels_by_keyword(self): with pytest.raises(ValueError): data.drop(dim="x", x="a") + def test_drop_labels_by_position(self): + data = Dataset( + {"A": (["x", "y"], np.random.randn(2, 6)), "x": ["a", "b"], "y": range(6)} + ) + # Basic functionality. + assert len(data.coords["x"]) == 2 + + actual = data.drop_isel(x=0) + expected = data.drop_sel(x="a") + assert_identical(expected, actual) + + actual = data.drop_isel(x=[0]) + expected = data.drop_sel(x=["a"]) + assert_identical(expected, actual) + + actual = data.drop_isel(x=[0, 1]) + expected = data.drop_sel(x=["a", "b"]) + assert_identical(expected, actual) + assert actual.coords["x"].size == 0 + + actual = data.drop_isel(x=[0, 1], y=range(0, 6, 2)) + expected = data.drop_sel(x=["a", "b"], y=range(0, 6, 2)) + assert_identical(expected, actual) + assert actual.coords["x"].size == 0 + + with pytest.raises(KeyError): + data.drop_isel(z=1) + def test_drop_dims(self): data = xr.Dataset( { From 7dbbdcafed7f796ab77039ff797bcd31d9185903 Mon Sep 17 00:00:00 2001 From: aurghs <35919497+aurghs@users.noreply.github.com> Date: Tue, 19 Jan 2021 11:10:25 +0100 Subject: [PATCH 28/51] Bugfix in list_engine (#4811) * fix list_engine * fix store engine and netcdf4 * reve * revert changes in guess_engine * add resister of backend if dependencies aere instralled * style mypy * fix import * use import instead of importlib * black * replace ImportError with ModuleNotFoundError * fix typo * fix typos * remove else * Revert remove imports inside backends functions * Revert remove imports inside cfgrib * modify check on imports inside the backends * remove not used import --- xarray/backends/cfgrib_.py | 20 ++++++++++++++++++-- xarray/backends/common.py | 4 ++++ xarray/backends/h5netcdf_.py | 20 ++++++++++++++++---- xarray/backends/netCDF4_.py | 15 +++++++++++++-- xarray/backends/plugins.py | 24 +----------------------- xarray/backends/pseudonetcdf_.py | 20 ++++++++++++++++++-- xarray/backends/pydap_.py | 20 ++++++++++++++++++-- xarray/backends/pynio_.py | 20 ++++++++++++++++++-- xarray/backends/scipy_.py | 20 +++++++++++++++++--- xarray/backends/store.py | 5 ++++- xarray/backends/zarr.py | 15 +++++++++++++-- xarray/tests/test_plugins.py | 2 +- 12 files changed, 141 insertions(+), 44 deletions(-) diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py index d4933e370c7..4a0ac7d67f9 100644 --- a/xarray/backends/cfgrib_.py +++ b/xarray/backends/cfgrib_.py @@ -5,10 +5,23 @@ from ..core import indexing from ..core.utils import Frozen, FrozenDict, close_on_error from ..core.variable import Variable -from .common import AbstractDataStore, BackendArray, BackendEntrypoint +from .common import ( + BACKEND_ENTRYPOINTS, + AbstractDataStore, + BackendArray, + BackendEntrypoint, +) from .locks import SerializableLock, ensure_lock from .store import open_backend_dataset_store +try: + import cfgrib + + has_cfgrib = True +except ModuleNotFoundError: + has_cfgrib = False + + # FIXME: Add a dedicated lock, even if ecCodes is supposed to be thread-safe # in most circumstances. See: # https://confluence.ecmwf.int/display/ECC/Frequently+Asked+Questions @@ -38,7 +51,6 @@ class CfGribDataStore(AbstractDataStore): """ def __init__(self, filename, lock=None, **backend_kwargs): - import cfgrib if lock is None: lock = ECCODES_LOCK @@ -129,3 +141,7 @@ def open_backend_dataset_cfgrib( cfgrib_backend = BackendEntrypoint( open_dataset=open_backend_dataset_cfgrib, guess_can_open=guess_can_open_cfgrib ) + + +if has_cfgrib: + BACKEND_ENTRYPOINTS["cfgrib"] = cfgrib_backend diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 72a63957662..adb70658fab 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -1,6 +1,7 @@ import logging import time import traceback +from typing import Dict import numpy as np @@ -349,3 +350,6 @@ def __init__(self, open_dataset, open_dataset_parameters=None, guess_can_open=No self.open_dataset = open_dataset self.open_dataset_parameters = open_dataset_parameters self.guess_can_open = guess_can_open + + +BACKEND_ENTRYPOINTS: Dict[str, BackendEntrypoint] = {} diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index b2996369ee7..562600de4b6 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -8,7 +8,12 @@ from ..core import indexing from ..core.utils import FrozenDict, is_remote_uri, read_magic_number from ..core.variable import Variable -from .common import BackendEntrypoint, WritableCFDataStore, find_root_and_group +from .common import ( + BACKEND_ENTRYPOINTS, + BackendEntrypoint, + WritableCFDataStore, + find_root_and_group, +) from .file_manager import CachingFileManager, DummyFileManager from .locks import HDF5_LOCK, combine_locks, ensure_lock, get_write_lock from .netCDF4_ import ( @@ -20,6 +25,13 @@ ) from .store import open_backend_dataset_store +try: + import h5netcdf + + has_h5netcdf = True +except ModuleNotFoundError: + has_h5netcdf = False + class H5NetCDFArrayWrapper(BaseNetCDF4Array): def get_array(self, needs_lock=True): @@ -85,8 +97,6 @@ class H5NetCDFStore(WritableCFDataStore): def __init__(self, manager, group=None, mode=None, lock=HDF5_LOCK, autoclose=False): - import h5netcdf - if isinstance(manager, (h5netcdf.File, h5netcdf.Group)): if group is None: root, group = find_root_and_group(manager) @@ -122,7 +132,6 @@ def open( invalid_netcdf=None, phony_dims=None, ): - import h5netcdf if isinstance(filename, bytes): raise ValueError( @@ -375,3 +384,6 @@ def open_backend_dataset_h5netcdf( h5netcdf_backend = BackendEntrypoint( open_dataset=open_backend_dataset_h5netcdf, guess_can_open=guess_can_open_h5netcdf ) + +if has_h5netcdf: + BACKEND_ENTRYPOINTS["h5netcdf"] = h5netcdf_backend diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 0e35270ea9a..5bb4eec837b 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -12,6 +12,7 @@ from ..core.utils import FrozenDict, close_on_error, is_remote_uri from ..core.variable import Variable from .common import ( + BACKEND_ENTRYPOINTS, BackendArray, BackendEntrypoint, WritableCFDataStore, @@ -23,6 +24,14 @@ from .netcdf3 import encode_nc3_attr_value, encode_nc3_variable from .store import open_backend_dataset_store +try: + import netCDF4 + + has_netcdf4 = True +except ModuleNotFoundError: + has_netcdf4 = False + + # This lookup table maps from dtype.byteorder to a readable endian # string used by netCDF4. _endian_lookup = {"=": "native", ">": "big", "<": "little", "|": "native"} @@ -298,7 +307,6 @@ class NetCDF4DataStore(WritableCFDataStore): def __init__( self, manager, group=None, mode=None, lock=NETCDF4_PYTHON_LOCK, autoclose=False ): - import netCDF4 if isinstance(manager, netCDF4.Dataset): if group is None: @@ -335,7 +343,6 @@ def open( lock_maker=None, autoclose=False, ): - import netCDF4 if isinstance(filename, pathlib.Path): filename = os.fspath(filename) @@ -563,3 +570,7 @@ def open_backend_dataset_netcdf4( netcdf4_backend = BackendEntrypoint( open_dataset=open_backend_dataset_netcdf4, guess_can_open=guess_can_open_netcdf4 ) + + +if has_netcdf4: + BACKEND_ENTRYPOINTS["netcdf4"] = netcdf4_backend diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index d5799a78f91..6d3ec7e7da5 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -2,33 +2,11 @@ import inspect import itertools import logging -import typing as T import warnings import pkg_resources -from .cfgrib_ import cfgrib_backend -from .common import BackendEntrypoint -from .h5netcdf_ import h5netcdf_backend -from .netCDF4_ import netcdf4_backend -from .pseudonetcdf_ import pseudonetcdf_backend -from .pydap_ import pydap_backend -from .pynio_ import pynio_backend -from .scipy_ import scipy_backend -from .store import store_backend -from .zarr import zarr_backend - -BACKEND_ENTRYPOINTS: T.Dict[str, BackendEntrypoint] = { - "store": store_backend, - "netcdf4": netcdf4_backend, - "h5netcdf": h5netcdf_backend, - "scipy": scipy_backend, - "pseudonetcdf": pseudonetcdf_backend, - "zarr": zarr_backend, - "cfgrib": cfgrib_backend, - "pydap": pydap_backend, - "pynio": pynio_backend, -} +from .common import BACKEND_ENTRYPOINTS def remove_duplicates(backend_entrypoints): diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py index d9128d1d503..c2bfd519bed 100644 --- a/xarray/backends/pseudonetcdf_.py +++ b/xarray/backends/pseudonetcdf_.py @@ -3,11 +3,24 @@ from ..core import indexing from ..core.utils import Frozen, FrozenDict, close_on_error from ..core.variable import Variable -from .common import AbstractDataStore, BackendArray, BackendEntrypoint +from .common import ( + BACKEND_ENTRYPOINTS, + AbstractDataStore, + BackendArray, + BackendEntrypoint, +) from .file_manager import CachingFileManager from .locks import HDF5_LOCK, NETCDFC_LOCK, combine_locks, ensure_lock from .store import open_backend_dataset_store +try: + from PseudoNetCDF import pncopen + + has_pseudonetcdf = True +except ModuleNotFoundError: + has_pseudonetcdf = False + + # psuedonetcdf can invoke netCDF libraries internally PNETCDF_LOCK = combine_locks([HDF5_LOCK, NETCDFC_LOCK]) @@ -40,7 +53,6 @@ class PseudoNetCDFDataStore(AbstractDataStore): @classmethod def open(cls, filename, lock=None, mode=None, **format_kwargs): - from PseudoNetCDF import pncopen keywords = {"kwargs": format_kwargs} # only include mode if explicitly passed @@ -138,3 +150,7 @@ def open_backend_dataset_pseudonetcdf( open_dataset=open_backend_dataset_pseudonetcdf, open_dataset_parameters=open_dataset_parameters, ) + + +if has_pseudonetcdf: + BACKEND_ENTRYPOINTS["pseudonetcdf"] = pseudonetcdf_backend diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 4995045a739..c5ce943a10a 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -4,9 +4,22 @@ from ..core.pycompat import integer_types from ..core.utils import Frozen, FrozenDict, close_on_error, is_dict_like, is_remote_uri from ..core.variable import Variable -from .common import AbstractDataStore, BackendArray, BackendEntrypoint, robust_getitem +from .common import ( + BACKEND_ENTRYPOINTS, + AbstractDataStore, + BackendArray, + BackendEntrypoint, + robust_getitem, +) from .store import open_backend_dataset_store +try: + import pydap.client + + has_pydap = True +except ModuleNotFoundError: + has_pydap = False + class PydapArrayWrapper(BackendArray): def __init__(self, array): @@ -74,7 +87,6 @@ def __init__(self, ds): @classmethod def open(cls, url, session=None): - import pydap.client ds = pydap.client.open_url(url, session=session) return cls(ds) @@ -133,3 +145,7 @@ def open_backend_dataset_pydap( pydap_backend = BackendEntrypoint( open_dataset=open_backend_dataset_pydap, guess_can_open=guess_can_open_pydap ) + + +if has_pydap: + BACKEND_ENTRYPOINTS["pydap"] = pydap_backend diff --git a/xarray/backends/pynio_.py b/xarray/backends/pynio_.py index dc6c47935e8..261daa69880 100644 --- a/xarray/backends/pynio_.py +++ b/xarray/backends/pynio_.py @@ -3,11 +3,24 @@ from ..core import indexing from ..core.utils import Frozen, FrozenDict, close_on_error from ..core.variable import Variable -from .common import AbstractDataStore, BackendArray, BackendEntrypoint +from .common import ( + BACKEND_ENTRYPOINTS, + AbstractDataStore, + BackendArray, + BackendEntrypoint, +) from .file_manager import CachingFileManager from .locks import HDF5_LOCK, NETCDFC_LOCK, SerializableLock, combine_locks, ensure_lock from .store import open_backend_dataset_store +try: + import Nio + + has_pynio = True +except ModuleNotFoundError: + has_pynio = False + + # PyNIO can invoke netCDF libraries internally # Add a dedicated lock just in case NCL as well isn't thread-safe. NCL_LOCK = SerializableLock() @@ -45,7 +58,6 @@ class NioDataStore(AbstractDataStore): """Store for accessing datasets via PyNIO""" def __init__(self, filename, mode="r", lock=None, **kwargs): - import Nio if lock is None: lock = PYNIO_LOCK @@ -119,3 +131,7 @@ def open_backend_dataset_pynio( pynio_backend = BackendEntrypoint(open_dataset=open_backend_dataset_pynio) + + +if has_pynio: + BACKEND_ENTRYPOINTS["pynio"] = pynio_backend diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 873a91f9c07..df51d07d686 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -6,12 +6,24 @@ from ..core.indexing import NumpyIndexingAdapter from ..core.utils import Frozen, FrozenDict, close_on_error, read_magic_number from ..core.variable import Variable -from .common import BackendArray, BackendEntrypoint, WritableCFDataStore +from .common import ( + BACKEND_ENTRYPOINTS, + BackendArray, + BackendEntrypoint, + WritableCFDataStore, +) from .file_manager import CachingFileManager, DummyFileManager from .locks import ensure_lock, get_write_lock from .netcdf3 import encode_nc3_attr_value, encode_nc3_variable, is_valid_nc3_name from .store import open_backend_dataset_store +try: + import scipy.io + + has_scipy = True +except ModuleNotFoundError: + has_scipy = False + def _decode_string(s): if isinstance(s, bytes): @@ -61,8 +73,6 @@ def __setitem__(self, key, value): def _open_scipy_netcdf(filename, mode, mmap, version): import gzip - import scipy.io - # if the string ends with .gz, then gunzip and open as netcdf file if isinstance(filename, str) and filename.endswith(".gz"): try: @@ -271,3 +281,7 @@ def open_backend_dataset_scipy( scipy_backend = BackendEntrypoint( open_dataset=open_backend_dataset_scipy, guess_can_open=guess_can_open_scipy ) + + +if has_scipy: + BACKEND_ENTRYPOINTS["scipy"] = scipy_backend diff --git a/xarray/backends/store.py b/xarray/backends/store.py index 20fa13af202..66fca0d39c3 100644 --- a/xarray/backends/store.py +++ b/xarray/backends/store.py @@ -1,6 +1,6 @@ from .. import conventions from ..core.dataset import Dataset -from .common import AbstractDataStore, BackendEntrypoint +from .common import BACKEND_ENTRYPOINTS, AbstractDataStore, BackendEntrypoint def guess_can_open_store(store_spec): @@ -44,3 +44,6 @@ def open_backend_dataset_store( store_backend = BackendEntrypoint( open_dataset=open_backend_dataset_store, guess_can_open=guess_can_open_store ) + + +BACKEND_ENTRYPOINTS["store"] = store_backend diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 3b4b3a3d9d5..ceeb23cac9b 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -9,6 +9,7 @@ from ..core.utils import FrozenDict, HiddenKeyDict, close_on_error from ..core.variable import Variable from .common import ( + BACKEND_ENTRYPOINTS, AbstractWritableDataStore, BackendArray, BackendEntrypoint, @@ -16,6 +17,14 @@ ) from .store import open_backend_dataset_store +try: + import zarr + + has_zarr = True +except ModuleNotFoundError: + has_zarr = False + + # need some special secret attributes to tell us the dimensions DIMENSION_KEY = "_ARRAY_DIMENSIONS" @@ -289,7 +298,6 @@ def open_group( append_dim=None, write_region=None, ): - import zarr # zarr doesn't support pathlib.Path objects yet. zarr-python#601 if isinstance(store, pathlib.Path): @@ -409,7 +417,6 @@ def store( dimension on which the zarray will be appended only needed in append mode """ - import zarr existing_variables = { vn for vn in variables if _encode_variable_name(vn) in self.ds @@ -705,3 +712,7 @@ def open_backend_dataset_zarr( zarr_backend = BackendEntrypoint(open_dataset=open_backend_dataset_zarr) + + +if has_zarr: + BACKEND_ENTRYPOINTS["zarr"] = zarr_backend diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py index 110ef47209f..38ebce6da1a 100644 --- a/xarray/tests/test_plugins.py +++ b/xarray/tests/test_plugins.py @@ -92,7 +92,7 @@ def test_set_missing_parameters_raise_error(): with pytest.raises(TypeError): plugins.set_missing_parameters({"engine": backend}) - backend = plugins.BackendEntrypoint( + backend = common.BackendEntrypoint( dummy_open_dataset_kwargs, ("filename_or_obj", "decoder") ) plugins.set_missing_parameters({"engine": backend}) From 93ea177bdd49e205047a1416c2342fb645afafa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mesejo-Le=C3=B3n?= Date: Wed, 20 Jan 2021 05:12:06 +0100 Subject: [PATCH 29/51] fix issues in drop_sel and drop_isel (#4828) --- xarray/core/dataset.py | 18 +++++++++++++----- xarray/tests/test_dataarray.py | 2 +- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 8954ebfcc38..874e26ff465 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4021,9 +4021,17 @@ def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs): Examples -------- - >>> data = np.random.randn(2, 3) + >>> data = np.arange(6).reshape(2, 3) >>> labels = ["a", "b", "c"] >>> ds = xr.Dataset({"A": (["x", "y"], data), "y": labels}) + >>> ds + + Dimensions: (x: 2, y: 3) + Coordinates: + * y (y) >> ds.drop_sel(y=["a", "c"]) Dimensions: (x: 2, y: 1) @@ -4031,7 +4039,7 @@ def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs): * y (y) >> ds.drop_sel(y="b") Dimensions: (x: 2, y: 2) @@ -4039,12 +4047,12 @@ def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs): * y (y) Date: Wed, 20 Jan 2021 23:24:04 -0800 Subject: [PATCH 30/51] Move skip ci instructions to contributing guide (#4829) --- .github/PULL_REQUEST_TEMPLATE.md | 8 -------- doc/contributing.rst | 1 + 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 09ef053bb39..c7ea19a53cb 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -5,11 +5,3 @@ - [ ] Passes `pre-commit run --all-files` - [ ] User visible changes (including notable bug fixes) are documented in `whats-new.rst` - [ ] New functions/methods are listed in `api.rst` - - - -

- Overriding CI behaviors -

- By default, the upstream dev CI is disabled on pull request and push events. You can override this behavior per commit by adding a [test-upstream] tag to the first line of the commit message. For documentation-only commits, you can skip the CI per commit by adding a [skip-ci] tag to the first line of the commit message -
diff --git a/doc/contributing.rst b/doc/contributing.rst index 9c4ce5a0af2..439791cbbd6 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -836,6 +836,7 @@ PR checklist - Write new tests if needed. See `"Test-driven development/code writing" `_. - Test the code using `Pytest `_. Running all tests (type ``pytest`` in the root directory) takes a while, so feel free to only run the tests you think are needed based on your PR (example: ``pytest xarray/tests/test_dataarray.py``). CI will catch any failing tests. + - By default, the upstream dev CI is disabled on pull request and push events. You can override this behavior per commit by adding a [test-upstream] tag to the first line of the commit message. For documentation-only commits, you can skip the CI per commit by adding a "[skip-ci]" tag to the first line of the commit message. - **Properly format your code** and verify that it passes the formatting guidelines set by `Black `_ and `Flake8 `_. See `"Code formatting" `_. You can use `pre-commit `_ to run these automatically on each commit. From d555172c7d069ca9cf7a9a32bfd5f422be133861 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sun, 24 Jan 2021 15:46:48 -0800 Subject: [PATCH 31/51] Allow swap_dims to take kwargs (#4841) --- doc/whats-new.rst | 5 ++++- xarray/core/dataarray.py | 9 ++++++++- xarray/core/dataset.py | 10 +++++++++- xarray/tests/test_dataarray.py | 10 ++++++++++ xarray/tests/test_dataset.py | 7 +++++++ 5 files changed, 38 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 16b0cbf4ea1..0f2bf423449 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -46,7 +46,10 @@ Breaking changes New Features ~~~~~~~~~~~~ - Performance improvement when constructing DataArrays. Significantly speeds up repr for Datasets with large number of variables. - By `Deepak Cherian `_ + By `Deepak Cherian `_. +- :py:meth:`DataArray.swap_dims` & :py:meth:`Dataset.swap_dims` now accept dims + in the form of kwargs as well as a dict, like most similar methods. + By `Maximilian Roos `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index f062b70aac1..2fef3edbc43 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1699,7 +1699,9 @@ def rename( new_name_or_name_dict = cast(Hashable, new_name_or_name_dict) return self._replace(name=new_name_or_name_dict) - def swap_dims(self, dims_dict: Mapping[Hashable, Hashable]) -> "DataArray": + def swap_dims( + self, dims_dict: Mapping[Hashable, Hashable] = None, **dims_kwargs + ) -> "DataArray": """Returns a new DataArray with swapped dimensions. Parameters @@ -1708,6 +1710,10 @@ def swap_dims(self, dims_dict: Mapping[Hashable, Hashable]) -> "DataArray": Dictionary whose keys are current dimension names and whose values are new names. + **dim_kwargs : {dim: , ...}, optional + The keyword arguments form of ``dims_dict``. + One of dims_dict or dims_kwargs must be provided. + Returns ------- swapped : DataArray @@ -1749,6 +1755,7 @@ def swap_dims(self, dims_dict: Mapping[Hashable, Hashable]) -> "DataArray": DataArray.rename Dataset.swap_dims """ + dims_dict = either_dict_or_kwargs(dims_dict, dims_kwargs, "swap_dims") ds = self._to_temp_dataset().swap_dims(dims_dict) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 874e26ff465..f8718377104 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3155,7 +3155,9 @@ def rename_vars( ) return self._replace(variables, coord_names, dims=dims, indexes=indexes) - def swap_dims(self, dims_dict: Mapping[Hashable, Hashable]) -> "Dataset": + def swap_dims( + self, dims_dict: Mapping[Hashable, Hashable] = None, **dims_kwargs + ) -> "Dataset": """Returns a new object with swapped dimensions. Parameters @@ -3164,6 +3166,10 @@ def swap_dims(self, dims_dict: Mapping[Hashable, Hashable]) -> "Dataset": Dictionary whose keys are current dimension names and whose values are new names. + **dim_kwargs : {existing_dim: new_dim, ...}, optional + The keyword arguments form of ``dims_dict``. + One of dims_dict or dims_kwargs must be provided. + Returns ------- swapped : Dataset @@ -3214,6 +3220,8 @@ def swap_dims(self, dims_dict: Mapping[Hashable, Hashable]) -> "Dataset": """ # TODO: deprecate this method in favor of a (less confusing) # rename_dims() method that only renames dimensions. + + dims_dict = either_dict_or_kwargs(dims_dict, dims_kwargs, "swap_dims") for k, v in dims_dict.items(): if k not in self.dims: raise ValueError( diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index adf282ff34c..fc84687511e 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1639,6 +1639,16 @@ def test_swap_dims(self): expected.indexes[dim_name], actual.indexes[dim_name] ) + # as kwargs + array = DataArray(np.random.randn(3), {"x": list("abc")}, "x") + expected = DataArray(array.values, {"x": ("y", list("abc"))}, dims="y") + actual = array.swap_dims(x="y") + assert_identical(expected, actual) + for dim_name in set().union(expected.indexes.keys(), actual.indexes.keys()): + pd.testing.assert_index_equal( + expected.indexes[dim_name], actual.indexes[dim_name] + ) + # multiindex case idx = pd.MultiIndex.from_arrays([list("aab"), list("yzz")], names=["y1", "y2"]) array = DataArray(np.random.randn(3), {"y": ("x", idx)}, "x") diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index f71b8ec7741..fed9098701b 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2748,6 +2748,13 @@ def test_swap_dims(self): actual = original.swap_dims({"x": "u"}) assert_identical(expected, actual) + # as kwargs + expected = Dataset( + {"y": ("u", list("abc")), "z": 42}, coords={"x": ("u", [1, 2, 3])} + ) + actual = original.swap_dims(x="u") + assert_identical(expected, actual) + # handle multiindex case idx = pd.MultiIndex.from_arrays([list("aab"), list("yzz")], names=["y1", "y2"]) original = Dataset({"x": [1, 2, 3], "y": ("x", idx), "z": 42}) From a0c71c1508f34345ad7eef244cdbbe224e031c1b Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sun, 24 Jan 2021 15:48:04 -0800 Subject: [PATCH 32/51] Faster unstacking (#4746) * Significantly improve unstacking performance * Hack to get sparse tests passing * Use the existing unstack function for dask & sparse * Add whatsnew * Require numpy 1.17 for new unstack * Also special case pint * Revert "Also special case pint" This reverts commit b33adedbfbd92df0f4188568691c7e2915bf8c19. * Only run fast unstack on numpy arrays * Update asvs for unstacking * Update whatsnew --- asv_bench/benchmarks/unstacking.py | 15 ++++-- doc/whats-new.rst | 7 ++- xarray/core/dataset.py | 75 ++++++++++++++++++++++++++++-- xarray/core/variable.py | 68 +++++++++++++++++++++++++-- 4 files changed, 153 insertions(+), 12 deletions(-) diff --git a/asv_bench/benchmarks/unstacking.py b/asv_bench/benchmarks/unstacking.py index 342475b96df..8d0c3932870 100644 --- a/asv_bench/benchmarks/unstacking.py +++ b/asv_bench/benchmarks/unstacking.py @@ -7,18 +7,23 @@ class Unstacking: def setup(self): - data = np.random.RandomState(0).randn(1, 1000, 500) - self.ds = xr.DataArray(data).stack(flat_dim=["dim_1", "dim_2"]) + data = np.random.RandomState(0).randn(500, 1000) + self.da_full = xr.DataArray(data, dims=list("ab")).stack(flat_dim=[...]) + self.da_missing = self.da_full[:-1] + self.df_missing = self.da_missing.to_pandas() def time_unstack_fast(self): - self.ds.unstack("flat_dim") + self.da_full.unstack("flat_dim") def time_unstack_slow(self): - self.ds[:, ::-1].unstack("flat_dim") + self.da_missing.unstack("flat_dim") + + def time_unstack_pandas_slow(self): + self.df_missing.unstack() class UnstackingDask(Unstacking): def setup(self, *args, **kwargs): requires_dask() super().setup(**kwargs) - self.ds = self.ds.chunk({"flat_dim": 50}) + self.da_full = self.da_full.chunk({"flat_dim": 50}) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0f2bf423449..488d8baa650 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -17,7 +17,7 @@ What's New .. _whats-new.0.16.3: -v0.16.3 (unreleased) +v0.17.0 (unreleased) -------------------- Breaking changes @@ -45,6 +45,11 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Significantly higher ``unstack`` performance on numpy-backed arrays which + contain missing values; 8x faster in our benchmark, and 2x faster than pandas. + (:pull:`4746`); + By `Maximilian Roos `_. + - Performance improvement when constructing DataArrays. Significantly speeds up repr for Datasets with large number of variables. By `Deepak Cherian `_. - :py:meth:`DataArray.swap_dims` & :py:meth:`Dataset.swap_dims` now accept dims diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f8718377104..a73e299e27a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4,6 +4,7 @@ import sys import warnings from collections import defaultdict +from distutils.version import LooseVersion from html import escape from numbers import Number from operator import methodcaller @@ -79,7 +80,7 @@ ) from .missing import get_clean_interp_index from .options import OPTIONS, _get_keep_attrs -from .pycompat import is_duck_dask_array +from .pycompat import is_duck_dask_array, sparse_array_type from .utils import ( Default, Frozen, @@ -3715,7 +3716,40 @@ def ensure_stackable(val): return data_array - def _unstack_once(self, dim: Hashable, fill_value, sparse) -> "Dataset": + def _unstack_once(self, dim: Hashable, fill_value) -> "Dataset": + index = self.get_index(dim) + index = remove_unused_levels_categories(index) + + variables: Dict[Hashable, Variable] = {} + indexes = {k: v for k, v in self.indexes.items() if k != dim} + + for name, var in self.variables.items(): + if name != dim: + if dim in var.dims: + if isinstance(fill_value, Mapping): + fill_value_ = fill_value[name] + else: + fill_value_ = fill_value + + variables[name] = var._unstack_once( + index=index, dim=dim, fill_value=fill_value_ + ) + else: + variables[name] = var + + for name, lev in zip(index.names, index.levels): + variables[name] = IndexVariable(name, lev) + indexes[name] = lev + + coord_names = set(self._coord_names) - {dim} | set(index.names) + + return self._replace_with_new_dims( + variables, coord_names=coord_names, indexes=indexes + ) + + def _unstack_full_reindex( + self, dim: Hashable, fill_value, sparse: bool + ) -> "Dataset": index = self.get_index(dim) index = remove_unused_levels_categories(index) full_idx = pd.MultiIndex.from_product(index.levels, names=index.names) @@ -3812,7 +3846,38 @@ def unstack( result = self.copy(deep=False) for dim in dims: - result = result._unstack_once(dim, fill_value, sparse) + + if ( + # Dask arrays don't support assignment by index, which the fast unstack + # function requires. + # https://github.com/pydata/xarray/pull/4746#issuecomment-753282125 + any(is_duck_dask_array(v.data) for v in self.variables.values()) + # Sparse doesn't currently support (though we could special-case + # it) + # https://github.com/pydata/sparse/issues/422 + or any( + isinstance(v.data, sparse_array_type) + for v in self.variables.values() + ) + or sparse + # numpy full_like only added `shape` in 1.17 + or LooseVersion(np.__version__) < LooseVersion("1.17") + # Until https://github.com/pydata/xarray/pull/4751 is resolved, + # we check explicitly whether it's a numpy array. Once that is + # resolved, explicitly exclude pint arrays. + # # pint doesn't implement `np.full_like` in a way that's + # # currently compatible. + # # https://github.com/pydata/xarray/pull/4746#issuecomment-753425173 + # # or any( + # # isinstance(v.data, pint_array_type) for v in self.variables.values() + # # ) + or any( + not isinstance(v.data, np.ndarray) for v in self.variables.values() + ) + ): + result = result._unstack_full_reindex(dim, fill_value, sparse) + else: + result = result._unstack_once(dim, fill_value) return result def update(self, other: "CoercibleMapping") -> "Dataset": @@ -4982,6 +5047,10 @@ def _set_numpy_data_from_dataframe( self[name] = (dims, values) return + # NB: similar, more general logic, now exists in + # variable.unstack_once; we could consider combining them at some + # point. + shape = tuple(lev.size for lev in idx.levels) indexer = tuple(idx.codes) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 797de65bbcf..64c1895da59 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -10,6 +10,7 @@ Any, Dict, Hashable, + List, Mapping, Optional, Sequence, @@ -1488,7 +1489,7 @@ def set_dims(self, dims, shape=None): ) return expanded_var.transpose(*dims) - def _stack_once(self, dims, new_dim): + def _stack_once(self, dims: List[Hashable], new_dim: Hashable): if not set(dims) <= set(self.dims): raise ValueError("invalid existing dimensions: %s" % dims) @@ -1544,7 +1545,15 @@ def stack(self, dimensions=None, **dimensions_kwargs): result = result._stack_once(dims, new_dim) return result - def _unstack_once(self, dims, old_dim): + def _unstack_once_full( + self, dims: Mapping[Hashable, int], old_dim: Hashable + ) -> "Variable": + """ + Unstacks the variable without needing an index. + + Unlike `_unstack_once`, this function requires the existing dimension to + contain the full product of the new dimensions. + """ new_dim_names = tuple(dims.keys()) new_dim_sizes = tuple(dims.values()) @@ -1573,6 +1582,53 @@ def _unstack_once(self, dims, old_dim): return Variable(new_dims, new_data, self._attrs, self._encoding, fastpath=True) + def _unstack_once( + self, + index: pd.MultiIndex, + dim: Hashable, + fill_value=dtypes.NA, + ) -> "Variable": + """ + Unstacks this variable given an index to unstack and the name of the + dimension to which the index refers. + """ + + reordered = self.transpose(..., dim) + + new_dim_sizes = [lev.size for lev in index.levels] + new_dim_names = index.names + indexer = index.codes + + # Potentially we could replace `len(other_dims)` with just `-1` + other_dims = [d for d in self.dims if d != dim] + new_shape = list(reordered.shape[: len(other_dims)]) + new_dim_sizes + new_dims = reordered.dims[: len(other_dims)] + new_dim_names + + if fill_value is dtypes.NA: + is_missing_values = np.prod(new_shape) > np.prod(self.shape) + if is_missing_values: + dtype, fill_value = dtypes.maybe_promote(self.dtype) + else: + dtype = self.dtype + fill_value = dtypes.get_fill_value(dtype) + else: + dtype = self.dtype + + # Currently fails on sparse due to https://github.com/pydata/sparse/issues/422 + data = np.full_like( + self.data, + fill_value=fill_value, + shape=new_shape, + dtype=dtype, + ) + + # Indexer is a list of lists of locations. Each list is the locations + # on the new dimension. This is robust to the data being sparse; in that + # case the destinations will be NaN / zero. + data[(..., *indexer)] = reordered + + return self._replace(dims=new_dims, data=data) + def unstack(self, dimensions=None, **dimensions_kwargs): """ Unstack an existing dimension into multiple new dimensions. @@ -1580,6 +1636,10 @@ def unstack(self, dimensions=None, **dimensions_kwargs): New dimensions will be added at the end, and the order of the data along each new dimension will be in contiguous (C) order. + Note that unlike ``DataArray.unstack`` and ``Dataset.unstack``, this + method requires the existing dimension to contain the full product of + the new dimensions. + Parameters ---------- dimensions : mapping of hashable to mapping of hashable to int @@ -1598,11 +1658,13 @@ def unstack(self, dimensions=None, **dimensions_kwargs): See also -------- Variable.stack + DataArray.unstack + Dataset.unstack """ dimensions = either_dict_or_kwargs(dimensions, dimensions_kwargs, "unstack") result = self for old_dim, dims in dimensions.items(): - result = result._unstack_once(dims, old_dim) + result = result._unstack_once_full(dims, old_dim) return result def fillna(self, value): From d524d72c6cc97a87787117dd39c642254754bac4 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 26 Jan 2021 18:30:19 +0100 Subject: [PATCH 33/51] iris update doc url (#4845) --- doc/conf.py | 2 +- doc/faq.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index d83e966f3fa..14b28b4e471 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -411,7 +411,7 @@ intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None), - "iris": ("https://scitools.org.uk/iris/docs/latest", None), + "iris": ("https://scitools-iris.readthedocs.io/en/latest", None), "numpy": ("https://numpy.org/doc/stable", None), "scipy": ("https://docs.scipy.org/doc/scipy/reference", None), "numba": ("https://numba.pydata.org/numba-doc/latest", None), diff --git a/doc/faq.rst b/doc/faq.rst index a2b8be47e06..a2151cc4b37 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -166,7 +166,7 @@ different approaches to handling metadata: Iris strictly interprets `CF conventions`_. Iris particularly shines at mapping, thanks to its integration with Cartopy_. -.. _Iris: http://scitools.org.uk/iris/ +.. _Iris: https://scitools-iris.readthedocs.io/en/stable/ .. _Cartopy: http://scitools.org.uk/cartopy/docs/latest/ `UV-CDAT`__ is another Python library that implements in-memory netCDF-like From a4bb7e1dc80c7e413dd9b459671d10a666b395e7 Mon Sep 17 00:00:00 2001 From: Michael Mann Date: Tue, 26 Jan 2021 12:53:46 -0500 Subject: [PATCH 34/51] Update related-projects.rst (#4844) adding mention of geowombat for remote sensing applications Co-authored-by: Keewis --- doc/related-projects.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/related-projects.rst b/doc/related-projects.rst index 456cb64197f..0a010195d6d 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -15,6 +15,7 @@ Geosciences - `aospy `_: Automated analysis and management of gridded climate data. - `climpred `_: Analysis of ensemble forecast models for climate prediction. - `geocube `_: Tool to convert geopandas vector data into rasterized xarray data. +- `GeoWombat `_: Utilities for analysis of remotely sensed and gridded raster data at scale (easily tame Landsat, Sentinel, Quickbird, and PlanetScope). - `infinite-diff `_: xarray-based finite-differencing, focused on gridded climate/meterology data - `marc_analysis `_: Analysis package for CESM/MARC experiments and output. - `MetPy `_: A collection of tools in Python for reading, visualizing, and performing calculations with weather data. From 9fea799761ae178e586c59d1a67f480abecf2637 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Wed, 27 Jan 2021 09:05:30 +0100 Subject: [PATCH 35/51] weighted: small improvements (#4818) * weighted: small improvements * use T_DataWithCoords --- xarray/core/common.py | 11 ++++++++- xarray/core/weighted.py | 49 +++++++++++++++-------------------------- 2 files changed, 28 insertions(+), 32 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index a69ba03a7a4..c5836c68759 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -3,6 +3,7 @@ from html import escape from textwrap import dedent from typing import ( + TYPE_CHECKING, Any, Callable, Dict, @@ -32,6 +33,12 @@ ALL_DIMS = ... +if TYPE_CHECKING: + from .dataarray import DataArray + from .weighted import Weighted + +T_DataWithCoords = TypeVar("T_DataWithCoords", bound="DataWithCoords") + C = TypeVar("C") T = TypeVar("T") @@ -772,7 +779,9 @@ def groupby_bins( }, ) - def weighted(self, weights): + def weighted( + self: T_DataWithCoords, weights: "DataArray" + ) -> "Weighted[T_DataWithCoords]": """ Weighted operations. diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index dbd4e1ad103..449a7200ee7 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -1,13 +1,16 @@ -from typing import TYPE_CHECKING, Hashable, Iterable, Optional, Union, overload +from typing import TYPE_CHECKING, Generic, Hashable, Iterable, Optional, TypeVar, Union from . import duck_array_ops from .computation import dot -from .options import _get_keep_attrs from .pycompat import is_duck_dask_array if TYPE_CHECKING: + from .common import DataWithCoords # noqa: F401 from .dataarray import DataArray, Dataset +T_DataWithCoords = TypeVar("T_DataWithCoords", bound="DataWithCoords") + + _WEIGHTED_REDUCE_DOCSTRING_TEMPLATE = """ Reduce this {cls}'s data by a weighted ``{fcn}`` along some dimension(s). @@ -56,7 +59,7 @@ """ -class Weighted: +class Weighted(Generic[T_DataWithCoords]): """An object that implements weighted operations. You should create a Weighted object by using the ``DataArray.weighted`` or @@ -70,15 +73,7 @@ class Weighted: __slots__ = ("obj", "weights") - @overload - def __init__(self, obj: "DataArray", weights: "DataArray") -> None: - ... - - @overload - def __init__(self, obj: "Dataset", weights: "DataArray") -> None: - ... - - def __init__(self, obj, weights): + def __init__(self, obj: T_DataWithCoords, weights: "DataArray"): """ Create a Weighted object @@ -121,8 +116,8 @@ def _weight_check(w): else: _weight_check(weights.data) - self.obj = obj - self.weights = weights + self.obj: T_DataWithCoords = obj + self.weights: "DataArray" = weights @staticmethod def _reduce( @@ -146,7 +141,6 @@ def _reduce( # `dot` does not broadcast arrays, so this avoids creating a large # DataArray (if `weights` has additional dimensions) - # maybe add fasttrack (`(da * weights).sum(dims=dim, skipna=skipna)`) return dot(da, weights, dims=dim) def _sum_of_weights( @@ -203,7 +197,7 @@ def sum_of_weights( self, dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, keep_attrs: Optional[bool] = None, - ) -> Union["DataArray", "Dataset"]: + ) -> T_DataWithCoords: return self._implementation( self._sum_of_weights, dim=dim, keep_attrs=keep_attrs @@ -214,7 +208,7 @@ def sum( dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, skipna: Optional[bool] = None, keep_attrs: Optional[bool] = None, - ) -> Union["DataArray", "Dataset"]: + ) -> T_DataWithCoords: return self._implementation( self._weighted_sum, dim=dim, skipna=skipna, keep_attrs=keep_attrs @@ -225,7 +219,7 @@ def mean( dim: Optional[Union[Hashable, Iterable[Hashable]]] = None, skipna: Optional[bool] = None, keep_attrs: Optional[bool] = None, - ) -> Union["DataArray", "Dataset"]: + ) -> T_DataWithCoords: return self._implementation( self._weighted_mean, dim=dim, skipna=skipna, keep_attrs=keep_attrs @@ -239,22 +233,15 @@ def __repr__(self): return f"{klass} with weights along dimensions: {weight_dims}" -class DataArrayWeighted(Weighted): - def _implementation(self, func, dim, **kwargs): - - keep_attrs = kwargs.pop("keep_attrs") - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) - - weighted = func(self.obj, dim=dim, **kwargs) - - if keep_attrs: - weighted.attrs = self.obj.attrs +class DataArrayWeighted(Weighted["DataArray"]): + def _implementation(self, func, dim, **kwargs) -> "DataArray": - return weighted + dataset = self.obj._to_temp_dataset() + dataset = dataset.map(func, dim=dim, **kwargs) + return self.obj._from_temp_dataset(dataset) -class DatasetWeighted(Weighted): +class DatasetWeighted(Weighted["Dataset"]): def _implementation(self, func, dim, **kwargs) -> "Dataset": return self.obj.map(func, dim=dim, **kwargs) From 8cc34cb412ba89ebca12fc84f76a9e452628f1bc Mon Sep 17 00:00:00 2001 From: Aureliana Barghini <35919497+aurghs@users.noreply.github.com> Date: Thu, 28 Jan 2021 16:20:59 +0100 Subject: [PATCH 36/51] WIP: backend interface, now it uses subclassing (#4836) * draft * working version * fix: instantiate BackendEtrypoints * rename AbstractBackendEntrypoint in BackendEntrypoint * fix plugins tests * style * style * raise NotImplemetedError if BackendEntrypoint.open_dataset is not implemented --- xarray/backends/cfgrib_.py | 106 +++++++++++++++--------------- xarray/backends/common.py | 15 +++-- xarray/backends/h5netcdf_.py | 107 +++++++++++++++---------------- xarray/backends/netCDF4_.py | 107 +++++++++++++++---------------- xarray/backends/plugins.py | 14 ++-- xarray/backends/pseudonetcdf_.py | 96 ++++++++++++++------------- xarray/backends/pydap_.py | 68 +++++++++----------- xarray/backends/pynio_.py | 64 +++++++++--------- xarray/backends/scipy_.py | 94 +++++++++++++-------------- xarray/backends/store.py | 84 ++++++++++++------------ xarray/backends/zarr.py | 81 ++++++++++++----------- xarray/tests/test_plugins.py | 65 ++++++++++--------- 12 files changed, 446 insertions(+), 455 deletions(-) diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py index 4a0ac7d67f9..65c5bc2a02b 100644 --- a/xarray/backends/cfgrib_.py +++ b/xarray/backends/cfgrib_.py @@ -12,7 +12,7 @@ BackendEntrypoint, ) from .locks import SerializableLock, ensure_lock -from .store import open_backend_dataset_store +from .store import StoreBackendEntrypoint try: import cfgrib @@ -86,62 +86,58 @@ def get_encoding(self): return encoding -def guess_can_open_cfgrib(store_spec): - try: - _, ext = os.path.splitext(store_spec) - except TypeError: - return False - return ext in {".grib", ".grib2", ".grb", ".grb2"} - - -def open_backend_dataset_cfgrib( - filename_or_obj, - *, - mask_and_scale=True, - decode_times=None, - concat_characters=None, - decode_coords=None, - drop_variables=None, - use_cftime=None, - decode_timedelta=None, - lock=None, - indexpath="{path}.{short_hash}.idx", - filter_by_keys={}, - read_keys=[], - encode_cf=("parameter", "time", "geography", "vertical"), - squeeze=True, - time_dims=("time", "step"), -): - - store = CfGribDataStore( +class CfgribfBackendEntrypoint(BackendEntrypoint): + def guess_can_open(self, store_spec): + try: + _, ext = os.path.splitext(store_spec) + except TypeError: + return False + return ext in {".grib", ".grib2", ".grb", ".grb2"} + + def open_dataset( + self, filename_or_obj, - indexpath=indexpath, - filter_by_keys=filter_by_keys, - read_keys=read_keys, - encode_cf=encode_cf, - squeeze=squeeze, - time_dims=time_dims, - lock=lock, - ) - - with close_on_error(store): - ds = open_backend_dataset_store( - store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, + *, + mask_and_scale=True, + decode_times=None, + concat_characters=None, + decode_coords=None, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + lock=None, + indexpath="{path}.{short_hash}.idx", + filter_by_keys={}, + read_keys=[], + encode_cf=("parameter", "time", "geography", "vertical"), + squeeze=True, + time_dims=("time", "step"), + ): + + store = CfGribDataStore( + filename_or_obj, + indexpath=indexpath, + filter_by_keys=filter_by_keys, + read_keys=read_keys, + encode_cf=encode_cf, + squeeze=squeeze, + time_dims=time_dims, + lock=lock, ) - return ds - - -cfgrib_backend = BackendEntrypoint( - open_dataset=open_backend_dataset_cfgrib, guess_can_open=guess_can_open_cfgrib -) + store_entrypoint = StoreBackendEntrypoint() + with close_on_error(store): + ds = store_entrypoint.open_dataset( + store, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + ) + return ds if has_cfgrib: - BACKEND_ENTRYPOINTS["cfgrib"] = cfgrib_backend + BACKEND_ENTRYPOINTS["cfgrib"] = CfgribfBackendEntrypoint diff --git a/xarray/backends/common.py b/xarray/backends/common.py index adb70658fab..e2905d0866b 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -1,7 +1,7 @@ import logging import time import traceback -from typing import Dict +from typing import Dict, Tuple, Type, Union import numpy as np @@ -344,12 +344,13 @@ def encode(self, variables, attributes): class BackendEntrypoint: - __slots__ = ("guess_can_open", "open_dataset", "open_dataset_parameters") + open_dataset_parameters: Union[Tuple, None] = None - def __init__(self, open_dataset, open_dataset_parameters=None, guess_can_open=None): - self.open_dataset = open_dataset - self.open_dataset_parameters = open_dataset_parameters - self.guess_can_open = guess_can_open + def open_dataset(self): + raise NotImplementedError + def guess_can_open(self, store_spec): + return False -BACKEND_ENTRYPOINTS: Dict[str, BackendEntrypoint] = {} + +BACKEND_ENTRYPOINTS: Dict[str, Type[BackendEntrypoint]] = {} diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 562600de4b6..aa892c4f89c 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -23,7 +23,7 @@ _get_datatype, _nc4_require_group, ) -from .store import open_backend_dataset_store +from .store import StoreBackendEntrypoint try: import h5netcdf @@ -328,62 +328,61 @@ def close(self, **kwargs): self._manager.close(**kwargs) -def guess_can_open_h5netcdf(store_spec): - try: - return read_magic_number(store_spec).startswith(b"\211HDF\r\n\032\n") - except TypeError: - pass - - try: - _, ext = os.path.splitext(store_spec) - except TypeError: - return False - - return ext in {".nc", ".nc4", ".cdf"} - - -def open_backend_dataset_h5netcdf( - filename_or_obj, - *, - mask_and_scale=True, - decode_times=None, - concat_characters=None, - decode_coords=None, - drop_variables=None, - use_cftime=None, - decode_timedelta=None, - format=None, - group=None, - lock=None, - invalid_netcdf=None, - phony_dims=None, -): - - store = H5NetCDFStore.open( +class H5netcdfBackendEntrypoint(BackendEntrypoint): + def guess_can_open(self, store_spec): + try: + return read_magic_number(store_spec).startswith(b"\211HDF\r\n\032\n") + except TypeError: + pass + + try: + _, ext = os.path.splitext(store_spec) + except TypeError: + return False + + return ext in {".nc", ".nc4", ".cdf"} + + def open_dataset( + self, filename_or_obj, - format=format, - group=group, - lock=lock, - invalid_netcdf=invalid_netcdf, - phony_dims=phony_dims, - ) + *, + mask_and_scale=True, + decode_times=None, + concat_characters=None, + decode_coords=None, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + format=None, + group=None, + lock=None, + invalid_netcdf=None, + phony_dims=None, + ): - ds = open_backend_dataset_store( - store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, - ) - return ds + store = H5NetCDFStore.open( + filename_or_obj, + format=format, + group=group, + lock=lock, + invalid_netcdf=invalid_netcdf, + phony_dims=phony_dims, + ) + store_entrypoint = StoreBackendEntrypoint() + + ds = store_entrypoint.open_dataset( + store, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + ) + return ds -h5netcdf_backend = BackendEntrypoint( - open_dataset=open_backend_dataset_h5netcdf, guess_can_open=guess_can_open_h5netcdf -) if has_h5netcdf: - BACKEND_ENTRYPOINTS["h5netcdf"] = h5netcdf_backend + BACKEND_ENTRYPOINTS["h5netcdf"] = H5netcdfBackendEntrypoint diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 5bb4eec837b..e3d87aaf83f 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -22,7 +22,7 @@ from .file_manager import CachingFileManager, DummyFileManager from .locks import HDF5_LOCK, NETCDFC_LOCK, combine_locks, ensure_lock, get_write_lock from .netcdf3 import encode_nc3_attr_value, encode_nc3_variable -from .store import open_backend_dataset_store +from .store import StoreBackendEntrypoint try: import netCDF4 @@ -512,65 +512,62 @@ def close(self, **kwargs): self._manager.close(**kwargs) -def guess_can_open_netcdf4(store_spec): - if isinstance(store_spec, str) and is_remote_uri(store_spec): - return True - try: - _, ext = os.path.splitext(store_spec) - except TypeError: - return False - return ext in {".nc", ".nc4", ".cdf"} - - -def open_backend_dataset_netcdf4( - filename_or_obj, - mask_and_scale=True, - decode_times=None, - concat_characters=None, - decode_coords=None, - drop_variables=None, - use_cftime=None, - decode_timedelta=None, - group=None, - mode="r", - format="NETCDF4", - clobber=True, - diskless=False, - persist=False, - lock=None, - autoclose=False, -): +class NetCDF4BackendEntrypoint(BackendEntrypoint): + def guess_can_open(self, store_spec): + if isinstance(store_spec, str) and is_remote_uri(store_spec): + return True + try: + _, ext = os.path.splitext(store_spec) + except TypeError: + return False + return ext in {".nc", ".nc4", ".cdf"} - store = NetCDF4DataStore.open( + def open_dataset( + self, filename_or_obj, - mode=mode, - format=format, - group=group, - clobber=clobber, - diskless=diskless, - persist=persist, - lock=lock, - autoclose=autoclose, - ) + mask_and_scale=True, + decode_times=None, + concat_characters=None, + decode_coords=None, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + group=None, + mode="r", + format="NETCDF4", + clobber=True, + diskless=False, + persist=False, + lock=None, + autoclose=False, + ): - with close_on_error(store): - ds = open_backend_dataset_store( - store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, + store = NetCDF4DataStore.open( + filename_or_obj, + mode=mode, + format=format, + group=group, + clobber=clobber, + diskless=diskless, + persist=persist, + lock=lock, + autoclose=autoclose, ) - return ds - -netcdf4_backend = BackendEntrypoint( - open_dataset=open_backend_dataset_netcdf4, guess_can_open=guess_can_open_netcdf4 -) + store_entrypoint = StoreBackendEntrypoint() + with close_on_error(store): + ds = store_entrypoint.open_dataset( + store, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + ) + return ds if has_netcdf4: - BACKEND_ENTRYPOINTS["netcdf4"] = netcdf4_backend + BACKEND_ENTRYPOINTS["netcdf4"] = NetCDF4BackendEntrypoint diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index 6d3ec7e7da5..b8cd2bf6378 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -36,6 +36,7 @@ def remove_duplicates(backend_entrypoints): def detect_parameters(open_dataset): signature = inspect.signature(open_dataset) parameters = signature.parameters + parameters_list = [] for name, param in parameters.items(): if param.kind in ( inspect.Parameter.VAR_KEYWORD, @@ -45,7 +46,9 @@ def detect_parameters(open_dataset): f"All the parameters in {open_dataset!r} signature should be explicit. " "*args and **kwargs is not supported" ) - return tuple(parameters) + if name != "self": + parameters_list.append(name) + return tuple(parameters_list) def create_engines_dict(backend_entrypoints): @@ -57,8 +60,8 @@ def create_engines_dict(backend_entrypoints): return engines -def set_missing_parameters(engines): - for name, backend in engines.items(): +def set_missing_parameters(backend_entrypoints): + for name, backend in backend_entrypoints.items(): if backend.open_dataset_parameters is None: open_dataset = backend.open_dataset backend.open_dataset_parameters = detect_parameters(open_dataset) @@ -70,7 +73,10 @@ def build_engines(entrypoints): external_backend_entrypoints = create_engines_dict(pkg_entrypoints) backend_entrypoints.update(external_backend_entrypoints) set_missing_parameters(backend_entrypoints) - return backend_entrypoints + engines = {} + for name, backend in backend_entrypoints.items(): + engines[name] = backend() + return engines @functools.lru_cache(maxsize=1) diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py index c2bfd519bed..80485fce459 100644 --- a/xarray/backends/pseudonetcdf_.py +++ b/xarray/backends/pseudonetcdf_.py @@ -11,7 +11,7 @@ ) from .file_manager import CachingFileManager from .locks import HDF5_LOCK, NETCDFC_LOCK, combine_locks, ensure_lock -from .store import open_backend_dataset_store +from .store import StoreBackendEntrypoint try: from PseudoNetCDF import pncopen @@ -100,57 +100,55 @@ def close(self): self._manager.close() -def open_backend_dataset_pseudonetcdf( - filename_or_obj, - mask_and_scale=False, - decode_times=None, - concat_characters=None, - decode_coords=None, - drop_variables=None, - use_cftime=None, - decode_timedelta=None, - mode=None, - lock=None, - **format_kwargs, -): - - store = PseudoNetCDFDataStore.open( - filename_or_obj, lock=lock, mode=mode, **format_kwargs +class PseudoNetCDFBackendEntrypoint(BackendEntrypoint): + + # *args and **kwargs are not allowed in open_backend_dataset_ kwargs, + # unless the open_dataset_parameters are explicity defined like this: + open_dataset_parameters = ( + "filename_or_obj", + "mask_and_scale", + "decode_times", + "concat_characters", + "decode_coords", + "drop_variables", + "use_cftime", + "decode_timedelta", + "mode", + "lock", ) - with close_on_error(store): - ds = open_backend_dataset_store( - store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, + def open_dataset( + self, + filename_or_obj, + mask_and_scale=False, + decode_times=None, + concat_characters=None, + decode_coords=None, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + mode=None, + lock=None, + **format_kwargs, + ): + store = PseudoNetCDFDataStore.open( + filename_or_obj, lock=lock, mode=mode, **format_kwargs ) - return ds - - -# *args and **kwargs are not allowed in open_backend_dataset_ kwargs, -# unless the open_dataset_parameters are explicity defined like this: -open_dataset_parameters = ( - "filename_or_obj", - "mask_and_scale", - "decode_times", - "concat_characters", - "decode_coords", - "drop_variables", - "use_cftime", - "decode_timedelta", - "mode", - "lock", -) -pseudonetcdf_backend = BackendEntrypoint( - open_dataset=open_backend_dataset_pseudonetcdf, - open_dataset_parameters=open_dataset_parameters, -) + + store_entrypoint = StoreBackendEntrypoint() + with close_on_error(store): + ds = store_entrypoint.open_dataset( + store, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + ) + return ds if has_pseudonetcdf: - BACKEND_ENTRYPOINTS["pseudonetcdf"] = pseudonetcdf_backend + BACKEND_ENTRYPOINTS["pseudonetcdf"] = PseudoNetCDFBackendEntrypoint diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index c5ce943a10a..7f8622ca66e 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -11,7 +11,7 @@ BackendEntrypoint, robust_getitem, ) -from .store import open_backend_dataset_store +from .store import StoreBackendEntrypoint try: import pydap.client @@ -107,45 +107,41 @@ def get_dimensions(self): return Frozen(self.ds.dimensions) -def guess_can_open_pydap(store_spec): - return isinstance(store_spec, str) and is_remote_uri(store_spec) +class PydapBackendEntrypoint(BackendEntrypoint): + def guess_can_open(self, store_spec): + return isinstance(store_spec, str) and is_remote_uri(store_spec) - -def open_backend_dataset_pydap( - filename_or_obj, - mask_and_scale=True, - decode_times=None, - concat_characters=None, - decode_coords=None, - drop_variables=None, - use_cftime=None, - decode_timedelta=None, - session=None, -): - - store = PydapDataStore.open( + def open_dataset( + self, filename_or_obj, - session=session, - ) - - with close_on_error(store): - ds = open_backend_dataset_store( - store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, + mask_and_scale=True, + decode_times=None, + concat_characters=None, + decode_coords=None, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + session=None, + ): + store = PydapDataStore.open( + filename_or_obj, + session=session, ) - return ds - -pydap_backend = BackendEntrypoint( - open_dataset=open_backend_dataset_pydap, guess_can_open=guess_can_open_pydap -) + store_entrypoint = StoreBackendEntrypoint() + with close_on_error(store): + ds = store_entrypoint.open_dataset( + store, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + ) + return ds if has_pydap: - BACKEND_ENTRYPOINTS["pydap"] = pydap_backend + BACKEND_ENTRYPOINTS["pydap"] = PydapBackendEntrypoint diff --git a/xarray/backends/pynio_.py b/xarray/backends/pynio_.py index 261daa69880..41c99efd076 100644 --- a/xarray/backends/pynio_.py +++ b/xarray/backends/pynio_.py @@ -11,7 +11,7 @@ ) from .file_manager import CachingFileManager from .locks import HDF5_LOCK, NETCDFC_LOCK, SerializableLock, combine_locks, ensure_lock -from .store import open_backend_dataset_store +from .store import StoreBackendEntrypoint try: import Nio @@ -97,41 +97,39 @@ def close(self): self._manager.close() -def open_backend_dataset_pynio( - filename_or_obj, - mask_and_scale=True, - decode_times=None, - concat_characters=None, - decode_coords=None, - drop_variables=None, - use_cftime=None, - decode_timedelta=None, - mode="r", - lock=None, -): - - store = NioDataStore( +class PynioBackendEntrypoint(BackendEntrypoint): + def open_dataset( filename_or_obj, - mode=mode, - lock=lock, - ) - - with close_on_error(store): - ds = open_backend_dataset_store( - store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, + mask_and_scale=True, + decode_times=None, + concat_characters=None, + decode_coords=None, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + mode="r", + lock=None, + ): + store = NioDataStore( + filename_or_obj, + mode=mode, + lock=lock, ) - return ds - -pynio_backend = BackendEntrypoint(open_dataset=open_backend_dataset_pynio) + store_entrypoint = StoreBackendEntrypoint() + with close_on_error(store): + ds = store_entrypoint.open_dataset( + store, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + ) + return ds if has_pynio: - BACKEND_ENTRYPOINTS["pynio"] = pynio_backend + BACKEND_ENTRYPOINTS["pynio"] = PynioBackendEntrypoint diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index df51d07d686..ddc157ed8e4 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -15,7 +15,7 @@ from .file_manager import CachingFileManager, DummyFileManager from .locks import ensure_lock, get_write_lock from .netcdf3 import encode_nc3_attr_value, encode_nc3_variable, is_valid_nc3_name -from .store import open_backend_dataset_store +from .store import StoreBackendEntrypoint try: import scipy.io @@ -232,56 +232,54 @@ def close(self): self._manager.close() -def guess_can_open_scipy(store_spec): - try: - return read_magic_number(store_spec).startswith(b"CDF") - except TypeError: - pass +class ScipyBackendEntrypoint(BackendEntrypoint): + def guess_can_open(self, store_spec): + try: + return read_magic_number(store_spec).startswith(b"CDF") + except TypeError: + pass - try: - _, ext = os.path.splitext(store_spec) - except TypeError: - return False - return ext in {".nc", ".nc4", ".cdf", ".gz"} - - -def open_backend_dataset_scipy( - filename_or_obj, - mask_and_scale=True, - decode_times=None, - concat_characters=None, - decode_coords=None, - drop_variables=None, - use_cftime=None, - decode_timedelta=None, - mode="r", - format=None, - group=None, - mmap=None, - lock=None, -): - - store = ScipyDataStore( - filename_or_obj, mode=mode, format=format, group=group, mmap=mmap, lock=lock - ) - with close_on_error(store): - ds = open_backend_dataset_store( - store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, - ) - return ds + try: + _, ext = os.path.splitext(store_spec) + except TypeError: + return False + return ext in {".nc", ".nc4", ".cdf", ".gz"} + + def open_dataset( + self, + filename_or_obj, + mask_and_scale=True, + decode_times=None, + concat_characters=None, + decode_coords=None, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + mode="r", + format=None, + group=None, + mmap=None, + lock=None, + ): + store = ScipyDataStore( + filename_or_obj, mode=mode, format=format, group=group, mmap=mmap, lock=lock + ) -scipy_backend = BackendEntrypoint( - open_dataset=open_backend_dataset_scipy, guess_can_open=guess_can_open_scipy -) + store_entrypoint = StoreBackendEntrypoint() + with close_on_error(store): + ds = store_entrypoint.open_dataset( + store, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + ) + return ds if has_scipy: - BACKEND_ENTRYPOINTS["scipy"] = scipy_backend + BACKEND_ENTRYPOINTS["scipy"] = ScipyBackendEntrypoint diff --git a/xarray/backends/store.py b/xarray/backends/store.py index 66fca0d39c3..d57b3ab9df8 100644 --- a/xarray/backends/store.py +++ b/xarray/backends/store.py @@ -3,47 +3,43 @@ from .common import BACKEND_ENTRYPOINTS, AbstractDataStore, BackendEntrypoint -def guess_can_open_store(store_spec): - return isinstance(store_spec, AbstractDataStore) - - -def open_backend_dataset_store( - store, - *, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables=None, - use_cftime=None, - decode_timedelta=None, -): - vars, attrs = store.load() - encoding = store.get_encoding() - - vars, attrs, coord_names = conventions.decode_cf_variables( - vars, - attrs, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, - ) - - ds = Dataset(vars, attrs=attrs) - ds = ds.set_coords(coord_names.intersection(vars)) - ds.set_close(store.close) - ds.encoding = encoding - - return ds - - -store_backend = BackendEntrypoint( - open_dataset=open_backend_dataset_store, guess_can_open=guess_can_open_store -) - - -BACKEND_ENTRYPOINTS["store"] = store_backend +class StoreBackendEntrypoint(BackendEntrypoint): + def guess_can_open(self, store_spec): + return isinstance(store_spec, AbstractDataStore) + + def open_dataset( + self, + store, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + ): + vars, attrs = store.load() + encoding = store.get_encoding() + + vars, attrs, coord_names = conventions.decode_cf_variables( + vars, + attrs, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + ) + + ds = Dataset(vars, attrs=attrs) + ds = ds.set_coords(coord_names.intersection(vars)) + ds.set_close(store.close) + ds.encoding = encoding + + return ds + + +BACKEND_ENTRYPOINTS["store"] = StoreBackendEntrypoint diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index ceeb23cac9b..1d667a38b53 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -15,7 +15,7 @@ BackendEntrypoint, _encode_variable_name, ) -from .store import open_backend_dataset_store +from .store import StoreBackendEntrypoint try: import zarr @@ -670,49 +670,48 @@ def open_zarr( return ds -def open_backend_dataset_zarr( - filename_or_obj, - mask_and_scale=True, - decode_times=None, - concat_characters=None, - decode_coords=None, - drop_variables=None, - use_cftime=None, - decode_timedelta=None, - group=None, - mode="r", - synchronizer=None, - consolidated=False, - consolidate_on_close=False, - chunk_store=None, -): - - store = ZarrStore.open_group( +class ZarrBackendEntrypoint(BackendEntrypoint): + def open_dataset( + self, filename_or_obj, - group=group, - mode=mode, - synchronizer=synchronizer, - consolidated=consolidated, - consolidate_on_close=consolidate_on_close, - chunk_store=chunk_store, - ) - - with close_on_error(store): - ds = open_backend_dataset_store( - store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, + mask_and_scale=True, + decode_times=None, + concat_characters=None, + decode_coords=None, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + group=None, + mode="r", + synchronizer=None, + consolidated=False, + consolidate_on_close=False, + chunk_store=None, + ): + store = ZarrStore.open_group( + filename_or_obj, + group=group, + mode=mode, + synchronizer=synchronizer, + consolidated=consolidated, + consolidate_on_close=consolidate_on_close, + chunk_store=chunk_store, ) - return ds - -zarr_backend = BackendEntrypoint(open_dataset=open_backend_dataset_zarr) + store_entrypoint = StoreBackendEntrypoint() + with close_on_error(store): + ds = store_entrypoint.open_dataset( + store, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + ) + return ds if has_zarr: - BACKEND_ENTRYPOINTS["zarr"] = zarr_backend + BACKEND_ENTRYPOINTS["zarr"] = ZarrBackendEntrypoint diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py index 38ebce6da1a..64a1c563dba 100644 --- a/xarray/tests/test_plugins.py +++ b/xarray/tests/test_plugins.py @@ -6,19 +6,24 @@ from xarray.backends import common, plugins -def dummy_open_dataset_args(filename_or_obj, *args): - pass +class DummyBackendEntrypointArgs(common.BackendEntrypoint): + def open_dataset(filename_or_obj, *args): + pass -def dummy_open_dataset_kwargs(filename_or_obj, **kwargs): - pass +class DummyBackendEntrypointKwargs(common.BackendEntrypoint): + def open_dataset(filename_or_obj, **kwargs): + pass -def dummy_open_dataset(filename_or_obj, *, decoder): - pass +class DummyBackendEntrypoint1(common.BackendEntrypoint): + def open_dataset(self, filename_or_obj, *, decoder): + pass -dummy_cfgrib = common.BackendEntrypoint(dummy_open_dataset) +class DummyBackendEntrypoint2(common.BackendEntrypoint): + def open_dataset(self, filename_or_obj, *, decoder): + pass @pytest.fixture @@ -65,46 +70,48 @@ def test_create_engines_dict(): def test_set_missing_parameters(): - backend_1 = common.BackendEntrypoint(dummy_open_dataset) - backend_2 = common.BackendEntrypoint(dummy_open_dataset, ("filename_or_obj",)) + backend_1 = DummyBackendEntrypoint1 + backend_2 = DummyBackendEntrypoint2 + backend_2.open_dataset_parameters = ("filename_or_obj",) engines = {"engine_1": backend_1, "engine_2": backend_2} plugins.set_missing_parameters(engines) assert len(engines) == 2 - engine_1 = engines["engine_1"] - assert engine_1.open_dataset_parameters == ("filename_or_obj", "decoder") - engine_2 = engines["engine_2"] - assert engine_2.open_dataset_parameters == ("filename_or_obj",) + assert backend_1.open_dataset_parameters == ("filename_or_obj", "decoder") + assert backend_2.open_dataset_parameters == ("filename_or_obj",) + + backend = DummyBackendEntrypointKwargs() + backend.open_dataset_parameters = ("filename_or_obj", "decoder") + plugins.set_missing_parameters({"engine": backend}) + assert backend.open_dataset_parameters == ("filename_or_obj", "decoder") + + backend = DummyBackendEntrypointArgs() + backend.open_dataset_parameters = ("filename_or_obj", "decoder") + plugins.set_missing_parameters({"engine": backend}) + assert backend.open_dataset_parameters == ("filename_or_obj", "decoder") def test_set_missing_parameters_raise_error(): - backend = common.BackendEntrypoint(dummy_open_dataset_args) + backend = DummyBackendEntrypointKwargs() with pytest.raises(TypeError): plugins.set_missing_parameters({"engine": backend}) - backend = common.BackendEntrypoint( - dummy_open_dataset_args, ("filename_or_obj", "decoder") - ) - plugins.set_missing_parameters({"engine": backend}) - - backend = common.BackendEntrypoint(dummy_open_dataset_kwargs) + backend = DummyBackendEntrypointArgs() with pytest.raises(TypeError): plugins.set_missing_parameters({"engine": backend}) - backend = common.BackendEntrypoint( - dummy_open_dataset_kwargs, ("filename_or_obj", "decoder") - ) - plugins.set_missing_parameters({"engine": backend}) - -@mock.patch("pkg_resources.EntryPoint.load", mock.MagicMock(return_value=dummy_cfgrib)) +@mock.patch( + "pkg_resources.EntryPoint.load", + mock.MagicMock(return_value=DummyBackendEntrypoint1), +) def test_build_engines(): - dummy_cfgrib_pkg_entrypoint = pkg_resources.EntryPoint.parse( + dummy_pkg_entrypoint = pkg_resources.EntryPoint.parse( "cfgrib = xarray.tests.test_plugins:backend_1" ) - backend_entrypoints = plugins.build_engines([dummy_cfgrib_pkg_entrypoint]) - assert backend_entrypoints["cfgrib"] is dummy_cfgrib + backend_entrypoints = plugins.build_engines([dummy_pkg_entrypoint]) + assert isinstance(backend_entrypoints["cfgrib"], DummyBackendEntrypoint1) assert backend_entrypoints["cfgrib"].open_dataset_parameters == ( "filename_or_obj", "decoder", From f4b95cd28f5f34ed5ef6cbd9280904fb5449c2a7 Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Fri, 29 Jan 2021 22:59:29 +0000 Subject: [PATCH 37/51] dim -> coord in DataArray.integrate (#3993) * changed arg and docstrings * test warning is raised * updated kwarg in test * updated what's new * Fix error checking * Fix whats-new * fix docstring * small fix Co-authored-by: dcherian --- doc/whats-new.rst | 7 +++++++ xarray/core/dataarray.py | 35 ++++++++++++++++++++++++++++------- xarray/core/dataset.py | 14 ++++++++------ xarray/tests/test_dataset.py | 3 +++ xarray/tests/test_units.py | 2 +- 5 files changed, 47 insertions(+), 14 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 488d8baa650..471e91a8512 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -42,6 +42,13 @@ Breaking changes - remove deprecated ``autoclose`` kwargs from :py:func:`open_dataset` (:pull:`4725`). By `Aureliana Barghini `_. +Deprecations +~~~~~~~~~~~~ + +- ``dim`` argument to :py:meth:`DataArray.integrate` is being deprecated in + favour of a ``coord`` argument, for consistency with :py:meth:`Dataset.integrate`. + For now using ``dim`` issues a ``FutureWarning``. By `Tom Nicholas `_. + New Features ~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 2fef3edbc43..0155cdc4e19 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3481,21 +3481,26 @@ def differentiate( return self._from_temp_dataset(ds) def integrate( - self, dim: Union[Hashable, Sequence[Hashable]], datetime_unit: str = None + self, + coord: Union[Hashable, Sequence[Hashable]] = None, + datetime_unit: str = None, + *, + dim: Union[Hashable, Sequence[Hashable]] = None, ) -> "DataArray": - """ integrate the array with the trapezoidal rule. + """Integrate along the given coordinate using the trapezoidal rule. .. note:: - This feature is limited to simple cartesian geometry, i.e. dim + This feature is limited to simple cartesian geometry, i.e. coord must be one dimensional. Parameters ---------- + coord: hashable, or a sequence of hashable + Coordinate(s) used for the integration. dim : hashable, or sequence of hashable Coordinate(s) used for the integration. - datetime_unit : {"Y", "M", "W", "D", "h", "m", "s", "ms", "us", "ns", \ - "ps", "fs", "as"}, optional - Can be used to specify the unit if datetime coordinate is used. + datetime_unit: {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ + 'ps', 'fs', 'as'}, optional Returns ------- @@ -3503,6 +3508,7 @@ def integrate( See also -------- + Dataset.integrate numpy.trapz: corresponding numpy function Examples @@ -3528,7 +3534,22 @@ def integrate( array([5.4, 6.6, 7.8]) Dimensions without coordinates: y """ - ds = self._to_temp_dataset().integrate(dim, datetime_unit) + if dim is not None and coord is not None: + raise ValueError( + "Cannot pass both 'dim' and 'coord'. Please pass only 'coord' instead." + ) + + if dim is not None and coord is None: + coord = dim + msg = ( + "The `dim` keyword argument to `DataArray.integrate` is " + "being replaced with `coord`, for consistency with " + "`Dataset.integrate`. Please pass `coord` instead." + " `dim` will be removed in version 0.19.0." + ) + warnings.warn(msg, FutureWarning, stacklevel=2) + + ds = self._to_temp_dataset().integrate(coord, datetime_unit) return self._from_temp_dataset(ds) def unify_chunks(self) -> "DataArray": diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a73e299e27a..8376b4875f9 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5963,8 +5963,10 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None): variables[k] = v return self._replace(variables) - def integrate(self, coord, datetime_unit=None): - """ integrate the array with the trapezoidal rule. + def integrate( + self, coord: Union[Hashable, Sequence[Hashable]], datetime_unit: str = None + ) -> "Dataset": + """Integrate along the given coordinate using the trapezoidal rule. .. note:: This feature is limited to simple cartesian geometry, i.e. coord @@ -5972,11 +5974,11 @@ def integrate(self, coord, datetime_unit=None): Parameters ---------- - coord: str, or sequence of str + coord: hashable, or a sequence of hashable Coordinate(s) used for the integration. - datetime_unit : {"Y", "M", "W", "D", "h", "m", "s", "ms", "us", "ns", \ - "ps", "fs", "as"}, optional - Can be specify the unit if datetime coordinate is used. + datetime_unit: {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ + 'ps', 'fs', 'as'}, optional + Specify the unit if datetime coordinate is used. Returns ------- diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index fed9098701b..db47faa8d2b 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6603,6 +6603,9 @@ def test_integrate(dask): with pytest.raises(ValueError): da.integrate("x2d") + with pytest.warns(FutureWarning): + da.integrate(dim="x") + @pytest.mark.parametrize("dask", [True, False]) @pytest.mark.parametrize("which_datetime", ["np", "cftime"]) diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index bb3127e90b5..76dd830de23 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -3681,7 +3681,7 @@ def test_stacking_reordering(self, func, dtype): ( method("diff", dim="x"), method("differentiate", coord="x"), - method("integrate", dim="x"), + method("integrate", coord="x"), method("quantile", q=[0.25, 0.75]), method("reduce", func=np.sum, dim="x"), pytest.param(lambda x: x.dot(x), id="method_dot"), From 39048f95c5048b95505abc3afaec3bf386cbdf10 Mon Sep 17 00:00:00 2001 From: keewis Date: Sat, 30 Jan 2021 00:05:57 +0100 Subject: [PATCH 38/51] speed up the repr for big MultiIndex objects (#4846) * print the repr of a multiindex using only a subset of the coordinate values * don't index if we have less items than available width * don't try to shorten arrays which are way too short * col_width seems to be the maximum number of elements, not characters * add a asv benchmark * Apply suggestions from code review Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- asv_bench/benchmarks/repr.py | 18 ++++++++++++++++++ xarray/core/formatting.py | 11 +++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 asv_bench/benchmarks/repr.py diff --git a/asv_bench/benchmarks/repr.py b/asv_bench/benchmarks/repr.py new file mode 100644 index 00000000000..b218c0be870 --- /dev/null +++ b/asv_bench/benchmarks/repr.py @@ -0,0 +1,18 @@ +import pandas as pd + +import xarray as xr + + +class ReprMultiIndex: + def setup(self, key): + index = pd.MultiIndex.from_product( + [range(10000), range(10000)], names=("level_0", "level_1") + ) + series = pd.Series(range(100000000), index=index) + self.da = xr.DataArray(series) + + def time_repr(self): + repr(self.da) + + def time_repr_html(self): + self.da._repr_html_() diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 282620e3569..0c1be1cc175 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -300,11 +300,18 @@ def _summarize_coord_multiindex(coord, col_width, marker): def _summarize_coord_levels(coord, col_width, marker="-"): + if len(coord) > 100 and col_width < len(coord): + n_values = col_width + indices = list(range(0, n_values)) + list(range(-n_values, 0)) + subset = coord[indices] + else: + subset = coord + return "\n".join( summarize_variable( - lname, coord.get_level_variable(lname), col_width, marker=marker + lname, subset.get_level_variable(lname), col_width, marker=marker ) - for lname in coord.level_names + for lname in subset.level_names ) From 5735e163bea43ec9bc3c2e640fbf25a1d4a9d0c0 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 30 Jan 2021 17:26:03 +0100 Subject: [PATCH 39/51] Add units if "unit" is in the attrs. (#4850) --- xarray/plot/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 16c67e154fc..601b23a3065 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -460,6 +460,8 @@ def label_from_attrs(da, extra=""): if da.attrs.get("units"): units = " [{}]".format(da.attrs["units"]) + elif da.attrs.get("unit"): + units = " [{}]".format(da.attrs["unit"]) else: units = "" From 110c85778a7dd000634618660699ade3148f6d97 Mon Sep 17 00:00:00 2001 From: keewis Date: Sat, 6 Feb 2021 23:02:30 +0100 Subject: [PATCH 40/51] temporarily pin dask (#4873) --- ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 6de2bc8dc64..9455ef2f127 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -8,7 +8,7 @@ dependencies: # - cdms2 # Not available on Windows # - cfgrib # Causes Python interpreter crash on Windows: https://github.com/pydata/xarray/pull/3340 - cftime - - dask + - dask<2021.02.0 - distributed - h5netcdf - h5py=2 diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 0f59d9570c8..7261b5b6954 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -9,7 +9,7 @@ dependencies: - cdms2 - cfgrib - cftime - - dask + - dask<2021.02.0 - distributed - h5netcdf - h5py=2 From ec7f628bf38b37df213fe3b5ad68d3f70824b864 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Sun, 7 Feb 2021 22:57:33 +0100 Subject: [PATCH 41/51] fix da.pad example for numpy 1.20 (#4865) --- xarray/core/dataarray.py | 23 +++++++++++------------ xarray/tests/test_dataarray.py | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 0155cdc4e19..540230766a5 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3851,9 +3851,8 @@ def pad( Notes ----- - By default when ``mode="constant"`` and ``constant_values=None``, integer types will be - promoted to ``float`` and padded with ``np.nan``. To avoid type promotion - specify ``constant_values=np.nan`` + For ``mode="constant"`` and ``constant_values=None``, integer types will be + promoted to ``float`` and padded with ``np.nan``. Examples -------- @@ -3880,16 +3879,16 @@ def pad( * x (x) float64 nan 0.0 1.0 nan * y (y) int64 10 20 30 40 z (x) float64 nan 100.0 200.0 nan - >>> da.pad(x=1, constant_values=np.nan) + + Careful, ``constant_values`` are coerced to the data type of the array which may + lead to a loss of precision: + + >>> da.pad(x=1, constant_values=1.23456789) - array([[-9223372036854775808, -9223372036854775808, -9223372036854775808, - -9223372036854775808], - [ 0, 1, 2, - 3], - [ 10, 11, 12, - 13], - [-9223372036854775808, -9223372036854775808, -9223372036854775808, - -9223372036854775808]]) + array([[ 1, 1, 1, 1], + [ 0, 1, 2, 3], + [10, 11, 12, 13], + [ 1, 1, 1, 1]]) Coordinates: * x (x) float64 nan 0.0 1.0 nan * y (y) int64 10 20 30 40 diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index fc84687511e..8d599c7a715 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4472,6 +4472,26 @@ def test_pad_constant(self): assert actual.shape == (7, 4, 5) assert_identical(actual, expected) + ar = xr.DataArray([9], dims="x") + + actual = ar.pad(x=1) + expected = xr.DataArray([np.NaN, 9, np.NaN], dims="x") + assert_identical(actual, expected) + + actual = ar.pad(x=1, constant_values=1.23456) + expected = xr.DataArray([1, 9, 1], dims="x") + assert_identical(actual, expected) + + if LooseVersion(np.__version__) >= "1.20": + with pytest.raises(ValueError, match="cannot convert float NaN to integer"): + ar.pad(x=1, constant_values=np.NaN) + else: + actual = ar.pad(x=1, constant_values=np.NaN) + expected = xr.DataArray( + [-9223372036854775808, 9, -9223372036854775808], dims="x" + ) + assert_identical(actual, expected) + def test_pad_coords(self): ar = DataArray( np.arange(3 * 4 * 5).reshape(3, 4, 5), From a5f53e203c52a7605d5db799864046471115d04f Mon Sep 17 00:00:00 2001 From: keewis Date: Sun, 7 Feb 2021 23:34:48 +0100 Subject: [PATCH 42/51] don't skip the doctests CI (#4869) * don't skip the doctests CI with skip-ci * empty commit [skip-ci] --- .github/workflows/ci-additional.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index fdc61f2f4f7..3579e18dbff 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -121,8 +121,6 @@ jobs: doctest: name: Doctests runs-on: "ubuntu-latest" - needs: detect-ci-trigger - if: needs.detect-ci-trigger.outputs.triggered == 'false' defaults: run: shell: bash -l {0} From c83dfd1f97ebb6d44c9ebf97fbb6f159a414729c Mon Sep 17 00:00:00 2001 From: keewis Date: Sun, 7 Feb 2021 23:46:38 +0100 Subject: [PATCH 43/51] =?UTF-8?q?v=C3=A9lin=20(#4872)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 5 +++ xarray/backends/locks.py | 2 +- xarray/backends/lru_cache.py | 2 +- xarray/backends/rasterio_.py | 9 ++-- xarray/backends/zarr.py | 6 +-- xarray/coding/cftime_offsets.py | 5 +-- xarray/coding/cftimeindex.py | 2 +- xarray/coding/frequencies.py | 4 +- xarray/coding/times.py | 4 +- xarray/conventions.py | 7 ++- xarray/core/accessor_dt.py | 4 +- xarray/core/alignment.py | 4 +- xarray/core/common.py | 33 +++++--------- xarray/core/computation.py | 24 +++++------ xarray/core/dataarray.py | 56 ++++++++---------------- xarray/core/dataset.py | 76 +++++++++++---------------------- xarray/core/duck_array_ops.py | 30 ++++++------- xarray/core/extensions.py | 5 +-- xarray/core/groupby.py | 11 +++-- xarray/core/indexes.py | 4 +- xarray/core/indexing.py | 16 +++---- xarray/core/missing.py | 36 ++++++++-------- xarray/core/nputils.py | 2 +- xarray/core/parallel.py | 5 +-- xarray/core/rolling.py | 2 +- xarray/core/variable.py | 21 ++++----- xarray/plot/facetgrid.py | 2 +- xarray/plot/plot.py | 1 - xarray/plot/utils.py | 11 +++-- xarray/testing.py | 8 ++-- 30 files changed, 161 insertions(+), 236 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b0fa21a7bf9..20f1af72a11 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,6 +24,11 @@ repos: rev: 3.8.4 hooks: - id: flake8 + # - repo: https://github.com/Carreau/velin + # rev: 0.0.8 + # hooks: + # - id: velin + # args: ["--write", "--compact"] - repo: https://github.com/pre-commit/mirrors-mypy rev: v0.790 # Must match ci/requirements/*.yml hooks: diff --git a/xarray/backends/locks.py b/xarray/backends/locks.py index bb876a432c8..5303ea49381 100644 --- a/xarray/backends/locks.py +++ b/xarray/backends/locks.py @@ -67,7 +67,7 @@ def _get_scheduler(get=None, collection=None) -> Optional[str]: None is returned if no dask scheduler is active. - See also + See Also -------- dask.base.get_scheduler """ diff --git a/xarray/backends/lru_cache.py b/xarray/backends/lru_cache.py index 5ca49a0311a..48030903036 100644 --- a/xarray/backends/lru_cache.py +++ b/xarray/backends/lru_cache.py @@ -34,7 +34,7 @@ def __init__(self, maxsize: int, on_evict: Callable[[K, V], Any] = None): ---------- maxsize : int Integer maximum number of items to hold in the cache. - on_evict: callable, optional + on_evict : callable, optional Function to call like ``on_evict(key, value)`` when items are evicted. """ diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py index c689c1e99d7..d776b116ea8 100644 --- a/xarray/backends/rasterio_.py +++ b/xarray/backends/rasterio_.py @@ -52,9 +52,9 @@ def shape(self): def _get_indexer(self, key): """Get indexer for rasterio array. - Parameter - --------- - key: tuple of int + Parameters + ---------- + key : tuple of int Returns ------- @@ -63,7 +63,7 @@ def _get_indexer(self, key): squeeze_axis: axes to be squeezed np_ind: indexer for loaded numpy array - See also + See Also -------- indexing.decompose_indexer """ @@ -180,7 +180,6 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc nx, ny = da.sizes['x'], da.sizes['y'] x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform - Parameters ---------- filename : str, rasterio.DatasetReader, or rasterio.WarpedVRT diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 1d667a38b53..04fdeac6450 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -462,7 +462,7 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No check_encoding_set : list-like List of variables that should be checked for invalid encoding values - writer : + writer unlimited_dims : list-like List of dimension names that should be treated as unlimited dimensions. @@ -566,7 +566,7 @@ def open_zarr( based on the variable's zarr chunks. If `chunks=None`, zarr array data will lazily convert to numpy arrays upon access. This accepts all the chunk specifications as Dask does. - overwrite_encoded_chunks: bool, optional + overwrite_encoded_chunks : bool, optional Whether to drop the zarr chunks encoded for each variable when a dataset is loaded with specified chunk sizes (default: False) decode_cf : bool, optional @@ -605,7 +605,7 @@ def open_zarr( {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} into timedelta objects. If False, leave them encoded as numbers. If None (default), assume the same value of decode_time. - use_cftime: bool, optional + use_cftime : bool, optional Only relevant if encoded dates come from a standard calendar (e.g. "gregorian", "proleptic_gregorian", "standard", or not specified). If None (default), attempt to decode times to diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 3c92c816e12..177a0fd831b 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -796,7 +796,7 @@ def cftime_range( periods : int, optional Number of periods to generate. freq : str or None, default: "D" - Frequency strings can have multiples, e.g. "5H". + Frequency strings can have multiples, e.g. "5H". normalize : bool, default: False Normalize start/end dates to midnight before generating date range. name : str, default: None @@ -813,7 +813,6 @@ def cftime_range( Notes ----- - This function is an analog of ``pandas.date_range`` for use in generating sequences of ``cftime.datetime`` objects. It supports most of the features of ``pandas.date_range`` (e.g. specifying how the index is @@ -911,7 +910,6 @@ def cftime_range( | Q(S)-DEC | Quarter frequency, anchored at the end (or beginning) of December | +----------+--------------------------------------------------------------------+ - Finally, the following calendar aliases are supported. +--------------------------------+---------------------------------------+ @@ -932,7 +930,6 @@ def cftime_range( Examples -------- - This function returns a ``CFTimeIndex``, populated with ``cftime.datetime`` objects associated with the specified calendar type, e.g. diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index e414740d420..948bff1056a 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -516,7 +516,7 @@ def shift(self, n, freq): ------- CFTimeIndex - See also + See Also -------- pandas.DatetimeIndex.shift diff --git a/xarray/coding/frequencies.py b/xarray/coding/frequencies.py index fa11d05923f..c83c766f071 100644 --- a/xarray/coding/frequencies.py +++ b/xarray/coding/frequencies.py @@ -62,8 +62,8 @@ def infer_freq(index): Parameters ---------- index : CFTimeIndex, DataArray, DatetimeIndex, TimedeltaIndex, Series - If not passed a CFTimeIndex, this simply calls `pandas.infer_freq`. - If passed a Series or a DataArray will use the values of the series (NOT THE INDEX). + If not passed a CFTimeIndex, this simply calls `pandas.infer_freq`. + If passed a Series or a DataArray will use the values of the series (NOT THE INDEX). Returns ------- diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 3d877a169f5..ac2b1fb280d 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -191,7 +191,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): Note that time unit in `units` must not be smaller than microseconds and not larger than days. - See also + See Also -------- cftime.num2date """ @@ -407,7 +407,7 @@ def encode_cf_datetime(dates, units=None, calendar=None): Unlike `date2num`, this function can handle datetime64 arrays. - See also + See Also -------- cftime.date2num """ diff --git a/xarray/conventions.py b/xarray/conventions.py index e33ae53b31d..93e765e5622 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -624,7 +624,7 @@ def cf_decoder( concat_characters : bool Should character arrays be concatenated to strings, for example: ["h", "e", "l", "l", "o"] -> "hello" - mask_and_scale: bool + mask_and_scale : bool Lazily scale (using scale_factor and add_offset) and mask (using _FillValue). decode_times : bool @@ -637,7 +637,7 @@ def cf_decoder( decoded_attributes : dict A dictionary mapping from attribute name to values. - See also + See Also -------- decode_cf_variable """ @@ -747,7 +747,6 @@ def cf_encoder(variables, attributes): This includes masking, scaling, character array handling, and CF-time encoding. - Parameters ---------- variables : dict @@ -762,7 +761,7 @@ def cf_encoder(variables, attributes): encoded_attributes : dict A dictionary mapping from attribute name to value - See also + See Also -------- decode_cf_variable, encode_cf_variable """ diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 3fc682f8c32..ec67534c651 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -322,8 +322,8 @@ def strftime(self, date_format): def isocalendar(self): """Dataset containing ISO year, week number, and weekday. - Note - ---- + Notes + ----- The iso year and weekday differ from the nominal year and weekday. """ diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index debf3aad96a..98cbadcb25c 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -135,7 +135,6 @@ def align( Examples -------- - >>> import xarray as xr >>> x = xr.DataArray( ... [[25, 35], [10, 24]], @@ -532,7 +531,7 @@ def reindex_variables( the input. In either case, new xarray objects are always returned. fill_value : scalar, optional Value to use for newly missing values - sparse: bool, optional + sparse : bool, optional Use an sparse-array Returns @@ -704,7 +703,6 @@ def broadcast(*args, exclude=None): Examples -------- - Broadcast two data arrays against one another to fill out their dimensions: >>> a = xr.DataArray([1, 2, 3], dims="x") diff --git a/xarray/core/common.py b/xarray/core/common.py index c5836c68759..88155234020 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -187,7 +187,7 @@ def sizes(self: Any) -> Mapping[Hashable, int]: Immutable. - See also + See Also -------- Dataset.sizes """ @@ -409,7 +409,6 @@ def assign_coords(self, coords=None, **coords_kwargs): defined and attached to an existing dimension using a tuple with the first element the dimension name and the second element the values for this new coordinate. - **coords_kwargs : optional The keyword arguments form of ``coords``. One of ``coords`` or ``coords_kwargs`` must be provided. @@ -470,7 +469,7 @@ def assign_coords(self, coords=None, **coords_kwargs): is possible, but you cannot reference other variables created within the same ``assign_coords`` call. - See also + See Also -------- Dataset.assign Dataset.swap_dims @@ -498,7 +497,7 @@ def assign_attrs(self, *args, **kwargs): assigned : same type as caller A new object with the new attrs in addition to the existing data. - See also + See Also -------- Dataset.assign """ @@ -537,7 +536,6 @@ def pipe( Notes ----- - Use ``.pipe`` when chaining together functions that expect xarray or pandas objects, e.g., instead of writing @@ -561,7 +559,6 @@ def pipe( Examples -------- - >>> import numpy as np >>> import xarray as xr >>> x = xr.Dataset( @@ -813,7 +810,7 @@ def rolling( Parameters ---------- - dim: dict, optional + dim : dict, optional Mapping from the dimension name to create the rolling iterator along (e.g. `time`) to its moving window size. min_periods : int, default: None @@ -1101,7 +1098,6 @@ def resample( References ---------- - .. [1] http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases """ # TODO support non-string indexer after removing the old API. @@ -1189,7 +1185,6 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): Examples -------- - >>> import numpy as np >>> a = xr.DataArray(np.arange(25).reshape(5, 5), dims=("x", "y")) >>> a @@ -1235,7 +1230,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): [15., nan, nan, nan]]) Dimensions without coordinates: x, y - See also + See Also -------- numpy.where : corresponding numpy function where : equivalent function @@ -1386,14 +1381,13 @@ def isin(self, test_elements): Examples -------- - >>> array = xr.DataArray([1, 2, 3], dims="x") >>> array.isin([1, 3]) array([ True, False, True]) Dimensions without coordinates: x - See also + See Also -------- numpy.isin """ @@ -1452,7 +1446,6 @@ def astype( * 'same_kind' means only safe casts or casts within a kind, like float64 to float32, are allowed. * 'unsafe' means any data conversions may be done. - subok : bool, optional If True, then sub-classes will be passed-through, otherwise the returned array will be forced to be a base-class array. @@ -1477,7 +1470,7 @@ def astype( Make sure to only supply these arguments if the underlying array class supports them. - See also + See Also -------- numpy.ndarray.astype dask.array.Array.astype @@ -1533,7 +1526,6 @@ def full_like(other, fill_value, dtype: DTypeLike = None): Examples -------- - >>> import numpy as np >>> import xarray as xr >>> x = xr.DataArray( @@ -1609,9 +1601,8 @@ def full_like(other, fill_value, dtype: DTypeLike = None): a (x) bool True True True b (x) float64 2.0 2.0 2.0 - See also + See Also -------- - zeros_like ones_like @@ -1692,7 +1683,6 @@ def zeros_like(other, dtype: DTypeLike = None): Examples -------- - >>> import numpy as np >>> import xarray as xr >>> x = xr.DataArray( @@ -1724,9 +1714,8 @@ def zeros_like(other, dtype: DTypeLike = None): * lat (lat) int64 1 2 * lon (lon) int64 0 1 2 - See also + See Also -------- - ones_like full_like @@ -1752,7 +1741,6 @@ def ones_like(other, dtype: DTypeLike = None): Examples -------- - >>> import numpy as np >>> import xarray as xr >>> x = xr.DataArray( @@ -1776,9 +1764,8 @@ def ones_like(other, dtype: DTypeLike = None): * lat (lat) int64 1 2 * lon (lon) int64 0 1 2 - See also + See Also -------- - zeros_like full_like diff --git a/xarray/core/computation.py b/xarray/core/computation.py index e0d9ff4b218..e68c6b2629d 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -885,11 +885,11 @@ def apply_ufunc( Value used in place of missing variables on Dataset inputs when the datasets do not share the exact same ``data_vars``. Required if ``dataset_join not in {'inner', 'exact'}``, otherwise ignored. - keep_attrs: bool, optional + keep_attrs : bool, optional Whether to copy attributes from the first argument to the output. - kwargs: dict, optional + kwargs : dict, optional Optional keyword arguments passed directly on to call ``func``. - dask: {"forbidden", "allowed", "parallelized"}, default: "forbidden" + dask : {"forbidden", "allowed", "parallelized"}, default: "forbidden" How to handle applying to objects containing lazy data in the form of dask arrays: @@ -925,7 +925,6 @@ def apply_ufunc( Examples -------- - Calculate the vector magnitude of two arguments: >>> def magnitude(a, b): @@ -1011,7 +1010,7 @@ def earth_mover_distance(first_samples, works well with numba's vectorize and guvectorize. Further explanation with examples are provided in the xarray documentation [3]_. - See also + See Also -------- numpy.broadcast_arrays numba.vectorize @@ -1162,10 +1161,10 @@ def cov(da_a, da_b, dim=None, ddof=1): ------- covariance : DataArray - See also + See Also -------- pandas.Series.cov : corresponding pandas function - xarray.corr: respective function to calculate correlation + xarray.corr : respective function to calculate correlation Examples -------- @@ -1240,7 +1239,7 @@ def corr(da_a, da_b, dim=None): ------- correlation: DataArray - See also + See Also -------- pandas.Series.corr : corresponding pandas function xarray.cov : underlying covariance function @@ -1361,7 +1360,6 @@ def dot(*arrays, dims=None, **kwargs): Examples -------- - >>> import numpy as np >>> import xarray as xr >>> da_a = xr.DataArray(np.arange(3 * 2).reshape(3, 2), dims=["a", "b"]) @@ -1496,7 +1494,6 @@ def where(cond, x, y): All dimension coordinates on `x` and `y` must be aligned with each other and with `cond`. - Parameters ---------- cond : scalar, array, Variable, DataArray or Dataset @@ -1566,10 +1563,11 @@ def where(cond, x, y): [0, 0]]) Dimensions without coordinates: x, y - See also + See Also -------- numpy.where : corresponding numpy function - Dataset.where, DataArray.where : equivalent methods + Dataset.where, DataArray.where : + equivalent methods """ # alignment for three arguments is complicated, so don't support it yet return apply_ufunc( @@ -1595,7 +1593,7 @@ def polyval(coord, coeffs, degree_dim="degree"): degree_dim : str, default: "degree" Name of the polynomial degree dimension in `coeffs`. - See also + See Also -------- xarray.DataArray.polyfit numpy.polyval diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 540230766a5..fc53dbee26e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -945,7 +945,6 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray": Examples -------- - Shallow versus deep copy >>> array = xr.DataArray([1, 2, 3], dims="x", coords={"x": ["a", "b", "c"]}) @@ -1294,7 +1293,6 @@ def broadcast_like( Examples -------- - >>> arr1 = xr.DataArray( ... np.random.randn(2, 3), ... dims=("x", "y"), @@ -1709,7 +1707,6 @@ def swap_dims( dims_dict : dict-like Dictionary whose keys are current dimension names and whose values are new names. - **dim_kwargs : {dim: , ...}, optional The keyword arguments form of ``dims_dict``. One of dims_dict or dims_kwargs must be provided. @@ -1721,7 +1718,6 @@ def swap_dims( Examples -------- - >>> arr = xr.DataArray( ... data=[0, 1], ... dims="x", @@ -1751,7 +1747,6 @@ def swap_dims( See Also -------- - DataArray.rename Dataset.swap_dims """ @@ -1769,7 +1764,6 @@ def expand_dims( the corresponding position in the array shape. The new object is a view into the underlying array, not a copy. - If dim is already a scalar coordinate, it will be promoted to a 1D coordinate consisting of a single value. @@ -1965,7 +1959,6 @@ def stack( Examples -------- - >>> arr = xr.DataArray( ... np.arange(6).reshape(2, 3), ... coords=[("x", ["a", "b"]), ("y", [0, 1, 2])], @@ -2026,7 +2019,6 @@ def unstack( Examples -------- - >>> arr = xr.DataArray( ... np.arange(6).reshape(2, 3), ... coords=[("x", ["a", "b"]), ("y", [0, 1, 2])], @@ -2191,7 +2183,7 @@ def drop_vars( ---------- names : hashable or iterable of hashable Name(s) of variables to drop. - errors: {"raise", "ignore"}, optional + errors : {"raise", "ignore"}, optional If 'raise' (default), raises a ValueError error if any of the variable passed are not in the dataset. If 'ignore', any given names that are in the DataArray are dropped and no error is raised. @@ -2357,7 +2349,6 @@ def interpolate_na( provided. - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their respective :py:class:`scipy.interpolate` classes. - use_coordinate : bool or str, default: True Specifies which index to use as the x values in the interpolation formulated as `y = f(x)`. If False, values are treated as if @@ -2369,7 +2360,7 @@ def interpolate_na( or None for no limit. This filling is done regardless of the size of the gap in the data. To only interpolate over gaps less than a given length, see ``max_gap``. - max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, datetime.timedelta, default: None + max_gap : int, float, str, pandas.Timedelta, numpy.timedelta64, datetime.timedelta, default: None Maximum size of gap, a continuous sequence of NaNs, that will be filled. Use None for no limit. When interpolating along a datetime64 dimension and ``use_coordinate=True``, ``max_gap`` can be one of the following: @@ -2404,7 +2395,7 @@ def interpolate_na( interpolated: DataArray Filled in DataArray. - See also + See Also -------- numpy.interp scipy.interpolate @@ -2716,7 +2707,7 @@ def to_dict(self, data: bool = True) -> dict: Whether to include the actual data in the dictionary. When set to False, returns just the schema. - See also + See Also -------- DataArray.from_dict """ @@ -2757,7 +2748,7 @@ def from_dict(cls, d: dict) -> "DataArray": ------- obj : xarray.DataArray - See also + See Also -------- DataArray.to_dict Dataset.from_dict @@ -2794,7 +2785,7 @@ def from_series(cls, series: pd.Series, sparse: bool = False) -> "DataArray": If sparse=True, creates a sparse array instead of a dense NumPy array. Requires the pydata/sparse package. - See also + See Also -------- xarray.Dataset.from_dataframe """ @@ -3047,7 +3038,6 @@ def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> "DataArr `n` matches numpy's behavior and is different from pandas' first argument named `periods`. - Examples -------- >>> arr = xr.DataArray([5, 5, 6, 6], [[1, 2, 3, 4]], ["x"]) @@ -3087,7 +3077,7 @@ def shift( Integer offset to shift along each of the given dimensions. Positive offsets shift to the right; negative offsets shift to the left. - fill_value: scalar, optional + fill_value : scalar, optional Value to use for newly missing values **shifts_kwargs The keyword arguments form of ``shifts``. @@ -3099,13 +3089,12 @@ def shift( DataArray with the same coordinates and attributes but shifted data. - See also + See Also -------- roll Examples -------- - >>> arr = xr.DataArray([5, 6, 7], dims="x") >>> arr.shift(x=1) @@ -3149,13 +3138,12 @@ def roll( rolled : DataArray DataArray with the same attributes but rolled data and coordinates. - See also + See Also -------- shift Examples -------- - >>> arr = xr.DataArray([5, 6, 7], dims="x") >>> arr.roll(x=1) @@ -3195,14 +3183,13 @@ def dot( result : DataArray Array resulting from the dot product over all shared dimensions. - See also + See Also -------- dot numpy.tensordot Examples -------- - >>> da_vals = np.arange(6 * 5 * 4).reshape((6, 5, 4)) >>> da = xr.DataArray(da_vals, dims=["x", "y", "z"]) >>> dm_vals = np.arange(4) @@ -3265,7 +3252,6 @@ def sortby( Examples -------- - >>> da = xr.DataArray( ... np.random.rand(5), ... coords=[pd.date_range("1/1/2000", periods=5)], @@ -3338,7 +3324,6 @@ def quantile( Examples -------- - >>> da = xr.DataArray( ... data=[[0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]], ... coords={"x": [7, 9], "y": [1, 1.5, 2, 2.5]}, @@ -3410,7 +3395,6 @@ def rank( Examples -------- - >>> arr = xr.DataArray([5, 6, 7], dims="x") >>> arr.rank("x") @@ -3557,12 +3541,10 @@ def unify_chunks(self) -> "DataArray": Returns ------- - DataArray with consistent chunk sizes for all dask-array variables See Also -------- - dask.array.core.unify_chunks """ ds = self._to_temp_dataset().unify_chunks() @@ -3623,12 +3605,11 @@ def map_blocks( See Also -------- - dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks, + dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks xarray.DataArray.map_blocks Examples -------- - Calculate an anomaly from climatology using ``.groupby()``. Using ``xr.map_blocks()`` allows for parallel operations with knowledge of ``xarray``, its indices, and its methods like ``.groupby()``. @@ -3728,7 +3709,7 @@ def polyfit( polyfit_covariance The covariance matrix of the polynomial coefficient estimates (only included if `full=False` and `cov=True`) - See also + See Also -------- numpy.polyfit """ @@ -3845,7 +3826,7 @@ def pad( padded : DataArray DataArray with the padded coordinates and data. - See also + See Also -------- DataArray.shift, DataArray.roll, DataArray.bfill, DataArray.ffill, numpy.pad, dask.array.pad @@ -3856,7 +3837,6 @@ def pad( Examples -------- - >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0, 1, 2])]) >>> arr.pad(x=(1, 2), constant_values=0) @@ -3948,13 +3928,12 @@ def idxmin( New `DataArray` object with `idxmin` applied to its data and the indicated dimension removed. - See also + See Also -------- Dataset.idxmin, DataArray.idxmax, DataArray.min, DataArray.argmin Examples -------- - >>> array = xr.DataArray( ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} ... ) @@ -4045,13 +4024,12 @@ def idxmax( New `DataArray` object with `idxmax` applied to its data and the indicated dimension removed. - See also + See Also -------- Dataset.idxmax, DataArray.idxmin, DataArray.max, DataArray.argmax Examples -------- - >>> array = xr.DataArray( ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} ... ) @@ -4139,7 +4117,7 @@ def argmin( ------- result : DataArray or dict of DataArray - See also + See Also -------- Variable.argmin, DataArray.idxmin @@ -4242,7 +4220,7 @@ def argmax( ------- result : DataArray or dict of DataArray - See also + See Also -------- Variable.argmax, DataArray.idxmax diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 8376b4875f9..6a609bfe7b7 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -762,7 +762,7 @@ def sizes(self) -> Mapping[Hashable, int]: This is an alias for `Dataset.dims` provided for the benefit of consistency with `DataArray.sizes`. - See also + See Also -------- DataArray.sizes """ @@ -1159,7 +1159,6 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": Examples -------- - Shallow copy versus deep copy >>> da = xr.DataArray(np.random.randn(2, 3)) @@ -1549,7 +1548,7 @@ def set_coords(self, names: "Union[Hashable, Iterable[Hashable]]") -> "Dataset": ------- Dataset - See also + See Also -------- Dataset.swap_dims """ @@ -1751,17 +1750,17 @@ def to_zarr( Nested dictionary with variable names as keys and dictionaries of variable specific encodings as values, e.g., ``{"my_variable": {"dtype": "int16", "scale_factor": 0.1,}, ...}`` - compute: bool, optional + compute : bool, optional If True write array data immediately, otherwise return a ``dask.delayed.Delayed`` object that can be computed to write array data later. Metadata is always updated eagerly. - consolidated: bool, optional + consolidated : bool, optional If True, apply zarr's `consolidate_metadata` function to the store after writing metadata. - append_dim: hashable, optional + append_dim : hashable, optional If set, the dimension along which the data will be appended. All other dimensions on overriden variables must remain the same size. - region: dict, optional + region : dict, optional Optional mapping from dimension names to integer slices along dataset dimensions to indicate the region of existing zarr array(s) in which to write this dataset's data. For example, @@ -1832,7 +1831,7 @@ def info(self, buf=None) -> None: See Also -------- pandas.DataFrame.assign - ncdump: netCDF's ncdump + ncdump : netCDF's ncdump """ if buf is None: # pragma: no cover buf = sys.stdout @@ -2232,7 +2231,6 @@ def sel( in this dataset, unless vectorized indexing was triggered by using an array indexer, in which case the data will be a copy. - See Also -------- Dataset.isel @@ -2263,7 +2261,6 @@ def head( The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. - See Also -------- Dataset.tail @@ -2310,7 +2307,6 @@ def tail( The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. - See Also -------- Dataset.head @@ -2360,7 +2356,6 @@ def thin( The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. - See Also -------- Dataset.head @@ -2536,7 +2531,6 @@ def reindex( Examples -------- - Create a dataset with some fictional data. >>> import xarray as xr @@ -2750,7 +2744,7 @@ def interp( in any order and they are sorted first. If True, interpolated coordinates are assumed to be an array of monotonically increasing values. - kwargs: dict, optional + kwargs : dict, optional Additional keyword arguments passed to scipy's interpolator. Valid options and their behavior depend on if 1-dimensional or multi-dimensional interpolation is used. @@ -2952,7 +2946,7 @@ def interp_like( in any order and they are sorted first. If True, interpolated coordinates are assumed to be an array of monotonically increasing values. - kwargs: dict, optional + kwargs : dict, optional Additional keyword passed to scipy's interpolator. Returns @@ -3166,7 +3160,6 @@ def swap_dims( dims_dict : dict-like Dictionary whose keys are current dimension names and whose values are new names. - **dim_kwargs : {existing_dim: new_dim, ...}, optional The keyword arguments form of ``dims_dict``. One of dims_dict or dims_kwargs must be provided. @@ -3215,7 +3208,6 @@ def swap_dims( See Also -------- - Dataset.rename DataArray.swap_dims """ @@ -3586,7 +3578,7 @@ def stack( stacked : Dataset Dataset with stacked data. - See also + See Also -------- Dataset.unstack """ @@ -3815,7 +3807,7 @@ def unstack( unstacked : Dataset Dataset with unstacked data. - See also + See Also -------- Dataset.stack """ @@ -3894,7 +3886,6 @@ def update(self, other: "CoercibleMapping") -> "Dataset": - mapping {var name: (dimension name, array-like)} - mapping {var name: (tuple of dimension names, array-like)} - Returns ------- updated : Dataset @@ -4375,7 +4366,6 @@ def fillna(self, value: Any) -> "Dataset": Examples -------- - >>> import numpy as np >>> import xarray as xr >>> ds = xr.Dataset( @@ -4452,7 +4442,6 @@ def interpolate_na( ---------- dim : str Specifies the dimension along which to interpolate. - method : str, optional String indicating which method to use for interpolation: @@ -4464,7 +4453,6 @@ def interpolate_na( provided. - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their respective :py:class:`scipy.interpolate` classes. - use_coordinate : bool, str, default: True Specifies which index to use as the x values in the interpolation formulated as `y = f(x)`. If False, values are treated as if @@ -4507,7 +4495,7 @@ def interpolate_na( interpolated: Dataset Filled in Dataset. - See also + See Also -------- numpy.interp scipy.interpolate @@ -5101,7 +5089,7 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Datas ------- New Dataset. - See also + See Also -------- xarray.DataArray.from_series pandas.DataFrame.to_xarray @@ -5232,7 +5220,7 @@ def to_dict(self, data=True): Whether to include the actual data in the dictionary. When set to False, returns just the schema. - See also + See Also -------- Dataset.from_dict """ @@ -5453,9 +5441,7 @@ def diff(self, dim, n=1, label="upper"): ------- difference : same type as caller The n-th order finite difference of this object. - .. note:: - `n` matches numpy's behavior and is different from pandas' first argument named `periods`. @@ -5543,13 +5529,12 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): Dataset with the same coordinates and attributes but shifted data variables. - See also + See Also -------- roll Examples -------- - >>> ds = xr.Dataset({"foo": ("x", list("abcde"))}) >>> ds.shift(x=2) @@ -5588,7 +5573,6 @@ def roll(self, shifts=None, roll_coords=None, **shifts_kwargs): Parameters ---------- - shifts : dict, optional A dict with keys matching dimensions and values given by integers to rotate each of the given dimensions. Positive @@ -5607,13 +5591,12 @@ def roll(self, shifts=None, roll_coords=None, **shifts_kwargs): Dataset with the same coordinates and attributes but rolled variables. - See also + See Also -------- shift Examples -------- - >>> ds = xr.Dataset({"foo": ("x", list("abcde"))}) >>> ds.roll(x=2) @@ -5680,10 +5663,10 @@ def sortby(self, variables, ascending=True): Parameters ---------- - variables: str, DataArray, or list of str or DataArray + variables : str, DataArray, or list of str or DataArray 1D DataArray objects or name(s) of 1D variable(s) in coords/data_vars whose values are used to sort the dataset. - ascending: bool, optional + ascending : bool, optional Whether to sort by ascending or descending order. Returns @@ -5771,7 +5754,6 @@ def quantile( Examples -------- - >>> ds = xr.Dataset( ... {"a": (("x", "y"), [[0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]])}, ... coords={"x": [7, 9], "y": [1, 1.5, 2, 2.5]}, @@ -6177,12 +6159,10 @@ def unify_chunks(self) -> "Dataset": Returns ------- - Dataset with consistent chunk sizes for all dask-array variables See Also -------- - dask.array.core.unify_chunks """ @@ -6257,7 +6237,6 @@ def map_blocks( When provided, ``attrs`` on variables in `template` are copied over to the result. Any ``attrs`` set by ``func`` will be ignored. - Returns ------- A single DataArray or Dataset with dask backend, reassembled from the outputs of the @@ -6274,12 +6253,11 @@ def map_blocks( See Also -------- - dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks, + dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks xarray.DataArray.map_blocks Examples -------- - Calculate an anomaly from climatology using ``.groupby()``. Using ``xr.map_blocks()`` allows for parallel operations with knowledge of ``xarray``, its indices, and its methods like ``.groupby()``. @@ -6365,7 +6343,6 @@ def polyfit( Whether to return to the covariance matrix in addition to the coefficients. The matrix is not scaled if `cov='unscaled'`. - Returns ------- polyfit_results : Dataset @@ -6390,7 +6367,7 @@ def polyfit( The rank of the coefficient matrix in the least-squares fit is deficient. The warning is not raised with in-memory (not dask) data and `full=True`. - See also + See Also -------- numpy.polyfit """ @@ -6627,7 +6604,7 @@ def pad( padded : Dataset Dataset with the padded coordinates and data. - See also + See Also -------- Dataset.shift, Dataset.roll, Dataset.bfill, Dataset.ffill, numpy.pad, dask.array.pad @@ -6639,7 +6616,6 @@ def pad( Examples -------- - >>> ds = xr.Dataset({"foo": ("x", range(5))}) >>> ds.pad(x=(1, 2)) @@ -6728,13 +6704,12 @@ def idxmin( New `Dataset` object with `idxmin` applied to its data and the indicated dimension removed. - See also + See Also -------- DataArray.idxmin, Dataset.idxmax, Dataset.min, Dataset.argmin Examples -------- - >>> array1 = xr.DataArray( ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} ... ) @@ -6826,13 +6801,12 @@ def idxmax( New `Dataset` object with `idxmax` applied to its data and the indicated dimension removed. - See also + See Also -------- DataArray.idxmax, Dataset.idxmin, Dataset.max, Dataset.argmax Examples -------- - >>> array1 = xr.DataArray( ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} ... ) @@ -6912,7 +6886,7 @@ def argmin(self, dim=None, axis=None, **kwargs): ------- result : Dataset - See also + See Also -------- DataArray.argmin @@ -6975,7 +6949,7 @@ def argmax(self, dim=None, axis=None, **kwargs): ------- result : Dataset - See also + See Also -------- DataArray.argmax diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index e6c3aae5bf8..9c8c42d0491 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -405,21 +405,21 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): Parameters ---------- - da : array-like - Input data - offset: None, datetime or cftime.datetime - Datetime offset. If None, this is set by default to the array's minimum - value to reduce round off errors. - datetime_unit: {None, Y, M, W, D, h, m, s, ms, us, ns, ps, fs, as} - If not None, convert output to a given datetime unit. Note that some - conversions are not allowed due to non-linear relationships between units. - dtype: dtype - Output dtype. + array : array-like + Input data + offset : None, datetime or cftime.datetime + Datetime offset. If None, this is set by default to the array's minimum + value to reduce round off errors. + datetime_unit : {None, Y, M, W, D, h, m, s, ms, us, ns, ps, fs, as} + If not None, convert output to a given datetime unit. Note that some + conversions are not allowed due to non-linear relationships between units. + dtype : dtype + Output dtype. Returns ------- array - Numerical representation of datetime object relative to an offset. + Numerical representation of datetime object relative to an offset. Notes ----- @@ -463,12 +463,12 @@ def timedelta_to_numeric(value, datetime_unit="ns", dtype=float): Parameters ---------- value : datetime.timedelta, numpy.timedelta64, pandas.Timedelta, str - Time delta representation. + Time delta representation. datetime_unit : {Y, M, W, D, h, m, s, ms, us, ns, ps, fs, as} - The time units of the output values. Note that some conversions are not allowed due to - non-linear relationships between units. + The time units of the output values. Note that some conversions are not allowed due to + non-linear relationships between units. dtype : type - The output data type. + The output data type. """ import datetime as dt diff --git a/xarray/core/extensions.py b/xarray/core/extensions.py index ee4c3ebc9e6..9b7b060107b 100644 --- a/xarray/core/extensions.py +++ b/xarray/core/extensions.py @@ -69,7 +69,7 @@ def register_dataarray_accessor(name): Name under which the accessor should be registered. A warning is issued if this name conflicts with a preexisting attribute. - See also + See Also -------- register_dataset_accessor """ @@ -87,7 +87,6 @@ def register_dataset_accessor(name): Examples -------- - In your library code: >>> @xr.register_dataset_accessor("geo") @@ -115,7 +114,7 @@ def register_dataset_accessor(name): (10.0, 5.0) >>> ds.geo.plot() # plots data on a map - See also + See Also -------- register_dataarray_accessor """ diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index e1e5a0fabe8..824f2767153 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -541,7 +541,7 @@ def fillna(self, value): ------- same type as the grouped object - See also + See Also -------- Dataset.fillna DataArray.fillna @@ -590,12 +590,11 @@ def quantile( See Also -------- - numpy.nanquantile, numpy.quantile, pandas.Series.quantile, Dataset.quantile, + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, Dataset.quantile DataArray.quantile Examples -------- - >>> da = xr.DataArray( ... [[1.3, 8.4, 0.7, 6.9], [0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]], ... coords={"x": [0, 0, 1], "y": [1, 1, 2, 2]}, @@ -672,7 +671,7 @@ def where(self, cond, other=dtypes.NA): ------- same type as the grouped object - See also + See Also -------- Dataset.where """ @@ -698,7 +697,7 @@ def last(self, skipna=None, keep_attrs=None): def assign_coords(self, coords=None, **coords_kwargs): """Assign coordinates by group. - See also + See Also -------- Dataset.assign_coords Dataset.swap_dims @@ -996,7 +995,7 @@ def reduce_dataset(ds): def assign(self, **kwargs): """Assign data variables by group. - See also + See Also -------- Dataset.assign """ diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index a5d1896e74c..0c4a28db93d 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -47,7 +47,7 @@ def __init__(self, indexes): Parameters ---------- indexes : Dict[Any, pandas.Index] - Indexes held by this object. + Indexes held by this object. """ self._indexes = indexes @@ -75,7 +75,7 @@ def default_indexes( Parameters ---------- coords : Mapping[Any, xarray.Variable] - Coordinate variables from which to draw default indexes. + Coordinate variables from which to draw default indexes. dims : iterable Iterable of dimension names. diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 843feb04479..dff6d75d5b7 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -787,11 +787,11 @@ def _combine_indexers(old_key, shape, new_key): Parameters ---------- - old_key: ExplicitIndexer + old_key : ExplicitIndexer The first indexer for the original array - shape: tuple of ints + shape : tuple of ints Shape of the original array to be indexed by old_key - new_key: + new_key The second indexer for indexing original[old_key] """ if not isinstance(old_key, VectorizedIndexer): @@ -841,7 +841,7 @@ def explicit_indexing_adapter( Shape of the indexed array. indexing_support : IndexingSupport enum Form of indexing supported by raw_indexing_method. - raw_indexing_method: callable + raw_indexing_method : callable Function (like ndarray.__getitem__) that when called with indexing key in the form of a tuple returns an indexed array. @@ -895,8 +895,8 @@ def _decompose_vectorized_indexer( Parameters ---------- - indexer: VectorizedIndexer - indexing_support: one of IndexerSupport entries + indexer : VectorizedIndexer + indexing_support : one of IndexerSupport entries Returns ------- @@ -977,8 +977,8 @@ def _decompose_outer_indexer( Parameters ---------- - indexer: OuterIndexer or BasicIndexer - indexing_support: One of the entries of IndexingSupport + indexer : OuterIndexer or BasicIndexer + indexing_support : One of the entries of IndexingSupport Returns ------- diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 8d112b4603c..695affa84c1 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -216,20 +216,20 @@ def get_clean_interp_index( Parameters ---------- arr : DataArray - Array to interpolate or fit to a curve. + Array to interpolate or fit to a curve. dim : str - Name of dimension along which to fit. + Name of dimension along which to fit. use_coordinate : str or bool - If use_coordinate is True, the coordinate that shares the name of the - dimension along which interpolation is being performed will be used as the - x values. If False, the x values are set as an equally spaced sequence. + If use_coordinate is True, the coordinate that shares the name of the + dimension along which interpolation is being performed will be used as the + x values. If False, the x values are set as an equally spaced sequence. strict : bool - Whether to raise errors if the index is either non-unique or non-monotonic (default). + Whether to raise errors if the index is either non-unique or non-monotonic (default). Returns ------- Variable - Numerical values for the x-coordinates. + Numerical values for the x-coordinates. Notes ----- @@ -589,16 +589,16 @@ def interp(var, indexes_coords, method, **kwargs): Parameters ---------- - var: Variable - index_coords: + var : Variable + indexes_coords Mapping from dimension name to a pair of original and new coordinates. Original coordinates should be sorted in strictly ascending order. Note that all the coordinates should be Variable objects. - method: string + method : string One of {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'}. For multidimensional interpolation, only {'linear', 'nearest'} can be used. - **kwargs: + **kwargs keyword arguments to be passed to scipy.interpolate Returns @@ -658,17 +658,17 @@ def interp_func(var, x, new_x, method, kwargs): Parameters ---------- - var: np.ndarray or dask.array.Array + var : np.ndarray or dask.array.Array Array to be interpolated. The final dimension is interpolated. - x: a list of 1d array. + x : a list of 1d array. Original coordinates. Should not contain NaN. - new_x: a list of 1d array + new_x : a list of 1d array New coordinates. Should not contain NaN. - method: string + method : string {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'} for 1-dimensional interpolation. {'linear', 'nearest'} for multidimensional interpolation - **kwargs: + **kwargs Optional keyword arguments to be passed to scipy.interpolator Returns @@ -676,8 +676,8 @@ def interp_func(var, x, new_x, method, kwargs): interpolated: array Interpolated array - Note - ---- + Notes + ----- This requiers scipy installed. See Also diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index c65c22f5384..7e382903046 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -160,7 +160,7 @@ def _rolling_window(a, window, axis=-1): ---------- a : array_like Array to add rolling window to - axis: int + axis : int axis position along which rolling window will be applied. window : int Size of rolling window diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index 20b4b9f9eb3..d2cdb94d01a 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -183,7 +183,6 @@ def map_blocks( This function must return either a single DataArray or a single Dataset. This function cannot add a new chunked dimension. - obj : DataArray, Dataset Passed to the function as its first argument, one block at a time. args : sequence @@ -201,7 +200,6 @@ def map_blocks( When provided, ``attrs`` on variables in `template` are copied over to the result. Any ``attrs`` set by ``func`` will be ignored. - Returns ------- A single DataArray or Dataset with dask backend, reassembled from the outputs of the @@ -218,12 +216,11 @@ def map_blocks( See Also -------- - dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks, + dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks xarray.DataArray.map_blocks Examples -------- - Calculate an anomaly from climatology using ``.groupby()``. Using ``xr.map_blocks()`` allows for parallel operations with knowledge of ``xarray``, its indices, and its methods like ``.groupby()``. diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 39d889244dc..f25d798d9f1 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -721,7 +721,7 @@ def __init__(self, obj, windows, boundary, side, coord_func, keep_attrs): multiple of window size. If 'trim', the excess indexes are trimed. If 'pad', NA will be padded. side : 'left' or 'right' or mapping from dimension to 'left' or 'right' - coord_func: mapping from coordinate name to func. + coord_func : mapping from coordinate name to func. Returns ------- diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 64c1895da59..45553eb9b1e 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -403,7 +403,6 @@ def astype( * 'same_kind' means only safe casts or casts within a kind, like float64 to float32, are allowed. * 'unsafe' means any data conversions may be done. - subok : bool, optional If True, then sub-classes will be passed-through, otherwise the returned array will be forced to be a base-class array. @@ -428,7 +427,7 @@ def astype( Make sure to only supply these arguments if the underlying array class supports them. - See also + See Also -------- numpy.ndarray.astype dask.array.Array.astype @@ -606,8 +605,8 @@ def _broadcast_indexes(self, key): """Prepare an indexing key for an indexing operation. Parameters - ----------- - key: int, slice, array-like, dict or tuple of integer, slice and array-like + ---------- + key : int, slice, array-like, dict or tuple of integer, slice and array-like Any valid input for indexing. Returns @@ -929,7 +928,6 @@ def copy(self, deep=True, data=None): Examples -------- - Shallow copy versus deep copy >>> var = xr.Variable(data=[1, 2, 3], dims="x") @@ -1225,7 +1223,7 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): Integer offset to shift along each of the given dimensions. Positive offsets shift to the right; negative offsets shift to the left. - fill_value: scalar, optional + fill_value : scalar, optional Value to use for newly missing values **shifts_kwargs The keyword arguments form of ``shifts``. @@ -1535,7 +1533,7 @@ def stack(self, dimensions=None, **dimensions_kwargs): stacked : Variable Variable with the same attributes but stacked data. - See also + See Also -------- Variable.unstack """ @@ -1655,7 +1653,7 @@ def unstack(self, dimensions=None, **dimensions_kwargs): unstacked : Variable Variable with the same attributes but unstacked data. - See also + See Also -------- Variable.stack DataArray.unstack @@ -1900,7 +1898,6 @@ def quantile( * higher: ``j``. * nearest: ``i`` or ``j``, whichever is nearest. * midpoint: ``(i + j) / 2``. - keep_attrs : bool, optional If True, the variable's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new @@ -1917,7 +1914,7 @@ def quantile( See Also -------- - numpy.nanquantile, pandas.Series.quantile, Dataset.quantile, + numpy.nanquantile, pandas.Series.quantile, Dataset.quantile DataArray.quantile """ @@ -2432,7 +2429,7 @@ def argmin( ------- result : Variable or dict of Variable - See also + See Also -------- DataArray.argmin, DataArray.idxmin """ @@ -2477,7 +2474,7 @@ def argmax( ------- result : Variable or dict of Variable - See also + See Also -------- DataArray.argmax, DataArray.idxmax """ diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py index 58b38251352..bfa400d7ba4 100644 --- a/xarray/plot/facetgrid.py +++ b/xarray/plot/facetgrid.py @@ -233,7 +233,7 @@ def map_dataarray(self, func, x, y, **kwargs): plotting method such as `xarray.plot.imshow` x, y : string Names of the coordinates to plot on x, y axes - kwargs : + kwargs additional keyword arguments to func Returns diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 8a57e17e5e8..75fa786ecc5 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -351,7 +351,6 @@ def step(darray, *args, where="pre", drawstyle=None, ds=None, **kwargs): :py:func:`pandas.Interval` values, e.g. as a result of :py:func:`xarray.Dataset.groupby_bins`. In this case, the actual boundaries of the interval are used. - *args, **kwargs : optional Additional arguments following :py:func:`xarray.plot.line` """ diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 601b23a3065..ffe796987c5 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -159,12 +159,12 @@ def _determine_cmap_params( Use some heuristics to set good defaults for colorbar and range. Parameters - ========== - plot_data: Numpy array + ---------- + plot_data : Numpy array Doesn't handle xarray objects Returns - ======= + ------- cmap_params : dict Use depends on the type of the plotting function """ @@ -791,15 +791,14 @@ def _process_cmap_cbar_kwargs( ): """ Parameters - ========== + ---------- func : plotting function data : ndarray, Data values Returns - ======= + ------- cmap_params - cbar_kwargs """ cbar_kwargs = {} if cbar_kwargs is None else dict(cbar_kwargs) diff --git a/xarray/testing.py b/xarray/testing.py index ca72a4bee8e..1d79ae8df7d 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -54,9 +54,9 @@ def assert_equal(a, b): b : xarray.Dataset, xarray.DataArray or xarray.Variable The second object to compare. - See also + See Also -------- - assert_identical, assert_allclose, Dataset.equals, DataArray.equals, + assert_identical, assert_allclose, Dataset.equals, DataArray.equals numpy.testing.assert_array_equal """ __tracebackhide__ = True @@ -82,7 +82,7 @@ def assert_identical(a, b): b : xarray.Dataset, xarray.DataArray or xarray.Variable The second object to compare. - See also + See Also -------- assert_equal, assert_allclose, Dataset.equals, DataArray.equals """ @@ -120,7 +120,7 @@ def assert_allclose(a, b, rtol=1e-05, atol=1e-08, decode_bytes=True): This is useful for testing serialization methods on Python 3 that return saved strings as bytes. - See also + See Also -------- assert_identical, assert_equal, numpy.testing.assert_allclose """ From 46591d28d9fbbfc184aaf4075d330b1c8f070627 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 7 Feb 2021 18:12:29 -0500 Subject: [PATCH 44/51] Modify _encode_datetime_with_cftime for compatibility with cftime > 1.4.0 (#4871) --- doc/whats-new.rst | 2 ++ xarray/coding/times.py | 2 +- xarray/tests/test_coding_times.py | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 471e91a8512..f619284437b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -96,6 +96,8 @@ Bug fixes :py:meth:`Dataset.to_zarr` (:issue:`4783`, :pull:`4795`). By `Julien Seguinot `_. - Add :py:meth:`Dataset.drop_isel` and :py:meth:`DataArray.drop_isel` (:issue:`4658`, :pull:`4819`). By `Daniel Mesejo `_. +- Fix time encoding bug associated with using cftime versions greater than + 1.4.0 with xarray (:issue:`4870`, :pull:`4871`). By `Spencer Clark `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/coding/times.py b/xarray/coding/times.py index ac2b1fb280d..39ad2f57c1e 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -391,7 +391,7 @@ def _encode_datetime_with_cftime(dates, units, calendar): def encode_datetime(d): return np.nan if d is None else cftime.date2num(d, units, calendar) - return np.vectorize(encode_datetime)(dates) + return np.array([encode_datetime(d) for d in dates.ravel()]).reshape(dates.shape) def cast_to_int_if_safe(num): diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index dfd558f737e..d8412f82374 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -8,6 +8,7 @@ from xarray import DataArray, Dataset, Variable, coding, conventions, decode_cf from xarray.coding.times import ( + _encode_datetime_with_cftime, cftime_to_nptime, decode_cf_datetime, encode_cf_datetime, @@ -995,3 +996,18 @@ def test_encode_decode_roundtrip(freq): encoded = conventions.encode_cf_variable(variable) decoded = conventions.decode_cf_variable("time", encoded) assert_equal(variable, decoded) + + +@requires_cftime +def test__encode_datetime_with_cftime(): + # See GH 4870. cftime versions > 1.4.0 required us to adapt the + # way _encode_datetime_with_cftime was written. + import cftime + + calendar = "gregorian" + times = cftime.num2date([0, 1], "hours since 2000-01-01", calendar) + + encoding_units = "days since 2000-01-01" + expected = cftime.date2num(times, encoding_units, calendar) + result = _encode_datetime_with_cftime(times, encoding_units, calendar) + np.testing.assert_equal(result, expected) From f98d6f065db2ad1f8911cb22aa04b4e0210ecee4 Mon Sep 17 00:00:00 2001 From: keewis Date: Mon, 8 Feb 2021 00:26:52 +0100 Subject: [PATCH 45/51] small fixes for the docstrings of swap_dims and integrate (#4867) * update the docstrings of integrate * also mention the removal version in whats-new.rst * update the docstrings of swap_dims [skip-ci] * add the PR [skip-ci] --- doc/whats-new.rst | 4 +++- xarray/core/dataarray.py | 11 ++++++----- xarray/core/dataset.py | 8 ++++---- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f619284437b..4300f1b188a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -47,7 +47,9 @@ Deprecations - ``dim`` argument to :py:meth:`DataArray.integrate` is being deprecated in favour of a ``coord`` argument, for consistency with :py:meth:`Dataset.integrate`. - For now using ``dim`` issues a ``FutureWarning``. By `Tom Nicholas `_. + For now using ``dim`` issues a ``FutureWarning``. It will be removed in + version 0.19.0 (:pull:`3993`). + By `Tom Nicholas `_. New Features diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index fc53dbee26e..7ca5ff50eba 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1707,7 +1707,7 @@ def swap_dims( dims_dict : dict-like Dictionary whose keys are current dimension names and whose values are new names. - **dim_kwargs : {dim: , ...}, optional + **dims_kwargs : {existing_dim: new_dim, ...}, optional The keyword arguments form of ``dims_dict``. One of dims_dict or dims_kwargs must be provided. @@ -3479,21 +3479,22 @@ def integrate( Parameters ---------- - coord: hashable, or a sequence of hashable + coord : hashable, or sequence of hashable Coordinate(s) used for the integration. dim : hashable, or sequence of hashable Coordinate(s) used for the integration. - datetime_unit: {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ + datetime_unit : {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ 'ps', 'fs', 'as'}, optional + Specify the unit if a datetime coordinate is used. Returns ------- - integrated: DataArray + integrated : DataArray See also -------- Dataset.integrate - numpy.trapz: corresponding numpy function + numpy.trapz : corresponding numpy function Examples -------- diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6a609bfe7b7..7d51adb5244 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3160,7 +3160,7 @@ def swap_dims( dims_dict : dict-like Dictionary whose keys are current dimension names and whose values are new names. - **dim_kwargs : {existing_dim: new_dim, ...}, optional + **dims_kwargs : {existing_dim: new_dim, ...}, optional The keyword arguments form of ``dims_dict``. One of dims_dict or dims_kwargs must be provided. @@ -5956,9 +5956,9 @@ def integrate( Parameters ---------- - coord: hashable, or a sequence of hashable + coord : hashable, or sequence of hashable Coordinate(s) used for the integration. - datetime_unit: {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ + datetime_unit : {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ 'ps', 'fs', 'as'}, optional Specify the unit if datetime coordinate is used. @@ -5969,7 +5969,7 @@ def integrate( See also -------- DataArray.integrate - numpy.trapz: corresponding numpy function + numpy.trapz : corresponding numpy function Examples -------- From 4e97b33cd0f2f272d5c4c17db9caf7a8cb84ca2d Mon Sep 17 00:00:00 2001 From: keewis Date: Mon, 8 Feb 2021 01:18:50 +0100 Subject: [PATCH 46/51] update pre-commit hooks (#4874) * update the blackdoc hook version * remove mypy from the environments * remove the reference to the conda requirements files * also remove isort, black and flake8 --- .pre-commit-config.yaml | 4 ++-- ci/requirements/py37-min-all-deps.yml | 4 ---- ci/requirements/py38-all-but-dask.yml | 4 ---- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 20f1af72a11..bc3bc8f2ba8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,7 +17,7 @@ repos: hooks: - id: black - repo: https://github.com/keewis/blackdoc - rev: v0.3.2 + rev: v0.3.3 hooks: - id: blackdoc - repo: https://gitlab.com/pycqa/flake8 @@ -30,7 +30,7 @@ repos: # - id: velin # args: ["--write", "--compact"] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.790 # Must match ci/requirements/*.yml + rev: v0.790 hooks: - id: mypy exclude: "properties|asv_bench" diff --git a/ci/requirements/py37-min-all-deps.yml b/ci/requirements/py37-min-all-deps.yml index feef86ddf5c..166836243b4 100644 --- a/ci/requirements/py37-min-all-deps.yml +++ b/ci/requirements/py37-min-all-deps.yml @@ -8,7 +8,6 @@ dependencies: # When upgrading python, numpy, or pandas, must also change # doc/installing.rst and setup.py. - python=3.7 - - black - boto3=1.9 - bottleneck=1.2 - cartopy=0.17 @@ -18,16 +17,13 @@ dependencies: - coveralls - dask=2.9 - distributed=2.9 - - flake8 - h5netcdf=0.7 - h5py=2.9 # Policy allows for 2.10, but it's a conflict-fest - hdf5=1.10 - hypothesis - iris=2.2 - - isort - lxml=4.4 # Optional dep of pydap - matplotlib-base=3.1 - - mypy=0.782 # Must match .pre-commit-config.yaml - nc-time-axis=1.2 - netcdf4=1.4 - numba=0.46 diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml index 14930f5272d..51ec48cc6b1 100644 --- a/ci/requirements/py38-all-but-dask.yml +++ b/ci/requirements/py38-all-but-dask.yml @@ -4,7 +4,6 @@ channels: - nodefaults dependencies: - python=3.8 - - black - boto3 - bottleneck - cartopy @@ -12,15 +11,12 @@ dependencies: - cfgrib - cftime - coveralls - - flake8 - h5netcdf - h5py=2 - hdf5 - hypothesis - - isort - lxml # Optional dep of pydap - matplotlib-base - - mypy=0.790 # Must match .pre-commit-config.yaml - nc-time-axis - netcdf4 - numba From 45c3618050ed303228e4bb8011d3068eb2b80f3c Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 8 Feb 2021 18:33:37 +0100 Subject: [PATCH 47/51] ensure warnings cannot become errors in assert_ (#4864) * ensure warnings cannot become errors in assert_ * also assert_duckarray_allclose * add whats new * ensure warnings are raised * Update doc/whats-new.rst * use a decorator --- doc/whats-new.rst | 4 +++- xarray/testing.py | 19 +++++++++++++++++ xarray/tests/test_testing.py | 40 ++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4300f1b188a..e5eb4680878 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -130,6 +130,8 @@ Internal Changes in ipython (:issue:`4741`, :pull:`4742`). By `Richard Kleijn `_. - Added the ``set_close`` method to ``Dataset`` and ``DataArray`` for beckends to specify how to voluntary release all resources. (:pull:`#4809`), By `Alessandro Amici `_. +- Ensure warnings cannot be turned into exceptions in :py:func:`testing.assert_equal` and + the other ``assert_*`` functions (:pull:`4864`). By `Mathias Hauser `_. .. _whats-new.0.16.2: @@ -146,7 +148,7 @@ Deprecations - :py:attr:`~core.accessor_dt.DatetimeAccessor.weekofyear` and :py:attr:`~core.accessor_dt.DatetimeAccessor.week` have been deprecated. Use ``DataArray.dt.isocalendar().week`` - instead (:pull:`4534`). By `Mathias Hauser `_, + instead (:pull:`4534`). By `Mathias Hauser `_. `Maximilian Roos `_, and `Spencer Clark `_. - :py:attr:`DataArray.rolling` and :py:attr:`Dataset.rolling` no longer support passing ``keep_attrs`` via its constructor. Pass ``keep_attrs`` via the applied function, i.e. use diff --git a/xarray/testing.py b/xarray/testing.py index 1d79ae8df7d..e8b5f04ef85 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -1,5 +1,6 @@ """Testing functions exposed to the user API""" import functools +import warnings from typing import Hashable, Set, Union import numpy as np @@ -21,6 +22,19 @@ ) +def ensure_warnings(func): + # sometimes tests elevate warnings to errors + # -> make sure that does not happen in the assert_* functions + @functools.wraps(func) + def wrapper(*args, **kwargs): + with warnings.catch_warnings(): + warnings.simplefilter("always") + + return func(*args, **kwargs) + + return wrapper + + def _decode_string_data(data): if data.dtype.kind == "S": return np.core.defchararray.decode(data, "utf-8", "replace") @@ -38,6 +52,7 @@ def _data_allclose_or_equiv(arr1, arr2, rtol=1e-05, atol=1e-08, decode_bytes=Tru return duck_array_ops.allclose_or_equiv(arr1, arr2, rtol=rtol, atol=atol) +@ensure_warnings def assert_equal(a, b): """Like :py:func:`numpy.testing.assert_array_equal`, but for xarray objects. @@ -69,6 +84,7 @@ def assert_equal(a, b): raise TypeError("{} not supported by assertion comparison".format(type(a))) +@ensure_warnings def assert_identical(a, b): """Like :py:func:`xarray.testing.assert_equal`, but also matches the objects' names and attributes. @@ -99,6 +115,7 @@ def assert_identical(a, b): raise TypeError("{} not supported by assertion comparison".format(type(a))) +@ensure_warnings def assert_allclose(a, b, rtol=1e-05, atol=1e-08, decode_bytes=True): """Like :py:func:`numpy.testing.assert_allclose`, but for xarray objects. @@ -182,6 +199,7 @@ def _format_message(x, y, err_msg, verbose): return "\n".join(parts) +@ensure_warnings def assert_duckarray_allclose( actual, desired, rtol=1e-07, atol=0, err_msg="", verbose=True ): @@ -192,6 +210,7 @@ def assert_duckarray_allclose( assert allclose, _format_message(actual, desired, err_msg=err_msg, verbose=verbose) +@ensure_warnings def assert_duckarray_equal(x, y, err_msg="", verbose=True): """ Like `np.testing.assert_array_equal`, but for duckarrays """ __tracebackhide__ = True diff --git a/xarray/tests/test_testing.py b/xarray/tests/test_testing.py index 30ea6aaaee9..b6dec846c54 100644 --- a/xarray/tests/test_testing.py +++ b/xarray/tests/test_testing.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np import pytest @@ -127,3 +129,41 @@ def test_assert_duckarray_equal(duckarray, obj1, obj2): b = duckarray(obj2) xr.testing.assert_duckarray_equal(a, b) + + +@pytest.mark.parametrize( + "func", + [ + "assert_equal", + "assert_identical", + "assert_allclose", + "assert_duckarray_equal", + "assert_duckarray_allclose", + ], +) +def test_ensure_warnings_not_elevated(func): + # make sure warnings are not elevated to errors in the assertion functions + # e.g. by @pytest.mark.filterwarnings("error") + # see https://github.com/pydata/xarray/pull/4760#issuecomment-774101639 + + # define a custom Variable class that raises a warning in assert_* + class WarningVariable(xr.Variable): + @property # type: ignore + def dims(self): + warnings.warn("warning in test") + return super().dims + + def __array__(self): + warnings.warn("warning in test") + return super().__array__() + + a = WarningVariable("x", [1]) + b = WarningVariable("x", [2]) + + with warnings.catch_warnings(record=True) as w: + # elevate warnings to errors + warnings.filterwarnings("error") + with pytest.raises(AssertionError): + getattr(xr.testing, func)(a, b) + + assert len(w) > 0 From 59088a0a5dbbb6c62ba248caf19f984d05b18bc6 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 8 Feb 2021 18:42:41 +0100 Subject: [PATCH 48/51] update pre-commit hooks (mypy) (#4883) --- .pre-commit-config.yaml | 2 +- setup.cfg | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bc3bc8f2ba8..8bd67c81964 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,7 +30,7 @@ repos: # - id: velin # args: ["--write", "--compact"] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.790 + rev: v0.800 hooks: - id: mypy exclude: "properties|asv_bench" diff --git a/setup.cfg b/setup.cfg index a695191bf02..72d28d3ca6f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -164,6 +164,8 @@ force_to_top = true default_section = THIRDPARTY known_first_party = xarray +[mypy] + # Most of the numerical computing stack doesn't have type annotations yet. [mypy-affine.*] ignore_missing_imports = True From 47889eece9768d421dc5c0764e513a4a4a913297 Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 10 Feb 2021 20:50:16 +0100 Subject: [PATCH 49/51] add a drop_conflicts strategy for merging attrs (#4827) --- doc/whats-new.rst | 4 ++ xarray/core/combine.py | 12 ++-- xarray/core/concat.py | 6 +- xarray/core/merge.py | 29 ++++++-- xarray/tests/test_combine.py | 11 +++ xarray/tests/test_concat.py | 129 +++++++++++++++++++++++++++++------ xarray/tests/test_merge.py | 85 +++++++++++++++++++++++ 7 files changed, 247 insertions(+), 29 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e5eb4680878..2574cd2e60a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -60,6 +60,10 @@ New Features By `Maximilian Roos `_. - Performance improvement when constructing DataArrays. Significantly speeds up repr for Datasets with large number of variables. + By `Deepak Cherian `_ +- add ``"drop_conflicts"`` to the strategies supported by the ``combine_attrs`` kwarg + (:issue:`4749`, :pull:`4827`). + By `Justus Magin `_. By `Deepak Cherian `_. - :py:meth:`DataArray.swap_dims` & :py:meth:`Dataset.swap_dims` now accept dims in the form of kwargs as well as a dict, like most similar methods. diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 86ed1870302..573247937b7 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -412,14 +412,16 @@ def combine_nested( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. - combine_attrs : {"drop", "identical", "no_conflicts", "override"}, \ - default: "drop" + combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ + "override"}, default: "drop" String indicating how to combine attrs of the objects being merged: - "drop": empty attrs on returned Dataset. - "identical": all attrs must be the same on every object. - "no_conflicts": attrs from all objects are combined, any that have the same name must also have the same value. + - "drop_conflicts": attrs from all objects are combined, any that have + the same name but different values are dropped. - "override": skip comparing and copy attrs from the first dataset to the result. @@ -625,14 +627,16 @@ def combine_by_coords( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. - combine_attrs : {"drop", "identical", "no_conflicts", "override"}, \ - default: "drop" + combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ + "override"}, default: "drop" String indicating how to combine attrs of the objects being merged: - "drop": empty attrs on returned Dataset. - "identical": all attrs must be the same on every object. - "no_conflicts": attrs from all objects are combined, any that have the same name must also have the same value. + - "drop_conflicts": attrs from all objects are combined, any that have + the same name but different values are dropped. - "override": skip comparing and copy attrs from the first dataset to the result. diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 5cda5aa903c..7a958eb1404 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -142,14 +142,16 @@ def concat( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. - combine_attrs : {"drop", "identical", "no_conflicts", "override"}, \ - default: "override" + combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ + "override"}, default: "override" String indicating how to combine attrs of the objects being merged: - "drop": empty attrs on returned Dataset. - "identical": all attrs must be the same on every object. - "no_conflicts": attrs from all objects are combined, any that have the same name must also have the same value. + - "drop_conflicts": attrs from all objects are combined, any that have + the same name but different values are dropped. - "override": skip comparing and copy attrs from the first dataset to the result. diff --git a/xarray/core/merge.py b/xarray/core/merge.py index d29a9e1ff02..14beeff3db5 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -20,7 +20,7 @@ from . import dtypes, pdcompat from .alignment import deep_align from .duck_array_ops import lazy_array_equiv -from .utils import Frozen, compat_dict_union, dict_equiv +from .utils import Frozen, compat_dict_union, dict_equiv, equivalent from .variable import Variable, as_variable, assert_unique_multiindex_level_names if TYPE_CHECKING: @@ -513,6 +513,24 @@ def merge_attrs(variable_attrs, combine_attrs): "the same. Merging %s with %s" % (str(result), str(attrs)) ) return result + elif combine_attrs == "drop_conflicts": + result = {} + dropped_keys = set() + for attrs in variable_attrs: + result.update( + { + key: value + for key, value in attrs.items() + if key not in result and key not in dropped_keys + } + ) + result = { + key: value + for key, value in result.items() + if key not in attrs or equivalent(attrs[key], value) + } + dropped_keys |= {key for key in attrs if key not in result} + return result elif combine_attrs == "identical": result = dict(variable_attrs[0]) for attrs in variable_attrs[1:]: @@ -556,7 +574,8 @@ def merge_core( Compatibility checks to use when merging variables. join : {"outer", "inner", "left", "right"}, optional How to combine objects with different indexes. - combine_attrs : {"drop", "identical", "no_conflicts", "override"}, optional + combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ + "override"}, optional How to combine attributes of objects priority_arg : int, optional Optional argument in `objects` that takes precedence over the others. @@ -668,14 +687,16 @@ def merge( Value to use for newly missing values. If a dict-like, maps variable names to fill values. Use a data array's name to refer to its values. - combine_attrs : {"drop", "identical", "no_conflicts", "override"}, \ - default: "drop" + combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ + "override"}, default: "drop" String indicating how to combine attrs of the objects being merged: - "drop": empty attrs on returned Dataset. - "identical": all attrs must be the same on every object. - "no_conflicts": attrs from all objects are combined, any that have the same name must also have the same value. + - "drop_conflicts": attrs from all objects are combined, any that have + the same name but different values are dropped. - "override": skip comparing and copy attrs from the first dataset to the result. diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 109b78f05a9..522b98cf864 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -732,6 +732,17 @@ def test_combine_coords_combine_attrs_identical(self): objs, concat_dim="x", join="outer", combine_attrs="identical" ) + def test_combine_nested_combine_attrs_drop_conflicts(self): + objs = [ + Dataset({"x": [0], "y": [0]}, attrs={"a": 1, "b": 2, "c": 3}), + Dataset({"x": [1], "y": [1]}, attrs={"a": 1, "b": 0, "d": 3}), + ] + expected = Dataset({"x": [0, 1], "y": [0, 1]}, attrs={"a": 1, "c": 3, "d": 3}) + actual = combine_nested( + objs, concat_dim="x", join="outer", combine_attrs="drop_conflicts" + ) + assert_identical(expected, actual) + def test_infer_order_from_coords(self): data = create_test_data() objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))] diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 7416cab13ed..beed48a35fc 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -258,27 +258,118 @@ def test_concat_join_kwarg(self): ) assert_identical(actual, expected) - def test_concat_combine_attrs_kwarg(self): - ds1 = Dataset({"a": ("x", [0])}, coords={"x": [0]}, attrs={"b": 42}) - ds2 = Dataset({"a": ("x", [0])}, coords={"x": [1]}, attrs={"b": 42, "c": 43}) - - expected = {} - expected["drop"] = Dataset({"a": ("x", [0, 0])}, {"x": [0, 1]}) - expected["no_conflicts"] = Dataset( - {"a": ("x", [0, 0])}, {"x": [0, 1]}, {"b": 42, "c": 43} - ) - expected["override"] = Dataset({"a": ("x", [0, 0])}, {"x": [0, 1]}, {"b": 42}) - - with raises_regex(ValueError, "combine_attrs='identical'"): - actual = concat([ds1, ds2], dim="x", combine_attrs="identical") - with raises_regex(ValueError, "combine_attrs='no_conflicts'"): - ds3 = ds2.copy(deep=True) - ds3.attrs["b"] = 44 - actual = concat([ds1, ds3], dim="x", combine_attrs="no_conflicts") + @pytest.mark.parametrize( + "combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception", + [ + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 1, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + False, + ), + ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False), + ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False), + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 4, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + True, + ), + ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True), + ( + "override", + {"a": 1, "b": 2}, + {"a": 4, "b": 5, "c": 3}, + {"a": 1, "b": 2}, + False, + ), + ( + "drop_conflicts", + {"a": 41, "b": 42, "c": 43}, + {"b": 2, "c": 43, "d": 44}, + {"a": 41, "c": 43, "d": 44}, + False, + ), + ], + ) + def test_concat_combine_attrs_kwarg( + self, combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception + ): + ds1 = Dataset({"a": ("x", [0])}, coords={"x": [0]}, attrs=var1_attrs) + ds2 = Dataset({"a": ("x", [0])}, coords={"x": [1]}, attrs=var2_attrs) + + if expect_exception: + with pytest.raises(ValueError, match=f"combine_attrs='{combine_attrs}'"): + concat([ds1, ds2], dim="x", combine_attrs=combine_attrs) + else: + actual = concat([ds1, ds2], dim="x", combine_attrs=combine_attrs) + expected = Dataset( + {"a": ("x", [0, 0])}, {"x": [0, 1]}, attrs=expected_attrs + ) - for combine_attrs in expected: + assert_identical(actual, expected) + + @pytest.mark.skip(reason="not implemented, yet (see #4827)") + @pytest.mark.parametrize( + "combine_attrs, attrs1, attrs2, expected_attrs, expect_exception", + [ + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 1, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + False, + ), + ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False), + ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False), + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 4, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + True, + ), + ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True), + ( + "override", + {"a": 1, "b": 2}, + {"a": 4, "b": 5, "c": 3}, + {"a": 1, "b": 2}, + False, + ), + ( + "drop_conflicts", + {"a": 41, "b": 42, "c": 43}, + {"b": 2, "c": 43, "d": 44}, + {"a": 41, "c": 43, "d": 44}, + False, + ), + ], + ) + def test_concat_combine_attrs_kwarg_variables( + self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception + ): + """check that combine_attrs is used on data variables and coords""" + ds1 = Dataset({"a": ("x", [0], attrs1)}, coords={"x": ("x", [0], attrs1)}) + ds2 = Dataset({"a": ("x", [0], attrs2)}, coords={"x": ("x", [1], attrs2)}) + + if expect_exception: + with pytest.raises(ValueError, match=f"combine_attrs='{combine_attrs}'"): + concat([ds1, ds2], dim="x", combine_attrs=combine_attrs) + else: actual = concat([ds1, ds2], dim="x", combine_attrs=combine_attrs) - assert_identical(actual, expected[combine_attrs]) + expected = Dataset( + {"a": ("x", [0, 0], expected_attrs)}, + {"x": ("x", [0, 1], expected_attrs)}, + ) + + assert_identical(actual, expected) def test_concat_promote_shape(self): # mixed dims within variables diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 34b138e1f6a..27e2b10dcbc 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -92,6 +92,20 @@ def test_merge_arrays_attrs_default(self): {"a": 1, "b": 2}, False, ), + ( + "drop_conflicts", + {"a": 1, "b": 2, "c": 3}, + {"b": 1, "c": 3, "d": 4}, + {"a": 1, "c": 3, "d": 4}, + False, + ), + ( + "drop_conflicts", + {"a": 1, "b": np.array([2]), "c": np.array([3])}, + {"b": 1, "c": np.array([3]), "d": 4}, + {"a": 1, "c": np.array([3]), "d": 4}, + False, + ), ], ) def test_merge_arrays_attrs( @@ -109,6 +123,68 @@ def test_merge_arrays_attrs( expected.attrs = expected_attrs assert_identical(actual, expected) + @pytest.mark.skip(reason="not implemented, yet (see #4827)") + @pytest.mark.parametrize( + "combine_attrs, attrs1, attrs2, expected_attrs, expect_exception", + [ + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 1, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + False, + ), + ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False), + ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False), + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 4, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + True, + ), + ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True), + ( + "override", + {"a": 1, "b": 2}, + {"a": 4, "b": 5, "c": 3}, + {"a": 1, "b": 2}, + False, + ), + ( + "drop_conflicts", + {"a": 1, "b": 2, "c": 3}, + {"b": 1, "c": 3, "d": 4}, + {"a": 1, "c": 3, "d": 4}, + False, + ), + ], + ) + def test_merge_arrays_attrs_variables( + self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception + ): + """check that combine_attrs is used on data variables and coords""" + data = create_test_data() + data1 = data.copy() + data1.var1.attrs = attrs1 + data1.dim1.attrs = attrs1 + data2 = data.copy() + data2.var1.attrs = attrs2 + data2.dim1.attrs = attrs2 + + if expect_exception: + with raises_regex(MergeError, "combine_attrs"): + actual = xr.merge([data1, data2], combine_attrs=combine_attrs) + else: + actual = xr.merge([data1, data2], combine_attrs=combine_attrs) + expected = data.copy() + expected.var1.attrs = expected_attrs + expected.dim1.attrs = expected_attrs + + assert_identical(actual, expected) + def test_merge_attrs_override_copy(self): ds1 = xr.Dataset(attrs={"x": 0}) ds2 = xr.Dataset(attrs={"x": 1}) @@ -116,6 +192,15 @@ def test_merge_attrs_override_copy(self): ds3.attrs["x"] = 2 assert ds1.x == 0 + def test_merge_attrs_drop_conflicts(self): + ds1 = xr.Dataset(attrs={"a": 0, "b": 0, "c": 0}) + ds2 = xr.Dataset(attrs={"b": 0, "c": 1, "d": 0}) + ds3 = xr.Dataset(attrs={"a": 0, "b": 1, "c": 0, "e": 0}) + + actual = xr.merge([ds1, ds2, ds3], combine_attrs="drop_conflicts") + expected = xr.Dataset(attrs={"a": 0, "d": 0, "e": 0}) + assert_identical(actual, expected) + def test_merge_dicts_simple(self): actual = xr.merge([{"foo": 0}, {"bar": "one"}, {"baz": 3.5}]) expected = xr.Dataset({"foo": 0, "bar": "one", "baz": 3.5}) From aba46ccd907dc1e68bba42aae0c7aa549b067749 Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Wed, 10 Feb 2021 17:42:06 -0400 Subject: [PATCH 50/51] Fix `bounds_error=True` ignored with 1D interpolation (#4855) Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 2 ++ xarray/core/missing.py | 2 +- xarray/tests/test_interp.py | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2574cd2e60a..a0f8e935670 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -102,6 +102,8 @@ Bug fixes :py:meth:`Dataset.to_zarr` (:issue:`4783`, :pull:`4795`). By `Julien Seguinot `_. - Add :py:meth:`Dataset.drop_isel` and :py:meth:`DataArray.drop_isel` (:issue:`4658`, :pull:`4819`). By `Daniel Mesejo `_. +- Ensure that :py:meth:`Dataset.interp` raises ``ValueError`` when interpolating outside coordinate range and ``bounds_error=True`` (:issue:`4854`, :pull:`4855`). + By `Leif Denby `_. - Fix time encoding bug associated with using cftime versions greater than 1.4.0 with xarray (:issue:`4870`, :pull:`4871`). By `Spencer Clark `_. diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 695affa84c1..e6dd8b537a0 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -154,7 +154,7 @@ def __init__( yi, kind=self.method, fill_value=fill_value, - bounds_error=False, + bounds_error=bounds_error, assume_sorted=assume_sorted, copy=copy, **self.cons_kwargs, diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index 20d5fb12a62..cdfc46bbedf 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -866,3 +866,18 @@ def test_interpolate_chunk_advanced(method): z = z.chunk(3) actual = da.interp(t=0.5, x=x, y=y, z=z, kwargs=kwargs, method=method) assert_identical(actual, expected) + + +@requires_scipy +def test_interp1d_bounds_error(): + """Ensure exception on bounds error is raised if requested""" + da = xr.DataArray( + np.sin(0.3 * np.arange(4)), + [("time", np.arange(4))], + ) + + with pytest.raises(ValueError): + da.interp(time=3.5, kwargs=dict(bounds_error=True)) + + # default is to fill with nans, so this should pass + da.interp(time=3.5) From 10f0227a1667c5ab3c88465ff1572065322cde77 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Wed, 10 Feb 2021 16:44:25 -0500 Subject: [PATCH 51/51] Ensure maximum accuracy when encoding and decoding cftime.datetime values (#4758) --- doc/whats-new.rst | 7 +++ xarray/coding/cftime_offsets.py | 30 +++++++++++- xarray/coding/times.py | 74 +++++++++++++++++++---------- xarray/tests/__init__.py | 1 + xarray/tests/test_cftime_offsets.py | 30 ++++++++++++ xarray/tests/test_coding_times.py | 45 ++++++++++++++++-- 6 files changed, 155 insertions(+), 32 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a0f8e935670..4b06003b630 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -54,6 +54,13 @@ Deprecations New Features ~~~~~~~~~~~~ +- Xarray now leverages updates as of cftime version 1.4.1, which enable exact I/O + roundtripping of ``cftime.datetime`` objects (:pull:`4758`). + By `Spencer Clark `_. +- :py:meth:`~xarray.cftime_range` and :py:meth:`DataArray.resample` now support + millisecond (``"L"`` or ``"ms"``) and microsecond (``"U"`` or ``"us"``) frequencies + for ``cftime.datetime`` coordinates (:issue:`4097`, :pull:`4758`). + By `Spencer Clark `_. - Significantly higher ``unstack`` performance on numpy-backed arrays which contain missing values; 8x faster in our benchmark, and 2x faster than pandas. (:pull:`4746`); diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 177a0fd831b..c25d5296c41 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -576,6 +576,26 @@ def __apply__(self, other): return other + self.as_timedelta() +class Millisecond(BaseCFTimeOffset): + _freq = "L" + + def as_timedelta(self): + return timedelta(milliseconds=self.n) + + def __apply__(self, other): + return other + self.as_timedelta() + + +class Microsecond(BaseCFTimeOffset): + _freq = "U" + + def as_timedelta(self): + return timedelta(microseconds=self.n) + + def __apply__(self, other): + return other + self.as_timedelta() + + _FREQUENCIES = { "A": YearEnd, "AS": YearBegin, @@ -590,6 +610,10 @@ def __apply__(self, other): "T": Minute, "min": Minute, "S": Second, + "L": Millisecond, + "ms": Millisecond, + "U": Microsecond, + "us": Microsecond, "AS-JAN": partial(YearBegin, month=1), "AS-FEB": partial(YearBegin, month=2), "AS-MAR": partial(YearBegin, month=3), @@ -824,7 +848,7 @@ def cftime_range( `ISO-8601 format `_. - It supports many, but not all, frequencies supported by ``pandas.date_range``. For example it does not currently support any of - the business-related, semi-monthly, or sub-second frequencies. + the business-related or semi-monthly frequencies. - Compound sub-monthly frequencies are not supported, e.g. '1H1min', as these can easily be written in terms of the finest common resolution, e.g. '61min'. @@ -855,6 +879,10 @@ def cftime_range( +--------+--------------------------+ | S | Second frequency | +--------+--------------------------+ + | L, ms | Millisecond frequency | + +--------+--------------------------+ + | U, us | Microsecond frequency | + +--------+--------------------------+ Any multiples of the following anchored offsets are also supported. diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 39ad2f57c1e..a1822393dc1 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1,6 +1,6 @@ import re import warnings -from datetime import datetime +from datetime import datetime, timedelta from distutils.version import LooseVersion from functools import partial @@ -35,6 +35,26 @@ "D": int(1e9) * 60 * 60 * 24, } +_US_PER_TIME_DELTA = { + "microseconds": 1, + "milliseconds": 1_000, + "seconds": 1_000_000, + "minutes": 60 * 1_000_000, + "hours": 60 * 60 * 1_000_000, + "days": 24 * 60 * 60 * 1_000_000, +} + +_NETCDF_TIME_UNITS_CFTIME = [ + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", +] + +_NETCDF_TIME_UNITS_NUMPY = _NETCDF_TIME_UNITS_CFTIME + ["nanoseconds"] + TIME_UNITS = frozenset( [ "days", @@ -225,9 +245,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): if calendar in _STANDARD_CALENDARS: dates = cftime_to_nptime(dates) elif use_cftime: - dates = _decode_datetime_with_cftime( - flat_num_dates.astype(float), units, calendar - ) + dates = _decode_datetime_with_cftime(flat_num_dates, units, calendar) else: dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar) @@ -262,25 +280,33 @@ def decode_cf_timedelta(num_timedeltas, units): return result.reshape(num_timedeltas.shape) +def _unit_timedelta_cftime(units): + return timedelta(microseconds=_US_PER_TIME_DELTA[units]) + + +def _unit_timedelta_numpy(units): + numpy_units = _netcdf_to_numpy_timeunit(units) + return np.timedelta64(_NS_PER_TIME_DELTA[numpy_units], "ns") + + def _infer_time_units_from_diff(unique_timedeltas): - # Note that the modulus operator was only implemented for np.timedelta64 - # arrays as of NumPy version 1.16.0. Once our minimum version of NumPy - # supported is greater than or equal to this we will no longer need to cast - # unique_timedeltas to a TimedeltaIndex. In the meantime, however, the - # modulus operator works for TimedeltaIndex objects. - unique_deltas_as_index = pd.TimedeltaIndex(unique_timedeltas) - for time_unit in [ - "days", - "hours", - "minutes", - "seconds", - "milliseconds", - "microseconds", - "nanoseconds", - ]: - delta_ns = _NS_PER_TIME_DELTA[_netcdf_to_numpy_timeunit(time_unit)] - unit_delta = np.timedelta64(delta_ns, "ns") - if np.all(unique_deltas_as_index % unit_delta == np.timedelta64(0, "ns")): + if unique_timedeltas.dtype == np.dtype("O"): + time_units = _NETCDF_TIME_UNITS_CFTIME + unit_timedelta = _unit_timedelta_cftime + zero_timedelta = timedelta(microseconds=0) + timedeltas = unique_timedeltas + else: + time_units = _NETCDF_TIME_UNITS_NUMPY + unit_timedelta = _unit_timedelta_numpy + zero_timedelta = np.timedelta64(0, "ns") + # Note that the modulus operator was only implemented for np.timedelta64 + # arrays as of NumPy version 1.16.0. Once our minimum version of NumPy + # supported is greater than or equal to this we will no longer need to cast + # unique_timedeltas to a TimedeltaIndex. In the meantime, however, the + # modulus operator works for TimedeltaIndex objects. + timedeltas = pd.TimedeltaIndex(unique_timedeltas) + for time_unit in time_units: + if np.all(timedeltas % unit_timedelta(time_unit) == zero_timedelta): return time_unit return "seconds" @@ -309,10 +335,6 @@ def infer_datetime_units(dates): reference_date = dates[0] if len(dates) > 0 else "1970-01-01" reference_date = format_cftime_datetime(reference_date) unique_timedeltas = np.unique(np.diff(dates)) - if unique_timedeltas.dtype == np.dtype("O"): - # Convert to np.timedelta64 objects using pandas to work around a - # NumPy casting bug: https://github.com/numpy/numpy/issues/11096 - unique_timedeltas = to_timedelta_unboxed(unique_timedeltas) units = _infer_time_units_from_diff(unique_timedeltas) return f"{units} since {reference_date}" diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 7c18f1a8c8a..a7761aefa3d 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -68,6 +68,7 @@ def LooseVersion(vstring): has_pseudonetcdf, requires_pseudonetcdf = _importorskip("PseudoNetCDF") has_cftime, requires_cftime = _importorskip("cftime") has_cftime_1_1_0, requires_cftime_1_1_0 = _importorskip("cftime", minversion="1.1.0.0") +has_cftime_1_4_1, requires_cftime_1_4_1 = _importorskip("cftime", minversion="1.4.1") has_dask, requires_dask = _importorskip("dask") has_bottleneck, requires_bottleneck = _importorskip("bottleneck") has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis") diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 3efcf8039c6..b1ecf059f2f 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -10,6 +10,8 @@ BaseCFTimeOffset, Day, Hour, + Microsecond, + Millisecond, Minute, MonthBegin, MonthEnd, @@ -181,6 +183,14 @@ def test_to_offset_offset_input(offset): ("2min", Minute(n=2)), ("S", Second()), ("2S", Second(n=2)), + ("L", Millisecond(n=1)), + ("2L", Millisecond(n=2)), + ("ms", Millisecond(n=1)), + ("2ms", Millisecond(n=2)), + ("U", Microsecond(n=1)), + ("2U", Microsecond(n=2)), + ("us", Microsecond(n=1)), + ("2us", Microsecond(n=2)), ], ids=_id_func, ) @@ -299,6 +309,8 @@ def test_to_cftime_datetime_error_type_error(): Hour(), Minute(), Second(), + Millisecond(), + Microsecond(), ] _EQ_TESTS_B = [ BaseCFTimeOffset(n=2), @@ -316,6 +328,8 @@ def test_to_cftime_datetime_error_type_error(): Hour(n=2), Minute(n=2), Second(n=2), + Millisecond(n=2), + Microsecond(n=2), ] @@ -340,6 +354,8 @@ def test_neq(a, b): Hour(n=2), Minute(n=2), Second(n=2), + Millisecond(n=2), + Microsecond(n=2), ] @@ -360,6 +376,8 @@ def test_eq(a, b): (Hour(), Hour(n=3)), (Minute(), Minute(n=3)), (Second(), Second(n=3)), + (Millisecond(), Millisecond(n=3)), + (Microsecond(), Microsecond(n=3)), ] @@ -387,6 +405,8 @@ def test_rmul(offset, expected): (Hour(), Hour(n=-1)), (Minute(), Minute(n=-1)), (Second(), Second(n=-1)), + (Millisecond(), Millisecond(n=-1)), + (Microsecond(), Microsecond(n=-1)), ], ids=_id_func, ) @@ -399,6 +419,8 @@ def test_neg(offset, expected): (Hour(n=2), (1, 1, 1, 2)), (Minute(n=2), (1, 1, 1, 0, 2)), (Second(n=2), (1, 1, 1, 0, 0, 2)), + (Millisecond(n=2), (1, 1, 1, 0, 0, 0, 2000)), + (Microsecond(n=2), (1, 1, 1, 0, 0, 0, 2)), ] @@ -427,6 +449,8 @@ def test_radd_sub_monthly(offset, expected_date_args, calendar): (Hour(n=2), (1, 1, 2, 22)), (Minute(n=2), (1, 1, 2, 23, 58)), (Second(n=2), (1, 1, 2, 23, 59, 58)), + (Millisecond(n=2), (1, 1, 2, 23, 59, 59, 998000)), + (Microsecond(n=2), (1, 1, 2, 23, 59, 59, 999998)), ], ids=_id_func, ) @@ -802,6 +826,8 @@ def test_add_quarter_end_onOffset( ((1, 1, 1), Hour(), True), ((1, 1, 1), Minute(), True), ((1, 1, 1), Second(), True), + ((1, 1, 1), Millisecond(), True), + ((1, 1, 1), Microsecond(), True), ], ids=_id_func, ) @@ -865,6 +891,8 @@ def test_onOffset_month_or_quarter_or_year_end( (Hour(), (1, 3, 2, 1, 1), (1, 3, 2, 1, 1)), (Minute(), (1, 3, 2, 1, 1, 1), (1, 3, 2, 1, 1, 1)), (Second(), (1, 3, 2, 1, 1, 1, 1), (1, 3, 2, 1, 1, 1, 1)), + (Millisecond(), (1, 3, 2, 1, 1, 1, 1000), (1, 3, 2, 1, 1, 1, 1000)), + (Microsecond(), (1, 3, 2, 1, 1, 1, 1), (1, 3, 2, 1, 1, 1, 1)), ], ids=_id_func, ) @@ -914,6 +942,8 @@ def test_rollforward(calendar, offset, initial_date_args, partial_expected_date_ (Hour(), (1, 3, 2, 1, 1), (1, 3, 2, 1, 1)), (Minute(), (1, 3, 2, 1, 1, 1), (1, 3, 2, 1, 1, 1)), (Second(), (1, 3, 2, 1, 1, 1, 1), (1, 3, 2, 1, 1, 1, 1)), + (Millisecond(), (1, 3, 2, 1, 1, 1, 1000), (1, 3, 2, 1, 1, 1, 1000)), + (Microsecond(), (1, 3, 2, 1, 1, 1, 1), (1, 3, 2, 1, 1, 1, 1)), ], ids=_id_func, ) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index d8412f82374..eda32d31148 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1,4 +1,5 @@ import warnings +from datetime import timedelta from itertools import product import numpy as np @@ -6,7 +7,15 @@ import pytest from pandas.errors import OutOfBoundsDatetime -from xarray import DataArray, Dataset, Variable, coding, conventions, decode_cf +from xarray import ( + DataArray, + Dataset, + Variable, + cftime_range, + coding, + conventions, + decode_cf, +) from xarray.coding.times import ( _encode_datetime_with_cftime, cftime_to_nptime, @@ -19,7 +28,15 @@ from xarray.core.common import contains_cftime_datetimes from xarray.testing import assert_equal -from . import arm_xfail, assert_array_equal, has_cftime, requires_cftime, requires_dask +from . import ( + arm_xfail, + assert_array_equal, + has_cftime, + has_cftime_1_4_1, + requires_cftime, + requires_cftime_1_4_1, + requires_dask, +) _NON_STANDARD_CALENDARS_SET = { "noleap", @@ -973,8 +990,13 @@ def test_decode_ambiguous_time_warns(calendar): @pytest.mark.parametrize("encoding_units", FREQUENCIES_TO_ENCODING_UNITS.values()) @pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys()) -def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, freq): - times = pd.date_range("2000", periods=3, freq=freq) +@pytest.mark.parametrize("date_range", [pd.date_range, cftime_range]) +def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, freq, date_range): + if not has_cftime_1_4_1 and date_range == cftime_range: + pytest.skip("Test requires cftime 1.4.1.") + if (freq == "N" or encoding_units == "nanoseconds") and date_range == cftime_range: + pytest.skip("Nanosecond frequency is not valid for cftime dates.") + times = date_range("2000", periods=3, freq=freq) units = f"{encoding_units} since 2000-01-01" encoded, _, _ = coding.times.encode_cf_datetime(times, units) @@ -987,7 +1009,7 @@ def test_encode_cf_datetime_defaults_to_correct_dtype(encoding_units, freq): @pytest.mark.parametrize("freq", FREQUENCIES_TO_ENCODING_UNITS.keys()) -def test_encode_decode_roundtrip(freq): +def test_encode_decode_roundtrip_datetime64(freq): # See GH 4045. Prior to GH 4684 this test would fail for frequencies of # "S", "L", "U", and "N". initial_time = pd.date_range("1678-01-01", periods=1) @@ -998,6 +1020,19 @@ def test_encode_decode_roundtrip(freq): assert_equal(variable, decoded) +@requires_cftime_1_4_1 +@pytest.mark.parametrize("freq", ["U", "L", "S", "T", "H", "D"]) +def test_encode_decode_roundtrip_cftime(freq): + initial_time = cftime_range("0001", periods=1) + times = initial_time.append( + cftime_range("0001", periods=2, freq=freq) + timedelta(days=291000 * 365) + ) + variable = Variable(["time"], times) + encoded = conventions.encode_cf_variable(variable) + decoded = conventions.decode_cf_variable("time", encoded, use_cftime=True) + assert_equal(variable, decoded) + + @requires_cftime def test__encode_datetime_with_cftime(): # See GH 4870. cftime versions > 1.4.0 required us to adapt the