From ba48fbcd6ee14e0bbd8887a970a1125fde6769f0 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Thu, 24 Oct 2019 12:48:45 -0400 Subject: [PATCH 01/27] Html repr (#3425) * add CSS style and internal functions for html repr * move CSS code to its own file in a new static directory * add repr of array objects + some refactoring and fixes * add _repr_html_ methods to dataset, dataarray and variable * fix encoding issue in read CSS * fix some CSS for compatibility with notebook (tested 5.2) * use CSS grid + add icons to show/hide attrs and data repr * Changing title of icons to make tooltips better * Adding option to set repr back to classic * Adding support for multiindexes * Getting rid of some spans and fixing alignment * Forgot to check in css [skip ci] * Overflow on hover * Cleaning up css * Fixing indentation * Replacing + icon with db icon * Unifying input css * Renaming stylesheet [skip ci] * Improving styling of attributes * Using the repr functions * Using dask array _repr_html_ * Fixing alignment of Dimensions * Make sure to include subdirs in package * Adding static to manifest * Trying to include css files * Fixing css discrepancies in colab * Adding in lots of escapes and also f-strings * Adding some tests for formatting_html * linting * classic -> text * linting more * Adding tests for new option * Trying to get better coverage * reformatting * Fixing up test * Last tests hopefully * Fixing dask test to work with lower version * More black * Added what's new section * classic -> text Co-Authored-By: Deepak Cherian * Fixing up dt/dl for jlab * Directly change dl objects for attrs section --- MANIFEST.in | 1 + doc/whats-new.rst | 6 + setup.py | 4 +- xarray/core/common.py | 10 +- xarray/core/dataset.py | 7 + xarray/core/formatting_html.py | 274 ++++++++++++++++++++ xarray/core/options.py | 7 + xarray/static/css/style.css | 310 +++++++++++++++++++++++ xarray/static/html/icons-svg-inline.html | 17 ++ xarray/tests/test_formatting_html.py | 132 ++++++++++ xarray/tests/test_options.py | 37 +++ 11 files changed, 802 insertions(+), 3 deletions(-) mode change 100644 => 100755 setup.py create mode 100644 xarray/core/formatting_html.py create mode 100644 xarray/static/css/style.css create mode 100644 xarray/static/html/icons-svg-inline.html create mode 100644 xarray/tests/test_formatting_html.py diff --git a/MANIFEST.in b/MANIFEST.in index a006660e5fb..4d5c34f622c 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -6,3 +6,4 @@ prune doc/generated global-exclude .DS_Store include versioneer.py include xarray/_version.py +recursive-include xarray/static * diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9d3e64badb8..12bed8f332e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -36,6 +36,12 @@ New Features ``pip install git+https://github.com/andrewgsavage/pint.git@refs/pull/6/head)``. Even with it, interaction with non-numpy array libraries, e.g. dask or sparse, is broken. +- Added new :py:meth:`Dataset._repr_html_` and :py:meth:`DataArray._repr_html_` to improve + representation of objects in jupyter. By default this feature is turned off + for now. Enable it with :py:meth:`xarray.set_options(display_style="html")`. + (:pull:`3425`) by `Benoit Bovy `_ and + `Julia Signell `_. + Bug fixes ~~~~~~~~~ - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 index 08d4f54764f..cba0c74aa3a --- a/setup.py +++ b/setup.py @@ -104,5 +104,7 @@ tests_require=TESTS_REQUIRE, url=URL, packages=find_packages(), - package_data={"xarray": ["py.typed", "tests/data/*"]}, + package_data={ + "xarray": ["py.typed", "tests/data/*", "static/css/*", "static/html/*"] + }, ) diff --git a/xarray/core/common.py b/xarray/core/common.py index 45d860a1797..1a8cf34ed39 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1,5 +1,6 @@ import warnings from contextlib import suppress +from html import escape from textwrap import dedent from typing import ( Any, @@ -18,10 +19,10 @@ import numpy as np import pandas as pd -from . import dtypes, duck_array_ops, formatting, ops +from . import dtypes, duck_array_ops, formatting, formatting_html, ops from .arithmetic import SupportsArithmetic from .npcompat import DTypeLike -from .options import _get_keep_attrs +from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_array_type from .rolling_exp import RollingExp from .utils import Frozen, ReprObject, either_dict_or_kwargs @@ -134,6 +135,11 @@ def __array__(self: Any, dtype: DTypeLike = None) -> np.ndarray: def __repr__(self) -> str: return formatting.array_repr(self) + def _repr_html_(self): + if OPTIONS["display_style"] == "text": + return f"
{escape(repr(self))}
" + return formatting_html.array_repr(self) + def _iter(self: Any) -> Iterator[Any]: for n in range(len(self)): yield self[n] diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 12d5cbdc9f3..eba580f84bd 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3,6 +3,7 @@ import sys import warnings from collections import defaultdict +from html import escape from numbers import Number from pathlib import Path from typing import ( @@ -39,6 +40,7 @@ dtypes, duck_array_ops, formatting, + formatting_html, groupby, ops, resample, @@ -1619,6 +1621,11 @@ def to_zarr( def __repr__(self) -> str: return formatting.dataset_repr(self) + def _repr_html_(self): + if OPTIONS["display_style"] == "text": + return f"
{escape(repr(self))}
" + return formatting_html.dataset_repr(self) + def info(self, buf=None) -> None: """ Concise summary of a Dataset variables and attributes. diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py new file mode 100644 index 00000000000..b03ecc12962 --- /dev/null +++ b/xarray/core/formatting_html.py @@ -0,0 +1,274 @@ +import uuid +import pkg_resources +from collections import OrderedDict +from functools import partial +from html import escape + +from .formatting import inline_variable_array_repr, short_data_repr + + +CSS_FILE_PATH = "/".join(("static", "css", "style.css")) +CSS_STYLE = pkg_resources.resource_string("xarray", CSS_FILE_PATH).decode("utf8") + + +ICONS_SVG_PATH = "/".join(("static", "html", "icons-svg-inline.html")) +ICONS_SVG = pkg_resources.resource_string("xarray", ICONS_SVG_PATH).decode("utf8") + + +def short_data_repr_html(array): + """Format "data" for DataArray and Variable.""" + internal_data = getattr(array, "variable", array)._data + if hasattr(internal_data, "_repr_html_"): + return internal_data._repr_html_() + return escape(short_data_repr(array)) + + +def format_dims(dims, coord_names): + if not dims: + return "" + + dim_css_map = { + k: " class='xr-has-index'" if k in coord_names else "" for k, v in dims.items() + } + + dims_li = "".join( + f"
  • " f"{escape(dim)}: {size}
  • " + for dim, size in dims.items() + ) + + return f"
      {dims_li}
    " + + +def summarize_attrs(attrs): + attrs_dl = "".join( + f"
    {escape(k)} :
    " f"
    {escape(str(v))}
    " + for k, v in attrs.items() + ) + + return f"
    {attrs_dl}
    " + + +def _icon(icon_name): + # icon_name should be defined in xarray/static/html/icon-svg-inline.html + return ( + "" + "" + "" + "".format(icon_name) + ) + + +def _summarize_coord_multiindex(name, coord): + preview = f"({', '.join(escape(l) for l in coord.level_names)})" + return summarize_variable( + name, coord, is_index=True, dtype="MultiIndex", preview=preview + ) + + +def summarize_coord(name, var): + is_index = name in var.dims + if is_index: + coord = var.variable.to_index_variable() + if coord.level_names is not None: + coords = {} + coords[name] = _summarize_coord_multiindex(name, coord) + for lname in coord.level_names: + var = coord.get_level_variable(lname) + coords[lname] = summarize_variable(lname, var) + return coords + + return {name: summarize_variable(name, var, is_index)} + + +def summarize_coords(variables): + coords = {} + for k, v in variables.items(): + coords.update(**summarize_coord(k, v)) + + vars_li = "".join(f"
  • {v}
  • " for v in coords.values()) + + return f"
      {vars_li}
    " + + +def summarize_variable(name, var, is_index=False, dtype=None, preview=None): + variable = var.variable if hasattr(var, "variable") else var + + cssclass_idx = " class='xr-has-index'" if is_index else "" + dims_str = f"({', '.join(escape(dim) for dim in var.dims)})" + name = escape(name) + dtype = dtype or var.dtype + + # "unique" ids required to expand/collapse subsections + attrs_id = "attrs-" + str(uuid.uuid4()) + data_id = "data-" + str(uuid.uuid4()) + disabled = "" if len(var.attrs) else "disabled" + + preview = preview or escape(inline_variable_array_repr(variable, 35)) + attrs_ul = summarize_attrs(var.attrs) + data_repr = short_data_repr_html(variable) + + attrs_icon = _icon("icon-file-text2") + data_icon = _icon("icon-database") + + return ( + f"
    {name}
    " + f"
    {dims_str}
    " + f"
    {dtype}
    " + f"
    {preview}
    " + f"" + f"" + f"" + f"" + f"
    {attrs_ul}
    " + f"
    {data_repr}
    " + ) + + +def summarize_vars(variables): + vars_li = "".join( + f"
  • {summarize_variable(k, v)}
  • " + for k, v in variables.items() + ) + + return f"
      {vars_li}
    " + + +def collapsible_section( + name, inline_details="", details="", n_items=None, enabled=True, collapsed=False +): + # "unique" id to expand/collapse the section + data_id = "section-" + str(uuid.uuid4()) + + has_items = n_items is not None and n_items + n_items_span = "" if n_items is None else f" ({n_items})" + enabled = "" if enabled and has_items else "disabled" + collapsed = "" if collapsed or not has_items else "checked" + tip = " title='Expand/collapse section'" if enabled else "" + + return ( + f"" + f"" + f"
    {inline_details}
    " + f"
    {details}
    " + ) + + +def _mapping_section(mapping, name, details_func, max_items_collapse, enabled=True): + n_items = len(mapping) + collapsed = n_items >= max_items_collapse + + return collapsible_section( + name, + details=details_func(mapping), + n_items=n_items, + enabled=enabled, + collapsed=collapsed, + ) + + +def dim_section(obj): + dim_list = format_dims(obj.dims, list(obj.coords)) + + return collapsible_section( + "Dimensions", inline_details=dim_list, enabled=False, collapsed=True + ) + + +def array_section(obj): + # "unique" id to expand/collapse the section + data_id = "section-" + str(uuid.uuid4()) + collapsed = "" + preview = escape(inline_variable_array_repr(obj.variable, max_width=70)) + data_repr = short_data_repr_html(obj) + data_icon = _icon("icon-database") + + return ( + "
    " + f"" + f"" + f"
    {preview}
    " + f"
    {data_repr}
    " + "
    " + ) + + +coord_section = partial( + _mapping_section, + name="Coordinates", + details_func=summarize_coords, + max_items_collapse=25, +) + + +datavar_section = partial( + _mapping_section, + name="Data variables", + details_func=summarize_vars, + max_items_collapse=15, +) + + +attr_section = partial( + _mapping_section, + name="Attributes", + details_func=summarize_attrs, + max_items_collapse=10, +) + + +def _obj_repr(header_components, sections): + header = f"
    {''.join(h for h in header_components)}
    " + sections = "".join(f"
  • {s}
  • " for s in sections) + + return ( + "
    " + f"{ICONS_SVG}" + "
    " + f"{header}" + f"
      {sections}
    " + "
    " + "
    " + ) + + +def array_repr(arr): + dims = OrderedDict((k, v) for k, v in zip(arr.dims, arr.shape)) + + obj_type = "xarray.{}".format(type(arr).__name__) + arr_name = "'{}'".format(arr.name) if getattr(arr, "name", None) else "" + coord_names = list(arr.coords) if hasattr(arr, "coords") else [] + + header_components = [ + "
    {}
    ".format(obj_type), + "
    {}
    ".format(arr_name), + format_dims(dims, coord_names), + ] + + sections = [array_section(arr)] + + if hasattr(arr, "coords"): + sections.append(coord_section(arr.coords)) + + sections.append(attr_section(arr.attrs)) + + return _obj_repr(header_components, sections) + + +def dataset_repr(ds): + obj_type = "xarray.{}".format(type(ds).__name__) + + header_components = [f"
    {escape(obj_type)}
    "] + + sections = [ + dim_section(ds), + coord_section(ds.coords), + datavar_section(ds.data_vars), + attr_section(ds.attrs), + ] + + return _obj_repr(header_components, sections) diff --git a/xarray/core/options.py b/xarray/core/options.py index 2f464a33fb1..72f9ad8e1fa 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -8,6 +8,7 @@ CMAP_SEQUENTIAL = "cmap_sequential" CMAP_DIVERGENT = "cmap_divergent" KEEP_ATTRS = "keep_attrs" +DISPLAY_STYLE = "display_style" OPTIONS = { @@ -19,9 +20,11 @@ CMAP_SEQUENTIAL: "viridis", CMAP_DIVERGENT: "RdBu_r", KEEP_ATTRS: "default", + DISPLAY_STYLE: "text", } _JOIN_OPTIONS = frozenset(["inner", "outer", "left", "right", "exact"]) +_DISPLAY_OPTIONS = frozenset(["text", "html"]) def _positive_integer(value): @@ -35,6 +38,7 @@ def _positive_integer(value): FILE_CACHE_MAXSIZE: _positive_integer, WARN_FOR_UNCLOSED_FILES: lambda value: isinstance(value, bool), KEEP_ATTRS: lambda choice: choice in [True, False, "default"], + DISPLAY_STYLE: _DISPLAY_OPTIONS.__contains__, } @@ -98,6 +102,9 @@ class set_options: attrs, ``False`` to always discard them, or ``'default'`` to use original logic that attrs should only be kept in unambiguous circumstances. Default: ``'default'``. + - ``display_style``: display style to use in jupyter for xarray objects. + Default: ``'text'``. Other options are ``'html'``. + You can use ``set_options`` either as a context manager: diff --git a/xarray/static/css/style.css b/xarray/static/css/style.css new file mode 100644 index 00000000000..536b8ab6103 --- /dev/null +++ b/xarray/static/css/style.css @@ -0,0 +1,310 @@ +/* CSS stylesheet for displaying xarray objects in jupyterlab. + * + */ + +.xr-wrap { + min-width: 300px; + max-width: 700px; +} + +.xr-header { + padding-top: 6px; + padding-bottom: 6px; + margin-bottom: 4px; + border-bottom: solid 1px #ddd; +} + +.xr-header > div, +.xr-header > ul { + display: inline; + margin-top: 0; + margin-bottom: 0; +} + +.xr-obj-type, +.xr-array-name { + margin-left: 2px; + margin-right: 10px; +} + +.xr-obj-type { + color: #555; +} + +.xr-array-name { + color: #000; +} + +.xr-sections { + padding-left: 0 !important; + display: grid; + grid-template-columns: 150px auto auto 1fr 20px 20px; +} + +.xr-section-item { + display: contents; +} + +.xr-section-item input { + display: none; +} + +.xr-section-item input + label { + color: #ccc; +} + +.xr-section-item input:enabled + label { + cursor: pointer; + color: #555; +} + +.xr-section-item input:enabled + label:hover { + color: #000; +} + +.xr-section-summary { + grid-column: 1; + color: #555; + font-weight: 500; +} + +.xr-section-summary > span { + display: inline-block; + padding-left: 0.5em; +} + +.xr-section-summary-in:disabled + label { + color: #555; +} + +.xr-section-summary-in + label:before { + display: inline-block; + content: '►'; + font-size: 11px; + width: 15px; + text-align: center; +} + +.xr-section-summary-in:disabled + label:before { + color: #ccc; +} + +.xr-section-summary-in:checked + label:before { + content: '▼'; +} + +.xr-section-summary-in:checked + label > span { + display: none; +} + +.xr-section-summary, +.xr-section-inline-details { + padding-top: 4px; + padding-bottom: 4px; +} + +.xr-section-inline-details { + grid-column: 2 / -1; +} + +.xr-section-details { + display: none; + grid-column: 1 / -1; + margin-bottom: 5px; +} + +.xr-section-summary-in:checked ~ .xr-section-details { + display: contents; +} + +.xr-array-wrap { + grid-column: 1 / -1; + display: grid; + grid-template-columns: 20px auto; +} + +.xr-array-wrap > label { + grid-column: 1; + vertical-align: top; +} + +.xr-preview { + color: #888; +} + +.xr-array-preview, +.xr-array-data { + padding: 0 5px !important; + grid-column: 2; +} + +.xr-array-data, +.xr-array-in:checked ~ .xr-array-preview { + display: none; +} + +.xr-array-in:checked ~ .xr-array-data, +.xr-array-preview { + display: inline-block; +} + +.xr-dim-list { + display: inline-block !important; + list-style: none; + padding: 0 !important; + margin: 0; +} + +.xr-dim-list li { + display: inline-block; + padding: 0; + margin: 0; +} + +.xr-dim-list:before { + content: '('; +} + +.xr-dim-list:after { + content: ')'; +} + +.xr-dim-list li:not(:last-child):after { + content: ','; + padding-right: 5px; +} + +.xr-has-index { + font-weight: bold; +} + +.xr-var-list, +.xr-var-item { + display: contents; +} + +.xr-var-item > div, +.xr-var-item label, +.xr-var-item > .xr-var-name span { + background-color: #fcfcfc; + margin-bottom: 0; +} + +.xr-var-item > .xr-var-name:hover span { + padding-right: 5px; +} + +.xr-var-list > li:nth-child(odd) > div, +.xr-var-list > li:nth-child(odd) > label, +.xr-var-list > li:nth-child(odd) > .xr-var-name span { + background-color: #efefef; +} + +.xr-var-name { + grid-column: 1; +} + +.xr-var-dims { + grid-column: 2; +} + +.xr-var-dtype { + grid-column: 3; + text-align: right; + color: #555; +} + +.xr-var-preview { + grid-column: 4; +} + +.xr-var-name, +.xr-var-dims, +.xr-var-dtype, +.xr-preview, +.xr-attrs dt { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + padding-right: 10px; +} + +.xr-var-name:hover, +.xr-var-dims:hover, +.xr-var-dtype:hover, +.xr-attrs dt:hover { + overflow: visible; + width: auto; + z-index: 1; +} + +.xr-var-attrs, +.xr-var-data { + display: none; + background-color: #fff !important; + padding-bottom: 5px !important; +} + +.xr-var-attrs-in:checked ~ .xr-var-attrs, +.xr-var-data-in:checked ~ .xr-var-data { + display: block; +} + +.xr-var-data > table { + float: right; +} + +.xr-var-name span, +.xr-var-data, +.xr-attrs { + padding-left: 25px !important; +} + +.xr-attrs, +.xr-var-attrs, +.xr-var-data { + grid-column: 1 / -1; +} + +dl.xr-attrs { + padding: 0; + margin: 0; + display: grid; + grid-template-columns: 125px auto; +} + +.xr-attrs dt, dd { + padding: 0; + margin: 0; + float: left; + padding-right: 10px; + width: auto; +} + +.xr-attrs dt { + font-weight: normal; + grid-column: 1; +} + +.xr-attrs dt:hover span { + display: inline-block; + background: #fff; + padding-right: 10px; +} + +.xr-attrs dd { + grid-column: 2; + white-space: pre-wrap; + word-break: break-all; +} + +.xr-icon-database, +.xr-icon-file-text2 { + display: inline-block; + vertical-align: middle; + width: 1em; + height: 1.5em !important; + stroke-width: 0; + stroke: currentColor; + fill: currentColor; +} diff --git a/xarray/static/html/icons-svg-inline.html b/xarray/static/html/icons-svg-inline.html new file mode 100644 index 00000000000..c44f89c4304 --- /dev/null +++ b/xarray/static/html/icons-svg-inline.html @@ -0,0 +1,17 @@ + + + +Show/Hide data repr + + + + + +Show/Hide attributes + + + + + + + diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py new file mode 100644 index 00000000000..e7f54b22d06 --- /dev/null +++ b/xarray/tests/test_formatting_html.py @@ -0,0 +1,132 @@ +from distutils.version import LooseVersion + +import numpy as np +import pandas as pd +import pytest + +import xarray as xr +from xarray.core import formatting_html as fh + + +@pytest.fixture +def dataarray(): + return xr.DataArray(np.random.RandomState(0).randn(4, 6)) + + +@pytest.fixture +def dask_dataarray(dataarray): + pytest.importorskip("dask") + return dataarray.chunk() + + +@pytest.fixture +def multiindex(): + mindex = pd.MultiIndex.from_product( + [["a", "b"], [1, 2]], names=("level_1", "level_2") + ) + return xr.Dataset({}, {"x": mindex}) + + +@pytest.fixture +def dataset(): + times = pd.date_range("2000-01-01", "2001-12-31", name="time") + annual_cycle = np.sin(2 * np.pi * (times.dayofyear.values / 365.25 - 0.28)) + + base = 10 + 15 * annual_cycle.reshape(-1, 1) + tmin_values = base + 3 * np.random.randn(annual_cycle.size, 3) + tmax_values = base + 10 + 3 * np.random.randn(annual_cycle.size, 3) + + return xr.Dataset( + { + "tmin": (("time", "location"), tmin_values), + "tmax": (("time", "location"), tmax_values), + }, + {"time": times, "location": ["", "IN", "IL"]}, + attrs={"description": "Test data."}, + ) + + +def test_short_data_repr_html(dataarray): + data_repr = fh.short_data_repr_html(dataarray) + assert data_repr.startswith("array") + + +def test_short_data_repr_html_dask(dask_dataarray): + import dask + + if LooseVersion(dask.__version__) < "2.0.0": + assert not hasattr(dask_dataarray.data, "_repr_html_") + data_repr = fh.short_data_repr_html(dask_dataarray) + assert ( + data_repr + == "dask.array<xarray-<this-array>, shape=(4, 6), dtype=float64, chunksize=(4, 6)>" + ) + else: + assert hasattr(dask_dataarray.data, "_repr_html_") + data_repr = fh.short_data_repr_html(dask_dataarray) + assert data_repr == dask_dataarray.data._repr_html_() + + +def test_format_dims_no_dims(): + dims, coord_names = {}, [] + formatted = fh.format_dims(dims, coord_names) + assert formatted == "" + + +def test_format_dims_unsafe_dim_name(): + dims, coord_names = {"": 3, "y": 2}, [] + formatted = fh.format_dims(dims, coord_names) + assert "<x>" in formatted + + +def test_format_dims_non_index(): + dims, coord_names = {"x": 3, "y": 2}, ["time"] + formatted = fh.format_dims(dims, coord_names) + assert "class='xr-has-index'" not in formatted + + +def test_format_dims_index(): + dims, coord_names = {"x": 3, "y": 2}, ["x"] + formatted = fh.format_dims(dims, coord_names) + assert "class='xr-has-index'" in formatted + + +def test_summarize_attrs_with_unsafe_attr_name_and_value(): + attrs = {"": 3, "y": ""} + formatted = fh.summarize_attrs(attrs) + assert "
    <x> :
    " in formatted + assert "
    y :
    " in formatted + assert "
    3
    " in formatted + assert "
    <pd.DataFrame>
    " in formatted + + +def test_repr_of_dataarray(dataarray): + formatted = fh.array_repr(dataarray) + assert "dim_0" in formatted + # has an expandable data section + assert formatted.count("class='xr-array-in' type='checkbox' >") == 1 + # coords and attrs don't have an items so they'll be be disabled and collapsed + assert ( + formatted.count("class='xr-section-summary-in' type='checkbox' disabled >") == 2 + ) + + +def test_summary_of_multiindex_coord(multiindex): + idx = multiindex.x.variable.to_index_variable() + formatted = fh._summarize_coord_multiindex("foo", idx) + assert "(level_1, level_2)" in formatted + assert "MultiIndex" in formatted + assert "foo" in formatted + + +def test_repr_of_multiindex(multiindex): + formatted = fh.dataset_repr(multiindex) + assert "(x)" in formatted + + +def test_repr_of_dataset(dataset): + formatted = fh.dataset_repr(dataset) + # coords, attrs, and data_vars are expanded + assert ( + formatted.count("class='xr-section-summary-in' type='checkbox' checked>") == 3 + ) diff --git a/xarray/tests/test_options.py b/xarray/tests/test_options.py index 2aa77ecd6b3..f155acbf494 100644 --- a/xarray/tests/test_options.py +++ b/xarray/tests/test_options.py @@ -67,6 +67,16 @@ def test_nested_options(): assert OPTIONS["display_width"] == original +def test_display_style(): + original = "text" + assert OPTIONS["display_style"] == original + with pytest.raises(ValueError): + xarray.set_options(display_style="invalid_str") + with xarray.set_options(display_style="html"): + assert OPTIONS["display_style"] == "html" + assert OPTIONS["display_style"] == original + + def create_test_dataset_attrs(seed=0): ds = create_test_data(seed) ds.attrs = {"attr1": 5, "attr2": "history", "attr3": {"nested": "more_info"}} @@ -164,3 +174,30 @@ def test_merge_attr_retention(self): # option doesn't affect this result = merge([da1, da2]) assert result.attrs == original_attrs + + def test_display_style_text(self): + ds = create_test_dataset_attrs() + text = ds._repr_html_() + assert text.startswith("
    ")
    +        assert "'nested'" in text
    +        assert "<xarray.Dataset>" in text
    +
    +    def test_display_style_html(self):
    +        ds = create_test_dataset_attrs()
    +        with xarray.set_options(display_style="html"):
    +            html = ds._repr_html_()
    +            assert html.startswith("
    ") + assert "'nested'" in html + + def test_display_dataarray_style_text(self): + da = create_test_dataarray_attrs() + text = da._repr_html_() + assert text.startswith("
    ")
    +        assert "<xarray.DataArray 'var1'" in text
    +
    +    def test_display_dataarray_style_html(self):
    +        da = create_test_dataarray_attrs()
    +        with xarray.set_options(display_style="html"):
    +            html = da._repr_html_()
    +            assert html.startswith("
    ") + assert "#x27;nested'" in html From bb0a5a2b1c71f7c2622543406ccc82ddbb290ece Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Thu, 24 Oct 2019 17:50:19 -0400 Subject: [PATCH 02/27] Escaping dtypes (#3444) * Escaping dtypes * Reformatting --- xarray/core/formatting_html.py | 2 +- xarray/tests/test_formatting_html.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index b03ecc12962..dbebbcf4fbe 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -96,7 +96,7 @@ def summarize_variable(name, var, is_index=False, dtype=None, preview=None): cssclass_idx = " class='xr-has-index'" if is_index else "" dims_str = f"({', '.join(escape(dim) for dim in var.dims)})" name = escape(name) - dtype = dtype or var.dtype + dtype = dtype or escape(str(var.dtype)) # "unique" ids required to expand/collapse subsections attrs_id = "attrs-" + str(uuid.uuid4()) diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py index e7f54b22d06..fea24ff93f8 100644 --- a/xarray/tests/test_formatting_html.py +++ b/xarray/tests/test_formatting_html.py @@ -130,3 +130,5 @@ def test_repr_of_dataset(dataset): assert ( formatted.count("class='xr-section-summary-in' type='checkbox' checked>") == 3 ) + assert "<U4" in formatted + assert "<IA>" in formatted From 79b3cdd3822c79ad2ee267f4d5082cd91c7f714c Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 25 Oct 2019 11:15:46 -0400 Subject: [PATCH 03/27] change ALL_DIMS to equal ellipsis (#3418) * change ALL_DIMS to equal ... * changed references & added whatsnew * Update xarray/core/groupby.py Co-Authored-By: Deepak Cherian * Update xarray/core/groupby.py Co-Authored-By: Deepak Cherian * note in readme --- doc/examples/multidimensional-coords.rst | 2 +- doc/groupby.rst | 16 +++++++++++----- doc/whats-new.rst | 5 +++++ xarray/core/common.py | 4 ++-- xarray/core/dataset.py | 5 ++--- xarray/core/groupby.py | 16 ++++++++-------- xarray/core/variable.py | 2 +- xarray/tests/test_dask.py | 4 ++-- xarray/tests/test_dataarray.py | 14 +++++++------- xarray/tests/test_dataset.py | 13 ++++++------- xarray/tests/test_groupby.py | 6 +++--- xarray/tests/test_plot.py | 6 +++--- xarray/tests/test_sparse.py | 8 ++++---- 13 files changed, 55 insertions(+), 46 deletions(-) diff --git a/doc/examples/multidimensional-coords.rst b/doc/examples/multidimensional-coords.rst index a5084043977..55569b7662a 100644 --- a/doc/examples/multidimensional-coords.rst +++ b/doc/examples/multidimensional-coords.rst @@ -107,7 +107,7 @@ function to specify the output coordinates of the group. lat_center = np.arange(1, 90, 2) # group according to those bins and take the mean Tair_lat_mean = (ds.Tair.groupby_bins('xc', lat_bins, labels=lat_center) - .mean(xr.ALL_DIMS)) + .mean(...)) # plot the result @savefig xarray_multidimensional_coords_14_1.png width=5in Tair_lat_mean.plot(); diff --git a/doc/groupby.rst b/doc/groupby.rst index e1d88e289d2..52a27f4f160 100644 --- a/doc/groupby.rst +++ b/doc/groupby.rst @@ -116,7 +116,13 @@ dimensions *other than* the provided one: .. ipython:: python - ds.groupby('x').std(xr.ALL_DIMS) + ds.groupby('x').std(...) + +.. note:: + + We use an ellipsis (`...`) here to indicate we want to reduce over all + other dimensions + First and last ~~~~~~~~~~~~~~ @@ -127,7 +133,7 @@ values for group along the grouped dimension: .. ipython:: python - ds.groupby('letters').first(xr.ALL_DIMS) + ds.groupby('letters').first(...) By default, they skip missing values (control this with ``skipna``). @@ -142,7 +148,7 @@ coordinates. For example: .. ipython:: python - alt = arr.groupby('letters').mean(xr.ALL_DIMS) + alt = arr.groupby('letters').mean(...) alt ds.groupby('letters') - alt @@ -195,7 +201,7 @@ __ http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimen 'lat': (['ny','nx'], [[10,10],[20,20]] ),}, dims=['ny','nx']) da - da.groupby('lon').sum(xr.ALL_DIMS) + da.groupby('lon').sum(...) da.groupby('lon').apply(lambda x: x - x.mean(), shortcut=False) Because multidimensional groups have the ability to generate a very large @@ -213,4 +219,4 @@ applying your function, and then unstacking the result: .. ipython:: python stacked = da.stack(gridcell=['ny', 'nx']) - stacked.groupby('gridcell').sum(xr.ALL_DIMS).unstack('gridcell') + stacked.groupby('gridcell').sum(...).unstack('gridcell') diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 12bed8f332e..ac60994d35b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,11 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Changed `xr.ALL_DIMS` to equal python's `Ellipsis` (`...`), and changed internal usages to use + `...` directly. As before, you can use this to instruct a `groupby` operation + to reduce over all dimensions. While we have no plans to remove `xr.ALL_DIMS`, we suggest + using `...`. + By `Maximilian Roos `_ - Added integration tests against `pint `_. (:pull:`3238`) by `Justus Magin `_. diff --git a/xarray/core/common.py b/xarray/core/common.py index 1a8cf34ed39..d372115ea57 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -25,10 +25,10 @@ from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_array_type from .rolling_exp import RollingExp -from .utils import Frozen, ReprObject, either_dict_or_kwargs +from .utils import Frozen, either_dict_or_kwargs # Used as a sentinel value to indicate a all dimensions -ALL_DIMS = ReprObject("") +ALL_DIMS = ... C = TypeVar("C") diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index eba580f84bd..55ac0bc6135 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -49,7 +49,6 @@ ) from .alignment import _broadcast_helper, _get_broadcast_dims_map_common_coords, align from .common import ( - ALL_DIMS, DataWithCoords, ImplementsDatasetReduce, _contains_datetime_like_objects, @@ -4037,7 +4036,7 @@ def reduce( Dataset with this object's DataArrays replaced with new DataArrays of summarized data and the indicated dimension(s) removed. """ - if dim is None or dim is ALL_DIMS: + if dim is None or dim is ...: dims = set(self.dims) elif isinstance(dim, str) or not isinstance(dim, Iterable): dims = {dim} @@ -5002,7 +5001,7 @@ def quantile( if isinstance(dim, str): dims = {dim} - elif dim is None or dim is ALL_DIMS: + elif dim in [None, ...]: dims = set(self.dims) else: dims = set(dim) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 52eb17df18d..68bd28ddb12 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -7,7 +7,7 @@ from . import dtypes, duck_array_ops, nputils, ops from .arithmetic import SupportsArithmetic -from .common import ALL_DIMS, ImplementsArrayReduce, ImplementsDatasetReduce +from .common import ImplementsArrayReduce, ImplementsDatasetReduce from .concat import concat from .formatting import format_array_flat from .options import _get_keep_attrs @@ -712,7 +712,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): q : float in range of [0,1] (or sequence of floats) Quantile to compute, which must be between 0 and 1 inclusive. - dim : xarray.ALL_DIMS, str or sequence of str, optional + dim : `...`, str or sequence of str, optional Dimension(s) over which to apply quantile. Defaults to the grouped dimension. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} @@ -769,7 +769,7 @@ def reduce( Function which can be called in the form `func(x, axis=axis, **kwargs)` to return the result of collapsing an np.ndarray over an integer valued axis. - dim : xarray.ALL_DIMS, str or sequence of str, optional + dim : `...`, str or sequence of str, optional Dimension(s) over which to apply `func`. axis : int or sequence of int, optional Axis(es) over which to apply `func`. Only one of the 'dimension' @@ -794,9 +794,9 @@ def reduce( if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) - if dim is not ALL_DIMS and dim not in self.dims: + if dim is not ... and dim not in self.dims: raise ValueError( - "cannot reduce over dimension %r. expected either xarray.ALL_DIMS to reduce over all dimensions or one or more of %r." + "cannot reduce over dimension %r. expected either '...' to reduce over all dimensions or one or more of %r." % (dim, self.dims) ) @@ -867,7 +867,7 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): Function which can be called in the form `func(x, axis=axis, **kwargs)` to return the result of collapsing an np.ndarray over an integer valued axis. - dim : xarray.ALL_DIMS, str or sequence of str, optional + dim : `...`, str or sequence of str, optional Dimension(s) over which to apply `func`. axis : int or sequence of int, optional Axis(es) over which to apply `func`. Only one of the 'dimension' @@ -895,9 +895,9 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): def reduce_dataset(ds): return ds.reduce(func, dim, keep_attrs, **kwargs) - if dim is not ALL_DIMS and dim not in self.dims: + if dim is not ... and dim not in self.dims: raise ValueError( - "cannot reduce over dimension %r. expected either xarray.ALL_DIMS to reduce over all dimensions or one or more of %r." + "cannot reduce over dimension %r. expected either '...' to reduce over all dimensions or one or more of %r." % (dim, self.dims) ) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 37672cd82d9..93ad1eafb97 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1450,7 +1450,7 @@ def reduce( Array with summarized data and the indicated dimension(s) removed. """ - if dim is common.ALL_DIMS: + if dim == ...: dim = None if dim is not None and axis is not None: raise ValueError("cannot supply both 'axis' and 'dim' arguments") diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index ae8f43cb66d..50517ae3c9c 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -435,8 +435,8 @@ def test_groupby(self): u = self.eager_array v = self.lazy_array - expected = u.groupby("x").mean(xr.ALL_DIMS) - actual = v.groupby("x").mean(xr.ALL_DIMS) + expected = u.groupby("x").mean(...) + actual = v.groupby("x").mean(...) self.assertLazyAndAllClose(expected, actual) def test_groupby_first(self): diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index a3a2f55f6cc..b13527bc098 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -13,7 +13,7 @@ from xarray.coding.times import CFDatetimeCoder from xarray.convert import from_cdms2 from xarray.core import dtypes -from xarray.core.common import ALL_DIMS, full_like +from xarray.core.common import full_like from xarray.tests import ( LooseVersion, ReturnItem, @@ -2443,8 +2443,8 @@ def test_groupby_sum(self): "abc": Variable(["abc"], np.array(["a", "b", "c"])), } )["foo"] - assert_allclose(expected_sum_all, grouped.reduce(np.sum, dim=ALL_DIMS)) - assert_allclose(expected_sum_all, grouped.sum(ALL_DIMS)) + assert_allclose(expected_sum_all, grouped.reduce(np.sum, dim=...)) + assert_allclose(expected_sum_all, grouped.sum(...)) expected = DataArray( [ @@ -2456,7 +2456,7 @@ def test_groupby_sum(self): ) actual = array["y"].groupby("abc").apply(np.sum) assert_allclose(expected, actual) - actual = array["y"].groupby("abc").sum(ALL_DIMS) + actual = array["y"].groupby("abc").sum(...) assert_allclose(expected, actual) expected_sum_axis1 = Dataset( @@ -2590,9 +2590,9 @@ def test_groupby_math(self): assert_identical(expected, actual) grouped = array.groupby("abc") - expected_agg = (grouped.mean(ALL_DIMS) - np.arange(3)).rename(None) + expected_agg = (grouped.mean(...) - np.arange(3)).rename(None) actual = grouped - DataArray(range(3), [("abc", ["a", "b", "c"])]) - actual_agg = actual.groupby("abc").mean(ALL_DIMS) + actual_agg = actual.groupby("abc").mean(...) assert_allclose(expected_agg, actual_agg) with raises_regex(TypeError, "only support binary ops"): @@ -2698,7 +2698,7 @@ def test_groupby_multidim(self): ("lon", DataArray([5, 28, 23], coords=[("lon", [30.0, 40.0, 50.0])])), ("lat", DataArray([16, 40], coords=[("lat", [10.0, 20.0])])), ]: - actual_sum = array.groupby(dim).sum(ALL_DIMS) + actual_sum = array.groupby(dim).sum(...) assert_identical(expected_sum, actual_sum) def test_groupby_multidim_apply(self): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 006d6881b5a..b3ffdf68e3f 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -11,7 +11,6 @@ import xarray as xr from xarray import ( - ALL_DIMS, DataArray, Dataset, IndexVariable, @@ -3327,7 +3326,7 @@ def test_groupby_reduce(self): expected = data.mean("y") expected["yonly"] = expected["yonly"].variable.set_dims({"x": 3}) - actual = data.groupby("x").mean(ALL_DIMS) + actual = data.groupby("x").mean(...) assert_allclose(expected, actual) actual = data.groupby("x").mean("y") @@ -3336,12 +3335,12 @@ def test_groupby_reduce(self): letters = data["letters"] expected = Dataset( { - "xy": data["xy"].groupby(letters).mean(ALL_DIMS), + "xy": data["xy"].groupby(letters).mean(...), "xonly": (data["xonly"].mean().variable.set_dims({"letters": 2})), "yonly": data["yonly"].groupby(letters).mean(), } ) - actual = data.groupby("letters").mean(ALL_DIMS) + actual = data.groupby("letters").mean(...) assert_allclose(expected, actual) def test_groupby_math(self): @@ -3404,14 +3403,14 @@ def test_groupby_math_virtual(self): {"x": ("t", [1, 2, 3])}, {"t": pd.date_range("20100101", periods=3)} ) grouped = ds.groupby("t.day") - actual = grouped - grouped.mean(ALL_DIMS) + actual = grouped - grouped.mean(...) expected = Dataset({"x": ("t", [0, 0, 0])}, ds[["t", "t.day"]]) assert_identical(actual, expected) def test_groupby_nan(self): # nan should be excluded from groupby ds = Dataset({"foo": ("x", [1, 2, 3, 4])}, {"bar": ("x", [1, 1, 2, np.nan])}) - actual = ds.groupby("bar").mean(ALL_DIMS) + actual = ds.groupby("bar").mean(...) expected = Dataset({"foo": ("bar", [1.5, 3]), "bar": [1, 2]}) assert_identical(actual, expected) @@ -3421,7 +3420,7 @@ def test_groupby_order(self): for vn in ["a", "b", "c"]: ds[vn] = DataArray(np.arange(10), dims=["t"]) data_vars_ref = list(ds.data_vars.keys()) - ds = ds.groupby("t").mean(ALL_DIMS) + ds = ds.groupby("t").mean(...) data_vars = list(ds.data_vars.keys()) assert data_vars == data_vars_ref # coords are now at the end of the list, so the test below fails diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index be494c4ae2b..a6de41beb66 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -147,11 +147,11 @@ def test_da_groupby_quantile(): [("x", [1, 1, 1, 2, 2]), ("y", [0, 0, 1])], ) - actual_x = array.groupby("x").quantile(0, dim=xr.ALL_DIMS) + actual_x = array.groupby("x").quantile(0, dim=...) expected_x = xr.DataArray([1, 4], [("x", [1, 2])]) assert_identical(expected_x, actual_x) - actual_y = array.groupby("y").quantile(0, dim=xr.ALL_DIMS) + actual_y = array.groupby("y").quantile(0, dim=...) expected_y = xr.DataArray([1, 22], [("y", [0, 1])]) assert_identical(expected_y, actual_y) @@ -177,7 +177,7 @@ def test_da_groupby_quantile(): ) g = foo.groupby(foo.time.dt.month) - actual = g.quantile(0, dim=xr.ALL_DIMS) + actual = g.quantile(0, dim=...) expected = xr.DataArray( [ 0.0, diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 3ac45a9720f..7deabd46eae 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -417,7 +417,7 @@ def test_convenient_facetgrid_4d(self): def test_coord_with_interval(self): bins = [-1, 0, 1, 2] - self.darray.groupby_bins("dim_0", bins).mean(xr.ALL_DIMS).plot() + self.darray.groupby_bins("dim_0", bins).mean(...).plot() class TestPlot1D(PlotTestCase): @@ -502,7 +502,7 @@ def test_step(self): def test_coord_with_interval_step(self): bins = [-1, 0, 1, 2] - self.darray.groupby_bins("dim_0", bins).mean(xr.ALL_DIMS).plot.step() + self.darray.groupby_bins("dim_0", bins).mean(...).plot.step() assert len(plt.gca().lines[0].get_xdata()) == ((len(bins) - 1) * 2) @@ -544,7 +544,7 @@ def test_plot_nans(self): def test_hist_coord_with_interval(self): ( self.darray.groupby_bins("dim_0", [-1, 0, 1, 2]) - .mean(xr.ALL_DIMS) + .mean(...) .plot.hist(range=(-1, 2)) ) diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index bd26b96f6d4..73c4b9b8c74 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -756,8 +756,8 @@ def test_dot(self): def test_groupby(self): x1 = self.ds_xr x2 = self.sp_xr - m1 = x1.groupby("x").mean(xr.ALL_DIMS) - m2 = x2.groupby("x").mean(xr.ALL_DIMS) + m1 = x1.groupby("x").mean(...) + m2 = x2.groupby("x").mean(...) assert isinstance(m2.data, sparse.SparseArray) assert np.allclose(m1.data, m2.data.todense()) @@ -772,8 +772,8 @@ def test_groupby_first(self): def test_groupby_bins(self): x1 = self.ds_xr x2 = self.sp_xr - m1 = x1.groupby_bins("x", bins=[0, 3, 7, 10]).sum(xr.ALL_DIMS) - m2 = x2.groupby_bins("x", bins=[0, 3, 7, 10]).sum(xr.ALL_DIMS) + m1 = x1.groupby_bins("x", bins=[0, 3, 7, 10]).sum(...) + m2 = x2.groupby_bins("x", bins=[0, 3, 7, 10]).sum(...) assert isinstance(m2.data, sparse.SparseArray) assert np.allclose(m1.data, m2.data.todense()) From 63cc85759ac25605c8398d904d055df5dc538b94 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Fri, 25 Oct 2019 17:40:46 +0200 Subject: [PATCH 04/27] add icomoon license (#3448) --- README.rst | 3 + licenses/ICOMOON_LICENSE | 395 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 398 insertions(+) create mode 100644 licenses/ICOMOON_LICENSE diff --git a/README.rst b/README.rst index 53f51392a1a..5ee7234f221 100644 --- a/README.rst +++ b/README.rst @@ -138,4 +138,7 @@ under a "3-clause BSD" license: xarray also bundles portions of CPython, which is available under the "Python Software Foundation License" in xarray/core/pycompat.py. +xarray uses icons from the icomoon package (free version), which is +available under the "CC BY 4.0" license. + The full text of these licenses are included in the licenses directory. diff --git a/licenses/ICOMOON_LICENSE b/licenses/ICOMOON_LICENSE new file mode 100644 index 00000000000..4ea99c213c5 --- /dev/null +++ b/licenses/ICOMOON_LICENSE @@ -0,0 +1,395 @@ +Attribution 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution 4.0 International Public License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution 4.0 International Public License ("Public License"). To the +extent this Public License may be interpreted as a contract, You are +granted the Licensed Rights in consideration of Your acceptance of +these terms and conditions, and the Licensor grants You such rights in +consideration of benefits the Licensor receives from making the +Licensed Material available under these terms and conditions. + + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + + b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + + c. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + + d. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + + e. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + + f. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + + g. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + + h. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + + i. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + + j. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + + k. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + + +Section 2 -- Scope. + + a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part; and + + b. produce, reproduce, and Share Adapted Material. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + + b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties. + + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + 4. If You Share Adapted Material You produce, the Adapter's + License You apply must not prevent recipients of the Adapted + Material from complying with this Public License. + + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database; + + b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material; and + + c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + + a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + + b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + + c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + + +Section 6 -- Term and Termination. + + a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + + c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + + +Section 7 -- Other Terms and Conditions. + + a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + + b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + + +Section 8 -- Interpretation. + + a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + + d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + + +======================================================================= + +Creative Commons is not a party to its public +licenses. Notwithstanding, Creative Commons may elect to apply one of +its public licenses to material it publishes and in those instances +will be considered the “Licensor.” The text of the Creative Commons +public licenses is dedicated to the public domain under the CC0 Public +Domain Dedication. Except for the limited purpose of indicating that +material is shared under a Creative Commons public license or as +otherwise permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the +public licenses. + +Creative Commons may be contacted at creativecommons.org. From fb0cf7b5fe56519a933ffcecbce9e9327fe236a6 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 25 Oct 2019 15:01:11 -0600 Subject: [PATCH 05/27] Another groupby.reduce bugfix. (#3403) * Another groupby.reduce bugfix. Fixes #3402 * Add whats-new. * Use is_scalar instead * bugfix * fix whats-new * Update xarray/core/groupby.py Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- doc/whats-new.rst | 4 +++ xarray/core/groupby.py | 27 +++++++++------- xarray/tests/test_dataarray.py | 9 ------ xarray/tests/test_groupby.py | 56 +++++++++++++++++++++++++--------- 4 files changed, 61 insertions(+), 35 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ac60994d35b..dea110b5e46 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -55,6 +55,10 @@ Bug fixes - Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4. By `Anderson Banihirwe `_. +- Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and + :py:meth:`xarray.core.groupby.DatasetGroupBy.reduce` when reducing over multiple dimensions. + (:issue:`3402`). By `Deepak Cherian `_ + Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 68bd28ddb12..62c055fed51 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -15,6 +15,7 @@ from .utils import ( either_dict_or_kwargs, hashable, + is_scalar, maybe_wrap_array, peek_at, safe_cast_to_index, @@ -22,6 +23,18 @@ from .variable import IndexVariable, Variable, as_variable +def check_reduce_dims(reduce_dims, dimensions): + + if reduce_dims is not ...: + if is_scalar(reduce_dims): + reduce_dims = [reduce_dims] + if any([dim not in dimensions for dim in reduce_dims]): + raise ValueError( + "cannot reduce over dimensions %r. expected either '...' to reduce over all dimensions or one or more of %r." + % (reduce_dims, dimensions) + ) + + def unique_value_groups(ar, sort=True): """Group an array by its unique values. @@ -794,15 +807,11 @@ def reduce( if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) - if dim is not ... and dim not in self.dims: - raise ValueError( - "cannot reduce over dimension %r. expected either '...' to reduce over all dimensions or one or more of %r." - % (dim, self.dims) - ) - def reduce_array(ar): return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs) + check_reduce_dims(dim, self.dims) + return self.apply(reduce_array, shortcut=shortcut) @@ -895,11 +904,7 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): def reduce_dataset(ds): return ds.reduce(func, dim, keep_attrs, **kwargs) - if dim is not ... and dim not in self.dims: - raise ValueError( - "cannot reduce over dimension %r. expected either '...' to reduce over all dimensions or one or more of %r." - % (dim, self.dims) - ) + check_reduce_dims(dim, self.dims) return self.apply(reduce_dataset) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b13527bc098..101bb44660c 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2560,15 +2560,6 @@ def change_metadata(x): expected = change_metadata(expected) assert_equal(expected, actual) - def test_groupby_reduce_dimension_error(self): - array = self.make_groupby_example_array() - grouped = array.groupby("y") - with raises_regex(ValueError, "cannot reduce over dimension 'y'"): - grouped.mean() - - grouped = array.groupby("y", squeeze=False) - assert_identical(array, grouped.mean()) - def test_groupby_math(self): array = self.make_groupby_example_array() for squeeze in [True, False]: diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index a6de41beb66..d74d684dc54 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -5,7 +5,23 @@ import xarray as xr from xarray.core.groupby import _consolidate_slices -from . import assert_identical, raises_regex +from . import assert_allclose, assert_identical, raises_regex + + +@pytest.fixture +def dataset(): + ds = xr.Dataset( + {"foo": (("x", "y", "z"), np.random.randn(3, 4, 2))}, + {"x": ["a", "b", "c"], "y": [1, 2, 3, 4], "z": [1, 2]}, + ) + ds["boo"] = (("z", "y"), [["f", "g", "h", "j"]] * 2) + + return ds + + +@pytest.fixture +def array(dataset): + return dataset["foo"] def test_consolidate_slices(): @@ -21,25 +37,17 @@ def test_consolidate_slices(): _consolidate_slices([slice(3), 4]) -def test_groupby_dims_property(): - ds = xr.Dataset( - {"foo": (("x", "y", "z"), np.random.randn(3, 4, 2))}, - {"x": ["a", "bcd", "c"], "y": [1, 2, 3, 4], "z": [1, 2]}, - ) +def test_groupby_dims_property(dataset): + assert dataset.groupby("x").dims == dataset.isel(x=1).dims + assert dataset.groupby("y").dims == dataset.isel(y=1).dims - assert ds.groupby("x").dims == ds.isel(x=1).dims - assert ds.groupby("y").dims == ds.isel(y=1).dims - - stacked = ds.stack({"xy": ("x", "y")}) + stacked = dataset.stack({"xy": ("x", "y")}) assert stacked.groupby("xy").dims == stacked.isel(xy=0).dims -def test_multi_index_groupby_apply(): +def test_multi_index_groupby_apply(dataset): # regression test for GH873 - ds = xr.Dataset( - {"foo": (("x", "y"), np.random.randn(3, 4))}, - {"x": ["a", "b", "c"], "y": [1, 2, 3, 4]}, - ) + ds = dataset.isel(z=1, drop=True)[["foo"]] doubled = 2 * ds group_doubled = ( ds.stack(space=["x", "y"]) @@ -276,6 +284,24 @@ def test_groupby_grouping_errors(): dataset.to_array().groupby(dataset.foo * np.nan) +def test_groupby_reduce_dimension_error(array): + grouped = array.groupby("y") + with raises_regex(ValueError, "cannot reduce over dimensions"): + grouped.mean() + + with raises_regex(ValueError, "cannot reduce over dimensions"): + grouped.mean("huh") + + with raises_regex(ValueError, "cannot reduce over dimensions"): + grouped.mean(("x", "y", "asd")) + + grouped = array.groupby("y", squeeze=False) + assert_identical(array, grouped.mean()) + + assert_identical(array.mean("x"), grouped.reduce(np.mean, "x")) + assert_allclose(array.mean(["x", "z"]), grouped.reduce(np.mean, ["x", "z"])) + + def test_groupby_bins_timeseries(): ds = xr.Dataset() ds["time"] = xr.DataArray( From 02288b4e0cb4e300e402d96bbbdba68db6eeb41f Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Mon, 28 Oct 2019 17:12:48 -0400 Subject: [PATCH 06/27] Allow ellipsis (...) in transpose (#3421) * infix_dims function * implement transpose with ellipsis * also infix in dataarray * check errors centrally, remove boilerplate from transpose methods * whatsnew * docs * remove old comments * generator->iterator * test for differently ordered dimensions --- doc/reshaping.rst | 4 +++- doc/whats-new.rst | 4 ++++ setup.cfg | 5 ++++- xarray/core/dataarray.py | 7 +------ xarray/core/dataset.py | 4 ++-- xarray/core/utils.py | 25 +++++++++++++++++++++++++ xarray/core/variable.py | 2 ++ xarray/tests/__init__.py | 3 +++ xarray/tests/test_dataarray.py | 4 ++++ xarray/tests/test_dataset.py | 27 +++++++++++++++++++++++++-- xarray/tests/test_utils.py | 24 ++++++++++++++++++++++++ xarray/tests/test_variable.py | 3 +++ 12 files changed, 100 insertions(+), 12 deletions(-) diff --git a/doc/reshaping.rst b/doc/reshaping.rst index 51202f9be41..455a24f9216 100644 --- a/doc/reshaping.rst +++ b/doc/reshaping.rst @@ -18,12 +18,14 @@ Reordering dimensions --------------------- To reorder dimensions on a :py:class:`~xarray.DataArray` or across all variables -on a :py:class:`~xarray.Dataset`, use :py:meth:`~xarray.DataArray.transpose`: +on a :py:class:`~xarray.Dataset`, use :py:meth:`~xarray.DataArray.transpose`. An +ellipsis (`...`) can be use to represent all other dimensions: .. ipython:: python ds = xr.Dataset({'foo': (('x', 'y', 'z'), [[[42]]]), 'bar': (('y', 'z'), [[24]])}) ds.transpose('y', 'z', 'x') + ds.transpose(..., 'x') # equivalent ds.transpose() # reverses all dimensions Expand and squeeze dimensions diff --git a/doc/whats-new.rst b/doc/whats-new.rst index dea110b5e46..cced7276ff3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,10 @@ Breaking changes New Features ~~~~~~~~~~~~ +- :py:meth:`Dataset.transpose` and :py:meth:`DataArray.transpose` now support an ellipsis (`...`) + to represent all 'other' dimensions. For example, to move one dimension to the front, + use `.transpose('x', ...)`. (:pull:`3421`) + By `Maximilian Roos `_ - Changed `xr.ALL_DIMS` to equal python's `Ellipsis` (`...`), and changed internal usages to use `...` directly. As before, you can use this to instruct a `groupby` operation to reduce over all dimensions. While we have no plans to remove `xr.ALL_DIMS`, we suggest diff --git a/setup.cfg b/setup.cfg index eee8b2477b2..fec2ca6bbe4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -117,4 +117,7 @@ tag_prefix = v parentdir_prefix = xarray- [aliases] -test = pytest \ No newline at end of file +test = pytest + +[pytest-watch] +nobeep = True \ No newline at end of file diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 5fccb9236e8..33dcad13204 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1863,12 +1863,7 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArra Dataset.transpose """ if dims: - if set(dims) ^ set(self.dims): - raise ValueError( - "arguments to transpose (%s) must be " - "permuted array dimensions (%s)" % (dims, tuple(self.dims)) - ) - + dims = tuple(utils.infix_dims(dims, self.dims)) variable = self.variable.transpose(*dims) if transpose_coords: coords: Dict[Hashable, Variable] = {} diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 55ac0bc6135..2a0464515c6 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3712,14 +3712,14 @@ def transpose(self, *dims: Hashable) -> "Dataset": DataArray.transpose """ if dims: - if set(dims) ^ set(self.dims): + if set(dims) ^ set(self.dims) and ... not in dims: raise ValueError( "arguments to transpose (%s) must be " "permuted dataset dimensions (%s)" % (dims, tuple(self.dims)) ) ds = self.copy() for name, var in self._variables.items(): - var_dims = tuple(dim for dim in dims if dim in var.dims) + var_dims = tuple(dim for dim in dims if dim in (var.dims + (...,))) ds._variables[name] = var.transpose(*var_dims) return ds diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 6befe0b5efc..492c595a887 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -10,6 +10,7 @@ AbstractSet, Any, Callable, + Collection, Container, Dict, Hashable, @@ -660,6 +661,30 @@ def __len__(self) -> int: return len(self._data) - num_hidden +def infix_dims(dims_supplied: Collection, dims_all: Collection) -> Iterator: + """ + Resolves a supplied list containing an ellispsis representing other items, to + a generator with the 'realized' list of all items + """ + if ... in dims_supplied: + if len(set(dims_all)) != len(dims_all): + raise ValueError("Cannot use ellipsis with repeated dims") + if len([d for d in dims_supplied if d == ...]) > 1: + raise ValueError("More than one ellipsis supplied") + other_dims = [d for d in dims_all if d not in dims_supplied] + for d in dims_supplied: + if d == ...: + yield from other_dims + else: + yield d + else: + if set(dims_supplied) ^ set(dims_all): + raise ValueError( + f"{dims_supplied} must be a permuted list of {dims_all}, unless `...` is included" + ) + yield from dims_supplied + + def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable: """ Get an new dimension name based on new_dim, that is not used in dims. If the same name exists, we add an underscore(s) in the head. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 93ad1eafb97..7d03fd58d39 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -25,6 +25,7 @@ OrderedSet, decode_numpy_dict_values, either_dict_or_kwargs, + infix_dims, ensure_us_time_resolution, ) @@ -1228,6 +1229,7 @@ def transpose(self, *dims) -> "Variable": """ if len(dims) == 0: dims = self.dims[::-1] + dims = tuple(infix_dims(dims, self.dims)) axes = self.get_axis_num(dims) if len(dims) < 2: # no need to transpose if only one dimension return self.copy(deep=False) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 88476e5e730..f85a33f7a3c 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -158,18 +158,21 @@ def source_ndarray(array): def assert_equal(a, b): + __tracebackhide__ = True xarray.testing.assert_equal(a, b) xarray.testing._assert_internal_invariants(a) xarray.testing._assert_internal_invariants(b) def assert_identical(a, b): + __tracebackhide__ = True xarray.testing.assert_identical(a, b) xarray.testing._assert_internal_invariants(a) xarray.testing._assert_internal_invariants(b) def assert_allclose(a, b, **kwargs): + __tracebackhide__ = True xarray.testing.assert_allclose(a, b, **kwargs) xarray.testing._assert_internal_invariants(a) xarray.testing._assert_internal_invariants(b) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 101bb44660c..ad474d533be 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2068,6 +2068,10 @@ def test_transpose(self): ) assert_equal(expected, actual) + # same as previous but with ellipsis + actual = da.transpose("z", ..., "x", transpose_coords=True) + assert_equal(expected, actual) + with pytest.raises(ValueError): da.transpose("x", "y") diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index b3ffdf68e3f..647eb733adb 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4675,6 +4675,10 @@ def test_dataset_transpose(self): ) assert_identical(expected, actual) + actual = ds.transpose(...) + expected = ds + assert_identical(expected, actual) + actual = ds.transpose("x", "y") expected = ds.apply(lambda x: x.transpose("x", "y", transpose_coords=True)) assert_identical(expected, actual) @@ -4690,13 +4694,32 @@ def test_dataset_transpose(self): expected_dims = tuple(d for d in new_order if d in ds[k].dims) assert actual[k].dims == expected_dims - with raises_regex(ValueError, "arguments to transpose"): + # same as above but with ellipsis + new_order = ("dim2", "dim3", "dim1", "time") + actual = ds.transpose("dim2", "dim3", ...) + for k in ds.variables: + expected_dims = tuple(d for d in new_order if d in ds[k].dims) + assert actual[k].dims == expected_dims + + with raises_regex(ValueError, "permuted"): ds.transpose("dim1", "dim2", "dim3") - with raises_regex(ValueError, "arguments to transpose"): + with raises_regex(ValueError, "permuted"): ds.transpose("dim1", "dim2", "dim3", "time", "extra_dim") assert "T" not in dir(ds) + def test_dataset_ellipsis_transpose_different_ordered_vars(self): + # https://github.com/pydata/xarray/issues/1081#issuecomment-544350457 + ds = Dataset( + dict( + a=(("w", "x", "y", "z"), np.ones((2, 3, 4, 5))), + b=(("x", "w", "y", "z"), np.zeros((3, 2, 4, 5))), + ) + ) + result = ds.transpose(..., "z", "y") + assert list(result["a"].dims) == list("wxzy") + assert list(result["b"].dims) == list("xwzy") + def test_dataset_retains_period_index_on_transpose(self): ds = create_test_data() diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index c36e8a1775d..5bb9deaf240 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -275,3 +275,27 @@ def test_either_dict_or_kwargs(): with pytest.raises(ValueError, match=r"foo"): result = either_dict_or_kwargs(dict(a=1), dict(a=1), "foo") + + +@pytest.mark.parametrize( + ["supplied", "all_", "expected"], + [ + (list("abc"), list("abc"), list("abc")), + (["a", ..., "c"], list("abc"), list("abc")), + (["a", ...], list("abc"), list("abc")), + (["c", ...], list("abc"), list("cab")), + ([..., "b"], list("abc"), list("acb")), + ([...], list("abc"), list("abc")), + ], +) +def test_infix_dims(supplied, all_, expected): + result = list(utils.infix_dims(supplied, all_)) + assert result == expected + + +@pytest.mark.parametrize( + ["supplied", "all_"], [([..., ...], list("abc")), ([...], list("aac"))] +) +def test_infix_dims_errors(supplied, all_): + with pytest.raises(ValueError): + list(utils.infix_dims(supplied, all_)) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 78723eda013..528027ed149 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1280,6 +1280,9 @@ def test_transpose(self): w2 = Variable(["d", "b", "c", "a"], np.einsum("abcd->dbca", x)) assert w2.shape == (5, 3, 4, 2) assert_identical(w2, w.transpose("d", "b", "c", "a")) + assert_identical(w2, w.transpose("d", ..., "a")) + assert_identical(w2, w.transpose("d", "b", "c", ...)) + assert_identical(w2, w.transpose(..., "b", "c", "a")) assert_identical(w, w2.transpose("a", "b", "c", "d")) w3 = Variable(["b", "c", "d", "a"], np.einsum("abcd->bcda", x)) assert_identical(w, w3.transpose("a", "b", "c", "d")) From c955449d4d5c7ef6b2607af13df4abed778a4c61 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 28 Oct 2019 17:46:40 -0600 Subject: [PATCH 07/27] Drop groups associated with nans in group variable (#3406) * Drop nans in grouped variable. * Add NaTs * whats-new * fix merge. * fix whats-new * fix test --- doc/whats-new.rst | 7 ++-- xarray/core/groupby.py | 7 ++++ xarray/tests/test_groupby.py | 80 ++++++++++++++++++++++++++++++++---- 3 files changed, 83 insertions(+), 11 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cced7276ff3..73618782460 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -55,15 +55,14 @@ Bug fixes ~~~~~~~~~ - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle `_ - -- Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4. +- Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`). + By `Deepak Cherian `_. +- Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4. By `Anderson Banihirwe `_. - - Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and :py:meth:`xarray.core.groupby.DatasetGroupBy.reduce` when reducing over multiple dimensions. (:issue:`3402`). By `Deepak Cherian `_ - Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 62c055fed51..c3f712b31ac 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -361,6 +361,13 @@ def __init__( group_indices = [slice(i, i + 1) for i in group_indices] unique_coord = group else: + if group.isnull().any(): + # drop any NaN valued groups. + # also drop obj values where group was NaN + # Use where instead of reindex to account for duplicate coordinate labels. + obj = obj.where(group.notnull(), drop=True) + group = group.dropna(group_dim) + # look through group to find the unique values unique_values, group_indices = unique_value_groups( safe_cast_to_index(group), sort=(bins is None) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index d74d684dc54..e2216547ac8 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -5,7 +5,7 @@ import xarray as xr from xarray.core.groupby import _consolidate_slices -from . import assert_allclose, assert_identical, raises_regex +from . import assert_allclose, assert_equal, assert_identical, raises_regex @pytest.fixture @@ -48,14 +48,14 @@ def test_groupby_dims_property(dataset): def test_multi_index_groupby_apply(dataset): # regression test for GH873 ds = dataset.isel(z=1, drop=True)[["foo"]] - doubled = 2 * ds - group_doubled = ( + expected = 2 * ds + actual = ( ds.stack(space=["x", "y"]) .groupby("space") .apply(lambda x: 2 * x) .unstack("space") ) - assert doubled.equals(group_doubled) + assert_equal(expected, actual) def test_multi_index_groupby_sum(): @@ -66,7 +66,7 @@ def test_multi_index_groupby_sum(): ) expected = ds.sum("z") actual = ds.stack(space=["x", "y"]).groupby("space").sum("z").unstack("space") - assert expected.equals(actual) + assert_equal(expected, actual) def test_groupby_da_datetime(): @@ -86,7 +86,7 @@ def test_groupby_da_datetime(): expected = xr.DataArray( [3, 7], coords=dict(reference_date=reference_dates), dims="reference_date" ) - assert actual.equals(expected) + assert_equal(expected, actual) def test_groupby_duplicate_coordinate_labels(): @@ -94,7 +94,7 @@ def test_groupby_duplicate_coordinate_labels(): array = xr.DataArray([1, 2, 3], [("x", [1, 1, 2])]) expected = xr.DataArray([3, 3], [("x", [1, 2])]) actual = array.groupby("x").sum() - assert expected.equals(actual) + assert_equal(expected, actual) def test_groupby_input_mutation(): @@ -263,6 +263,72 @@ def test_groupby_repr_datetime(obj): assert actual == expected +def test_groupby_drops_nans(): + # GH2383 + # nan in 2D data variable (requires stacking) + ds = xr.Dataset( + { + "variable": (("lat", "lon", "time"), np.arange(60.0).reshape((4, 3, 5))), + "id": (("lat", "lon"), np.arange(12.0).reshape((4, 3))), + }, + coords={"lat": np.arange(4), "lon": np.arange(3), "time": np.arange(5)}, + ) + + ds["id"].values[0, 0] = np.nan + ds["id"].values[3, 0] = np.nan + ds["id"].values[-1, -1] = np.nan + + grouped = ds.groupby(ds.id) + + # non reduction operation + expected = ds.copy() + expected.variable.values[0, 0, :] = np.nan + expected.variable.values[-1, -1, :] = np.nan + expected.variable.values[3, 0, :] = np.nan + actual = grouped.apply(lambda x: x).transpose(*ds.variable.dims) + assert_identical(actual, expected) + + # reduction along grouped dimension + actual = grouped.mean() + stacked = ds.stack({"xy": ["lat", "lon"]}) + expected = ( + stacked.variable.where(stacked.id.notnull()).rename({"xy": "id"}).to_dataset() + ) + expected["id"] = stacked.id.values + assert_identical(actual, expected.dropna("id").transpose(*actual.dims)) + + # reduction operation along a different dimension + actual = grouped.mean("time") + expected = ds.mean("time").where(ds.id.notnull()) + assert_identical(actual, expected) + + # NaN in non-dimensional coordinate + array = xr.DataArray([1, 2, 3], [("x", [1, 2, 3])]) + array["x1"] = ("x", [1, 1, np.nan]) + expected = xr.DataArray(3, [("x1", [1])]) + actual = array.groupby("x1").sum() + assert_equal(expected, actual) + + # NaT in non-dimensional coordinate + array["t"] = ( + "x", + [ + np.datetime64("2001-01-01"), + np.datetime64("2001-01-01"), + np.datetime64("NaT"), + ], + ) + expected = xr.DataArray(3, [("t", [np.datetime64("2001-01-01")])]) + actual = array.groupby("t").sum() + assert_equal(expected, actual) + + # test for repeated coordinate labels + array = xr.DataArray([0, 1, 2, 4, 3, 4], [("x", [np.nan, 1, 1, np.nan, 2, np.nan])]) + expected = xr.DataArray([3, 3], [("x", [1, 2])]) + actual = array.groupby("x").sum() + assert_equal(expected, actual) + + def test_groupby_grouping_errors(): dataset = xr.Dataset({"foo": ("x", [1, 1, 1])}, {"x": [1, 2, 3]}) with raises_regex(ValueError, "None of the data falls within bins with edges"): From 43d07b7b1d389a4bfc95c920149f4caa78653e81 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 28 Oct 2019 23:47:27 -0400 Subject: [PATCH 08/27] jupyterlab dark theme (#3443) --- xarray/static/css/style.css | 43 +++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/xarray/static/css/style.css b/xarray/static/css/style.css index 536b8ab6103..7e382de3b5b 100644 --- a/xarray/static/css/style.css +++ b/xarray/static/css/style.css @@ -2,6 +2,17 @@ * */ +:root { + --xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1)); + --xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54)); + --xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38)); + --xr-border-color: var(--jp-border-color2, #e0e0e0); + --xr-disabled-color: var(--jp-layout-color3, #bdbdbd); + --xr-background-color: var(--jp-layout-color0, white); + --xr-background-color-row-even: var(--jp-layout-color1, white); + --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee); +} + .xr-wrap { min-width: 300px; max-width: 700px; @@ -11,7 +22,7 @@ padding-top: 6px; padding-bottom: 6px; margin-bottom: 4px; - border-bottom: solid 1px #ddd; + border-bottom: solid 1px var(--xr-border-color); } .xr-header > div, @@ -28,11 +39,7 @@ } .xr-obj-type { - color: #555; -} - -.xr-array-name { - color: #000; + color: var(--xr-font-color2); } .xr-sections { @@ -50,21 +57,21 @@ } .xr-section-item input + label { - color: #ccc; + color: var(--xr-disabled-color); } .xr-section-item input:enabled + label { cursor: pointer; - color: #555; + color: var(--xr-font-color2); } .xr-section-item input:enabled + label:hover { - color: #000; + color: var(--xr-font-color0); } .xr-section-summary { grid-column: 1; - color: #555; + color: var(--xr-font-color2); font-weight: 500; } @@ -74,7 +81,7 @@ } .xr-section-summary-in:disabled + label { - color: #555; + color: var(--xr-font-color2); } .xr-section-summary-in + label:before { @@ -86,7 +93,7 @@ } .xr-section-summary-in:disabled + label:before { - color: #ccc; + color: var(--xr-disabled-color); } .xr-section-summary-in:checked + label:before { @@ -129,7 +136,7 @@ } .xr-preview { - color: #888; + color: var(--xr-font-color3); } .xr-array-preview, @@ -186,7 +193,7 @@ .xr-var-item > div, .xr-var-item label, .xr-var-item > .xr-var-name span { - background-color: #fcfcfc; + background-color: var(--xr-background-color-row-even); margin-bottom: 0; } @@ -197,7 +204,7 @@ .xr-var-list > li:nth-child(odd) > div, .xr-var-list > li:nth-child(odd) > label, .xr-var-list > li:nth-child(odd) > .xr-var-name span { - background-color: #efefef; + background-color: var(--xr-background-color-row-odd); } .xr-var-name { @@ -211,7 +218,7 @@ .xr-var-dtype { grid-column: 3; text-align: right; - color: #555; + color: var(--xr-font-color2); } .xr-var-preview { @@ -241,7 +248,7 @@ .xr-var-attrs, .xr-var-data { display: none; - background-color: #fff !important; + background-color: var(--xr-background-color) !important; padding-bottom: 5px !important; } @@ -288,7 +295,7 @@ dl.xr-attrs { .xr-attrs dt:hover span { display: inline-block; - background: #fff; + background: var(--xr-background-color); padding-right: 10px; } From 74ca69a3b7b53d2b8cc8c88ddaf0fe8c6c7bbf6c Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Tue, 29 Oct 2019 10:49:16 -0400 Subject: [PATCH 09/27] Remove deprecated behavior from dataset.drop docstring (#3451) * remove deprecated behavior from dataset.drop docstring * remove a few warnings too * actually keep original form but test for warnings --- xarray/core/dataset.py | 1 - xarray/tests/test_dataset.py | 18 ++++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2a0464515c6..3ca9dd14fae 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3542,7 +3542,6 @@ def drop( # noqa: F811 ---------- labels : hashable or iterable of hashables Name(s) of variables or index labels to drop. - If dim is not None, labels can be any array-like. dim : None or hashable, optional Dimension along which to drop index labels. By default (if ``dim is None``), drops variables rather than index labels. diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 647eb733adb..dfb3da89569 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2117,25 +2117,31 @@ def test_drop_variables(self): def test_drop_index_labels(self): data = Dataset({"A": (["x", "y"], np.random.randn(2, 3)), "x": ["a", "b"]}) - actual = data.drop(["a"], "x") + with pytest.warns(DeprecationWarning): + actual = data.drop(["a"], "x") expected = data.isel(x=[1]) assert_identical(expected, actual) - actual = data.drop(["a", "b"], "x") + with pytest.warns(DeprecationWarning): + actual = data.drop(["a", "b"], "x") expected = data.isel(x=slice(0, 0)) assert_identical(expected, actual) with pytest.raises(KeyError): # not contained in axis - data.drop(["c"], dim="x") + with pytest.warns(DeprecationWarning): + data.drop(["c"], dim="x") - actual = data.drop(["c"], dim="x", errors="ignore") + with pytest.warns(DeprecationWarning): + actual = data.drop(["c"], dim="x", errors="ignore") assert_identical(data, actual) with pytest.raises(ValueError): - data.drop(["c"], dim="x", errors="wrong_value") + with pytest.warns(DeprecationWarning): + data.drop(["c"], dim="x", errors="wrong_value") - actual = data.drop(["a", "b", "c"], "x", errors="ignore") + with pytest.warns(DeprecationWarning): + actual = data.drop(["a", "b", "c"], "x", errors="ignore") expected = data.isel(x=slice(0, 0)) assert_identical(expected, actual) From cb5eef1ad17e36626e2556bc2cfaf5c74aedf807 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 29 Oct 2019 11:30:54 -0400 Subject: [PATCH 10/27] Remove outdated code related to compatibility with netcdftime (#3450) * Remove code leftover from the netcdftime -> cftime transition * Add a what's new note * black formatting * Add more detail to what's new note * More minor edits to what's new note --- doc/whats-new.rst | 5 + xarray/coding/times.py | 43 +------ xarray/tests/__init__.py | 4 - xarray/tests/test_accessor_dt.py | 30 ++--- xarray/tests/test_cftimeindex.py | 10 +- xarray/tests/test_coding_times.py | 184 ++++++++++++------------------ xarray/tests/test_conventions.py | 10 +- xarray/tests/test_utils.py | 13 +-- 8 files changed, 100 insertions(+), 199 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 73618782460..82355a6bda4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,11 @@ Breaking changes ~~~~~~~~~~~~~~~~ - Minimum cftime version is now 1.0.3. By `Deepak Cherian `_. +- All leftover support for dates from non-standard calendars through netcdftime, the + module included in versions of netCDF4 prior to 1.4 that eventually became the + cftime package, has been removed in favor of relying solely on the standalone + cftime package (:pull:`3450`). By `Spencer Clark + `_. New Features ~~~~~~~~~~~~ diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 0174088064b..965ddd8f043 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -39,34 +39,6 @@ ) -def _import_cftime(): - """ - helper function handle the transition to netcdftime/cftime - as a stand-alone package - """ - try: - import cftime - except ImportError: - # in netCDF4 the num2date/date2num function are top-level api - try: - import netCDF4 as cftime - except ImportError: - raise ImportError("Failed to import cftime") - return cftime - - -def _require_standalone_cftime(): - """Raises an ImportError if the standalone cftime is not found""" - try: - import cftime # noqa: F401 - except ImportError: - raise ImportError( - "Decoding times with non-standard calendars " - "or outside the pandas.Timestamp-valid range " - "requires the standalone cftime package." - ) - - def _netcdf_to_numpy_timeunit(units): units = units.lower() if not units.endswith("s"): @@ -119,16 +91,11 @@ def _decode_cf_datetime_dtype(data, units, calendar, use_cftime): def _decode_datetime_with_cftime(num_dates, units, calendar): - cftime = _import_cftime() + import cftime - if cftime.__name__ == "cftime": - return np.asarray( - cftime.num2date(num_dates, units, calendar, only_use_cftime_datetimes=True) - ) - else: - # Must be using num2date from an old version of netCDF4 which - # does not have the only_use_cftime_datetimes option. - return np.asarray(cftime.num2date(num_dates, units, calendar)) + return np.asarray( + cftime.num2date(num_dates, units, calendar, only_use_cftime_datetimes=True) + ) def _decode_datetime_with_pandas(flat_num_dates, units, calendar): @@ -354,7 +321,7 @@ def _encode_datetime_with_cftime(dates, units, calendar): This method is more flexible than xarray's parsing using datetime64[ns] arrays but also slower because it loops over each element. """ - cftime = _import_cftime() + import cftime if np.issubdtype(dates.dtype, np.datetime64): # numpy's broken datetime conversion only works for us precision diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index f85a33f7a3c..6592360cdf2 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -78,10 +78,6 @@ def LooseVersion(vstring): requires_scipy_or_netCDF4 = pytest.mark.skipif( not has_scipy_or_netCDF4, reason="requires scipy or netCDF4" ) -has_cftime_or_netCDF4 = has_cftime or has_netCDF4 -requires_cftime_or_netCDF4 = pytest.mark.skipif( - not has_cftime_or_netCDF4, reason="requires cftime or netCDF4" -) try: import_seaborn() has_seaborn = True diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 0058747db71..5fe5b8c3f59 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -7,10 +7,8 @@ from . import ( assert_array_equal, assert_equal, - has_cftime, - has_cftime_or_netCDF4, - has_dask, raises_regex, + requires_cftime, requires_dask, ) @@ -199,7 +197,7 @@ def times_3d(times): ) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( "field", ["year", "month", "day", "hour", "dayofyear", "dayofweek"] ) @@ -217,7 +215,7 @@ def test_field_access(data, field): assert_equal(result, expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_cftime_strftime_access(data): """ compare cftime formatting against datetime formatting """ date_format = "%Y%m%d%H" @@ -232,8 +230,8 @@ def test_cftime_strftime_access(data): assert_equal(result, expected) -@pytest.mark.skipif(not has_dask, reason="dask not installed") -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime +@requires_dask @pytest.mark.parametrize( "field", ["year", "month", "day", "hour", "dayofyear", "dayofweek"] ) @@ -254,8 +252,8 @@ def test_dask_field_access_1d(data, field): assert_equal(result.compute(), expected) -@pytest.mark.skipif(not has_dask, reason="dask not installed") -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime +@requires_dask @pytest.mark.parametrize( "field", ["year", "month", "day", "hour", "dayofyear", "dayofweek"] ) @@ -286,7 +284,7 @@ def cftime_date_type(calendar): return _all_cftime_date_types()[calendar] -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_seasons(cftime_date_type): dates = np.array([cftime_date_type(2000, month, 15) for month in range(1, 13)]) dates = xr.DataArray(dates) @@ -307,15 +305,3 @@ def test_seasons(cftime_date_type): seasons = xr.DataArray(seasons) assert_array_equal(seasons.values, dates.dt.season.values) - - -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime or netCDF4 not installed") -def test_dt_accessor_error_netCDF4(cftime_date_type): - da = xr.DataArray( - [cftime_date_type(1, 1, 1), cftime_date_type(2, 1, 1)], dims=["time"] - ) - if not has_cftime: - with pytest.raises(TypeError): - da.dt.month - else: - da.dt.month diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index e49dc72abdd..a8ee3c97042 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -15,7 +15,7 @@ ) from xarray.tests import assert_array_equal, assert_identical -from . import has_cftime, has_cftime_or_netCDF4, raises_regex, requires_cftime +from . import raises_regex, requires_cftime from .test_coding_times import ( _ALL_CALENDARS, _NON_STANDARD_CALENDARS, @@ -653,7 +653,7 @@ def test_indexing_in_dataframe_iloc(df, index): assert result.equals(expected) -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime def test_concat_cftimeindex(date_type): da1 = xr.DataArray( [1.0, 2.0], coords=[[date_type(1, 1, 1), date_type(1, 2, 1)]], dims=["time"] @@ -663,11 +663,7 @@ def test_concat_cftimeindex(date_type): ) da = xr.concat([da1, da2], dim="time") - if has_cftime: - assert isinstance(da.indexes["time"], CFTimeIndex) - else: - assert isinstance(da.indexes["time"], pd.Index) - assert not isinstance(da.indexes["time"], CFTimeIndex) + assert isinstance(da.indexes["time"], CFTimeIndex) @requires_cftime diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 021d76e2b11..d012fb36c35 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -8,7 +8,6 @@ from xarray import DataArray, Dataset, Variable, coding, decode_cf from xarray.coding.times import ( - _import_cftime, cftime_to_nptime, decode_cf_datetime, encode_cf_datetime, @@ -19,15 +18,7 @@ from xarray.core.common import contains_cftime_datetimes from xarray.testing import assert_equal -from . import ( - arm_xfail, - assert_array_equal, - has_cftime, - has_cftime_or_netCDF4, - has_dask, - requires_cftime, - requires_cftime_or_netCDF4, -) +from . import arm_xfail, assert_array_equal, has_cftime, requires_cftime, requires_dask _NON_STANDARD_CALENDARS_SET = { "noleap", @@ -79,10 +70,8 @@ def _all_cftime_date_types(): - try: - import cftime - except ImportError: - import netcdftime as cftime + import cftime + return { "noleap": cftime.DatetimeNoLeap, "365_day": cftime.DatetimeNoLeap, @@ -95,16 +84,14 @@ def _all_cftime_date_types(): } -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize(["num_dates", "units", "calendar"], _CF_DATETIME_TESTS) def test_cf_datetime(num_dates, units, calendar): - cftime = _import_cftime() - if cftime.__name__ == "cftime": - expected = cftime.num2date( - num_dates, units, calendar, only_use_cftime_datetimes=True - ) - else: - expected = cftime.num2date(num_dates, units, calendar) + import cftime + + expected = cftime.num2date( + num_dates, units, calendar, only_use_cftime_datetimes=True + ) min_y = np.ravel(np.atleast_1d(expected))[np.nanargmin(num_dates)].year max_y = np.ravel(np.atleast_1d(expected))[np.nanargmax(num_dates)].year if min_y >= 1678 and max_y < 2262: @@ -138,15 +125,12 @@ def test_cf_datetime(num_dates, units, calendar): assert_array_equal(num_dates, np.around(encoded, 1)) -@requires_cftime_or_netCDF4 +@requires_cftime def test_decode_cf_datetime_overflow(): # checks for # https://github.com/pydata/pandas/issues/14068 # https://github.com/pydata/xarray/issues/975 - try: - from cftime import DatetimeGregorian - except ImportError: - from netcdftime import DatetimeGregorian + from cftime import DatetimeGregorian datetime = DatetimeGregorian units = "days since 2000-01-01 00:00:00" @@ -171,7 +155,7 @@ def test_decode_cf_datetime_non_standard_units(): assert_array_equal(actual, expected) -@requires_cftime_or_netCDF4 +@requires_cftime def test_decode_cf_datetime_non_iso_strings(): # datetime strings that are _almost_ ISO compliant but not quite, # but which cftime.num2date can still parse correctly @@ -190,10 +174,10 @@ def test_decode_cf_datetime_non_iso_strings(): assert (abs_diff <= np.timedelta64(1, "s")).all() -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) def test_decode_standard_calendar_inside_timestamp_range(calendar): - cftime = _import_cftime() + import cftime units = "days since 0001-01-01" times = pd.date_range("2001-04-01-00", end="2001-04-30-23", freq="H") @@ -210,21 +194,18 @@ def test_decode_standard_calendar_inside_timestamp_range(calendar): assert (abs_diff <= np.timedelta64(1, "s")).all() -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) def test_decode_non_standard_calendar_inside_timestamp_range(calendar): - cftime = _import_cftime() + import cftime + units = "days since 0001-01-01" times = pd.date_range("2001-04-01-00", end="2001-04-30-23", freq="H") non_standard_time = cftime.date2num(times.to_pydatetime(), units, calendar=calendar) - if cftime.__name__ == "cftime": - expected = cftime.num2date( - non_standard_time, units, calendar=calendar, only_use_cftime_datetimes=True - ) - else: - expected = cftime.num2date(non_standard_time, units, calendar=calendar) - + expected = cftime.num2date( + non_standard_time, units, calendar=calendar, only_use_cftime_datetimes=True + ) expected_dtype = np.dtype("O") actual = coding.times.decode_cf_datetime( @@ -238,24 +219,19 @@ def test_decode_non_standard_calendar_inside_timestamp_range(calendar): assert (abs_diff <= np.timedelta64(1, "s")).all() -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _ALL_CALENDARS) def test_decode_dates_outside_timestamp_range(calendar): + import cftime from datetime import datetime - cftime = _import_cftime() - units = "days since 0001-01-01" times = [datetime(1, 4, 1, h) for h in range(1, 5)] time = cftime.date2num(times, units, calendar=calendar) - if cftime.__name__ == "cftime": - expected = cftime.num2date( - time, units, calendar=calendar, only_use_cftime_datetimes=True - ) - else: - expected = cftime.num2date(time, units, calendar=calendar) - + expected = cftime.num2date( + time, units, calendar=calendar, only_use_cftime_datetimes=True + ) expected_date_type = type(expected[0]) with warnings.catch_warnings(): @@ -269,7 +245,7 @@ def test_decode_dates_outside_timestamp_range(calendar): assert (abs_diff <= np.timedelta64(1, "s")).all() -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) def test_decode_standard_calendar_single_element_inside_timestamp_range(calendar): units = "days since 0001-01-01" @@ -280,7 +256,7 @@ def test_decode_standard_calendar_single_element_inside_timestamp_range(calendar assert actual.dtype == np.dtype("M8[ns]") -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) def test_decode_non_standard_calendar_single_element_inside_timestamp_range(calendar): units = "days since 0001-01-01" @@ -291,10 +267,11 @@ def test_decode_non_standard_calendar_single_element_inside_timestamp_range(cale assert actual.dtype == np.dtype("O") -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) def test_decode_single_element_outside_timestamp_range(calendar): - cftime = _import_cftime() + import cftime + units = "days since 0001-01-01" for days in [1, 1470376]: for num_time in [days, [days], [[days]]]: @@ -304,20 +281,16 @@ def test_decode_single_element_outside_timestamp_range(calendar): num_time, units, calendar=calendar ) - if cftime.__name__ == "cftime": - expected = cftime.num2date( - days, units, calendar, only_use_cftime_datetimes=True - ) - else: - expected = cftime.num2date(days, units, calendar) - + expected = cftime.num2date( + days, units, calendar, only_use_cftime_datetimes=True + ) assert isinstance(actual.item(), type(expected)) -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) def test_decode_standard_calendar_multidim_time_inside_timestamp_range(calendar): - cftime = _import_cftime() + import cftime units = "days since 0001-01-01" times1 = pd.date_range("2001-04-01", end="2001-04-05", freq="D") @@ -343,10 +316,10 @@ def test_decode_standard_calendar_multidim_time_inside_timestamp_range(calendar) assert (abs_diff2 <= np.timedelta64(1, "s")).all() -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _NON_STANDARD_CALENDARS) def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range(calendar): - cftime = _import_cftime() + import cftime units = "days since 0001-01-01" times1 = pd.date_range("2001-04-01", end="2001-04-05", freq="D") @@ -382,13 +355,12 @@ def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range(calend assert (abs_diff2 <= np.timedelta64(1, "s")).all() -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _ALL_CALENDARS) def test_decode_multidim_time_outside_timestamp_range(calendar): + import cftime from datetime import datetime - cftime = _import_cftime() - units = "days since 0001-01-01" times1 = [datetime(1, 4, day) for day in range(1, 6)] times2 = [datetime(1, 5, day) for day in range(1, 6)] @@ -398,16 +370,8 @@ def test_decode_multidim_time_outside_timestamp_range(calendar): mdim_time[:, 0] = time1 mdim_time[:, 1] = time2 - if cftime.__name__ == "cftime": - expected1 = cftime.num2date( - time1, units, calendar, only_use_cftime_datetimes=True - ) - expected2 = cftime.num2date( - time2, units, calendar, only_use_cftime_datetimes=True - ) - else: - expected1 = cftime.num2date(time1, units, calendar) - expected2 = cftime.num2date(time2, units, calendar) + expected1 = cftime.num2date(time1, units, calendar, only_use_cftime_datetimes=True) + expected2 = cftime.num2date(time2, units, calendar, only_use_cftime_datetimes=True) with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Unable to decode time axis") @@ -424,46 +388,38 @@ def test_decode_multidim_time_outside_timestamp_range(calendar): assert (abs_diff2 <= np.timedelta64(1, "s")).all() -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", ["360_day", "all_leap", "366_day"]) def test_decode_non_standard_calendar_single_element(calendar): - cftime = _import_cftime() + import cftime + units = "days since 0001-01-01" - try: - dt = cftime.netcdftime.datetime(2001, 2, 29) - except AttributeError: - # Must be using the standalone cftime library - dt = cftime.datetime(2001, 2, 29) + dt = cftime.datetime(2001, 2, 29) num_time = cftime.date2num(dt, units, calendar) actual = coding.times.decode_cf_datetime(num_time, units, calendar=calendar) - if cftime.__name__ == "cftime": - expected = np.asarray( - cftime.num2date(num_time, units, calendar, only_use_cftime_datetimes=True) - ) - else: - expected = np.asarray(cftime.num2date(num_time, units, calendar)) + expected = np.asarray( + cftime.num2date(num_time, units, calendar, only_use_cftime_datetimes=True) + ) assert actual.dtype == np.dtype("O") assert expected == actual -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime def test_decode_360_day_calendar(): - cftime = _import_cftime() + import cftime + calendar = "360_day" # ensure leap year doesn't matter for year in [2010, 2011, 2012, 2013, 2014]: units = f"days since {year}-01-01" num_times = np.arange(100) - if cftime.__name__ == "cftime": - expected = cftime.num2date( - num_times, units, calendar, only_use_cftime_datetimes=True - ) - else: - expected = cftime.num2date(num_times, units, calendar) + expected = cftime.num2date( + num_times, units, calendar, only_use_cftime_datetimes=True + ) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") @@ -477,7 +433,7 @@ def test_decode_360_day_calendar(): @arm_xfail -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( ["num_dates", "units", "expected_list"], [ @@ -499,7 +455,7 @@ def test_cf_datetime_nan(num_dates, units, expected_list): assert_array_equal(expected, actual) -@requires_cftime_or_netCDF4 +@requires_cftime def test_decoded_cf_datetime_array_2d(): # regression test for GH1229 variable = Variable( @@ -548,7 +504,7 @@ def test_infer_datetime_units(dates, expected): ] -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( "calendar", _NON_STANDARD_CALENDARS + ["gregorian", "proleptic_gregorian"] ) @@ -622,7 +578,7 @@ def test_infer_timedelta_units(deltas, expected): assert expected == coding.times.infer_timedelta_units(deltas) -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( ["date_args", "expected"], [ @@ -649,7 +605,7 @@ def test_decode_cf(calendar): ds[v].attrs["units"] = "days since 2001-01-01" ds[v].attrs["calendar"] = calendar - if not has_cftime_or_netCDF4 and calendar not in _STANDARD_CALENDARS: + if not has_cftime and calendar not in _STANDARD_CALENDARS: with pytest.raises(ValueError): ds = decode_cf(ds) else: @@ -703,7 +659,7 @@ def test_decode_cf_time_bounds(): _update_bounds_attributes(ds.variables) -@requires_cftime_or_netCDF4 +@requires_cftime def test_encode_time_bounds(): time = pd.date_range("2000-01-16", periods=1) @@ -749,7 +705,7 @@ def calendar(request): @pytest.fixture() def times(calendar): - cftime = _import_cftime() + import cftime return cftime.num2date( np.arange(4), @@ -779,24 +735,24 @@ def times_3d(times): ) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_contains_cftime_datetimes_1d(data): assert contains_cftime_datetimes(data.time) -@pytest.mark.skipif(not has_dask, reason="dask not installed") -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime +@requires_dask def test_contains_cftime_datetimes_dask_1d(data): assert contains_cftime_datetimes(data.time.chunk()) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_contains_cftime_datetimes_3d(times_3d): assert contains_cftime_datetimes(times_3d) -@pytest.mark.skipif(not has_dask, reason="dask not installed") -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime +@requires_dask def test_contains_cftime_datetimes_dask_3d(times_3d): assert contains_cftime_datetimes(times_3d.chunk()) @@ -806,13 +762,13 @@ def test_contains_cftime_datetimes_non_cftimes(non_cftime_data): assert not contains_cftime_datetimes(non_cftime_data) -@pytest.mark.skipif(not has_dask, reason="dask not installed") +@requires_dask @pytest.mark.parametrize("non_cftime_data", [DataArray([]), DataArray([1, 2])]) def test_contains_cftime_datetimes_non_cftimes_dask(non_cftime_data): assert not contains_cftime_datetimes(non_cftime_data.chunk()) -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("shape", [(24,), (8, 3), (2, 4, 3)]) def test_encode_cf_datetime_overflow(shape): # Test for fix to GH 2272 @@ -837,7 +793,7 @@ def test_encode_cf_datetime_pandas_min(): assert calendar == expected_calendar -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime def test_time_units_with_timezone_roundtrip(calendar): # Regression test for GH 2649 expected_units = "days since 2000-01-01T00:00:00-05:00" diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 42b2a679347..09002e252b4 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -21,7 +21,7 @@ from . import ( assert_array_equal, raises_regex, - requires_cftime_or_netCDF4, + requires_cftime, requires_dask, requires_netCDF4, ) @@ -81,7 +81,7 @@ def test_decode_cf_with_conflicting_fill_missing_value(): assert_identical(actual, expected) -@requires_cftime_or_netCDF4 +@requires_cftime class TestEncodeCFVariable: def test_incompatible_attributes(self): invalid_vars = [ @@ -144,7 +144,7 @@ def test_string_object_warning(self): assert_identical(original, encoded) -@requires_cftime_or_netCDF4 +@requires_cftime class TestDecodeCF: def test_dataset(self): original = Dataset( @@ -226,7 +226,7 @@ def test_invalid_time_units_raises_eagerly(self): with raises_regex(ValueError, "unable to decode time"): decode_cf(ds) - @requires_cftime_or_netCDF4 + @requires_cftime def test_dataset_repr_with_netcdf4_datetimes(self): # regression test for #347 attrs = {"units": "days since 0001-01-01", "calendar": "noleap"} @@ -239,7 +239,7 @@ def test_dataset_repr_with_netcdf4_datetimes(self): ds = decode_cf(Dataset({"time": ("time", [0, 1], attrs)})) assert "(time) datetime64[ns]" in repr(ds) - @requires_cftime_or_netCDF4 + @requires_cftime def test_decode_cf_datetime_transition_to_invalid(self): # manually create dataset with not-decoded date from datetime import datetime diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 5bb9deaf240..af87b94393d 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -9,7 +9,7 @@ from xarray.core import duck_array_ops, utils from xarray.core.utils import either_dict_or_kwargs -from . import assert_array_equal, has_cftime, has_cftime_or_netCDF4, requires_dask +from . import assert_array_equal, requires_cftime, requires_dask from .test_coding_times import _all_cftime_date_types @@ -39,17 +39,12 @@ def test_safe_cast_to_index(): assert expected.dtype == actual.dtype -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime def test_safe_cast_to_index_cftimeindex(): date_types = _all_cftime_date_types() for date_type in date_types.values(): dates = [date_type(1, 1, day) for day in range(1, 20)] - - if has_cftime: - expected = CFTimeIndex(dates) - else: - expected = pd.Index(dates) - + expected = CFTimeIndex(dates) actual = utils.safe_cast_to_index(np.array(dates)) assert_array_equal(expected, actual) assert expected.dtype == actual.dtype @@ -57,7 +52,7 @@ def test_safe_cast_to_index_cftimeindex(): # Test that datetime.datetime objects are never used in a CFTimeIndex -@pytest.mark.skipif(not has_cftime_or_netCDF4, reason="cftime not installed") +@requires_cftime def test_safe_cast_to_index_datetime_datetime(): dates = [datetime(1, 1, day) for day in range(1, 20)] From 278d2e6af6abd933dd1d43ac3ae70bc306412ae1 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Tue, 29 Oct 2019 11:34:33 -0400 Subject: [PATCH 11/27] upgrade black verison to 19.10b0 (#3456) --- xarray/backends/api.py | 2 +- xarray/core/alignment.py | 2 +- xarray/core/combine.py | 2 +- xarray/core/computation.py | 8 ++++---- xarray/core/concat.py | 4 ++-- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 2 +- xarray/core/groupby.py | 6 +++--- xarray/core/indexing.py | 4 ++-- xarray/core/merge.py | 4 ++-- xarray/core/variable.py | 4 ++-- xarray/plot/plot.py | 8 ++++---- xarray/tests/test_cftime_offsets.py | 2 +- xarray/tests/test_dataarray.py | 8 ++++---- xarray/tests/test_dataset.py | 6 +++--- 15 files changed, 32 insertions(+), 32 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 199516116b0..d23594fc675 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -677,7 +677,7 @@ def open_dataarray( "then select the variable you want." ) else: - data_array, = dataset.data_vars.values() + (data_array,) = dataset.data_vars.values() data_array._file_obj = dataset._file_obj diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 1a33cb955c3..41ff5a3b32d 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -252,7 +252,7 @@ def align( if not indexes and len(objects) == 1: # fast path for the trivial case - obj, = objects + (obj,) = objects return (obj.copy(deep=copy),) all_indexes = defaultdict(list) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 19c327ec597..3308dcef285 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -954,7 +954,7 @@ def _auto_concat( "supply the ``concat_dim`` argument " "explicitly" ) - dim, = concat_dims + (dim,) = concat_dims return concat( datasets, dim=dim, diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 1393d76f283..2ab2ab78416 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -145,7 +145,7 @@ def result_name(objects: list) -> Any: names = {getattr(obj, "name", _DEFAULT_NAME) for obj in objects} names.discard(_DEFAULT_NAME) if len(names) == 1: - name, = names + (name,) = names else: name = None return name @@ -187,7 +187,7 @@ def build_output_coords( if len(coords_list) == 1 and not exclude_dims: # we can skip the expensive merge - unpacked_coords, = coords_list + (unpacked_coords,) = coords_list merged_vars = dict(unpacked_coords.variables) else: # TODO: save these merged indexes, instead of re-computing them later @@ -237,7 +237,7 @@ def apply_dataarray_vfunc( for variable, coords in zip(result_var, result_coords) ) else: - coords, = result_coords + (coords,) = result_coords out = DataArray(result_var, coords, name=name, fastpath=True) return out @@ -384,7 +384,7 @@ def apply_dataset_vfunc( if signature.num_outputs > 1: out = tuple(_fast_dataset(*args) for args in zip(result_vars, list_of_coords)) else: - coord_vars, = list_of_coords + (coord_vars,) = list_of_coords out = _fast_dataset(result_vars, coord_vars) if keep_attrs and isinstance(first_obj, Dataset): diff --git a/xarray/core/concat.py b/xarray/core/concat.py index bcab136de8d..0d19990bdd0 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -148,10 +148,10 @@ def _calc_concat_dim_coord(dim): dim = dim_name elif not isinstance(dim, DataArray): coord = as_variable(dim).to_index_variable() - dim, = coord.dims + (dim,) = coord.dims else: coord = dim - dim, = coord.dims + (dim,) = coord.dims return dim, coord diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 33dcad13204..0c220acaee0 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -616,7 +616,7 @@ def _level_coords(self) -> Dict[Hashable, Hashable]: if var.ndim == 1 and isinstance(var, IndexVariable): level_names = var.level_names if level_names is not None: - dim, = var.dims + (dim,) = var.dims level_coords.update({lname: dim for lname in level_names}) return level_coords diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3ca9dd14fae..05d9772cb7a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4066,7 +4066,7 @@ def reduce( if len(reduce_dims) == 1: # unpack dimensions for the benefit of functions # like np.argmin which can't handle tuple arguments - reduce_dims, = reduce_dims + (reduce_dims,) = reduce_dims elif len(reduce_dims) == var.ndim: # prefer to aggregate over axis=None rather than # axis=(0, 1) if they will be equivalent, because diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index c3f712b31ac..353566eb345 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -321,7 +321,7 @@ def __init__( raise ValueError("`group` must have a name") group, obj, stacked_dim, inserted_dims = _ensure_1d(group, obj) - group_dim, = group.dims + (group_dim,) = group.dims expected_size = obj.sizes[group_dim] if group.size != expected_size: @@ -470,7 +470,7 @@ def _infer_concat_args(self, applied_example): else: coord = self._unique_coord positions = None - dim, = coord.dims + (dim,) = coord.dims if isinstance(coord, _DummyGroup): coord = None return coord, dim, positions @@ -644,7 +644,7 @@ def _concat_shortcut(self, applied, dim, positions=None): def _restore_dim_order(self, stacked): def lookup_order(dimension): if dimension == self._group.name: - dimension, = self._group.dims + (dimension,) = self._group.dims if dimension in self._obj.dims: axis = self._obj.get_axis_num(dimension) else: diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index b9809a8d2b9..f48c9e72af1 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -212,7 +212,7 @@ def get_dim_indexers(data_obj, indexers): level_indexers = defaultdict(dict) dim_indexers = {} for key, label in indexers.items(): - dim, = data_obj[key].dims + (dim,) = data_obj[key].dims if key != dim: # assume here multi-index level indexer level_indexers[dim][key] = label @@ -1368,7 +1368,7 @@ def __getitem__( if isinstance(key, tuple) and len(key) == 1: # unpack key so it can index a pandas.Index object (pandas.Index # objects don't like tuples) - key, = key + (key,) = key if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional return NumpyIndexingAdapter(self.array.values)[indexer] diff --git a/xarray/core/merge.py b/xarray/core/merge.py index db5ef9531df..389ceb155f7 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -277,7 +277,7 @@ def append_all(variables, indexes): def collect_from_coordinates( - list_of_coords: "List[Coordinates]" + list_of_coords: "List[Coordinates]", ) -> Dict[Hashable, List[MergeElement]]: """Collect variables and indexes to be merged from Coordinate objects.""" grouped: Dict[Hashable, List[Tuple[Variable, pd.Index]]] = {} @@ -320,7 +320,7 @@ def merge_coordinates_without_align( def determine_coords( - list_of_mappings: Iterable["DatasetLike"] + list_of_mappings: Iterable["DatasetLike"], ) -> Tuple[Set[Hashable], Set[Hashable]]: """Given a list of dicts with xarray object values, identify coordinates. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 7d03fd58d39..b7abdc7c462 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1526,7 +1526,7 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False): along the given dimension. """ if not isinstance(dim, str): - dim, = dim.dims + (dim,) = dim.dims # can't do this lazily: we need to loop through variables at least # twice @@ -1996,7 +1996,7 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False): arrays, if possible. """ if not isinstance(dim, str): - dim, = dim.dims + (dim,) = dim.dims variables = list(variables) first_var = variables[0] diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index a288f195e32..ca68f617144 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -83,8 +83,8 @@ def _infer_line_data(darray, x, y, hue): ) else: - xdim, = darray[xname].dims - huedim, = darray[huename].dims + (xdim,) = darray[xname].dims + (huedim,) = darray[huename].dims yplt = darray.transpose(xdim, huedim) else: @@ -102,8 +102,8 @@ def _infer_line_data(darray, x, y, hue): ) else: - ydim, = darray[yname].dims - huedim, = darray[huename].dims + (ydim,) = darray[yname].dims + (huedim,) = darray[huename].dims xplt = darray.transpose(ydim, huedim) huelabel = label_from_attrs(darray[huename]) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 142769dbbe7..343e059f53c 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1187,5 +1187,5 @@ def test_dayofyear_after_cftime_range(freq): def test_cftime_range_standard_calendar_refers_to_gregorian(): from cftime import DatetimeGregorian - result, = cftime_range("2000", periods=1) + (result,) = cftime_range("2000", periods=1) assert isinstance(result, DatetimeGregorian) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index ad474d533be..4b3ffdc021a 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3125,11 +3125,11 @@ def test_align_copy(self): # Trivial align - 1 element x = DataArray([1, 2, 3], coords=[("a", [1, 2, 3])]) - x2, = align(x, copy=False) + (x2,) = align(x, copy=False) assert_identical(x, x2) assert source_ndarray(x2.data) is source_ndarray(x.data) - x2, = align(x, copy=True) + (x2,) = align(x, copy=True) assert_identical(x, x2) assert source_ndarray(x2.data) is not source_ndarray(x.data) @@ -3214,7 +3214,7 @@ def test_align_indexes(self): assert_identical(expected_x2, x2) assert_identical(expected_y2, y2) - x2, = align(x, join="outer", indexes={"a": [-2, 7, 10, -1]}) + (x2,) = align(x, join="outer", indexes={"a": [-2, 7, 10, -1]}) expected_x2 = DataArray([3, np.nan, 2, 1], coords=[("a", [-2, 7, 10, -1])]) assert_identical(expected_x2, x2) @@ -3293,7 +3293,7 @@ def test_broadcast_arrays_nocopy(self): assert source_ndarray(x2.data) is source_ndarray(x.data) # single-element broadcast (trivial case) - x2, = broadcast(x) + (x2,) = broadcast(x) assert_identical(x, x2) assert source_ndarray(x2.data) is source_ndarray(x.data) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index dfb3da89569..eab6040e17e 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1945,7 +1945,7 @@ def test_align_nocopy(self): def test_align_indexes(self): x = Dataset({"foo": DataArray([1, 2, 3], dims="x", coords=[("x", [1, 2, 3])])}) - x2, = align(x, indexes={"x": [2, 3, 1]}) + (x2,) = align(x, indexes={"x": [2, 3, 1]}) expected_x2 = Dataset( {"foo": DataArray([2, 3, 1], dims="x", coords={"x": [2, 3, 1]})} ) @@ -1973,7 +1973,7 @@ def test_broadcast(self): }, {"c": ("x", [4])}, ) - actual, = broadcast(ds) + (actual,) = broadcast(ds) assert_identical(expected, actual) ds_x = Dataset({"foo": ("x", [1])}) @@ -1995,7 +1995,7 @@ def test_broadcast_nocopy(self): x = Dataset({"foo": (("x", "y"), [[1, 1]])}) y = Dataset({"bar": ("y", [2, 3])}) - actual_x, = broadcast(x) + (actual_x,) = broadcast(x) assert_identical(x, actual_x) assert source_ndarray(actual_x["foo"].data) is source_ndarray(x["foo"].data) From 80e4e8973b968c9856052c93c0dc1e3162682f8e Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Tue, 29 Oct 2019 11:37:48 -0400 Subject: [PATCH 12/27] Merge stable into master (#3457) * Typo correction in docs (#3387) * Update terminology.rst (#3455) Fixed broken link --- doc/data-structures.rst | 4 ++-- doc/terminology.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/data-structures.rst b/doc/data-structures.rst index f7b34036a03..d5567f4863e 100644 --- a/doc/data-structures.rst +++ b/doc/data-structures.rst @@ -411,7 +411,7 @@ Any variables using that dimension are dropped: As an alternate to dictionary-like modifications, you can use :py:meth:`~xarray.Dataset.assign` and :py:meth:`~xarray.Dataset.assign_coords`. -These methods return a new dataset with additional (or replaced) or values: +These methods return a new dataset with additional (or replaced) values: .. ipython:: python @@ -420,7 +420,7 @@ These methods return a new dataset with additional (or replaced) or values: There is also the :py:meth:`~xarray.Dataset.pipe` method that allows you to use a method call with an external function (e.g., ``ds.pipe(func)``) instead of simply calling it (e.g., ``func(ds)``). This allows you to write pipelines for -transforming you data (using "method chaining") instead of writing hard to +transforming your data (using "method chaining") instead of writing hard to follow nested function calls: .. ipython:: python diff --git a/doc/terminology.rst b/doc/terminology.rst index 4ee56190d5f..d1265e4da9d 100644 --- a/doc/terminology.rst +++ b/doc/terminology.rst @@ -15,7 +15,7 @@ Terminology ---- -**Variable:** A `NetCDF-like variable `_ consisting of dimensions, data, and attributes which describe a single array. The main functional difference between variables and numpy arrays is that numerical operations on variables implement array broadcasting by dimension name. Each ``DataArray`` has an underlying variable that can be accessed via ``arr.variable``. However, a variable is not fully described outside of either a ``Dataset`` or a ``DataArray``. +**Variable:** A `NetCDF-like variable `_ consisting of dimensions, data, and attributes which describe a single array. The main functional difference between variables and numpy arrays is that numerical operations on variables implement array broadcasting by dimension name. Each ``DataArray`` has an underlying variable that can be accessed via ``arr.variable``. However, a variable is not fully described outside of either a ``Dataset`` or a ``DataArray``. .. note:: @@ -39,4 +39,4 @@ Terminology ---- -**Index:** An *index* is a data structure optimized for efficient selecting and slicing of an associated array. Xarray creates indexes for dimension coordinates so that operations along dimensions are fast, while non-dimension coordinates are not indexed. Under the hood, indexes are implemented as :py:class:`pandas.Index` objects. The index associated with dimension name ``x`` can be retrieved by ``arr.indexes[x]``. By construction, ``len(arr.dims) == len(arr.indexes)`` \ No newline at end of file +**Index:** An *index* is a data structure optimized for efficient selecting and slicing of an associated array. Xarray creates indexes for dimension coordinates so that operations along dimensions are fast, while non-dimension coordinates are not indexed. Under the hood, indexes are implemented as :py:class:`pandas.Index` objects. The index associated with dimension name ``x`` can be retrieved by ``arr.indexes[x]``. By construction, ``len(arr.dims) == len(arr.indexes)`` From 4d5237ba2d56c316cbc12b25572164afdbaef541 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 29 Oct 2019 20:12:50 +0100 Subject: [PATCH 13/27] enable xr.ALL_DIMS in xr.dot (#3424) * enable xr.ALL_DIMS in xr.dot * trailing whitespace * move whats new to other ellipsis work * xr.ALL_DIMS -> Ellipsis --- doc/whats-new.rst | 3 +++ xarray/core/computation.py | 20 +++++++++++++++----- xarray/core/dataarray.py | 6 +++--- xarray/tests/test_computation.py | 17 +++++++++++++++++ xarray/tests/test_dataarray.py | 10 ++++++++++ 5 files changed, 48 insertions(+), 8 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 82355a6bda4..6bcf4b61436 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,6 +39,9 @@ New Features to reduce over all dimensions. While we have no plans to remove `xr.ALL_DIMS`, we suggest using `...`. By `Maximilian Roos `_ +- :py:func:`~xarray.dot`, and :py:func:`~xarray.DataArray.dot` now support the + `dims=...` option to sum over the union of dimensions of all input arrays + (:issue:`3423`) by `Mathias Hauser `_. - Added integration tests against `pint `_. (:pull:`3238`) by `Justus Magin `_. diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 2ab2ab78416..2c87f378762 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1055,9 +1055,9 @@ def dot(*arrays, dims=None, **kwargs): ---------- arrays: DataArray (or Variable) objects Arrays to compute. - dims: str or tuple of strings, optional - Which dimensions to sum over. - If not speciified, then all the common dimensions are summed over. + dims: '...', str or tuple of strings, optional + Which dimensions to sum over. Ellipsis ('...') sums over all dimensions. + If not specified, then all the common dimensions are summed over. **kwargs: dict Additional keyword arguments passed to numpy.einsum or dask.array.einsum @@ -1070,7 +1070,7 @@ def dot(*arrays, dims=None, **kwargs): -------- >>> import numpy as np - >>> import xarray as xp + >>> import xarray as xr >>> da_a = xr.DataArray(np.arange(3 * 2).reshape(3, 2), dims=['a', 'b']) >>> da_b = xr.DataArray(np.arange(3 * 2 * 2).reshape(3, 2, 2), ... dims=['a', 'b', 'c']) @@ -1117,6 +1117,14 @@ def dot(*arrays, dims=None, **kwargs): [273, 446, 619]]) Dimensions without coordinates: a, d + >>> xr.dot(da_a, da_b) + + array([110, 125]) + Dimensions without coordinates: c + + >>> xr.dot(da_a, da_b, dims=...) + + array(235) """ from .dataarray import DataArray from .variable import Variable @@ -1141,7 +1149,9 @@ def dot(*arrays, dims=None, **kwargs): einsum_axes = "abcdefghijklmnopqrstuvwxyz" dim_map = {d: einsum_axes[i] for i, d in enumerate(all_dims)} - if dims is None: + if dims is ...: + dims = all_dims + elif dims is None: # find dimensions that occur more than one times dim_counts = Counter() for arr in arrays: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 0c220acaee0..62890f9cefa 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2742,9 +2742,9 @@ def dot( ---------- other : DataArray The other array with which the dot product is performed. - dims: hashable or sequence of hashables, optional - Along which dimensions to be summed over. Default all the common - dimensions are summed over. + dims: '...', hashable or sequence of hashables, optional + Which dimensions to sum over. Ellipsis ('...') sums over all dimensions. + If not specified, then all the common dimensions are summed over. Returns ------- diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 383427b479b..1f2634cc9b0 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -998,6 +998,23 @@ def test_dot(use_dask): assert actual.dims == ("b",) assert (actual.data == np.zeros(actual.shape)).all() + # Ellipsis (...) sums over all dimensions + actual = xr.dot(da_a, da_b, dims=...) + assert actual.dims == () + assert (actual.data == np.einsum("ij,ijk->", a, b)).all() + + actual = xr.dot(da_a, da_b, da_c, dims=...) + assert actual.dims == () + assert (actual.data == np.einsum("ij,ijk,kl-> ", a, b, c)).all() + + actual = xr.dot(da_a, dims=...) + assert actual.dims == () + assert (actual.data == np.einsum("ij-> ", a)).all() + + actual = xr.dot(da_a.sel(a=[]), da_a.sel(a=[]), dims=...) + assert actual.dims == () + assert (actual.data == np.zeros(actual.shape)).all() + # Invalid cases if not use_dask: with pytest.raises(TypeError): diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 4b3ffdc021a..5114d13b0dc 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3925,6 +3925,16 @@ def test_dot(self): expected = DataArray(expected_vals, coords=[x, j], dims=["x", "j"]) assert_equal(expected, actual) + # Ellipsis: all dims are shared + actual = da.dot(da, dims=...) + expected = da.dot(da) + assert_equal(expected, actual) + + # Ellipsis: not all dims are shared + actual = da.dot(dm, dims=...) + expected = da.dot(dm, dims=("j", "x", "y", "z")) + assert_equal(expected, actual) + with pytest.raises(NotImplementedError): da.dot(dm.to_dataset(name="dm")) with pytest.raises(TypeError): From 11049f568e09c9f0c56c9fb453d9ae9089f5fa5b Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Tue, 29 Oct 2019 17:36:35 -0400 Subject: [PATCH 14/27] Cleanup whatsnew (#3462) --- doc/whats-new.rst | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6bcf4b61436..3ab8618a85a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -37,22 +37,11 @@ New Features - Changed `xr.ALL_DIMS` to equal python's `Ellipsis` (`...`), and changed internal usages to use `...` directly. As before, you can use this to instruct a `groupby` operation to reduce over all dimensions. While we have no plans to remove `xr.ALL_DIMS`, we suggest - using `...`. + using `...`. (:pull:`3418`) By `Maximilian Roos `_ - :py:func:`~xarray.dot`, and :py:func:`~xarray.DataArray.dot` now support the `dims=...` option to sum over the union of dimensions of all input arrays (:issue:`3423`) by `Mathias Hauser `_. -- Added integration tests against `pint `_. - (:pull:`3238`) by `Justus Magin `_. - - .. note:: - - At the moment of writing, these tests *as well as the ability to use pint in general* - require `a highly experimental version of pint - `_ (install with - ``pip install git+https://github.com/andrewgsavage/pint.git@refs/pull/6/head)``. - Even with it, interaction with non-numpy array libraries, e.g. dask or sparse, is broken. - - Added new :py:meth:`Dataset._repr_html_` and :py:meth:`DataArray._repr_html_` to improve representation of objects in jupyter. By default this feature is turned off for now. Enable it with :py:meth:`xarray.set_options(display_style="html")`. @@ -84,6 +73,17 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- Added integration tests against `pint `_. + (:pull:`3238`) by `Justus Magin `_. + + .. note:: + + At the moment of writing, these tests *as well as the ability to use pint in general* + require `a highly experimental version of pint + `_ (install with + ``pip install git+https://github.com/andrewgsavage/pint.git@refs/pull/6/head)``. + Even with it, interaction with non-numpy array libraries, e.g. dask or sparse, is broken. + - Use Python 3.6 idioms throughout the codebase. (:pull:3419) By `Maximilian Roos `_ From 092d300db7576de2aa96316de42ee6bf293d9855 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Wed, 30 Oct 2019 02:08:13 +0100 Subject: [PATCH 15/27] unpin cftime (#3463) * unpin cftime * Update ci/requirements/py36-min-all-deps.yml Co-Authored-By: Spencer Clark * do not pin to patch version * add note on cftime=1.0.4 --- ci/requirements/py36-min-all-deps.yml | 2 +- ci/requirements/py36.yml | 2 +- ci/requirements/py37-windows.yml | 2 +- ci/requirements/py37.yml | 2 +- doc/whats-new.rst | 5 +++++ 5 files changed, 9 insertions(+), 4 deletions(-) diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml index c99ae39e5d9..3f10a158f91 100644 --- a/ci/requirements/py36-min-all-deps.yml +++ b/ci/requirements/py36-min-all-deps.yml @@ -13,7 +13,7 @@ dependencies: - cartopy=0.17 - cdms2=3.1 - cfgrib=0.9 - - cftime=1.0.3 # FIXME need 1.0.5 (not released yet); 1.0.4 is broken + - cftime=1.0 - coveralls - dask=1.2 - distributed=1.27 diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml index 6e27cea2ffe..f9847ef6da5 100644 --- a/ci/requirements/py36.yml +++ b/ci/requirements/py36.yml @@ -9,7 +9,7 @@ dependencies: - cartopy - cdms2 - cfgrib - - cftime<1.0.4 # FIXME need 1.0.5 (not released yet); 1.0.4 is broken + - cftime - coveralls - dask - distributed diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml index 7027fc11ab7..111cd96c30c 100644 --- a/ci/requirements/py37-windows.yml +++ b/ci/requirements/py37-windows.yml @@ -9,7 +9,7 @@ dependencies: - cartopy # - cdms2 # Not available on Windows # - cfgrib # Causes Python interpreter crash on Windows - - cftime<1.0.4 # FIXME need 1.0.5 (not released yet); 1.0.4 is broken + - cftime - coveralls - dask - distributed diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml index a4c974c0176..d816019dd65 100644 --- a/ci/requirements/py37.yml +++ b/ci/requirements/py37.yml @@ -9,7 +9,7 @@ dependencies: - cartopy - cdms2 - cfgrib - - cftime<1.0.4 # FIXME need 1.0.5 (not released yet); 1.0.4 is broken + - cftime - coveralls - dask - distributed diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3ab8618a85a..443be29ac23 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,11 @@ Breaking changes ~~~~~~~~~~~~~~~~ - Minimum cftime version is now 1.0.3. By `Deepak Cherian `_. + + .. note:: + + cftime version 1.0.4 is broken (`cftime/126 `_), use version 1.0.4.2 instead. + - All leftover support for dates from non-standard calendars through netcdftime, the module included in versions of netCDF4 prior to 1.4 that eventually became the cftime package, has been removed in favor of relying solely on the standalone From f115ad155067727882b683ca6fa7c231621dc965 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Wed, 30 Oct 2019 14:28:51 +0000 Subject: [PATCH 16/27] Hypothesis tests for roundtrip to & from pandas (#3285) * Move hypothesis deadline configuration to conftest.py * Add simple roundtrip test for xarray-pandas-xarray * Test roundtrip pd.Series->DataArray->Series * Test roundtrip DataFrame->DataArray->DataFrame * Test roundtrip Dataset->Dataframe->Dataset * Relax to allow 0 entries in each dataset var * Relax to allow empty string names * Add print_blob to config * Extra half-roundtrip from pandas series to xarray * Extra half roundtrip from pandas dataframe to Xarray * Redesign strategy for generating datasets with 1D variables Following suggestions from @Zac-HD * Make pep8 happy * Autoformat test file * Skip hypothesis tests if hypothesis not available * Don't require hypothesis for conftest file * Mark failing test as xfail --- properties/conftest.py | 8 +++ properties/test_encode_decode.py | 7 +-- properties/test_pandas_roundtrip.py | 97 +++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 6 deletions(-) create mode 100644 properties/conftest.py create mode 100644 properties/test_pandas_roundtrip.py diff --git a/properties/conftest.py b/properties/conftest.py new file mode 100644 index 00000000000..0a66d92ebc6 --- /dev/null +++ b/properties/conftest.py @@ -0,0 +1,8 @@ +try: + from hypothesis import settings +except ImportError: + pass +else: + # Run for a while - arrays are a bigger search space than usual + settings.register_profile("ci", deadline=None, print_blob=True) + settings.load_profile("ci") diff --git a/properties/test_encode_decode.py b/properties/test_encode_decode.py index 011e7a922d1..221083e16a1 100644 --- a/properties/test_encode_decode.py +++ b/properties/test_encode_decode.py @@ -10,15 +10,10 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st -from hypothesis import given, settings +from hypothesis import given import xarray as xr -# Run for a while - arrays are a bigger search space than usual -settings.register_profile("ci", deadline=None) -settings.load_profile("ci") - - an_array = npst.arrays( dtype=st.one_of( npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes() diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py new file mode 100644 index 00000000000..a8005d319d6 --- /dev/null +++ b/properties/test_pandas_roundtrip.py @@ -0,0 +1,97 @@ +""" +Property-based tests for roundtripping between xarray and pandas objects. +""" +import pytest + +pytest.importorskip("hypothesis") + +from functools import partial +import hypothesis.extra.numpy as npst +import hypothesis.extra.pandas as pdst +import hypothesis.strategies as st +from hypothesis import given + +import numpy as np +import pandas as pd +import xarray as xr + +numeric_dtypes = st.one_of( + npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes() +) + +numeric_series = numeric_dtypes.flatmap(lambda dt: pdst.series(dtype=dt)) + +an_array = npst.arrays( + dtype=numeric_dtypes, + shape=npst.array_shapes(max_dims=2), # can only convert 1D/2D to pandas +) + + +@st.composite +def datasets_1d_vars(draw): + """Generate datasets with only 1D variables + + Suitable for converting to pandas dataframes. + """ + # Generate an index for the dataset + idx = draw(pdst.indexes(dtype="u8", min_size=0, max_size=100)) + + # Generate 1-3 variables, 1D with the same length as the index + vars_strategy = st.dictionaries( + keys=st.text(), + values=npst.arrays(dtype=numeric_dtypes, shape=len(idx)).map( + partial(xr.Variable, ("rows",)) + ), + min_size=1, + max_size=3, + ) + return xr.Dataset(draw(vars_strategy), coords={"rows": idx}) + + +@given(st.data(), an_array) +def test_roundtrip_dataarray(data, arr): + names = data.draw( + st.lists(st.text(), min_size=arr.ndim, max_size=arr.ndim, unique=True).map( + tuple + ) + ) + coords = {name: np.arange(n) for (name, n) in zip(names, arr.shape)} + original = xr.DataArray(arr, dims=names, coords=coords) + roundtripped = xr.DataArray(original.to_pandas()) + xr.testing.assert_identical(original, roundtripped) + + +@given(datasets_1d_vars()) +def test_roundtrip_dataset(dataset): + df = dataset.to_dataframe() + assert isinstance(df, pd.DataFrame) + roundtripped = xr.Dataset(df) + xr.testing.assert_identical(dataset, roundtripped) + + +@given(numeric_series, st.text()) +def test_roundtrip_pandas_series(ser, ix_name): + # Need to name the index, otherwise Xarray calls it 'dim_0'. + ser.index.name = ix_name + arr = xr.DataArray(ser) + roundtripped = arr.to_pandas() + pd.testing.assert_series_equal(ser, roundtripped) + xr.testing.assert_identical(arr, roundtripped.to_xarray()) + + +# Dataframes with columns of all the same dtype - for roundtrip to DataArray +numeric_homogeneous_dataframe = numeric_dtypes.flatmap( + lambda dt: pdst.data_frames(columns=pdst.columns(["a", "b", "c"], dtype=dt)) +) + + +@pytest.mark.xfail +@given(numeric_homogeneous_dataframe) +def test_roundtrip_pandas_dataframe(df): + # Need to name the indexes, otherwise Xarray names them 'dim_0', 'dim_1'. + df.index.name = "rows" + df.columns.name = "cols" + arr = xr.DataArray(df) + roundtripped = arr.to_pandas() + pd.testing.assert_frame_equal(df, roundtripped) + xr.testing.assert_identical(arr, roundtripped.to_xarray()) From 59f88f776f290f216531d074b6e73a50a9f7c37c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20LALANDE?= Date: Wed, 30 Oct 2019 15:51:39 +0100 Subject: [PATCH 17/27] Fix leap year condition in monthly means example (#3464) * Typo correction in docs (#3387) * Update terminology.rst (#3455) Fixed broken link * Error in leap year? I've tried this script; however, it adds +1 to all months of the leap years. It sounds like an error, or I am wrong? So I wrote the condition "and month == 2" line 86 so that only the month of February gets +1. * Fix leap year (pydata#3464) * Update doc/whats-new.rst Co-Authored-By: Deepak Cherian --- doc/examples/monthly-means.rst | 2 +- doc/whats-new.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/examples/monthly-means.rst b/doc/examples/monthly-means.rst index 7cc47eb2847..7d620f1bca3 100644 --- a/doc/examples/monthly-means.rst +++ b/doc/examples/monthly-means.rst @@ -83,7 +83,7 @@ the ``calendar.month_range`` function. for i, (month, year) in enumerate(zip(time.month, time.year)): month_length[i] = cal_days[month] - if leap_year(year, calendar=calendar): + if leap_year(year, calendar=calendar) and month == 2: month_length[i] += 1 return month_length diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 443be29ac23..8f98a3860b2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -67,7 +67,7 @@ Bug fixes Documentation ~~~~~~~~~~~~~ - +- Fix leap year condition in example (http://xarray.pydata.org/en/stable/examples/monthly-means.html) by `Mickaël Lalande `_. - Fix the documentation of :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` and explicitly state that a datetime-like dimension is required. (:pull:`3400`) From c0af5e7bdca537038a68d660a3d8320d6b0c9592 Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 30 Oct 2019 18:46:33 +0100 Subject: [PATCH 18/27] Fix integrate docs (#3469) * rename the coord parameter of `Dataset.integrate` * add an example to `Dataset.integrate` * refer to an actual parameter in the note * don't just make y the same as x with an offset so now the results are different depending on integration with x or y * show the repr of the example dataset * update whats-new.rst --- doc/whats-new.rst | 4 ++++ xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 30 +++++++++++++++++++++++++++++- 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8f98a3860b2..36ba0681ea2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -74,6 +74,10 @@ Documentation By `Justus Magin `_. - Update the terminology page to address multidimensional coordinates. (:pull:`3410`) By `Jon Thielen `_. +- Fix the documentation of :py:meth:`Dataset.integrate` and + :py:meth:`DataArray.integrate` and add an example to + :py:meth:`Dataset.integrate`. (:pull:`3469`) + By `Justus Magin `_. Internal Changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 62890f9cefa..502d88f4f1f 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2995,7 +2995,7 @@ def integrate( """ integrate the array with the trapezoidal rule. .. note:: - This feature is limited to simple cartesian geometry, i.e. coord + This feature is limited to simple cartesian geometry, i.e. dim must be one dimensional. Parameters diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 05d9772cb7a..31efcb1d591 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5165,7 +5165,7 @@ def integrate(self, coord, datetime_unit=None): Parameters ---------- - dim: str, or a sequence of str + coord: str, or a sequence of str Coordinate(s) used for the integration. datetime_unit Can be specify the unit if datetime coordinate is used. One of @@ -5180,6 +5180,34 @@ def integrate(self, coord, datetime_unit=None): -------- DataArray.integrate numpy.trapz: corresponding numpy function + + Examples + -------- + >>> ds = xr.Dataset( + ... data_vars={"a": ("x", [5, 5, 6, 6]), "b": ("x", [1, 2, 1, 0])}, + ... coords={"x": [0, 1, 2, 3], "y": ("x", [1, 7, 3, 5])}, + ... ) + >>> ds + + Dimensions: (x: 4) + Coordinates: + * x (x) int64 0 1 2 3 + y (x) int64 1 7 3 5 + Data variables: + a (x) int64 5 5 6 6 + b (x) int64 1 2 1 0 + >>> ds.integrate("x") + + Dimensions: () + Data variables: + a float64 16.5 + b float64 3.5 + >>> ds.integrate("y") + + Dimensions: () + Data variables: + a float64 20.0 + b float64 4.0 """ if not isinstance(coord, (list, tuple)): coord = (coord,) From 96cc2bc62b33801e189dd954c6e2f335db745b66 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Wed, 30 Oct 2019 21:24:16 -0400 Subject: [PATCH 19/27] fix test suite warnings re `drop` (#3460) --- xarray/tests/test_duck_array_ops.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index eb073a14aae..9df2f167cf2 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -440,7 +440,9 @@ def test_argmin_max(dim_num, dtype, contains_nan, dask, func, skipna, aggdim): **{aggdim: getattr(da, "arg" + func)(dim=aggdim, skipna=skipna).compute()} ) expected = getattr(da, func)(dim=aggdim, skipna=skipna) - assert_allclose(actual.drop(actual.coords), expected.drop(expected.coords)) + assert_allclose( + actual.drop(list(actual.coords)), expected.drop(list(expected.coords)) + ) def test_argmin_max_error(): From 96e57d00c1a7060f571befa708406146a7257fb5 Mon Sep 17 00:00:00 2001 From: Brian Rose Date: Thu, 31 Oct 2019 10:28:15 -0400 Subject: [PATCH 20/27] Fix typo in docstring (#3474) --- xarray/core/computation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 2c87f378762..bb5ab07d8dd 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -884,7 +884,7 @@ def apply_ufunc( Plain scalars, numpy arrays and a mix of these with xarray objects is also supported: - >>> magnitude(4, 5) + >>> magnitude(3, 4) 5.0 >>> magnitude(3, np.array([0, 4])) array([3., 5.]) From 8fbe1f8409a0c0e4ce88a0c30bfc668b16c6903c Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Thu, 31 Oct 2019 11:52:01 -0400 Subject: [PATCH 21/27] Type check sentinel values (#3472) * type check sentinel values, using Enum pattern * Code review * Code review * Code review * Code review --- xarray/core/dataarray.py | 18 +++++++---------- xarray/core/dataset.py | 43 +++++++++++++++++++--------------------- xarray/core/utils.py | 9 +++++++++ 3 files changed, 36 insertions(+), 34 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 502d88f4f1f..82be3989b27 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -53,7 +53,7 @@ from .formatting import format_item from .indexes import Indexes, default_indexes from .options import OPTIONS -from .utils import ReprObject, _check_inplace, either_dict_or_kwargs +from .utils import Default, ReprObject, _default, _check_inplace, either_dict_or_kwargs from .variable import ( IndexVariable, Variable, @@ -270,8 +270,6 @@ class DataArray(AbstractArray, DataWithCoords): _coarsen_cls = rolling.DataArrayCoarsen _resample_cls = resample.DataArrayResample - __default = ReprObject("") - dt = property(DatetimeAccessor) def __init__( @@ -387,18 +385,18 @@ def _replace( self, variable: Variable = None, coords=None, - name: Optional[Hashable] = __default, + name: Union[Hashable, None, Default] = _default, ) -> "DataArray": if variable is None: variable = self.variable if coords is None: coords = self._coords - if name is self.__default: + if name is _default: name = self.name return type(self)(variable, coords, name=name, fastpath=True) def _replace_maybe_drop_dims( - self, variable: Variable, name: Optional[Hashable] = __default + self, variable: Variable, name: Union[Hashable, None, Default] = _default ) -> "DataArray": if variable.dims == self.dims and variable.shape == self.shape: coords = self._coords.copy() @@ -438,7 +436,7 @@ def _to_temp_dataset(self) -> Dataset: return self._to_dataset_whole(name=_THIS_ARRAY, shallow_copy=False) def _from_temp_dataset( - self, dataset: Dataset, name: Hashable = __default + self, dataset: Dataset, name: Hashable = _default ) -> "DataArray": variable = dataset._variables.pop(_THIS_ARRAY) coords = dataset._variables @@ -2450,13 +2448,11 @@ def identical(self, other: "DataArray") -> bool: except (TypeError, AttributeError): return False - __default_name = object() - def _result_name(self, other: Any = None) -> Optional[Hashable]: # use the same naming heuristics as pandas: # https://github.com/ContinuumIO/blaze/issues/458#issuecomment-51936356 - other_name = getattr(other, "name", self.__default_name) - if other_name is self.__default_name or other_name == self.name: + other_name = getattr(other, "name", _default) + if other_name is _default or other_name == self.name: return self.name else: return None diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 31efcb1d591..6e94d35df40 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -70,8 +70,10 @@ from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_array_type from .utils import ( + Default, Frozen, SortedKeysDict, + _default, _check_inplace, decode_numpy_dict_values, either_dict_or_kwargs, @@ -856,23 +858,18 @@ def _construct_direct( obj._accessors = None return obj - __default = object() - @classmethod def _from_vars_and_coord_names(cls, variables, coord_names, attrs=None): return cls._construct_direct(variables, coord_names, attrs=attrs) - # TODO(shoyer): renable type checking on this signature when pytype has a - # good way to handle defaulting arguments to a sentinel value: - # https://github.com/python/mypy/issues/1803 - def _replace( # type: ignore + def _replace( self, variables: Dict[Hashable, Variable] = None, coord_names: Set[Hashable] = None, dims: Dict[Any, int] = None, - attrs: Optional[Dict[Hashable, Any]] = __default, - indexes: Optional[Dict[Any, pd.Index]] = __default, - encoding: Optional[dict] = __default, + attrs: Union[Dict[Hashable, Any], None, Default] = _default, + indexes: Union[Dict[Any, pd.Index], None, Default] = _default, + encoding: Union[dict, None, Default] = _default, inplace: bool = False, ) -> "Dataset": """Fastpath constructor for internal use. @@ -890,12 +887,12 @@ def _replace( # type: ignore self._coord_names = coord_names if dims is not None: self._dims = dims - if attrs is not self.__default: - self._attrs = attrs - if indexes is not self.__default: - self._indexes = indexes - if encoding is not self.__default: - self._encoding = encoding + if attrs is not _default: + self._attrs = attrs # type: ignore # FIXME need mypy 0.750 + if indexes is not _default: + self._indexes = indexes # type: ignore # FIXME need mypy 0.750 + if encoding is not _default: + self._encoding = encoding # type: ignore # FIXME need mypy 0.750 obj = self else: if variables is None: @@ -904,23 +901,23 @@ def _replace( # type: ignore coord_names = self._coord_names.copy() if dims is None: dims = self._dims.copy() - if attrs is self.__default: + if attrs is _default: attrs = copy.copy(self._attrs) - if indexes is self.__default: + if indexes is _default: indexes = copy.copy(self._indexes) - if encoding is self.__default: + if encoding is _default: encoding = copy.copy(self._encoding) obj = self._construct_direct( variables, coord_names, dims, attrs, indexes, encoding ) return obj - def _replace_with_new_dims( # type: ignore + def _replace_with_new_dims( self, variables: Dict[Hashable, Variable], coord_names: set = None, - attrs: Optional[Dict[Hashable, Any]] = __default, - indexes: Dict[Hashable, pd.Index] = __default, + attrs: Union[Dict[Hashable, Any], None, Default] = _default, + indexes: Union[Dict[Hashable, pd.Index], None, Default] = _default, inplace: bool = False, ) -> "Dataset": """Replace variables with recalculated dimensions.""" @@ -929,12 +926,12 @@ def _replace_with_new_dims( # type: ignore variables, coord_names, dims, attrs, indexes, inplace=inplace ) - def _replace_vars_and_dims( # type: ignore + def _replace_vars_and_dims( self, variables: Dict[Hashable, Variable], coord_names: set = None, dims: Dict[Hashable, int] = None, - attrs: Dict[Hashable, Any] = __default, + attrs: Union[Dict[Hashable, Any], None, Default] = _default, inplace: bool = False, ) -> "Dataset": """Deprecated version of _replace_with_new_dims(). diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 492c595a887..6681375c18e 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -6,6 +6,7 @@ import os.path import re import warnings +from enum import Enum from typing import ( AbstractSet, Any, @@ -701,3 +702,11 @@ def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable: while new_dim in dims: new_dim = "_" + str(new_dim) return new_dim + + +# Singleton type, as per https://github.com/python/typing/pull/240 +class Default(Enum): + token = 0 + + +_default = Default.token From 53c5199423a29854e3f9c1f5d6d3658b8ea95049 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 31 Oct 2019 09:52:11 -0600 Subject: [PATCH 22/27] __dask_tokenize__ (#3446) * Implement __dask_tokenize__ * Fix window test * Code review * Test change in IndexVariable --- doc/whats-new.rst | 20 ++++++-- xarray/core/dataarray.py | 3 ++ xarray/core/dataset.py | 3 ++ xarray/core/variable.py | 9 ++++ xarray/tests/test_dask.py | 94 +++++++++++++++++++++++++++++++++++++ xarray/tests/test_sparse.py | 22 ++++++++- 6 files changed, 146 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 36ba0681ea2..47e2e58e988 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,17 +21,20 @@ v0.14.1 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ -- Minimum cftime version is now 1.0.3. By `Deepak Cherian `_. +- Broken compatibility with cftime < 1.0.3. + By `Deepak Cherian `_. .. note:: - cftime version 1.0.4 is broken (`cftime/126 `_), use version 1.0.4.2 instead. + cftime version 1.0.4 is broken + (`cftime/126 `_); + please use version 1.0.4.2 instead. - All leftover support for dates from non-standard calendars through netcdftime, the module included in versions of netCDF4 prior to 1.4 that eventually became the cftime package, has been removed in favor of relying solely on the standalone - cftime package (:pull:`3450`). By `Spencer Clark - `_. + cftime package (:pull:`3450`). + By `Spencer Clark `_. New Features ~~~~~~~~~~~~ @@ -52,6 +55,14 @@ New Features for now. Enable it with :py:meth:`xarray.set_options(display_style="html")`. (:pull:`3425`) by `Benoit Bovy `_ and `Julia Signell `_. +- Implement `dask deterministic hashing + `_ + for xarray objects. Note that xarray objects with a dask.array backend already used + deterministic hashing in previous releases; this change implements it when whole + xarray objects are embedded in a dask graph, e.g. when :meth:`DataArray.map` is + invoked. (:issue:`3378`, :pull:`3446`) + By `Deepak Cherian `_ and + `Guido Imperiale `_. Bug fixes ~~~~~~~~~ @@ -96,6 +107,7 @@ Internal Changes - Use Python 3.6 idioms throughout the codebase. (:pull:3419) By `Maximilian Roos `_ + .. _whats-new.0.14.0: v0.14.0 (14 Oct 2019) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 82be3989b27..b61f83bcb1c 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -752,6 +752,9 @@ def reset_coords( dataset[self.name] = self.variable return dataset + def __dask_tokenize__(self): + return (type(self), self._variable, self._coords, self._name) + def __dask_graph__(self): return self._to_temp_dataset().__dask_graph__() diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6e94d35df40..2b89051e84e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -651,6 +651,9 @@ def load(self, **kwargs) -> "Dataset": return self + def __dask_tokenize__(self): + return (type(self), self._variables, self._coord_names, self._attrs) + def __dask_graph__(self): graphs = {k: v.__dask_graph__() for k, v in self.variables.items()} graphs = {k: v for k, v in graphs.items() if v is not None} diff --git a/xarray/core/variable.py b/xarray/core/variable.py index b7abdc7c462..117ab85ae65 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -390,6 +390,11 @@ def compute(self, **kwargs): new = self.copy(deep=False) return new.load(**kwargs) + def __dask_tokenize__(self): + # Use v.data, instead of v._data, in order to cope with the wrappers + # around NetCDF and the like + return type(self), self._dims, self.data, self._attrs + def __dask_graph__(self): if isinstance(self._data, dask_array_type): return self._data.__dask_graph__() @@ -1963,6 +1968,10 @@ def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False): if not isinstance(self._data, PandasIndexAdapter): self._data = PandasIndexAdapter(self._data) + def __dask_tokenize__(self): + # Don't waste time converting pd.Index to np.ndarray + return (type(self), self._dims, self._data.array, self._attrs) + def load(self): # data is already loaded into memory for IndexVariable return self diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 50517ae3c9c..c4323d1d317 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1,5 +1,6 @@ import operator import pickle +import sys from contextlib import suppress from distutils.version import LooseVersion from textwrap import dedent @@ -21,12 +22,16 @@ assert_frame_equal, assert_identical, raises_regex, + requires_scipy_or_netCDF4, ) +from .test_backends import create_tmp_file dask = pytest.importorskip("dask") da = pytest.importorskip("dask.array") dd = pytest.importorskip("dask.dataframe") +ON_WINDOWS = sys.platform == "win32" + class CountingScheduler: """ Simple dask scheduler counting the number of computes. @@ -1135,3 +1140,92 @@ def test_make_meta(map_ds): for variable in map_ds.data_vars: assert variable in meta.data_vars assert meta.data_vars[variable].shape == (0,) * meta.data_vars[variable].ndim + + +@pytest.mark.parametrize( + "obj", [make_da(), make_da().compute(), make_ds(), make_ds().compute()] +) +@pytest.mark.parametrize( + "transform", + [ + lambda x: x.reset_coords(), + lambda x: x.reset_coords(drop=True), + lambda x: x.isel(x=1), + lambda x: x.attrs.update(new_attrs=1), + lambda x: x.assign_coords(cxy=1), + lambda x: x.rename({"x": "xnew"}), + lambda x: x.rename({"cxy": "cxynew"}), + ], +) +def test_token_changes_on_transform(obj, transform): + with raise_if_dask_computes(): + assert dask.base.tokenize(obj) != dask.base.tokenize(transform(obj)) + + +@pytest.mark.parametrize( + "obj", [make_da(), make_da().compute(), make_ds(), make_ds().compute()] +) +def test_token_changes_when_data_changes(obj): + with raise_if_dask_computes(): + t1 = dask.base.tokenize(obj) + + # Change data_var + if isinstance(obj, DataArray): + obj *= 2 + else: + obj["a"] *= 2 + with raise_if_dask_computes(): + t2 = dask.base.tokenize(obj) + assert t2 != t1 + + # Change non-index coord + obj.coords["ndcoord"] *= 2 + with raise_if_dask_computes(): + t3 = dask.base.tokenize(obj) + assert t3 != t2 + + # Change IndexVariable + obj.coords["x"] *= 2 + with raise_if_dask_computes(): + t4 = dask.base.tokenize(obj) + assert t4 != t3 + + +@pytest.mark.parametrize("obj", [make_da().compute(), make_ds().compute()]) +def test_token_changes_when_buffer_changes(obj): + with raise_if_dask_computes(): + t1 = dask.base.tokenize(obj) + + if isinstance(obj, DataArray): + obj[0, 0] = 123 + else: + obj["a"][0, 0] = 123 + with raise_if_dask_computes(): + t2 = dask.base.tokenize(obj) + assert t2 != t1 + + obj.coords["ndcoord"][0] = 123 + with raise_if_dask_computes(): + t3 = dask.base.tokenize(obj) + assert t3 != t2 + + +@pytest.mark.parametrize( + "transform", + [lambda x: x, lambda x: x.copy(deep=False), lambda x: x.copy(deep=True)], +) +@pytest.mark.parametrize("obj", [make_da(), make_ds(), make_ds().variables["a"]]) +def test_token_identical(obj, transform): + with raise_if_dask_computes(): + assert dask.base.tokenize(obj) == dask.base.tokenize(transform(obj)) + assert dask.base.tokenize(obj.compute()) == dask.base.tokenize( + transform(obj.compute()) + ) + + +@requires_scipy_or_netCDF4 +def test_normalize_token_with_backend(map_ds): + with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as tmp_file: + map_ds.to_netcdf(tmp_file) + read = xr.open_dataset(tmp_file) + assert not dask.base.tokenize(map_ds) == dask.base.tokenize(read) diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 73c4b9b8c74..8e2d4b8e064 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -11,7 +11,7 @@ from xarray.core.npcompat import IS_NEP18_ACTIVE from xarray.core.pycompat import sparse_array_type -from . import assert_equal, assert_identical +from . import assert_equal, assert_identical, requires_dask param = pytest.param xfail = pytest.mark.xfail @@ -849,3 +849,23 @@ def test_chunk(): dsc = ds.chunk(2) assert dsc.chunks == {"dim_0": (2, 2)} assert_identical(dsc, ds) + + +@requires_dask +def test_dask_token(): + import dask + + s = sparse.COO.from_numpy(np.array([0, 0, 1, 2])) + a = DataArray(s) + t1 = dask.base.tokenize(a) + t2 = dask.base.tokenize(a) + t3 = dask.base.tokenize(a + 1) + assert t1 == t2 + assert t3 != t2 + assert isinstance(a.data, sparse.COO) + + ac = a.chunk(2) + t4 = dask.base.tokenize(ac) + t5 = dask.base.tokenize(ac + 1) + assert t4 != t5 + assert isinstance(ac.data._meta, sparse.COO) From 46c4931a140fd39991620b483d347bee9ee66afe Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sat, 2 Nov 2019 16:33:33 -0400 Subject: [PATCH 23/27] python 3.8 tests (#3477) * python 3.8 tests * whatsnew * Update doc/whats-new.rst Co-Authored-By: crusaderky * Update doc/whats-new.rst Co-Authored-By: crusaderky --- azure-pipelines.yml | 2 ++ ci/requirements/py38.yml | 15 +++++++++++++++ doc/whats-new.rst | 8 ++++++-- 3 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 ci/requirements/py38.yml diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c7f9de73cf4..90de0705a27 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -18,6 +18,8 @@ jobs: conda_env: py36 py37: conda_env: py37 + py38: + conda_env: py38 py37-upstream-dev: conda_env: py37 upstream_dev: true diff --git a/ci/requirements/py38.yml b/ci/requirements/py38.yml new file mode 100644 index 00000000000..9698e3efecf --- /dev/null +++ b/ci/requirements/py38.yml @@ -0,0 +1,15 @@ +name: xarray-tests +channels: + - conda-forge +dependencies: + - python=3.8 + - pip + - pip: + - coveralls + - dask + - distributed + - numpy + - pandas + - pytest + - pytest-cov + - pytest-env diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 47e2e58e988..c117382f23f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -78,7 +78,8 @@ Bug fixes Documentation ~~~~~~~~~~~~~ -- Fix leap year condition in example (http://xarray.pydata.org/en/stable/examples/monthly-means.html) by `Mickaël Lalande `_. +- Fix leap year condition in example (http://xarray.pydata.org/en/stable/examples/monthly-means.html) + by `Mickaël Lalande `_. - Fix the documentation of :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` and explicitly state that a datetime-like dimension is required. (:pull:`3400`) @@ -104,7 +105,10 @@ Internal Changes ``pip install git+https://github.com/andrewgsavage/pint.git@refs/pull/6/head)``. Even with it, interaction with non-numpy array libraries, e.g. dask or sparse, is broken. -- Use Python 3.6 idioms throughout the codebase. (:pull:3419) +- Use Python 3.6 idioms throughout the codebase. (:pull:`3419`) + By `Maximilian Roos `_ + +- Run basic CI tests on Python 3.8. (:pull:`3477`) By `Maximilian Roos `_ From b649846b9ceef0db8631e7148f5ee9415bdd4621 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 5 Nov 2019 02:19:51 +0000 Subject: [PATCH 24/27] Propagate indexes in DataArray binary operations. (#3481) * Propagate indexes in DataArray binary operations. Works by propagating indexes in DataArray._replace. xref #2227. Tests pass! * remove commented code. * fix roll --- xarray/core/dataarray.py | 8 +++++--- xarray/core/dataset.py | 2 ++ xarray/core/groupby.py | 1 + xarray/core/indexes.py | 3 +++ xarray/tests/test_dataarray.py | 11 +++++++++++ xarray/tests/test_dataset.py | 8 ++++++++ 6 files changed, 30 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b61f83bcb1c..35ee90fb5c8 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -386,6 +386,7 @@ def _replace( variable: Variable = None, coords=None, name: Union[Hashable, None, Default] = _default, + indexes=None, ) -> "DataArray": if variable is None: variable = self.variable @@ -393,7 +394,7 @@ def _replace( coords = self._coords if name is _default: name = self.name - return type(self)(variable, coords, name=name, fastpath=True) + return type(self)(variable, coords, name=name, fastpath=True, indexes=indexes) def _replace_maybe_drop_dims( self, variable: Variable, name: Union[Hashable, None, Default] = _default @@ -440,7 +441,8 @@ def _from_temp_dataset( ) -> "DataArray": variable = dataset._variables.pop(_THIS_ARRAY) coords = dataset._variables - return self._replace(variable, coords, name) + indexes = dataset._indexes + return self._replace(variable, coords, name, indexes=indexes) def _to_dataset_split(self, dim: Hashable) -> Dataset: def subset(dim, label): @@ -2506,7 +2508,7 @@ def func(self, other): coords, indexes = self.coords._merge_raw(other_coords) name = self._result_name(other) - return self._replace(variable, coords, name) + return self._replace(variable, coords, name, indexes=indexes) return func diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2b89051e84e..978242e5f6b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4891,6 +4891,8 @@ def roll(self, shifts=None, roll_coords=None, **shifts_kwargs): (dim,) = self.variables[k].dims if dim in shifts: indexes[k] = roll_index(v, shifts[dim]) + else: + indexes[k] = v else: indexes = dict(self.indexes) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 353566eb345..209ac14184b 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -529,6 +529,7 @@ def _maybe_unstack(self, obj): for dim in self._inserted_dims: if dim in obj.coords: del obj.coords[dim] + del obj.indexes[dim] return obj def fillna(self, value): diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index a96fbccbeee..1574f4f18df 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -35,6 +35,9 @@ def __contains__(self, key): def __getitem__(self, key): return self._indexes[key] + def __delitem__(self, key): + del self._indexes[key] + def __repr__(self): return formatting.indexes_repr(self) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 5114d13b0dc..2c823b0c20a 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3953,6 +3953,17 @@ def test_matmul(self): expected = da.dot(da) assert_identical(result, expected) + def test_binary_op_propagate_indexes(self): + # regression test for GH2227 + self.dv["x"] = np.arange(self.dv.sizes["x"]) + expected = self.dv.indexes["x"] + + actual = (self.dv * 10).indexes["x"] + assert expected is actual + + actual = (self.dv > 10).indexes["x"] + assert expected is actual + def test_binary_op_join_setting(self): dim = "x" align_type = "outer" diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index eab6040e17e..b9fa20fab26 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4951,6 +4951,14 @@ def test_filter_by_attrs(self): ) assert not bool(new_ds.data_vars) + def test_binary_op_propagate_indexes(self): + ds = Dataset( + {"d1": DataArray([1, 2, 3], dims=["x"], coords={"x": [10, 20, 30]})} + ) + expected = ds.indexes["x"] + actual = (ds * 2).indexes["x"] + assert expected is actual + def test_binary_op_join_setting(self): # arithmetic_join applies to data array coordinates missing_2 = xr.Dataset({"x": [0, 1]}) From af28c6b02fac08494f5d9ae2718d68a084d93949 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 5 Nov 2019 15:41:13 +0000 Subject: [PATCH 25/27] Optimize dask array equality checks. (#3453) * Optimize dask array equality checks. Dask arrays with the same graph have the same name. We can use this to quickly compare dask-backed variables without computing. Fixes #3068 and #3311 * better docstring * review suggestions. * add concat test * update whats new * Add identity check to lazy_array_equiv * pep8 * bugfix. --- doc/whats-new.rst | 3 + xarray/core/concat.py | 56 ++++++++++++------ xarray/core/duck_array_ops.py | 62 ++++++++++++++----- xarray/core/merge.py | 19 ++++-- xarray/core/variable.py | 14 +++-- xarray/tests/test_dask.py | 108 +++++++++++++++++++++++++++++++++- 6 files changed, 217 insertions(+), 45 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c117382f23f..dcaab011e67 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -70,6 +70,9 @@ Bug fixes but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle `_ - Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`). By `Deepak Cherian `_. +- Use dask names to compare dask objects prior to comparing values after computation. + (:issue:`3068`, :issue:`3311`, :issue:`3454`, :pull:`3453`). + By `Deepak Cherian `_. - Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4. By `Anderson Banihirwe `_. - Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 0d19990bdd0..c26153eb0d8 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -2,6 +2,7 @@ from . import dtypes, utils from .alignment import align +from .duck_array_ops import lazy_array_equiv from .merge import _VALID_COMPAT, unique_variable from .variable import IndexVariable, Variable, as_variable from .variable import concat as concat_vars @@ -189,26 +190,43 @@ def process_subset_opt(opt, subset): # all nonindexes that are not the same in each dataset for k in getattr(datasets[0], subset): if k not in concat_over: - # Compare the variable of all datasets vs. the one - # of the first dataset. Perform the minimum amount of - # loads in order to avoid multiple loads from disk - # while keeping the RAM footprint low. - v_lhs = datasets[0].variables[k].load() - # We'll need to know later on if variables are equal. - computed = [] - for ds_rhs in datasets[1:]: - v_rhs = ds_rhs.variables[k].compute() - computed.append(v_rhs) - if not getattr(v_lhs, compat)(v_rhs): - concat_over.add(k) - equals[k] = False - # computed variables are not to be re-computed - # again in the future - for ds, v in zip(datasets[1:], computed): - ds.variables[k].data = v.data + equals[k] = None + variables = [ds.variables[k] for ds in datasets] + # first check without comparing values i.e. no computes + for var in variables[1:]: + equals[k] = getattr(variables[0], compat)( + var, equiv=lazy_array_equiv + ) + if equals[k] is not True: + # exit early if we know these are not equal or that + # equality cannot be determined i.e. one or all of + # the variables wraps a numpy array break - else: - equals[k] = True + + if equals[k] is False: + concat_over.add(k) + + elif equals[k] is None: + # Compare the variable of all datasets vs. the one + # of the first dataset. Perform the minimum amount of + # loads in order to avoid multiple loads from disk + # while keeping the RAM footprint low. + v_lhs = datasets[0].variables[k].load() + # We'll need to know later on if variables are equal. + computed = [] + for ds_rhs in datasets[1:]: + v_rhs = ds_rhs.variables[k].compute() + computed.append(v_rhs) + if not getattr(v_lhs, compat)(v_rhs): + concat_over.add(k) + equals[k] = False + # computed variables are not to be re-computed + # again in the future + for ds, v in zip(datasets[1:], computed): + ds.variables[k].data = v.data + break + else: + equals[k] = True elif opt == "all": concat_over.update( diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index d943788c434..71e79335c3d 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -174,14 +174,42 @@ def as_shared_dtype(scalars_or_arrays): return [x.astype(out_type, copy=False) for x in arrays] -def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8): - """Like np.allclose, but also allows values to be NaN in both arrays +def lazy_array_equiv(arr1, arr2): + """Like array_equal, but doesn't actually compare values. + Returns True when arr1, arr2 identical or their dask names are equal. + Returns False when shapes are not equal. + Returns None when equality cannot determined: one or both of arr1, arr2 are numpy arrays; + or their dask names are not equal """ + if arr1 is arr2: + return True arr1 = asarray(arr1) arr2 = asarray(arr2) if arr1.shape != arr2.shape: return False - return bool(isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=True).all()) + if ( + dask_array + and isinstance(arr1, dask_array.Array) + and isinstance(arr2, dask_array.Array) + ): + # GH3068 + if arr1.name == arr2.name: + return True + else: + return None + return None + + +def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8): + """Like np.allclose, but also allows values to be NaN in both arrays + """ + arr1 = asarray(arr1) + arr2 = asarray(arr2) + lazy_equiv = lazy_array_equiv(arr1, arr2) + if lazy_equiv is None: + return bool(isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=True).all()) + else: + return lazy_equiv def array_equiv(arr1, arr2): @@ -189,12 +217,14 @@ def array_equiv(arr1, arr2): """ arr1 = asarray(arr1) arr2 = asarray(arr2) - if arr1.shape != arr2.shape: - return False - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "In the future, 'NAT == x'") - flag_array = (arr1 == arr2) | (isnull(arr1) & isnull(arr2)) - return bool(flag_array.all()) + lazy_equiv = lazy_array_equiv(arr1, arr2) + if lazy_equiv is None: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "In the future, 'NAT == x'") + flag_array = (arr1 == arr2) | (isnull(arr1) & isnull(arr2)) + return bool(flag_array.all()) + else: + return lazy_equiv def array_notnull_equiv(arr1, arr2): @@ -203,12 +233,14 @@ def array_notnull_equiv(arr1, arr2): """ arr1 = asarray(arr1) arr2 = asarray(arr2) - if arr1.shape != arr2.shape: - return False - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "In the future, 'NAT == x'") - flag_array = (arr1 == arr2) | isnull(arr1) | isnull(arr2) - return bool(flag_array.all()) + lazy_equiv = lazy_array_equiv(arr1, arr2) + if lazy_equiv is None: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "In the future, 'NAT == x'") + flag_array = (arr1 == arr2) | isnull(arr1) | isnull(arr2) + return bool(flag_array.all()) + else: + return lazy_equiv def count(data, axis=None): diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 389ceb155f7..daf0c3b059f 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -19,6 +19,7 @@ from . import dtypes, pdcompat from .alignment import deep_align +from .duck_array_ops import lazy_array_equiv from .utils import Frozen, dict_equiv from .variable import Variable, as_variable, assert_unique_multiindex_level_names @@ -123,16 +124,24 @@ def unique_variable( combine_method = "fillna" if equals is None: - out = out.compute() + # first check without comparing values i.e. no computes for var in variables[1:]: - equals = getattr(out, compat)(var) - if not equals: + equals = getattr(out, compat)(var, equiv=lazy_array_equiv) + if equals is not True: break + if equals is None: + # now compare values with minimum number of computes + out = out.compute() + for var in variables[1:]: + equals = getattr(out, compat)(var) + if not equals: + break + if not equals: raise MergeError( - "conflicting values for variable {!r} on objects to be combined. " - "You can skip this check by specifying compat='override'.".format(name) + f"conflicting values for variable {name!r} on objects to be combined. " + "You can skip this check by specifying compat='override'." ) if combine_method: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 117ab85ae65..916df75b3e0 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1236,7 +1236,9 @@ def transpose(self, *dims) -> "Variable": dims = self.dims[::-1] dims = tuple(infix_dims(dims, self.dims)) axes = self.get_axis_num(dims) - if len(dims) < 2: # no need to transpose if only one dimension + if len(dims) < 2 or dims == self.dims: + # no need to transpose if only one dimension + # or dims are in same order return self.copy(deep=False) data = as_indexable(self._data).transpose(axes) @@ -1595,22 +1597,24 @@ def broadcast_equals(self, other, equiv=duck_array_ops.array_equiv): return False return self.equals(other, equiv=equiv) - def identical(self, other): + def identical(self, other, equiv=duck_array_ops.array_equiv): """Like equals, but also checks attributes. """ try: - return utils.dict_equiv(self.attrs, other.attrs) and self.equals(other) + return utils.dict_equiv(self.attrs, other.attrs) and self.equals( + other, equiv=equiv + ) except (TypeError, AttributeError): return False - def no_conflicts(self, other): + def no_conflicts(self, other, equiv=duck_array_ops.array_notnull_equiv): """True if the intersection of two Variable's non-null data is equal; otherwise false. Variables can thus still be equal if there are locations where either, or both, contain NaN values. """ - return self.broadcast_equals(other, equiv=duck_array_ops.array_notnull_equiv) + return self.broadcast_equals(other, equiv=equiv) def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): """Compute the qth quantile of the data along the specified dimension. diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index c4323d1d317..34115b29b23 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -24,6 +24,7 @@ raises_regex, requires_scipy_or_netCDF4, ) +from ..core.duck_array_ops import lazy_array_equiv from .test_backends import create_tmp_file dask = pytest.importorskip("dask") @@ -428,7 +429,53 @@ def test_concat_loads_variables(self): out.compute() assert kernel_call_count == 24 - # Finally, test that riginals are unaltered + # Finally, test that originals are unaltered + assert ds1["d"].data is d1 + assert ds1["c"].data is c1 + assert ds2["d"].data is d2 + assert ds2["c"].data is c2 + assert ds3["d"].data is d3 + assert ds3["c"].data is c3 + + # now check that concat() is correctly using dask name equality to skip loads + out = xr.concat( + [ds1, ds1, ds1], dim="n", data_vars="different", coords="different" + ) + assert kernel_call_count == 24 + # variables are not loaded in the output + assert isinstance(out["d"].data, dask.array.Array) + assert isinstance(out["c"].data, dask.array.Array) + + out = xr.concat( + [ds1, ds1, ds1], dim="n", data_vars=[], coords=[], compat="identical" + ) + assert kernel_call_count == 24 + # variables are not loaded in the output + assert isinstance(out["d"].data, dask.array.Array) + assert isinstance(out["c"].data, dask.array.Array) + + out = xr.concat( + [ds1, ds2.compute(), ds3], + dim="n", + data_vars="all", + coords="different", + compat="identical", + ) + # c1,c3 must be computed for comparison since c2 is numpy; + # d2 is computed too + assert kernel_call_count == 28 + + out = xr.concat( + [ds1, ds2.compute(), ds3], + dim="n", + data_vars="all", + coords="all", + compat="identical", + ) + # no extra computes + assert kernel_call_count == 30 + + # Finally, test that originals are unaltered assert ds1["d"].data is d1 assert ds1["c"].data is c1 assert ds2["d"].data is d2 @@ -1142,6 +1189,19 @@ def test_make_meta(map_ds): assert meta.data_vars[variable].shape == (0,) * meta.data_vars[variable].ndim +def test_identical_coords_no_computes(): + lons2 = xr.DataArray(da.zeros((10, 10), chunks=2), dims=("y", "x")) + a = xr.DataArray( + da.zeros((10, 10), chunks=2), dims=("y", "x"), coords={"lons": lons2} + ) + b = xr.DataArray( + da.zeros((10, 10), chunks=2), dims=("y", "x"), coords={"lons": lons2} + ) + with raise_if_dask_computes(): + c = a + b + assert_identical(c, a) + + @pytest.mark.parametrize( "obj", [make_da(), make_da().compute(), make_ds(), make_ds().compute()] ) @@ -1229,3 +1289,49 @@ def test_normalize_token_with_backend(map_ds): map_ds.to_netcdf(tmp_file) read = xr.open_dataset(tmp_file) assert not dask.base.tokenize(map_ds) == dask.base.tokenize(read) + + +@pytest.mark.parametrize( + "compat", ["broadcast_equals", "equals", "identical", "no_conflicts"] +) +def test_lazy_array_equiv_variables(compat): + var1 = xr.Variable(("y", "x"), da.zeros((10, 10), chunks=2)) + var2 = xr.Variable(("y", "x"), da.zeros((10, 10), chunks=2)) + var3 = xr.Variable(("y", "x"), da.zeros((20, 10), chunks=2)) + + with raise_if_dask_computes(): + assert getattr(var1, compat)(var2, equiv=lazy_array_equiv) + # values are actually equal, but we don't know that till we compute, return None + with raise_if_dask_computes(): + assert getattr(var1, compat)(var2 / 2, equiv=lazy_array_equiv) is None + + # shapes are not equal, return False without computes + with raise_if_dask_computes(): + assert getattr(var1, compat)(var3, equiv=lazy_array_equiv) is False + + # if one or both arrays are numpy, return None + assert getattr(var1, compat)(var2.compute(), equiv=lazy_array_equiv) is None + assert ( + getattr(var1.compute(), compat)(var2.compute(), equiv=lazy_array_equiv) is None + ) + + with raise_if_dask_computes(): + assert getattr(var1, compat)(var2.transpose("y", "x")) + + +@pytest.mark.parametrize( + "compat", ["broadcast_equals", "equals", "identical", "no_conflicts"] +) +def test_lazy_array_equiv_merge(compat): + da1 = xr.DataArray(da.zeros((10, 10), chunks=2), dims=("y", "x")) + da2 = xr.DataArray(da.zeros((10, 10), chunks=2), dims=("y", "x")) + da3 = xr.DataArray(da.ones((20, 10), chunks=2), dims=("y", "x")) + + with raise_if_dask_computes(): + xr.merge([da1, da2], compat=compat) + # shapes are not equal; no computes necessary + with raise_if_dask_computes(max_computes=0): + with pytest.raises(ValueError): + xr.merge([da1, da3], compat=compat) + with raise_if_dask_computes(max_computes=2): + xr.merge([da1, da2 / 2], compat=compat) From 4dce93f134e8296ea730104b46ce3372b90304ac Mon Sep 17 00:00:00 2001 From: barronh Date: Tue, 5 Nov 2019 10:42:34 -0500 Subject: [PATCH 26/27] uamiv test using only raw uamiv variables (#3485) * uamiv test using only raw uamiv variables Previous test relied on CF generated metadata, but this test is more robust. * uamiv test using only raw uamiv variables Previous test relied on CF generated metadata, but this test is more robust. * uamiv test using only raw uamiv variables Previous test relied on CF generated metadata, but this test is more robust. * uamiv test using only raw uamiv variables --- xarray/tests/test_backends.py | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 4bdebe73050..9b000b82b03 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3400,20 +3400,17 @@ def test_uamiv_format_read(self): actual = camxfile.variables["O3"] assert_allclose(expected, actual) - data = np.array(["2002-06-03"], "datetime64[ns]") + data = np.array([[[2002154, 0]]], dtype="i") expected = xr.Variable( - ("TSTEP",), + ("TSTEP", "VAR", "DATE-TIME"), data, dict( - bounds="time_bounds", - long_name=( - "synthesized time coordinate " - + "from SDATE, STIME, STEP " - + "global attributes" - ), + long_name="TFLAG".ljust(16), + var_desc="TFLAG".ljust(80), + units="DATE-TIME".ljust(16), ), ) - actual = camxfile.variables["time"] + actual = camxfile.variables["TFLAG"] assert_allclose(expected, actual) camxfile.close() @@ -3439,18 +3436,15 @@ def test_uamiv_format_mfread(self): actual = camxfile.variables["O3"] assert_allclose(expected, actual) - data1 = np.array(["2002-06-03"], "datetime64[ns]") - data = np.concatenate([data1] * 2, axis=0) + data = np.array([[[2002154, 0]]], dtype="i").repeat(2, 0) attrs = dict( - bounds="time_bounds", - long_name=( - "synthesized time coordinate " - + "from SDATE, STIME, STEP " - + "global attributes" - ), + long_name="TFLAG".ljust(16), + var_desc="TFLAG".ljust(80), + units="DATE-TIME".ljust(16), ) - expected = xr.Variable(("TSTEP",), data, attrs) - actual = camxfile.variables["time"] + dims = ("TSTEP", "VAR", "DATE-TIME") + expected = xr.Variable(dims, data, attrs) + actual = camxfile.variables["TFLAG"] assert_allclose(expected, actual) camxfile.close() From 0e8debfe28286b5fe1f3d27e8dcc8466a62aca6d Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Thu, 7 Nov 2019 15:13:50 -0500 Subject: [PATCH 27/27] drop_vars; deprecate drop for variables (#3475) * Deprecate drop for vars, in favor of drop_vars * docs tweaks * handle scalars as vars * allow warning in old whatsnew * add drop_sel, adjust deprecations based on comments * whatsnew * docs * old-whatsnew * docstring * pendingdeprecationwarning * whatsnew * whatsnew * move units tests to drop_sel * is_scalar (but retain isinstance for mypy) --- doc/data-structures.rst | 4 +- doc/indexing.rst | 6 +- doc/whats-new.rst | 7 ++ xarray/core/concat.py | 2 +- xarray/core/dataarray.py | 78 ++++++++---- xarray/core/dataset.py | 180 +++++++++++++++------------- xarray/core/groupby.py | 2 +- xarray/core/merge.py | 2 +- xarray/core/resample.py | 6 +- xarray/tests/test_backends.py | 8 +- xarray/tests/test_dask.py | 10 +- xarray/tests/test_dataarray.py | 45 +++---- xarray/tests/test_dataset.py | 115 +++++++++++------- xarray/tests/test_duck_array_ops.py | 3 +- xarray/tests/test_interp.py | 2 +- xarray/tests/test_plot.py | 6 +- xarray/tests/test_units.py | 6 +- 17 files changed, 286 insertions(+), 196 deletions(-) diff --git a/doc/data-structures.rst b/doc/data-structures.rst index d5567f4863e..93cdc7e9765 100644 --- a/doc/data-structures.rst +++ b/doc/data-structures.rst @@ -393,14 +393,14 @@ methods (like pandas) for transforming datasets into new objects. For removing variables, you can select and drop an explicit list of variables by indexing with a list of names or using the -:py:meth:`~xarray.Dataset.drop` methods to return a new ``Dataset``. These +:py:meth:`~xarray.Dataset.drop_vars` methods to return a new ``Dataset``. These operations keep around coordinates: .. ipython:: python ds[['temperature']] ds[['temperature', 'temperature_double']] - ds.drop('temperature') + ds.drop_vars('temperature') To remove a dimension, you can use :py:meth:`~xarray.Dataset.drop_dims` method. Any variables using that dimension are dropped: diff --git a/doc/indexing.rst b/doc/indexing.rst index 9ee8f1dddf8..ace960689a8 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -232,14 +232,14 @@ Using indexing to *assign* values to a subset of dataset (e.g., Dropping labels and dimensions ------------------------------ -The :py:meth:`~xarray.Dataset.drop` method returns a new object with the listed +The :py:meth:`~xarray.Dataset.drop_sel` method returns a new object with the listed index labels along a dimension dropped: .. ipython:: python - ds.drop(space=['IN', 'IL']) + ds.drop_sel(space=['IN', 'IL']) -``drop`` is both a ``Dataset`` and ``DataArray`` method. +``drop_sel`` is both a ``Dataset`` and ``DataArray`` method. Use :py:meth:`~xarray.Dataset.drop_dims` to drop a full dimension from a Dataset. Any variables with these dimensions are also dropped: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index dcaab011e67..0906058469d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,6 +38,12 @@ Breaking changes New Features ~~~~~~~~~~~~ +- :py:meth:`Dataset.drop_sel` & :py:meth:`DataArray.drop_sel` have been added for dropping labels. + :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for + dropping variables (including coordinates). The existing ``drop`` methods remain as a backward compatible + option for dropping either lables or variables, but using the more specific methods is encouraged. + (:pull:`3475`) + By `Maximilian Roos `_ - :py:meth:`Dataset.transpose` and :py:meth:`DataArray.transpose` now support an ellipsis (`...`) to represent all 'other' dimensions. For example, to move one dimension to the front, use `.transpose('x', ...)`. (:pull:`3421`) @@ -3752,6 +3758,7 @@ Enhancements explicitly listed variables or index labels: .. ipython:: python + :okwarning: # drop variables ds = xray.Dataset({'x': 0, 'y': 1}) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index c26153eb0d8..5b4fc078236 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -388,7 +388,7 @@ def ensure_common_dims(vars): result = result.set_coords(coord_names) result.encoding = result_encoding - result = result.drop(unlabeled_dims, errors="ignore") + result = result.drop_vars(unlabeled_dims, errors="ignore") if coord is not None: # add concat dimension last to ensure that its in the final Dataset diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 35ee90fb5c8..d2d37871ee9 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -16,7 +16,6 @@ TypeVar, Union, cast, - overload, ) import numpy as np @@ -53,7 +52,7 @@ from .formatting import format_item from .indexes import Indexes, default_indexes from .options import OPTIONS -from .utils import Default, ReprObject, _default, _check_inplace, either_dict_or_kwargs +from .utils import Default, ReprObject, _check_inplace, _default, either_dict_or_kwargs from .variable import ( IndexVariable, Variable, @@ -249,7 +248,7 @@ class DataArray(AbstractArray, DataWithCoords): Dictionary for holding arbitrary metadata. """ - _accessors: Optional[Dict[str, Any]] + _accessors: Optional[Dict[str, Any]] # noqa _coords: Dict[Any, Variable] _indexes: Optional[Dict[Hashable, pd.Index]] _name: Optional[Hashable] @@ -1890,41 +1889,72 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArra def T(self) -> "DataArray": return self.transpose() - # Drop coords - @overload - def drop( - self, labels: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" + def drop_vars( + self, names: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" ) -> "DataArray": - ... + """Drop variables from this DataArray. + + Parameters + ---------- + names : hashable or iterable of hashables + Name(s) of variables to drop. + errors: {'raise', 'ignore'}, optional + If 'raise' (default), raises a ValueError error if any of the variable + passed are not in the dataset. If 'ignore', any given names that are in the + DataArray are dropped and no error is raised. + + Returns + ------- + dropped : Dataset + + """ + ds = self._to_temp_dataset().drop_vars(names, errors=errors) + return self._from_temp_dataset(ds) - # Drop index labels along dimension - @overload # noqa: F811 def drop( - self, labels: Any, dim: Hashable, *, errors: str = "raise" # array-like + self, + labels: Mapping = None, + dim: Hashable = None, + *, + errors: str = "raise", + **labels_kwargs, ) -> "DataArray": - ... + """Backward compatible method based on `drop_vars` and `drop_sel` - def drop(self, labels, dim=None, *, errors="raise"): # noqa: F811 - """Drop coordinates or index labels from this DataArray. + Using either `drop_vars` or `drop_sel` is encouraged + """ + ds = self._to_temp_dataset().drop(labels, dim, errors=errors) + return self._from_temp_dataset(ds) + + def drop_sel( + self, + labels: Mapping[Hashable, Any] = None, + *, + errors: str = "raise", + **labels_kwargs, + ) -> "DataArray": + """Drop index labels from this DataArray. Parameters ---------- - labels : hashable or sequence of hashables - Name(s) of coordinates or index labels to drop. - If dim is not None, labels can be any array-like. - dim : hashable, optional - Dimension along which to drop index labels. By default (if - ``dim is None``), drops coordinates rather than index labels. + labels : Mapping[Hashable, Any] + Index labels to drop errors: {'raise', 'ignore'}, optional If 'raise' (default), raises a ValueError error if - any of the coordinates or index labels passed are not - in the array. If 'ignore', any given labels that are in the - array are dropped and no error is raised. + any of the index labels passed are not + in the dataset. If 'ignore', any given labels that are in the + dataset are dropped and no error is raised. + **labels_kwargs : {dim: label, ...}, optional + The keyword arguments form of ``dim`` and ``labels`` + Returns ------- dropped : DataArray """ - ds = self._to_temp_dataset().drop(labels, dim, errors=errors) + if labels_kwargs or isinstance(labels, dict): + labels = either_dict_or_kwargs(labels, labels_kwargs, "drop") + + ds = self._to_temp_dataset().drop_sel(labels, errors=errors) return self._from_temp_dataset(ds) def dropna( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 978242e5f6b..2cadc90334c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -25,7 +25,6 @@ TypeVar, Union, cast, - overload, ) import numpy as np @@ -80,6 +79,7 @@ hashable, is_dict_like, is_list_like, + is_scalar, maybe_wrap_array, ) from .variable import IndexVariable, Variable, as_variable, broadcast_variables @@ -3519,39 +3519,98 @@ def _assert_all_in_dataset( "cannot be found in this dataset" ) - # Drop variables - @overload # noqa: F811 - def drop( - self, labels: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" + def drop_vars( + self, names: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" ) -> "Dataset": - ... + """Drop variables from this dataset. - # Drop index labels along dimension - @overload # noqa: F811 - def drop( - self, labels: Any, dim: Hashable, *, errors: str = "raise" # array-like - ) -> "Dataset": - ... + Parameters + ---------- + names : hashable or iterable of hashables + Name(s) of variables to drop. + errors: {'raise', 'ignore'}, optional + If 'raise' (default), raises a ValueError error if any of the variable + passed are not in the dataset. If 'ignore', any given names that are in the + dataset are dropped and no error is raised. - def drop( # noqa: F811 - self, labels=None, dim=None, *, errors="raise", **labels_kwargs - ): - """Drop variables or index labels from this dataset. + Returns + ------- + dropped : Dataset + + """ + # the Iterable check is required for mypy + if is_scalar(names) or not isinstance(names, Iterable): + names = {names} + else: + names = set(names) + if errors == "raise": + self._assert_all_in_dataset(names) + + variables = {k: v for k, v in self._variables.items() if k not in names} + coord_names = {k for k in self._coord_names if k in variables} + indexes = {k: v for k, v in self.indexes.items() if k not in names} + return self._replace_with_new_dims( + variables, coord_names=coord_names, indexes=indexes + ) + + def drop(self, labels=None, dim=None, *, errors="raise", **labels_kwargs): + """Backward compatible method based on `drop_vars` and `drop_sel` + + Using either `drop_vars` or `drop_sel` is encouraged + """ + if errors not in ["raise", "ignore"]: + raise ValueError('errors must be either "raise" or "ignore"') + + if is_dict_like(labels) and not isinstance(labels, dict): + warnings.warn( + "dropping coordinates using `drop` is be deprecated; use drop_vars.", + FutureWarning, + stacklevel=2, + ) + return self.drop_vars(labels, errors=errors) + + if labels_kwargs or isinstance(labels, dict): + if dim is not None: + raise ValueError("cannot specify dim and dict-like arguments.") + labels = either_dict_or_kwargs(labels, labels_kwargs, "drop") + + if dim is None and (is_list_like(labels) or is_scalar(labels)): + warnings.warn( + "dropping variables using `drop` will be deprecated; using drop_vars is encouraged.", + PendingDeprecationWarning, + stacklevel=2, + ) + return self.drop_vars(labels, errors=errors) + if dim is not None: + warnings.warn( + "dropping labels using list-like labels is deprecated; using " + "dict-like arguments with `drop_sel`, e.g. `ds.drop_sel(dim=[labels]).", + DeprecationWarning, + stacklevel=2, + ) + return self.drop_sel({dim: labels}, errors=errors, **labels_kwargs) + + warnings.warn( + "dropping labels using `drop` will be deprecated; using drop_sel is encouraged.", + PendingDeprecationWarning, + stacklevel=2, + ) + return self.drop_sel(labels, errors=errors) + + def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs): + """Drop index labels from this dataset. Parameters ---------- - labels : hashable or iterable of hashables - Name(s) of variables or index labels to drop. - dim : None or hashable, optional - Dimension along which to drop index labels. By default (if - ``dim is None``), drops variables rather than index labels. + labels : Mapping[Hashable, Any] + Index labels to drop errors: {'raise', 'ignore'}, optional If 'raise' (default), raises a ValueError error if - any of the variable or index labels passed are not + any of the index labels passed are not in the dataset. If 'ignore', any given labels that are in the dataset are dropped and no error is raised. **labels_kwargs : {dim: label, ...}, optional - The keyword arguments form of ``dim`` and ``labels``. + The keyword arguments form of ``dim`` and ``labels` Returns ------- @@ -3562,7 +3621,7 @@ def drop( # noqa: F811 >>> data = np.random.randn(2, 3) >>> labels = ['a', 'b', 'c'] >>> ds = xr.Dataset({'A': (['x', 'y'], data), 'y': labels}) - >>> ds.drop(y=['a', 'c']) + >>> ds.drop_sel(y=['a', 'c']) Dimensions: (x: 2, y: 1) Coordinates: @@ -3570,7 +3629,7 @@ def drop( # noqa: F811 Dimensions without coordinates: x Data variables: A (x, y) float64 -0.3454 0.1734 - >>> ds.drop(y='b') + >>> ds.drop_sel(y='b') Dimensions: (x: 2, y: 2) Coordinates: @@ -3582,61 +3641,22 @@ def drop( # noqa: F811 if errors not in ["raise", "ignore"]: raise ValueError('errors must be either "raise" or "ignore"') - if is_dict_like(labels) and not isinstance(labels, dict): - warnings.warn( - "dropping coordinates using key values of dict-like labels is " - "deprecated; use drop_vars or a list of coordinates.", - FutureWarning, - stacklevel=2, - ) - if dim is not None and is_list_like(labels): - warnings.warn( - "dropping dimensions using list-like labels is deprecated; use " - "dict-like arguments.", - DeprecationWarning, - stacklevel=2, - ) + labels = either_dict_or_kwargs(labels, labels_kwargs, "drop") - if labels_kwargs or isinstance(labels, dict): - labels_kwargs = either_dict_or_kwargs(labels, labels_kwargs, "drop") - if dim is not None: - raise ValueError("cannot specify dim and dict-like arguments.") - ds = self - for dim, labels in labels_kwargs.items(): - ds = ds._drop_labels(labels, dim, errors=errors) - return ds - elif dim is None: - if isinstance(labels, str) or not isinstance(labels, Iterable): - labels = {labels} - else: - labels = set(labels) - return self._drop_vars(labels, errors=errors) - else: - return self._drop_labels(labels, dim, errors=errors) - - def _drop_labels(self, labels=None, dim=None, errors="raise"): - # Don't cast to set, as it would harm performance when labels - # is a large numpy array - if utils.is_scalar(labels): - labels = [labels] - labels = np.asarray(labels) - try: - index = self.indexes[dim] - except KeyError: - raise ValueError("dimension %r does not have coordinate labels" % dim) - new_index = index.drop(labels, errors=errors) - return self.loc[{dim: new_index}] - - def _drop_vars(self, names: set, errors: str = "raise") -> "Dataset": - if errors == "raise": - self._assert_all_in_dataset(names) - - variables = {k: v for k, v in self._variables.items() if k not in names} - coord_names = {k for k in self._coord_names if k in variables} - indexes = {k: v for k, v in self.indexes.items() if k not in names} - return self._replace_with_new_dims( - variables, coord_names=coord_names, indexes=indexes - ) + ds = self + for dim, labels_for_dim in labels.items(): + # Don't cast to set, as it would harm performance when labels + # is a large numpy array + if utils.is_scalar(labels_for_dim): + labels_for_dim = [labels_for_dim] + labels_for_dim = np.asarray(labels_for_dim) + try: + index = self.indexes[dim] + except KeyError: + raise ValueError("dimension %r does not have coordinate labels" % dim) + new_index = index.drop(labels_for_dim, errors=errors) + ds = ds.loc[{dim: new_index}] + return ds def drop_dims( self, drop_dims: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" @@ -3679,7 +3699,7 @@ def drop_dims( ) drop_vars = {k for k, v in self._variables.items() if set(v.dims) & drop_dims} - return self._drop_vars(drop_vars) + return self.drop_vars(drop_vars) def transpose(self, *dims: Hashable) -> "Dataset": """Return a new Dataset object with all array dimensions transposed. diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 209ac14184b..c8906e34737 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -775,7 +775,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): ) if np.asarray(q, dtype=np.float64).ndim == 0: - out = out.drop("quantile") + out = out.drop_vars("quantile") return out def reduce( diff --git a/xarray/core/merge.py b/xarray/core/merge.py index daf0c3b059f..10c7804d718 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -859,6 +859,6 @@ def dataset_update_method( if c not in value.dims and c in dataset.coords ] if coord_names: - other[key] = value.drop(coord_names) + other[key] = value.drop_vars(coord_names) return merge_core([dataset, other], priority_arg=1, indexes=dataset.indexes) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 998964273be..2cb1bd55e19 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -47,7 +47,7 @@ def _upsample(self, method, *args, **kwargs): if k == self._dim: continue if self._dim in v.dims: - self._obj = self._obj.drop(k) + self._obj = self._obj.drop_vars(k) if method == "asfreq": return self.mean(self._dim) @@ -146,7 +146,7 @@ def _interpolate(self, kind="linear"): dummy = self._obj.copy() for k, v in self._obj.coords.items(): if k != self._dim and self._dim in v.dims: - dummy = dummy.drop(k) + dummy = dummy.drop_vars(k) return dummy.interp( assume_sorted=True, method=kind, @@ -218,7 +218,7 @@ def apply(self, func, shortcut=False, args=(), **kwargs): # dimension, then we need to do so before we can rename the proxy # dimension we used. if self._dim in combined.coords: - combined = combined.drop(self._dim) + combined = combined.drop_vars(self._dim) if self._resample_dim in combined.dims: combined = combined.rename({self._resample_dim: self._dim}) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 9b000b82b03..de3a7eadab0 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -800,7 +800,7 @@ def equals_latlon(obj): assert "coordinates" not in ds["lat"].attrs assert "coordinates" not in ds["lon"].attrs - modified = original.drop(["temp", "precip"]) + modified = original.drop_vars(["temp", "precip"]) with self.roundtrip(modified) as actual: assert_identical(actual, modified) with create_tmp_file() as tmp_file: @@ -2177,7 +2177,7 @@ def test_cross_engine_read_write_netcdf4(self): # Drop dim3, because its labels include strings. These appear to be # not properly read with python-netCDF4, which converts them into # unicode instead of leaving them as bytes. - data = create_test_data().drop("dim3") + data = create_test_data().drop_vars("dim3") data.attrs["foo"] = "bar" valid_engines = ["netcdf4", "h5netcdf"] for write_engine in valid_engines: @@ -2344,7 +2344,7 @@ def test_open_twice(self): def test_open_fileobj(self): # open in-memory datasets instead of local file paths - expected = create_test_data().drop("dim3") + expected = create_test_data().drop_vars("dim3") expected.attrs["foo"] = "bar" with create_tmp_file() as tmp_file: expected.to_netcdf(tmp_file, engine="h5netcdf") @@ -4190,7 +4190,7 @@ def test_open_dataarray_options(self): with create_tmp_file() as tmp: data.to_netcdf(tmp) - expected = data.drop("y") + expected = data.drop_vars("y") with open_dataarray(tmp, drop_variables=["y"]) as loaded: assert_identical(expected, loaded) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 34115b29b23..fa8ae9991d7 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1129,11 +1129,11 @@ def test_map_blocks_to_array(map_ds): [ lambda x: x, lambda x: x.to_dataset(), - lambda x: x.drop("x"), + lambda x: x.drop_vars("x"), lambda x: x.expand_dims(k=[1, 2, 3]), lambda x: x.assign_coords(new_coord=("y", x.y * 2)), lambda x: x.astype(np.int32), - # TODO: [lambda x: x.isel(x=1).drop("x"), map_da], + # TODO: [lambda x: x.isel(x=1).drop_vars("x"), map_da], ], ) def test_map_blocks_da_transformations(func, map_da): @@ -1147,9 +1147,9 @@ def test_map_blocks_da_transformations(func, map_da): "func", [ lambda x: x, - lambda x: x.drop("cxy"), - lambda x: x.drop("a"), - lambda x: x.drop("x"), + lambda x: x.drop_vars("cxy"), + lambda x: x.drop_vars("a"), + lambda x: x.drop_vars("x"), lambda x: x.expand_dims(k=[1, 2, 3]), lambda x: x.rename({"a": "new1", "b": "new2"}), # TODO: [lambda x: x.isel(x=1)], diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 2c823b0c20a..acfe684d220 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -906,7 +906,7 @@ def test_sel_dataarray(self): assert_array_equal(actual, da.isel(x=[0, 1, 2])) assert "new_dim" in actual.dims assert "new_dim" in actual.coords - assert_equal(actual["new_dim"].drop("x"), ind["new_dim"]) + assert_equal(actual["new_dim"].drop_vars("x"), ind["new_dim"]) def test_sel_invalid_slice(self): array = DataArray(np.arange(10), [("x", np.arange(10))]) @@ -1660,7 +1660,7 @@ def test_expand_dims_with_greater_dim_size(self): coords=expected_coords, dims=list(expected_coords.keys()), attrs={"key": "entry"}, - ).drop(["y", "dim_0"]) + ).drop_vars(["y", "dim_0"]) assert_identical(expected, actual) # Test with kwargs instead of passing dict to dim arg. @@ -1677,7 +1677,7 @@ def test_expand_dims_with_greater_dim_size(self): }, dims=["dim_1", "x", "dim_0"], attrs={"key": "entry"}, - ).drop("dim_0") + ).drop_vars("dim_0") assert_identical(other_way_expected, other_way) def test_set_index(self): @@ -1993,7 +1993,7 @@ def test_stack_unstack(self): ) pd.util.testing.assert_index_equal(a, b) - actual = orig.stack(z=["x", "y"]).unstack("z").drop(["x", "y"]) + actual = orig.stack(z=["x", "y"]).unstack("z").drop_vars(["x", "y"]) assert_identical(orig, actual) dims = ["a", "b", "c", "d", "e"] @@ -2001,11 +2001,11 @@ def test_stack_unstack(self): stacked = orig.stack(ab=["a", "b"], cd=["c", "d"]) unstacked = stacked.unstack(["ab", "cd"]) - roundtripped = unstacked.drop(["a", "b", "c", "d"]).transpose(*dims) + roundtripped = unstacked.drop_vars(["a", "b", "c", "d"]).transpose(*dims) assert_identical(orig, roundtripped) unstacked = stacked.unstack() - roundtripped = unstacked.drop(["a", "b", "c", "d"]).transpose(*dims) + roundtripped = unstacked.drop_vars(["a", "b", "c", "d"]).transpose(*dims) assert_identical(orig, roundtripped) def test_stack_unstack_decreasing_coordinate(self): @@ -2109,40 +2109,43 @@ def test_drop_coordinates(self): expected = DataArray(np.random.randn(2, 3), dims=["x", "y"]) arr = expected.copy() arr.coords["z"] = 2 - actual = arr.drop("z") + actual = arr.drop_vars("z") assert_identical(expected, actual) with pytest.raises(ValueError): - arr.drop("not found") + arr.drop_vars("not found") - actual = expected.drop("not found", errors="ignore") + actual = expected.drop_vars("not found", errors="ignore") assert_identical(actual, expected) with raises_regex(ValueError, "cannot be found"): - arr.drop("w") + arr.drop_vars("w") - actual = expected.drop("w", errors="ignore") + actual = expected.drop_vars("w", errors="ignore") assert_identical(actual, expected) renamed = arr.rename("foo") with raises_regex(ValueError, "cannot be found"): - renamed.drop("foo") + renamed.drop_vars("foo") - actual = renamed.drop("foo", errors="ignore") + actual = renamed.drop_vars("foo", errors="ignore") assert_identical(actual, renamed) def test_drop_index_labels(self): arr = DataArray(np.random.randn(2, 3), coords={"y": [0, 1, 2]}, dims=["x", "y"]) - actual = arr.drop([0, 1], dim="y") + actual = arr.drop_sel(y=[0, 1]) expected = arr[:, 2:] assert_identical(actual, expected) with raises_regex((KeyError, ValueError), "not .* in axis"): - actual = arr.drop([0, 1, 3], dim="y") + actual = arr.drop_sel(y=[0, 1, 3]) - actual = arr.drop([0, 1, 3], dim="y", errors="ignore") + actual = arr.drop_sel(y=[0, 1, 3], errors="ignore") assert_identical(actual, expected) + with pytest.warns(DeprecationWarning): + arr.drop([0, 1, 3], dim="y", errors="ignore") + def test_dropna(self): x = np.random.randn(4, 4) x[::2, 0] = np.nan @@ -3360,7 +3363,7 @@ def test_to_pandas(self): da = DataArray(np.random.randn(*shape), dims=dims) with warnings.catch_warnings(): warnings.filterwarnings("ignore", r"\W*Panel is deprecated") - roundtripped = DataArray(da.to_pandas()).drop(dims) + roundtripped = DataArray(da.to_pandas()).drop_vars(dims) assert_identical(da, roundtripped) with raises_regex(ValueError, "cannot convert"): @@ -3411,11 +3414,13 @@ def test_to_and_from_series(self): assert_array_equal(expected.index.values, actual.index.values) assert "foo" == actual.name # test roundtrip - assert_identical(self.dv, DataArray.from_series(actual).drop(["x", "y"])) + assert_identical(self.dv, DataArray.from_series(actual).drop_vars(["x", "y"])) # test name is None actual.name = None expected_da = self.dv.rename(None) - assert_identical(expected_da, DataArray.from_series(actual).drop(["x", "y"])) + assert_identical( + expected_da, DataArray.from_series(actual).drop_vars(["x", "y"]) + ) @requires_sparse def test_from_series_sparse(self): @@ -3478,7 +3483,7 @@ def test_to_and_from_dict(self): # and the most bare bones representation still roundtrips d = {"name": "foo", "dims": ("x", "y"), "data": array.values} - assert_identical(array.drop("x"), DataArray.from_dict(d)) + assert_identical(array.drop_vars("x"), DataArray.from_dict(d)) # missing a dims in the coords d = { diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index b9fa20fab26..50e78c9f685 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -322,7 +322,7 @@ def __repr__(self): def test_info(self): ds = create_test_data(seed=123) - ds = ds.drop("dim3") # string type prints differently in PY2 vs PY3 + ds = ds.drop_vars("dim3") # string type prints differently in PY2 vs PY3 ds.attrs["unicode_attr"] = "ba®" ds.attrs["string_attr"] = "bar" @@ -509,7 +509,9 @@ def test_constructor_compat(self): {"c": (("x", "y"), np.zeros((2, 3))), "x": [0, 1]}, ) - actual = Dataset({"a": original["a"][:, 0], "b": original["a"][0].drop("x")}) + actual = Dataset( + {"a": original["a"][:, 0], "b": original["a"][0].drop_vars("x")} + ) assert_identical(expected, actual) data = {"x": DataArray(0, coords={"y": 3}), "y": ("z", [1, 1, 1])} @@ -775,9 +777,9 @@ def test_coords_set(self): one_coord.reset_coords("x") actual = all_coords.reset_coords("zzz", drop=True) - expected = all_coords.drop("zzz") + expected = all_coords.drop_vars("zzz") assert_identical(expected, actual) - expected = two_coords.drop("zzz") + expected = two_coords.drop_vars("zzz") assert_identical(expected, actual) def test_coords_to_dataset(self): @@ -954,7 +956,7 @@ def test_dask_is_lazy(self): ds.fillna(0) ds.rename({"dim1": "foobar"}) ds.set_coords("var1") - ds.drop("var1") + ds.drop_vars("var1") def test_isel(self): data = create_test_data() @@ -1097,7 +1099,7 @@ def test_isel_fancy(self): actual = data.isel(dim1=stations["dim1s"], dim2=stations["dim2s"]) assert "station" in actual.coords assert "station" in actual.dims - assert_identical(actual["station"].drop(["dim2"]), stations["station"]) + assert_identical(actual["station"].drop_vars(["dim2"]), stations["station"]) with raises_regex(ValueError, "conflicting values for "): data.isel( @@ -1123,7 +1125,7 @@ def test_isel_fancy(self): assert "dim2" in actual.coords assert "a" in actual["dim2"].dims - assert_identical(actual["a"].drop(["dim2"]), stations["a"]) + assert_identical(actual["a"].drop_vars(["dim2"]), stations["a"]) assert_identical(actual["b"], stations["b"]) expected_var1 = data["var1"].variable[ stations["dim1s"].variable, stations["dim2s"].variable @@ -1132,7 +1134,7 @@ def test_isel_fancy(self): stations["dim1s"].variable, stations["dim2s"].variable ] expected_var3 = data["var3"].variable[slice(None), stations["dim1s"].variable] - assert_equal(actual["a"].drop("dim2"), stations["a"]) + assert_equal(actual["a"].drop_vars("dim2"), stations["a"]) assert_array_equal(actual["var1"], expected_var1) assert_array_equal(actual["var2"], expected_var2) assert_array_equal(actual["var3"], expected_var3) @@ -1200,7 +1202,7 @@ def test_isel_dataarray(self): indexing_da = indexing_da < 3 actual = data.isel(dim2=indexing_da) assert_identical( - actual["dim2"].drop("non_dim").drop("non_dim2"), data["dim2"][:2] + actual["dim2"].drop_vars("non_dim").drop_vars("non_dim2"), data["dim2"][:2] ) assert_identical(actual["non_dim"], indexing_da["non_dim"][:2]) assert_identical(actual["non_dim2"], indexing_da["non_dim2"]) @@ -1286,8 +1288,10 @@ def test_sel_dataarray(self): expected = data.isel(dim2=[0, 1, 2]).rename({"dim2": "new_dim"}) assert "new_dim" in actual.dims assert "new_dim" in actual.coords - assert_equal(actual.drop("new_dim").drop("dim2"), expected.drop("new_dim")) - assert_equal(actual["new_dim"].drop("dim2"), ind["new_dim"]) + assert_equal( + actual.drop_vars("new_dim").drop_vars("dim2"), expected.drop_vars("new_dim") + ) + assert_equal(actual["new_dim"].drop_vars("dim2"), ind["new_dim"]) # with conflicted coordinate (silently ignored) ind = DataArray( @@ -1304,10 +1308,12 @@ def test_sel_dataarray(self): coords={"new_dim": ["a", "b", "c"], "dim2": 3}, ) actual = data.sel(dim2=ind) - assert_equal(actual["new_dim"].drop("dim2"), ind["new_dim"].drop("dim2")) + assert_equal( + actual["new_dim"].drop_vars("dim2"), ind["new_dim"].drop_vars("dim2") + ) expected = data.isel(dim2=[0, 1, 2]) expected["dim2"] = (("new_dim"), expected["dim2"].values) - assert_equal(actual["dim2"].drop("new_dim"), expected["dim2"]) + assert_equal(actual["dim2"].drop_vars("new_dim"), expected["dim2"]) assert actual["var1"].dims == ("dim1", "new_dim") # with non-dimensional coordinate @@ -1322,7 +1328,7 @@ def test_sel_dataarray(self): ) actual = data.sel(dim2=ind) expected = data.isel(dim2=[0, 1, 2]) - assert_equal(actual.drop("new_dim"), expected) + assert_equal(actual.drop_vars("new_dim"), expected) assert np.allclose(actual["new_dim"].values, ind["new_dim"].values) def test_sel_dataarray_mindex(self): @@ -1554,8 +1560,8 @@ def test_sel_fancy(self): expected_ary = data["foo"][[0, 1, 2], [0, 2, 1]] actual = data.sel(x=idx_x, y=idx_y) assert_array_equal(expected_ary, actual["foo"]) - assert_identical(actual["a"].drop("x"), idx_x["a"]) - assert_identical(actual["b"].drop("y"), idx_y["b"]) + assert_identical(actual["a"].drop_vars("x"), idx_x["a"]) + assert_identical(actual["b"].drop_vars("y"), idx_y["b"]) with pytest.raises(KeyError): data.sel(x=[2.5], y=[2.0], method="pad", tolerance=1e-3) @@ -2094,36 +2100,50 @@ def test_variable_indexing(self): def test_drop_variables(self): data = create_test_data() - assert_identical(data, data.drop([])) + assert_identical(data, data.drop_vars([])) expected = Dataset({k: data[k] for k in data.variables if k != "time"}) - actual = data.drop("time") + actual = data.drop_vars("time") assert_identical(expected, actual) - actual = data.drop(["time"]) + actual = data.drop_vars(["time"]) assert_identical(expected, actual) with raises_regex(ValueError, "cannot be found"): - data.drop("not_found_here") + data.drop_vars("not_found_here") + + actual = data.drop_vars("not_found_here", errors="ignore") + assert_identical(data, actual) + + actual = data.drop_vars(["not_found_here"], errors="ignore") + assert_identical(data, actual) + + actual = data.drop_vars(["time", "not_found_here"], errors="ignore") + assert_identical(expected, actual) + + # deprecated approach with `drop` works (straight copy paste from above) - actual = data.drop("not_found_here", errors="ignore") + with pytest.warns(PendingDeprecationWarning): + actual = data.drop("not_found_here", errors="ignore") assert_identical(data, actual) - actual = data.drop(["not_found_here"], errors="ignore") + with pytest.warns(PendingDeprecationWarning): + actual = data.drop(["not_found_here"], errors="ignore") assert_identical(data, actual) - actual = data.drop(["time", "not_found_here"], errors="ignore") + with pytest.warns(PendingDeprecationWarning): + actual = data.drop(["time", "not_found_here"], errors="ignore") assert_identical(expected, actual) def test_drop_index_labels(self): data = Dataset({"A": (["x", "y"], np.random.randn(2, 3)), "x": ["a", "b"]}) with pytest.warns(DeprecationWarning): - actual = data.drop(["a"], "x") + actual = data.drop(["a"], dim="x") expected = data.isel(x=[1]) assert_identical(expected, actual) with pytest.warns(DeprecationWarning): - actual = data.drop(["a", "b"], "x") + actual = data.drop(["a", "b"], dim="x") expected = data.isel(x=slice(0, 0)) assert_identical(expected, actual) @@ -2147,30 +2167,30 @@ def test_drop_index_labels(self): # DataArrays as labels are a nasty corner case as they are not # Iterable[Hashable] - DataArray.__iter__ yields scalar DataArrays. - actual = data.drop(DataArray(["a", "b", "c"]), "x", errors="ignore") + actual = data.drop_sel(x=DataArray(["a", "b", "c"]), errors="ignore") expected = data.isel(x=slice(0, 0)) assert_identical(expected, actual) + with pytest.warns(DeprecationWarning): + data.drop(DataArray(["a", "b", "c"]), dim="x", errors="ignore") + assert_identical(expected, actual) with raises_regex(ValueError, "does not have coordinate labels"): - data.drop(1, "y") + data.drop_sel(y=1) def test_drop_labels_by_keyword(self): - # Tests for #2910: Support for a additional `drop()` API. data = Dataset( {"A": (["x", "y"], np.random.randn(2, 6)), "x": ["a", "b"], "y": range(6)} ) # Basic functionality. assert len(data.coords["x"]) == 2 - # In the future, this will break. with pytest.warns(DeprecationWarning): ds1 = data.drop(["a"], dim="x") - ds2 = data.drop(x="a") - ds3 = data.drop(x=["a"]) - ds4 = data.drop(x=["a", "b"]) - ds5 = data.drop(x=["a", "b"], y=range(0, 6, 2)) + ds2 = data.drop_sel(x="a") + ds3 = data.drop_sel(x=["a"]) + ds4 = data.drop_sel(x=["a", "b"]) + ds5 = data.drop_sel(x=["a", "b"], y=range(0, 6, 2)) - # In the future, this will result in different behavior. arr = DataArray(range(3), dims=["c"]) with pytest.warns(FutureWarning): data.drop(arr.coords) @@ -2187,10 +2207,11 @@ def test_drop_labels_by_keyword(self): # Error handling if user tries both approaches. with pytest.raises(ValueError): data.drop(labels=["a"], x="a") - with pytest.raises(ValueError): - data.drop(dim="x", x="a") with pytest.raises(ValueError): data.drop(labels=["a"], dim="x", x="a") + warnings.filterwarnings("ignore", r"\W*drop") + with pytest.raises(ValueError): + data.drop(dim="x", x="a") def test_drop_dims(self): data = xr.Dataset( @@ -2203,15 +2224,15 @@ def test_drop_dims(self): ) actual = data.drop_dims("x") - expected = data.drop(["A", "B", "x"]) + expected = data.drop_vars(["A", "B", "x"]) assert_identical(expected, actual) actual = data.drop_dims("y") - expected = data.drop("A") + expected = data.drop_vars("A") assert_identical(expected, actual) actual = data.drop_dims(["x", "y"]) - expected = data.drop(["A", "B", "x"]) + expected = data.drop_vars(["A", "B", "x"]) assert_identical(expected, actual) with pytest.raises((ValueError, KeyError)): @@ -2230,7 +2251,7 @@ def test_drop_dims(self): actual = data.drop_dims("z", errors="wrong_value") actual = data.drop_dims(["x", "y", "z"], errors="ignore") - expected = data.drop(["A", "B", "x"]) + expected = data.drop_vars(["A", "B", "x"]) assert_identical(expected, actual) def test_copy(self): @@ -2571,7 +2592,7 @@ def test_expand_dims_mixed_int_and_coords(self): original["x"].values * np.ones([4, 3, 3]), coords=dict(d=range(4), e=["l", "m", "n"], a=np.linspace(0, 1, 3)), dims=["d", "e", "a"], - ).drop("d"), + ).drop_vars("d"), "y": xr.DataArray( original["y"].values * np.ones([4, 3, 4, 3]), coords=dict( @@ -2581,7 +2602,7 @@ def test_expand_dims_mixed_int_and_coords(self): a=np.linspace(0, 1, 3), ), dims=["d", "e", "b", "a"], - ).drop("d"), + ).drop_vars("d"), }, coords={"c": np.linspace(0, 1, 5)}, ) @@ -3059,7 +3080,7 @@ def test_setitem_with_coords(self): np.arange(10), dims="dim3", coords={"numbers": ("dim3", np.arange(10))} ) expected = ds.copy() - expected["var3"] = other.drop("numbers") + expected["var3"] = other.drop_vars("numbers") actual = ds.copy() actual["var3"] = other assert_identical(expected, actual) @@ -4504,7 +4525,9 @@ def test_apply(self): actual = data.apply(lambda x: x.mean(keep_attrs=True), keep_attrs=True) assert_identical(expected, actual) - assert_identical(data.apply(lambda x: x, keep_attrs=True), data.drop("time")) + assert_identical( + data.apply(lambda x: x, keep_attrs=True), data.drop_vars("time") + ) def scale(x, multiple=1): return multiple * x @@ -4514,7 +4537,7 @@ def scale(x, multiple=1): assert_identical(actual["numbers"], data["numbers"]) actual = data.apply(np.asarray) - expected = data.drop("time") # time is not used on a data var + expected = data.drop_vars("time") # time is not used on a data var assert_equal(expected, actual) def make_example_math_dataset(self): @@ -4616,7 +4639,7 @@ def test_dataset_math_auto_align(self): assert_identical(expected, actual) actual = ds.isel(y=slice(1)) + ds.isel(y=slice(1, None)) - expected = 2 * ds.drop(ds.y, dim="y") + expected = 2 * ds.drop_sel(y=ds.y) assert_equal(actual, expected) actual = ds + ds[["bar"]] diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 9df2f167cf2..f678af2fec5 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -441,7 +441,8 @@ def test_argmin_max(dim_num, dtype, contains_nan, dask, func, skipna, aggdim): ) expected = getattr(da, func)(dim=aggdim, skipna=skipna) assert_allclose( - actual.drop(list(actual.coords)), expected.drop(list(expected.coords)) + actual.drop_vars(list(actual.coords)), + expected.drop_vars(list(expected.coords)), ) diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index b9dc9a71acc..b93325d7eab 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -553,7 +553,7 @@ def test_datetime_single_string(): actual = da.interp(time="2000-01-01T12:00") expected = xr.DataArray(0.5) - assert_allclose(actual.drop("time"), expected) + assert_allclose(actual.drop_vars("time"), expected) @requires_cftime diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 7deabd46eae..6e283ea01da 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -1837,7 +1837,11 @@ def test_default_labels(self): assert substring_in_axes(self.darray.name, ax) def test_test_empty_cell(self): - g = self.darray.isel(row=1).drop("row").plot(col="col", hue="hue", col_wrap=2) + g = ( + self.darray.isel(row=1) + .drop_vars("row") + .plot(col="col", hue="hue", col_wrap=2) + ) bottomright = g.axes[-1, -1] assert not bottomright.has_data() assert not bottomright.get_visible() diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 9d14104bb50..80063f8b4bc 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -1093,7 +1093,7 @@ def test_content_manipulation(self, func, dtype): "func", ( pytest.param( - method("drop", labels=np.array([1, 5]), dim="x"), + method("drop_sel", labels=dict(x=np.array([1, 5]))), marks=pytest.mark.xfail( reason="selecting using incompatible units does not raise" ), @@ -1128,9 +1128,9 @@ def test_content_manipulation_with_units(self, func, unit, error, dtype): expected = attach_units( func(strip_units(data_array), **stripped_kwargs), - {"data": quantity.units if func.name == "drop" else unit, "x": x.units}, + {"data": quantity.units if func.name == "drop_sel" else unit, "x": x.units}, ) - if error is not None and func.name == "drop": + if error is not None and func.name == "drop_sel": with pytest.raises(error): func(data_array, **kwargs) else: