From 35c1b5bdd6086c5a9bcdde723eba285b9dd34ab1 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 15 Mar 2021 20:06:18 +0100 Subject: [PATCH 01/16] add tests for most keep_attrs values for apply_ufunc for variables --- xarray/tests/test_computation.py | 62 ++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 4890536a5d7..33ac0573769 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -563,6 +563,68 @@ def add(a, b, keep_attrs): assert_identical(actual.x.attrs, a.x.attrs) +@pytest.mark.parametrize( + ["strategy", "attrs", "expected", "error"], + ( + pytest.param( + False, + [{"a": 1}, {"a": 2}, {"a": 3}], + {}, + False, + id="False", + ), + pytest.param( + True, + [{"a": 1}, {"a": 2}, {"a": 3}], + {"a": 1}, + False, + id="True", + ), + pytest.param( + "override", + [{"a": 1}, {"a": 2}, {"a": 3}], + {"a": 1}, + False, + id="override", + ), + pytest.param( + "drop", + [{"a": 1}, {"a": 2}, {"a": 3}], + {}, + False, + id="drop", + ), + pytest.param( + "drop_conflicts", + [{"a": 1, "b": 2}, {"b": 1, "c": 3}, {"c": 3, "d": 4}], + {"a": 1, "c": 3, "d": 4}, + False, + id="drop_conflicts", + ), + pytest.param( + "no_conflicts", + [{"a": 1}, {"b": 2}, {"b": 3}], + None, + True, + id="no_conflicts", + ), + ), +) +def test_keep_attrs_strategies_variable(strategy, attrs, expected, error): + a = xr.Variable("x", [0, 1], attrs=attrs[0]) + b = xr.Variable("x", [0, 1], attrs=attrs[1]) + c = xr.Variable("x", [0, 1], attrs=attrs[2]) + + if error: + with pytest.raises(xr.MergeError): + apply_ufunc(lambda *args: sum(args), a, b, c, keep_attrs=strategy) + else: + expected = xr.Variable("x", [0, 3], attrs=expected) + actual = apply_ufunc(lambda *args: sum(args), a, b, c, keep_attrs=strategy) + + assert_identical(actual, expected) + + def test_dataset_join(): ds0 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) ds1 = xr.Dataset({"a": ("x", [99, 3]), "x": [1, 2]}) From c13b080ef4495048f8322b345b9986a90e462e57 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 15 Mar 2021 22:48:10 +0100 Subject: [PATCH 02/16] implement keep_attrs for apply_variable_ufunc --- xarray/core/computation.py | 27 +++++++++++++++++++++------ xarray/tests/test_computation.py | 7 +++++++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 027bf29000a..a3484c40be4 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -27,8 +27,8 @@ from . import dtypes, duck_array_ops, utils from .alignment import align, deep_align -from .merge import merge_coordinates_without_align -from .options import OPTIONS +from .merge import merge_attrs, merge_coordinates_without_align +from .options import OPTIONS, _get_keep_attrs from .pycompat import is_duck_dask_array from .utils import is_dict_like from .variable import Variable @@ -50,6 +50,11 @@ def _first_of_type(args, kind): raise ValueError("This should be unreachable.") +def _all_of_type(args, kind): + """Return all objects of type 'kind'""" + return [arg for arg in args if isinstance(arg, kind)] + + class _UFuncSignature: """Core dimensions signature for a given function. @@ -615,8 +620,6 @@ def apply_variable_ufunc( """Apply a ndarray level function over Variable and/or ndarray objects.""" from .variable import Variable, as_compatible_data - first_obj = _first_of_type(args, Variable) - dim_sizes = unified_dim_sizes( (a for a in args if hasattr(a, "dims")), exclude_dims=exclude_dims ) @@ -736,6 +739,19 @@ def func(*arrays): ) ) + objs = _all_of_type(args, Variable) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + + if isinstance(keep_attrs, bool): + combine_attrs = "override" if keep_attrs else "drop" + else: + combine_attrs = keep_attrs + attrs = merge_attrs( + [obj.attrs for obj in objs], + combine_attrs=combine_attrs, + ) + output = [] for dims, data in zip(output_dims, result_data): data = as_compatible_data(data) @@ -758,8 +774,7 @@ def func(*arrays): ) ) - if keep_attrs: - var.attrs.update(first_obj.attrs) + var.attrs = attrs output.append(var) if signature.num_outputs == 1: diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 33ac0573769..27bca649ef4 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -566,6 +566,13 @@ def add(a, b, keep_attrs): @pytest.mark.parametrize( ["strategy", "attrs", "expected", "error"], ( + pytest.param( + None, + [{"a": 1}, {"a": 2}, {"a": 3}], + {}, + False, + id="default", + ), pytest.param( False, [{"a": 1}, {"a": 2}, {"a": 3}], From 2ee70767fb671e38bdfcc54cf87f1da5da6ecb62 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 15 Mar 2021 22:52:47 +0100 Subject: [PATCH 03/16] move the keep_attrs preprocessing to apply_ufunc --- xarray/core/computation.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index a3484c40be4..b8e2ed822aa 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -740,16 +740,9 @@ def func(*arrays): ) objs = _all_of_type(args, Variable) - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) - - if isinstance(keep_attrs, bool): - combine_attrs = "override" if keep_attrs else "drop" - else: - combine_attrs = keep_attrs attrs = merge_attrs( [obj.attrs for obj in objs], - combine_attrs=combine_attrs, + combine_attrs=keep_attrs, ) output = [] @@ -816,7 +809,7 @@ def apply_ufunc( join: str = "exact", dataset_join: str = "exact", dataset_fill_value: object = _NO_FILL_VALUE, - keep_attrs: bool = False, + keep_attrs: Union[bool, str, None] = False, kwargs: Mapping = None, dask: str = "forbidden", output_dtypes: Sequence = None, @@ -1106,6 +1099,12 @@ def earth_mover_distance(first_samples, if kwargs: func = functools.partial(func, **kwargs) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + + if isinstance(keep_attrs, bool): + keep_attrs = "override" if keep_attrs else "drop" + variables_vfunc = functools.partial( apply_variable_ufunc, func, From 68e09ec3b9bc8e99a5814bff3d9eb45afca0bedd Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 15 Mar 2021 23:33:02 +0100 Subject: [PATCH 04/16] implement keep_attrs for DataArray objects --- xarray/core/computation.py | 22 +++++----- xarray/tests/test_computation.py | 69 ++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 10 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index b8e2ed822aa..e8b309f1c9b 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -265,11 +265,14 @@ def apply_dataarray_vfunc( args, join=join, copy=False, exclude=exclude_dims, raise_on_invalid=False ) - if keep_attrs: + objs = _all_of_type(args, DataArray) + + # TODO: fix the default value, now that keep_attrs always is a str + if keep_attrs in (False, "drop"): + name = result_name(args) + else: first_obj = _first_of_type(args, DataArray) name = first_obj.name - else: - name = result_name(args) result_coords = build_output_coords(args, signature, exclude_dims) data_vars = [getattr(a, "variable", a) for a in args] @@ -284,13 +287,12 @@ def apply_dataarray_vfunc( (coords,) = result_coords out = DataArray(result_var, coords, name=name, fastpath=True) - if keep_attrs: - if isinstance(out, tuple): - for da in out: - # This is adding attrs in place - da._copy_attrs_from(first_obj) - else: - out._copy_attrs_from(first_obj) + attrs = merge_attrs([x.attrs for x in objs], combine_attrs=keep_attrs) + if isinstance(out, tuple): + for da in out: + da.attrs = attrs + else: + out.attrs = attrs return out diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 27bca649ef4..bd61f4fe71c 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -632,6 +632,75 @@ def test_keep_attrs_strategies_variable(strategy, attrs, expected, error): assert_identical(actual, expected) +@pytest.mark.parametrize( + ["strategy", "attrs", "expected", "error"], + ( + pytest.param( + None, + [{"a": 1}, {"a": 2}, {"a": 3}], + {}, + False, + id="default", + ), + pytest.param( + False, + [{"a": 1}, {"a": 2}, {"a": 3}], + {}, + False, + id="False", + ), + pytest.param( + True, + [{"a": 1}, {"a": 2}, {"a": 3}], + {"a": 1}, + False, + id="True", + ), + pytest.param( + "override", + [{"a": 1}, {"a": 2}, {"a": 3}], + {"a": 1}, + False, + id="override", + ), + pytest.param( + "drop", + [{"a": 1}, {"a": 2}, {"a": 3}], + {}, + False, + id="drop", + ), + pytest.param( + "drop_conflicts", + [{"a": 1, "b": 2}, {"b": 1, "c": 3}, {"c": 3, "d": 4}], + {"a": 1, "c": 3, "d": 4}, + False, + id="drop_conflicts", + ), + pytest.param( + "no_conflicts", + [{"a": 1}, {"b": 2}, {"b": 3}], + None, + True, + id="no_conflicts", + ), + ), +) +def test_keep_attrs_strategies_dataarray(strategy, attrs, expected, error): + a = xr.DataArray(dims="x", data=[0, 1], attrs=attrs[0]) + b = xr.DataArray(dims="x", data=[0, 1], attrs=attrs[1]) + c = xr.DataArray(dims="x", data=[0, 1], attrs=attrs[2]) + + if error: + with pytest.raises(xr.MergeError): + apply_ufunc(lambda *args: sum(args), a, b, c, keep_attrs=strategy) + else: + expected = xr.DataArray(dims="x", data=[0, 3], attrs=expected) + actual = apply_ufunc(lambda *args: sum(args), a, b, c, keep_attrs=strategy) + + assert_identical(actual, expected) + + def test_dataset_join(): ds0 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) ds1 = xr.Dataset({"a": ("x", [99, 3]), "x": [1, 2]}) From b0a30545199055b5a210ee78a1e7b47c0d660ce9 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 15 Mar 2021 23:39:48 +0100 Subject: [PATCH 05/16] implement keep_attrs for Dataset --- xarray/core/computation.py | 17 ++++---- xarray/tests/test_computation.py | 69 ++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 9 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index e8b309f1c9b..531f166e124 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -421,8 +421,7 @@ def apply_dataset_vfunc( "dataset_fill_value argument." ) - if keep_attrs: - first_obj = _first_of_type(args, Dataset) + objs = _all_of_type(args, Dataset) if len(args) > 1: args = deep_align( @@ -442,13 +441,13 @@ def apply_dataset_vfunc( (coord_vars,) = list_of_coords out = _fast_dataset(result_vars, coord_vars) - if keep_attrs: - if isinstance(out, tuple): - for ds in out: - # This is adding attrs in place - ds._copy_attrs_from(first_obj) - else: - out._copy_attrs_from(first_obj) + attrs = merge_attrs([x.attrs for x in objs], combine_attrs=keep_attrs) + if isinstance(out, tuple): + for ds in out: + ds.attrs = attrs + else: + out.attrs = attrs + return out diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index bd61f4fe71c..9db0e063a92 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -701,6 +701,75 @@ def test_keep_attrs_strategies_dataarray(strategy, attrs, expected, error): assert_identical(actual, expected) +@pytest.mark.parametrize( + ["strategy", "attrs", "expected", "error"], + ( + pytest.param( + None, + [{"a": 1}, {"a": 2}, {"a": 3}], + {}, + False, + id="default", + ), + pytest.param( + False, + [{"a": 1}, {"a": 2}, {"a": 3}], + {}, + False, + id="False", + ), + pytest.param( + True, + [{"a": 1}, {"a": 2}, {"a": 3}], + {"a": 1}, + False, + id="True", + ), + pytest.param( + "override", + [{"a": 1}, {"a": 2}, {"a": 3}], + {"a": 1}, + False, + id="override", + ), + pytest.param( + "drop", + [{"a": 1}, {"a": 2}, {"a": 3}], + {}, + False, + id="drop", + ), + pytest.param( + "drop_conflicts", + [{"a": 1, "b": 2}, {"b": 1, "c": 3}, {"c": 3, "d": 4}], + {"a": 1, "c": 3, "d": 4}, + False, + id="drop_conflicts", + ), + pytest.param( + "no_conflicts", + [{"a": 1}, {"b": 2}, {"b": 3}], + None, + True, + id="no_conflicts", + ), + ), +) +def test_keep_attrs_strategies_dataset(strategy, attrs, expected, error): + a = xr.Dataset({"a": ("x", [0, 1])}, attrs=attrs[0]) + b = xr.Dataset({"a": ("x", [0, 1])}, attrs=attrs[1]) + c = xr.Dataset({"a": ("x", [0, 1])}, attrs=attrs[2]) + + if error: + with pytest.raises(xr.MergeError): + apply_ufunc(lambda *args: sum(args), a, b, c, keep_attrs=strategy) + else: + expected = xr.Dataset({"a": ("x", [0, 3])}, attrs=expected) + actual = apply_ufunc(lambda *args: sum(args), a, b, c, keep_attrs=strategy) + + assert_identical(actual, expected) + + def test_dataset_join(): ds0 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) ds1 = xr.Dataset({"a": ("x", [99, 3]), "x": [1, 2]}) From da1a23901619b2568fceef801e441a022aa59794 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 15 Mar 2021 23:48:28 +0100 Subject: [PATCH 06/16] don't include the definition of the assert func in the traceback --- xarray/tests/test_computation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 9db0e063a92..48ac75d8bcf 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -29,6 +29,7 @@ def assert_identical(a, b): """ A version of this function which accepts numpy arrays """ + __tracebackhide__ = True from xarray.testing import assert_identical as assert_identical_ if hasattr(a, "identical"): From 92d6ce58f12a80471d991e91ad3a02687e075b90 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 15 Mar 2021 23:49:15 +0100 Subject: [PATCH 07/16] check that keep_attrs is applied recursively --- xarray/tests/test_computation.py | 148 +++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 48ac75d8bcf..4c3e12ea4c3 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -702,6 +702,77 @@ def test_keep_attrs_strategies_dataarray(strategy, attrs, expected, error): assert_identical(actual, expected) +@pytest.mark.parametrize( + ["strategy", "attrs", "expected", "error"], + ( + pytest.param( + None, + [{"a": 1}, {"a": 2}, {"a": 3}], + {}, + False, + id="default", + ), + pytest.param( + False, + [{"a": 1}, {"a": 2}, {"a": 3}], + {}, + False, + id="False", + ), + pytest.param( + True, + [{"a": 1}, {"a": 2}, {"a": 3}], + {"a": 1}, + False, + id="True", + ), + pytest.param( + "override", + [{"a": 1}, {"a": 2}, {"a": 3}], + {"a": 1}, + False, + id="override", + ), + pytest.param( + "drop", + [{"a": 1}, {"a": 2}, {"a": 3}], + {}, + False, + id="drop", + ), + pytest.param( + "drop_conflicts", + [{"a": 1, "b": 2}, {"b": 1, "c": 3}, {"c": 3, "d": 4}], + {"a": 1, "c": 3, "d": 4}, + False, + id="drop_conflicts", + ), + pytest.param( + "no_conflicts", + [{"a": 1}, {"b": 2}, {"b": 3}], + None, + True, + id="no_conflicts", + ), + ), +) +def test_keep_attrs_strategies_dataarray_variables(strategy, attrs, expected, error): + a = xr.DataArray(dims="x", data=[0, 1], coords={"u": ("x", [0, 1], attrs[0])}) + b = xr.DataArray(dims="x", data=[0, 1], coords={"u": ("x", [0, 1], attrs[1])}) + c = xr.DataArray(dims="x", data=[0, 1], coords={"u": ("x", [0, 1], attrs[2])}) + + if error: + with pytest.raises(xr.MergeError): + apply_ufunc(lambda *args: sum(args), a, b, c, keep_attrs=strategy) + else: + expected = xr.DataArray( + dims="x", data=[0, 3], coords={"u": ("x", [0, 1], expected)} + ) + actual = apply_ufunc(lambda *args: sum(args), a, b, c, keep_attrs=strategy) + + assert_identical(actual, expected) + + @pytest.mark.parametrize( ["strategy", "attrs", "expected", "error"], ( @@ -771,6 +842,83 @@ def test_keep_attrs_strategies_dataset(strategy, attrs, expected, error): assert_identical(actual, expected) +@pytest.mark.parametrize( + ["strategy", "attrs", "expected", "error"], + ( + pytest.param( + None, + [{"a": 1}, {"a": 2}, {"a": 3}], + {}, + False, + id="default", + ), + pytest.param( + False, + [{"a": 1}, {"a": 2}, {"a": 3}], + {}, + False, + id="False", + ), + pytest.param( + True, + [{"a": 1}, {"a": 2}, {"a": 3}], + {"a": 1}, + False, + id="True", + ), + pytest.param( + "override", + [{"a": 1}, {"a": 2}, {"a": 3}], + {"a": 1}, + False, + id="override", + ), + pytest.param( + "drop", + [{"a": 1}, {"a": 2}, {"a": 3}], + {}, + False, + id="drop", + ), + pytest.param( + "drop_conflicts", + [{"a": 1, "b": 2}, {"b": 1, "c": 3}, {"c": 3, "d": 4}], + {"a": 1, "c": 3, "d": 4}, + False, + id="drop_conflicts", + ), + pytest.param( + "no_conflicts", + [{"a": 1}, {"b": 2}, {"b": 3}], + None, + True, + id="no_conflicts", + ), + ), +) +def test_keep_attrs_strategies_dataset_variables(strategy, attrs, expected, error): + a = xr.Dataset( + {"a": ("x", [0, 1], attrs[0])}, coords={"u": ("x", [0, 1], attrs[0])} + ) + b = xr.Dataset( + {"a": ("x", [0, 1], attrs[1])}, coords={"u": ("x", [0, 1], attrs[1])} + ) + c = xr.Dataset( + {"a": ("x", [0, 1], attrs[2])}, coords={"u": ("x", [0, 1], attrs[2])} + ) + + if error: + with pytest.raises(xr.MergeError): + apply_ufunc(lambda *args: sum(args), a, b, c, keep_attrs=strategy) + else: + expected = xr.Dataset( + {"a": ("x", [0, 3], expected)}, coords={"u": ("x", [0, 1], expected)} + ) + actual = apply_ufunc(lambda *args: sum(args), a, b, c, keep_attrs=strategy) + + assert_identical(actual, expected) + + def test_dataset_join(): ds0 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) ds1 = xr.Dataset({"a": ("x", [99, 3]), "x": [1, 2]}) From 8fccdd17af77058386bc6cf7450313aa58705626 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 15 Mar 2021 23:52:23 +0100 Subject: [PATCH 08/16] skip the recursion tests --- xarray/tests/test_computation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 4c3e12ea4c3..731cf9182d3 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -702,6 +702,7 @@ def test_keep_attrs_strategies_dataarray(strategy, attrs, expected, error): assert_identical(actual, expected) +@pytest.mark.skip(reason="not implemented, yet") @pytest.mark.parametrize( ["strategy", "attrs", "expected", "error"], ( @@ -842,6 +843,7 @@ def test_keep_attrs_strategies_dataset(strategy, attrs, expected, error): assert_identical(actual, expected) +@pytest.mark.skip(reason="not implemented, yet") @pytest.mark.parametrize( ["strategy", "attrs", "expected", "error"], ( From 5a16addbd89de9cd9f4f3feef44db892196af4e0 Mon Sep 17 00:00:00 2001 From: Keewis Date: Sun, 28 Mar 2021 22:20:44 +0200 Subject: [PATCH 09/16] don't skip the variable tests --- xarray/tests/test_computation.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 731cf9182d3..4c3e12ea4c3 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -702,7 +702,6 @@ def test_keep_attrs_strategies_dataarray(strategy, attrs, expected, error): assert_identical(actual, expected) -@pytest.mark.skip(reason="not implemented, yet") @pytest.mark.parametrize( ["strategy", "attrs", "expected", "error"], ( @@ -843,7 +842,6 @@ def test_keep_attrs_strategies_dataset(strategy, attrs, expected, error): assert_identical(actual, expected) -@pytest.mark.skip(reason="not implemented, yet") @pytest.mark.parametrize( ["strategy", "attrs", "expected", "error"], ( From d95cdf103556e948f2ed56b932c4645c7791fbec Mon Sep 17 00:00:00 2001 From: Keewis Date: Wed, 5 May 2021 19:12:02 +0200 Subject: [PATCH 10/16] add a combine_attrs parameter to merge_coordinates_without_align --- xarray/core/merge.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index ec3c9b0f065..f950d44c04e 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -313,6 +313,7 @@ def merge_coordinates_without_align( objects: "List[Coordinates]", prioritized: Mapping[Hashable, MergeElement] = None, exclude_dims: AbstractSet = frozenset(), + combine_attrs: str = "override", ) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, pd.Index]]: """Merge variables/indexes from coordinates without automatic alignments. @@ -334,7 +335,7 @@ def merge_coordinates_without_align( else: filtered = collected - return merge_collected(filtered, prioritized) + return merge_collected(filtered, prioritized, combine_attrs=combine_attrs) def determine_coords( From 6cdfd0935810bb711bc0695596938f07bd05690d Mon Sep 17 00:00:00 2001 From: Keewis Date: Wed, 5 May 2021 19:13:27 +0200 Subject: [PATCH 11/16] propagate keep_attrs --- xarray/core/computation.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index be85936e154..24cfb7d0a14 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -207,7 +207,10 @@ def _get_coords_list(args) -> List["Coordinates"]: def build_output_coords( - args: list, signature: _UFuncSignature, exclude_dims: AbstractSet = frozenset() + args: list, + signature: _UFuncSignature, + exclude_dims: AbstractSet = frozenset(), + combine_attrs: str = "override", ) -> "List[Dict[Any, Variable]]": """Build output coordinates for an operation. @@ -235,7 +238,7 @@ def build_output_coords( else: # TODO: save these merged indexes, instead of re-computing them later merged_vars, unused_indexes = merge_coordinates_without_align( - coords_list, exclude_dims=exclude_dims + coords_list, exclude_dims=exclude_dims, combine_attrs=combine_attrs ) output_coords = [] @@ -253,7 +256,12 @@ def build_output_coords( def apply_dataarray_vfunc( - func, *args, signature, join="inner", exclude_dims=frozenset(), keep_attrs=False + func, + *args, + signature, + join="inner", + exclude_dims=frozenset(), + keep_attrs="override", ): """Apply a variable level function over DataArray, Variable and/or ndarray objects. @@ -273,7 +281,9 @@ def apply_dataarray_vfunc( else: first_obj = _first_of_type(args, DataArray) name = first_obj.name - result_coords = build_output_coords(args, signature, exclude_dims) + result_coords = build_output_coords( + args, signature, exclude_dims, combine_attrs=keep_attrs + ) data_vars = [getattr(a, "variable", a) for a in args] result_var = func(*data_vars) @@ -428,7 +438,9 @@ def apply_dataset_vfunc( args, join=join, copy=False, exclude=exclude_dims, raise_on_invalid=False ) - list_of_coords = build_output_coords(args, signature, exclude_dims) + list_of_coords = build_output_coords( + args, signature, exclude_dims, combine_attrs=keep_attrs + ) args = [getattr(arg, "data_vars", arg) for arg in args] result_vars = apply_dict_of_variables_vfunc( From a38dffbaa6372961582833813d3e27553ef35f37 Mon Sep 17 00:00:00 2001 From: Keewis Date: Wed, 5 May 2021 23:49:29 +0200 Subject: [PATCH 12/16] update whats-new.rst [skip-ci] --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 59857c67bf7..86d36b27fa1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,9 @@ New Features - apply ``combine_attrs`` on data variables and coordinate variables when concatenating and merging datasets and dataarrays (:pull:`4902`). By `Justus Magin `_. +- allow passing ``combine_attrs`` strategy names to the ``keep_attrs`` parameter of + :py:func:`apply_ufunc` (:pull:`5041`) + By `Justus Magin `_. - Add :py:meth:`Dataset.to_pandas` (:pull:`5247`) By `Giacomo Caria `_. - Add :py:meth:`DataArray.plot.surface` which wraps matplotlib's `plot_surface` to make From b257ad72bc249afabf3b3dfbc2d3c52b5c94eac1 Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 8 May 2021 17:58:19 +0200 Subject: [PATCH 13/16] change the default --- xarray/core/computation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 24cfb7d0a14..9d3bbca8a9b 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -822,7 +822,7 @@ def apply_ufunc( join: str = "exact", dataset_join: str = "exact", dataset_fill_value: object = _NO_FILL_VALUE, - keep_attrs: Union[bool, str, None] = False, + keep_attrs: Union[bool, str] = None, kwargs: Mapping = None, dask: str = "forbidden", output_dtypes: Sequence = None, From bef9064c7ab582ba00a72f49b0ca9f10bc3b976f Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 8 May 2021 18:38:20 +0200 Subject: [PATCH 14/16] change the default values to make sure keep_attrs is always a str --- xarray/core/computation.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 9d3bbca8a9b..ebd5ac231a9 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -275,8 +275,7 @@ def apply_dataarray_vfunc( objs = _all_of_type(args, DataArray) - # TODO: fix the default value, now that keep_attrs always is a str - if keep_attrs in (False, "drop"): + if keep_attrs == "drop": name = result_name(args) else: first_obj = _first_of_type(args, DataArray) @@ -417,7 +416,7 @@ def apply_dataset_vfunc( dataset_join="exact", fill_value=_NO_FILL_VALUE, exclude_dims=frozenset(), - keep_attrs=False, + keep_attrs="override", ): """Apply a variable level function over Dataset, dict of DataArray, DataArray, Variable and/or ndarray objects. @@ -627,7 +626,7 @@ def apply_variable_ufunc( dask="forbidden", output_dtypes=None, vectorize=False, - keep_attrs=False, + keep_attrs="override", dask_gufunc_kwargs=None, ): """Apply a ndarray level function over Variable and/or ndarray objects.""" From 9c0704463adfb9c3b1795d2b5d8a4bc3600f1b45 Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 8 May 2021 20:23:31 +0200 Subject: [PATCH 15/16] test data, dims and coords separately --- xarray/tests/test_computation.py | 60 ++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 72ef2d5bf0d..cbfa61a4482 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -702,6 +702,7 @@ def test_keep_attrs_strategies_dataarray(strategy, attrs, expected, error): assert_identical(actual, expected) +@pytest.mark.parametrize("variant", ("dim", "coord")) @pytest.mark.parametrize( ["strategy", "attrs", "expected", "error"], ( @@ -756,17 +757,41 @@ def test_keep_attrs_strategies_dataarray(strategy, attrs, expected, error): ), ), ) -def test_keep_attrs_strategies_dataarray_variables(strategy, attrs, expected, error): - a = xr.DataArray(dims="x", data=[0, 1], coords={"u": ("x", [0, 1], attrs[0])}) - b = xr.DataArray(dims="x", data=[0, 1], coords={"u": ("x", [0, 1], attrs[1])}) - c = xr.DataArray(dims="x", data=[0, 1], coords={"u": ("x", [0, 1], attrs[2])}) +def test_keep_attrs_strategies_dataarray_variables( + variant, strategy, attrs, expected, error +): + compute_attrs = { + "dim": lambda attrs, default: (attrs, default), + "coord": lambda attrs, default: (default, attrs), + }.get(variant) + + dim_attrs, coord_attrs = compute_attrs(attrs, [{}, {}, {}]) + + a = xr.DataArray( + dims="x", + data=[0, 1], + coords={"x": ("x", [0, 1], dim_attrs[0]), "u": ("x", [0, 1], coord_attrs[0])}, + ) + b = xr.DataArray( + dims="x", + data=[0, 1], + coords={"x": ("x", [0, 1], dim_attrs[1]), "u": ("x", [0, 1], coord_attrs[1])}, + ) + c = xr.DataArray( + dims="x", + data=[0, 1], + coords={"x": ("x", [0, 1], dim_attrs[2]), "u": ("x", [0, 1], coord_attrs[2])}, + ) if error: with pytest.raises(xr.MergeError): apply_ufunc(lambda *args: sum(args), a, b, c, keep_attrs=strategy) else: + dim_attrs, coord_attrs = compute_attrs(expected, {}) expected = xr.DataArray( - dims="x", data=[0, 3], coords={"u": ("x", [0, 1], expected)} + dims="x", + data=[0, 3], + coords={"x": ("x", [0, 1], dim_attrs), "u": ("x", [0, 1], coord_attrs)}, ) actual = apply_ufunc(lambda *args: sum(args), a, b, c, keep_attrs=strategy) @@ -842,6 +867,7 @@ def test_keep_attrs_strategies_dataset(strategy, attrs, expected, error): assert_identical(actual, expected) +@pytest.mark.parametrize("variant", ("data", "dim", "coord")) @pytest.mark.parametrize( ["strategy", "attrs", "expected", "error"], ( @@ -896,23 +922,37 @@ def test_keep_attrs_strategies_dataset(strategy, attrs, expected, error): ), ), ) -def test_keep_attrs_strategies_dataset_variables(strategy, attrs, expected, error): +def test_keep_attrs_strategies_dataset_variables( + variant, strategy, attrs, expected, error +): + compute_attrs = { + "data": lambda attrs, default: (attrs, default, default), + "dim": lambda attrs, default: (default, attrs, default), + "coord": lambda attrs, default: (default, default, attrs), + }.get(variant) + data_attrs, dim_attrs, coord_attrs = compute_attrs(attrs, [{}, {}, {}]) + a = xr.Dataset( - {"a": ("x", [0, 1], attrs[0])}, coords={"u": ("x", [0, 1], attrs[0])} + {"a": ("x", [], data_attrs[0])}, + coords={"x": ("x", [], dim_attrs[0]), "u": ("x", [], coord_attrs[0])}, ) b = xr.Dataset( - {"a": ("x", [0, 1], attrs[1])}, coords={"u": ("x", [0, 1], attrs[1])} + {"a": ("x", [], data_attrs[1])}, + coords={"x": ("x", [], dim_attrs[1]), "u": ("x", [], coord_attrs[1])}, ) c = xr.Dataset( - {"a": ("x", [0, 1], attrs[2])}, coords={"u": ("x", [0, 1], attrs[2])} + {"a": ("x", [], data_attrs[2])}, + coords={"x": ("x", [], dim_attrs[2]), "u": ("x", [], coord_attrs[2])}, ) if error: with pytest.raises(xr.MergeError): apply_ufunc(lambda *args: sum(args), a, b, c, keep_attrs=strategy) else: + data_attrs, dim_attrs, coord_attrs = compute_attrs(expected, {}) expected = xr.Dataset( - {"a": ("x", [0, 3], expected)}, coords={"u": ("x", [0, 1], expected)} + {"a": ("x", [], data_attrs)}, + coords={"x": ("x", [], dim_attrs), "u": ("x", [], coord_attrs)}, ) actual = apply_ufunc(lambda *args: sum(args), a, b, c, keep_attrs=strategy) From 551b77bf48c58026173520087739f38c34f8e9ee Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 8 May 2021 20:46:31 +0200 Subject: [PATCH 16/16] move the whats-new.rst entry [skip-ci] --- doc/whats-new.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 832c6a51455..926385abd2c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,7 +21,9 @@ v0.18.1 (unreleased) New Features ~~~~~~~~~~~~ - +- allow passing ``combine_attrs`` strategy names to the ``keep_attrs`` parameter of + :py:func:`apply_ufunc` (:pull:`5041`) + By `Justus Magin `_. Breaking changes ~~~~~~~~~~~~~~~~ @@ -67,9 +69,6 @@ New Features - apply ``combine_attrs`` on data variables and coordinate variables when concatenating and merging datasets and dataarrays (:pull:`4902`). By `Justus Magin `_. -- allow passing ``combine_attrs`` strategy names to the ``keep_attrs`` parameter of - :py:func:`apply_ufunc` (:pull:`5041`) - By `Justus Magin `_. - Add :py:meth:`Dataset.to_pandas` (:pull:`5247`) By `Giacomo Caria `_. - Add :py:meth:`DataArray.plot.surface` which wraps matplotlib's `plot_surface` to make