From 28b857ed38eda07bbe2d632ad7acf06224815a01 Mon Sep 17 00:00:00 2001 From: Keewis Date: Fri, 12 Feb 2021 23:28:54 +0100 Subject: [PATCH 01/24] enable the tests --- xarray/tests/test_concat.py | 1 - xarray/tests/test_merge.py | 1 - 2 files changed, 2 deletions(-) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index beed48a35fc..7b13a5dfa3b 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -313,7 +313,6 @@ def test_concat_combine_attrs_kwarg( assert_identical(actual, expected) - @pytest.mark.skip(reason="not implemented, yet (see #4827)") @pytest.mark.parametrize( "combine_attrs, attrs1, attrs2, expected_attrs, expect_exception", [ diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 27e2b10dcbc..d6f389aab39 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -123,7 +123,6 @@ def test_merge_arrays_attrs( expected.attrs = expected_attrs assert_identical(actual, expected) - @pytest.mark.skip(reason="not implemented, yet (see #4827)") @pytest.mark.parametrize( "combine_attrs, attrs1, attrs2, expected_attrs, expect_exception", [ From 4ab0013f8deaea7bbf52ba0c2fec45ec5596599f Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 13 Feb 2021 00:07:23 +0100 Subject: [PATCH 02/24] clear all attrs first --- xarray/tests/test_merge.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index d6f389aab39..d8fd05d0c26 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -166,6 +166,10 @@ def test_merge_arrays_attrs_variables( ): """check that combine_attrs is used on data variables and coords""" data = create_test_data() + for var in data.variables.values(): + var.attrs.clear() + data.attrs.clear() + data1 = data.copy() data1.var1.attrs = attrs1 data1.dim1.attrs = attrs1 From c53c71ad05e72fc9bab8724b2a83403cbf5d1d50 Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 13 Feb 2021 00:11:31 +0100 Subject: [PATCH 03/24] implement the merging of variable attrs when merging --- xarray/core/merge.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 14beeff3db5..c5fccbfba8b 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -164,6 +164,7 @@ def merge_collected( grouped: Dict[Hashable, List[MergeElement]], prioritized: Mapping[Hashable, MergeElement] = None, compat: str = "minimal", + combine_attrs="override", ) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, pd.Index]]: """Merge dicts of variables, while resolving conflicts appropriately. @@ -233,6 +234,11 @@ def merge_collected( # we drop conflicting coordinates) raise + attrs = [var.attrs for var in variables] + merged_vars[name].attrs = merge_attrs( + attrs, combine_attrs=combine_attrs + ) + return merged_vars, merged_indexes @@ -613,7 +619,9 @@ def merge_core( collected = collect_variables_and_indexes(aligned) prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat) - variables, out_indexes = merge_collected(collected, prioritized, compat=compat) + variables, out_indexes = merge_collected( + collected, prioritized, compat=compat, combine_attrs=combine_attrs + ) assert_unique_multiindex_level_names(variables) dims = calculate_dimensions(variables) From a0a22655c37284fd0ff6aa5e75773bff6da39abb Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 13 Feb 2021 00:42:35 +0100 Subject: [PATCH 04/24] implement the merging of variable attrs when concatenating --- xarray/core/concat.py | 4 +-- xarray/core/variable.py | 67 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 61 insertions(+), 10 deletions(-) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 7a958eb1404..9eca99918d4 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -508,7 +508,7 @@ def ensure_common_dims(vars): vars = ensure_common_dims([ds[k].variable for ds in datasets]) except KeyError: raise ValueError("%r is not present in all datasets." % k) - combined = concat_vars(vars, dim, positions) + combined = concat_vars(vars, dim, positions, combine_attrs=combine_attrs) assert isinstance(combined, Variable) result_vars[k] = combined elif k in result_vars: @@ -572,7 +572,7 @@ def _dataarray_concat( positions, fill_value=fill_value, join=join, - combine_attrs="drop", + combine_attrs=combine_attrs, ) merged_attrs = merge_attrs([da.attrs for da in arrays], combine_attrs) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 45553eb9b1e..68a2455c9ef 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1758,7 +1758,14 @@ def reduce( return Variable(dims, data, attrs=attrs) @classmethod - def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False): + def concat( + cls, + variables, + dim="concat_dim", + positions=None, + shortcut=False, + combine_attrs="override", + ): """Concatenate variables along a new or existing dimension. Parameters @@ -1781,6 +1788,18 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False): This option is used internally to speed-up groupby operations. If `shortcut` is True, some checks of internal consistency between arrays to concatenate are skipped. + combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ + "override"}, default: "override" + String indicating how to combine attrs of the objects being merged: + + - "drop": empty attrs on returned Dataset. + - "identical": all attrs must be the same on every object. + - "no_conflicts": attrs from all objects are combined, any that have + the same name must also have the same value. + - "drop_conflicts": attrs from all objects are combined, any that have + the same name but different values are dropped. + - "override": skip comparing and copy attrs from the first dataset to + the result. Returns ------- @@ -1788,6 +1807,8 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False): Concatenated Variable formed by stacking all the supplied variables along the given dimension. """ + from .merge import merge_attrs + if not isinstance(dim, str): (dim,) = dim.dims @@ -1812,7 +1833,9 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False): dims = (dim,) + first_var.dims data = duck_array_ops.stack(arrays, axis=axis) - attrs = dict(first_var.attrs) + attrs = merge_attrs( + [var.attrs for var in variables], combine_attrs=combine_attrs + ) encoding = dict(first_var.encoding) if not shortcut: for var in variables: @@ -2554,12 +2577,21 @@ def __setitem__(self, key, value): raise TypeError("%s values cannot be modified" % type(self).__name__) @classmethod - def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False): + def concat( + cls, + variables, + dim="concat_dim", + positions=None, + shortcut=False, + combine_attrs="override", + ): """Specialized version of Variable.concat for IndexVariable objects. This exists because we want to avoid converting Index objects to NumPy arrays, if possible. """ + from .merge import merge_attrs + if not isinstance(dim, str): (dim,) = dim.dims @@ -2586,12 +2618,13 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False): # keep as str if possible as pandas.Index uses object (converts to numpy array) data = maybe_coerce_to_str(data, variables) - attrs = dict(first_var.attrs) + attrs = merge_attrs( + [var.attrs for var in variables], combine_attrs=combine_attrs + ) if not shortcut: for var in variables: if var.dims != first_var.dims: raise ValueError("inconsistent dimensions") - utils.remove_incompatible_items(attrs, var.attrs) return cls(first_var.dims, data, attrs) @@ -2765,7 +2798,13 @@ def _broadcast_compat_data(self, other): return self_data, other_data, dims -def concat(variables, dim="concat_dim", positions=None, shortcut=False): +def concat( + variables, + dim="concat_dim", + positions=None, + shortcut=False, + combine_attrs="override", +): """Concatenate variables along a new or existing dimension. Parameters @@ -2788,6 +2827,18 @@ def concat(variables, dim="concat_dim", positions=None, shortcut=False): This option is used internally to speed-up groupby operations. If `shortcut` is True, some checks of internal consistency between arrays to concatenate are skipped. + combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ + "override"}, default: "override" + String indicating how to combine attrs of the objects being merged: + + - "drop": empty attrs on returned Dataset. + - "identical": all attrs must be the same on every object. + - "no_conflicts": attrs from all objects are combined, any that have + the same name must also have the same value. + - "drop_conflicts": attrs from all objects are combined, any that have + the same name but different values are dropped. + - "override": skip comparing and copy attrs from the first dataset to + the result. Returns ------- @@ -2797,9 +2848,9 @@ def concat(variables, dim="concat_dim", positions=None, shortcut=False): """ variables = list(variables) if all(isinstance(v, IndexVariable) for v in variables): - return IndexVariable.concat(variables, dim, positions, shortcut) + return IndexVariable.concat(variables, dim, positions, shortcut, combine_attrs) else: - return Variable.concat(variables, dim, positions, shortcut) + return Variable.concat(variables, dim, positions, shortcut, combine_attrs) def assert_unique_multiindex_level_names(variables): From 9af2c3f821cb6cc914ca19e4f7f7e872725391dd Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 13 Feb 2021 01:22:08 +0100 Subject: [PATCH 05/24] add tests for combine_attrs on variables with combine_* --- xarray/tests/test_combine.py | 145 ++++++++++++++++++++++++++++++++++- 1 file changed, 144 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 522b98cf864..7cc5e2d2a4a 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -4,7 +4,14 @@ import numpy as np import pytest -from xarray import DataArray, Dataset, combine_by_coords, combine_nested, concat +from xarray import ( + DataArray, + Dataset, + MergeError, + combine_by_coords, + combine_nested, + concat, +) from xarray.core import dtypes from xarray.core.combine import ( _check_shape_tile_ids, @@ -743,6 +750,142 @@ def test_combine_nested_combine_attrs_drop_conflicts(self): ) assert_identical(expected, actual) + @pytest.mark.parametrize( + "combine_attrs, attrs1, attrs2, expected_attrs, expect_exception", + [ + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 1, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + False, + ), + ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False), + ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False), + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 4, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + True, + ), + ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True), + ( + "override", + {"a": 1, "b": 2}, + {"a": 4, "b": 5, "c": 3}, + {"a": 1, "b": 2}, + False, + ), + ( + "drop_conflicts", + {"a": 1, "b": 2, "c": 3}, + {"b": 1, "c": 3, "d": 4}, + {"a": 1, "c": 3, "d": 4}, + False, + ), + ], + ) + def test_combine_nested_combine_attrs_variables( + self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception + ): + """check that combine_attrs is used on data variables and coords""" + data1 = Dataset( + { + "a": ("x", [1, 2], attrs1), + "b": ("x", [3, -1], attrs1), + "x": ("x", [0, 1], attrs1), + } + ) + data2 = Dataset( + { + "a": ("x", [2, 3], attrs2), + "b": ("x", [-2, 1], attrs2), + "x": ("x", [2, 3], attrs2), + } + ) + + if expect_exception: + with raises_regex(MergeError, "combine_attrs"): + combine_by_coords([data1, data2], combine_attrs=combine_attrs) + else: + actual = combine_by_coords([data1, data2], combine_attrs=combine_attrs) + expected = Dataset( + { + "a": ("x", [1, 2, 2, 3], expected_attrs), + "b": ("x", [3, -1, -2, 1], expected_attrs), + }, + {"x": ("x", [0, 1, 2, 3], expected_attrs)}, + ) + + assert_identical(actual, expected) + + @pytest.mark.parametrize( + "combine_attrs, attrs1, attrs2, expected_attrs, expect_exception", + [ + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 1, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + False, + ), + ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False), + ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False), + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 4, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + True, + ), + ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True), + ( + "override", + {"a": 1, "b": 2}, + {"a": 4, "b": 5, "c": 3}, + {"a": 1, "b": 2}, + False, + ), + ( + "drop_conflicts", + {"a": 1, "b": 2, "c": 3}, + {"b": 1, "c": 3, "d": 4}, + {"a": 1, "c": 3, "d": 4}, + False, + ), + ], + ) + def test_combine_by_coords_combine_attrs_variables( + self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception + ): + """check that combine_attrs is used on data variables and coords""" + data1 = Dataset( + {"x": ("a", [0], attrs1), "y": ("a", [0], attrs1), "a": ("a", [0], attrs1)} + ) + data2 = Dataset( + {"x": ("a", [1], attrs2), "y": ("a", [1], attrs2), "a": ("a", [1], attrs2)} + ) + + if expect_exception: + with raises_regex(MergeError, "combine_attrs"): + combine_by_coords([data1, data2], combine_attrs=combine_attrs) + else: + actual = combine_by_coords([data1, data2], combine_attrs=combine_attrs) + expected = Dataset( + { + "x": ("a", [0, 1], expected_attrs), + "y": ("a", [0, 1], expected_attrs), + "a": ("a", [0, 1], expected_attrs), + } + ) + + assert_identical(actual, expected) + def test_infer_order_from_coords(self): data = create_test_data() objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))] From 78f9ef9b64af0427ed25cd9fcb5d1e83c64fa907 Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 13 Feb 2021 01:41:32 +0100 Subject: [PATCH 06/24] modify tests in test_combine which have wrong expectation about attrs --- xarray/tests/test_combine.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 7cc5e2d2a4a..b0af9cff755 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -26,6 +26,13 @@ from .test_dataset import create_test_data +def clear_attrs(ds): + for var in ds.variables.values(): + var.attrs.clear() + ds.attrs.clear() + return ds + + def assert_combined_tile_ids_equal(dict1, dict2): assert len(dict1) == len(dict2) for k, v in dict1.items(): @@ -478,7 +485,8 @@ def test_concat_name_symmetry(self): assert_identical(x_first, y_first) def test_concat_one_dim_merge_another(self): - data = create_test_data() + data = clear_attrs(create_test_data()) + data1 = data.copy(deep=True) data2 = data.copy(deep=True) @@ -504,7 +512,7 @@ def test_auto_combine_2d(self): assert_equal(result, expected) def test_auto_combine_2d_combine_attrs_kwarg(self): - ds = create_test_data + ds = lambda x: clear_attrs(create_test_data(x)) partway1 = concat([ds(0), ds(3)], dim="dim1") partway2 = concat([ds(1), ds(4)], dim="dim1") From 77d0208987ebb472a9ee20ec417ed3a83cc13c4f Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 13 Feb 2021 01:44:00 +0100 Subject: [PATCH 07/24] move the attrs preserving into the try / except --- xarray/core/merge.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index c5fccbfba8b..c00ba45942f 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -228,17 +228,15 @@ def merge_collected( variables = [variable for variable, _ in elements_list] try: merged_vars[name] = unique_variable(name, variables, compat) + merged_vars[name].attrs = merge_attrs( + [var.attrs for var in variables], combine_attrs=combine_attrs + ) except MergeError: if compat != "minimal": # we need more than "minimal" compatibility (for which # we drop conflicting coordinates) raise - attrs = [var.attrs for var in variables] - merged_vars[name].attrs = merge_attrs( - attrs, combine_attrs=combine_attrs - ) - return merged_vars, merged_indexes From b234ddfd3264126ba487472d6f389a0dbe502e1a Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 13 Feb 2021 02:07:32 +0100 Subject: [PATCH 08/24] clear variable attrs where necessary --- xarray/tests/test_merge.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index d8fd05d0c26..780aecd77ba 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -31,12 +31,16 @@ def test_broadcast_dimension_size(self): class TestMergeFunction: def test_merge_arrays(self): data = create_test_data() + for var in data.variables.values(): + var.attrs.clear() actual = xr.merge([data.var1, data.var2]) expected = data[["var1", "var2"]] assert_identical(actual, expected) def test_merge_datasets(self): data = create_test_data() + for var in data.variables.values(): + var.attrs.clear() actual = xr.merge([data[["var1"]], data[["var2"]]]) expected = data[["var1", "var2"]] @@ -60,6 +64,8 @@ def test_merge_arrays_attrs_default(self): data.var2.attrs = var2_attrs actual = xr.merge([data.var1, data.var2]) expected = data[["var1", "var2"]] + for var in expected.variables.values(): + var.attrs.clear() expected.attrs = expected_attrs assert_identical(actual, expected) From 0cbf1a47e2a90dcfb1499970c23ca16b5d99650c Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 13 Feb 2021 02:07:45 +0100 Subject: [PATCH 09/24] rewrite the main object merge_attrs tests --- xarray/tests/test_merge.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 780aecd77ba..653d9b4268a 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -117,16 +117,15 @@ def test_merge_arrays_attrs_default(self): def test_merge_arrays_attrs( self, combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception ): - data = create_test_data() - data.var1.attrs = var1_attrs - data.var2.attrs = var2_attrs + data1 = xr.Dataset(attrs=var1_attrs) + data2 = xr.Dataset(attrs=var2_attrs) if expect_exception: with raises_regex(MergeError, "combine_attrs"): - actual = xr.merge([data.var1, data.var2], combine_attrs=combine_attrs) + actual = xr.merge([data1, data2], combine_attrs=combine_attrs) else: - actual = xr.merge([data.var1, data.var2], combine_attrs=combine_attrs) - expected = data[["var1", "var2"]] - expected.attrs = expected_attrs + actual = xr.merge([data1, data2], combine_attrs=combine_attrs) + expected = xr.Dataset(attrs=expected_attrs) + assert_identical(actual, expected) @pytest.mark.parametrize( From 71d605a4554ccb75514f50409b33fab71f22fa34 Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 13 Feb 2021 14:53:27 +0100 Subject: [PATCH 10/24] clear the variable attrs first --- xarray/tests/test_merge.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 653d9b4268a..9fb87ffa93c 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -262,6 +262,8 @@ def test_merge_no_conflicts_single_var(self): def test_merge_no_conflicts_multi_var(self): data = create_test_data() + for var in data.variables.values(): + var.attrs.clear() data1 = data.copy(deep=True) data2 = data.copy(deep=True) From fb1fcb90cd22c82d0818aa51e90fcadfdd680436 Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 13 Feb 2021 14:53:51 +0100 Subject: [PATCH 11/24] add combine_attrs="no_conflict" --- xarray/tests/test_merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 9fb87ffa93c..2df55c6a94f 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -282,7 +282,7 @@ def test_merge_no_conflicts_multi_var(self): def test_merge_no_conflicts_preserve_attrs(self): data = xr.Dataset({"x": ([], 0, {"foo": "bar"})}) - actual = xr.merge([data, data]) + actual = xr.merge([data, data], combine_attrs="no_conflicts") assert_identical(data, actual) def test_merge_no_conflicts_broadcast(self): From 3ee536d67c9180c6694b08b22c169ab2f419ada1 Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 27 Feb 2021 23:28:36 +0100 Subject: [PATCH 12/24] add a entry to whats-new.rst --- doc/whats-new.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index eed4e16eb62..f9131d5f097 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,7 +22,8 @@ v0.17.1 (unreleased) New Features ~~~~~~~~~~~~ - +- apply ``combine_attrs`` recursively (:pull:`4902`). + By `Justus Magin `_. Breaking changes ~~~~~~~~~~~~~~~~ From abacdc9fe7136d84d848fe9db4081b81deee73f5 Mon Sep 17 00:00:00 2001 From: keewis Date: Tue, 20 Apr 2021 00:05:39 +0200 Subject: [PATCH 13/24] Update doc/whats-new.rst Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 370ba319526..275836128f4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,7 +22,8 @@ v0.17.1 (unreleased) New Features ~~~~~~~~~~~~ -- apply ``combine_attrs`` recursively (:pull:`4902`). +- apply ``combine_attrs`` on data variables and coordinate variables when concatenating + and merging datasets and dataarrays (:pull:`4902`). By `Justus Magin `_. - Add :py:meth:`Dataset.query` and :py:meth:`DataArray.query` which enable indexing From b4b64c16701d86bc372f38ea5f502cd086e4bb2c Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 5 Apr 2021 21:19:51 +0200 Subject: [PATCH 14/24] dedent a hidden test --- xarray/tests/test_combine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 85fd3b20b6e..0d751773b05 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -680,8 +680,8 @@ def test_combine_by_coords(self): with raises_regex(ValueError, "Every dimension needs a coordinate"): combine_by_coords(objs) - def test_empty_input(self): - assert_identical(Dataset(), combine_by_coords([])) + def test_empty_input(self): + assert_identical(Dataset(), combine_by_coords([])) @pytest.mark.parametrize( "join, expected", From cd3fd0ef72cc766c09627c92214b4e794d91d704 Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 20 Apr 2021 00:09:40 +0200 Subject: [PATCH 15/24] fix whats-new.rst --- doc/whats-new.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 275836128f4..0d260785394 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,10 +22,9 @@ v0.17.1 (unreleased) New Features ~~~~~~~~~~~~ -- apply ``combine_attrs`` on data variables and coordinate variables when concatenating +- apply ``combine_attrs`` on data variables and coordinate variables when concatenating and merging datasets and dataarrays (:pull:`4902`). By `Justus Magin `_. - - Add :py:meth:`Dataset.query` and :py:meth:`DataArray.query` which enable indexing of datasets and data arrays by evaluating query expressions against the values of the data variables (:pull:`4984`). By `Alistair Miles `_. From 8ca5e442ae9b7b2f61a4e8ad68dc32ebbf19134f Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 20 Apr 2021 15:42:58 +0200 Subject: [PATCH 16/24] conditionally exclude attrs --- xarray/tests/test_dataset.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index fb15f15df1a..561b23b594b 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -62,7 +62,7 @@ ] -def create_test_data(seed=None): +def create_test_data(seed=None, add_attrs=True): rs = np.random.RandomState(seed) _vars = { "var1": ["dim1", "dim2"], @@ -77,7 +77,9 @@ def create_test_data(seed=None): obj["dim3"] = ("dim3", list("abcdefghij")) for v, dims in sorted(_vars.items()): data = rs.normal(size=tuple(_dims[d] for d in dims)) - obj[v] = (dims, data, {"foo": "variable"}) + obj[v] = (dims, data) + if add_attrs: + obj[v].attrs = {"foo": "variable"} obj.coords["numbers"] = ( "dim3", np.array([0, 1, 2, 0, 0, 1, 1, 2, 2, 3], dtype="int64"), From 34d6615bfdfb16111a66c0601bd061cfd04435b4 Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 20 Apr 2021 15:58:07 +0200 Subject: [PATCH 17/24] use add_attrs=False or construct manually instead of clearing attrs --- xarray/tests/test_combine.py | 11 ++------- xarray/tests/test_merge.py | 46 ++++++++++++++---------------------- 2 files changed, 20 insertions(+), 37 deletions(-) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 0d751773b05..d3248a03975 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -27,13 +27,6 @@ from .test_dataset import create_test_data -def clear_attrs(ds): - for var in ds.variables.values(): - var.attrs.clear() - ds.attrs.clear() - return ds - - def assert_combined_tile_ids_equal(dict1, dict2): assert len(dict1) == len(dict2) for k, v in dict1.items(): @@ -486,7 +479,7 @@ def test_concat_name_symmetry(self): assert_identical(x_first, y_first) def test_concat_one_dim_merge_another(self): - data = clear_attrs(create_test_data()) + data = create_test_data(add_attrs=False) data1 = data.copy(deep=True) data2 = data.copy(deep=True) @@ -513,7 +506,7 @@ def test_auto_combine_2d(self): assert_equal(result, expected) def test_auto_combine_2d_combine_attrs_kwarg(self): - ds = lambda x: clear_attrs(create_test_data(x)) + ds = lambda x: create_test_data(x, add_attrs=False) partway1 = concat([ds(0), ds(3)], dim="dim1") partway2 = concat([ds(1), ds(4)], dim="dim1") diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 182933796b0..df880050b0b 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -30,17 +30,14 @@ def test_broadcast_dimension_size(self): class TestMergeFunction: def test_merge_arrays(self): - data = create_test_data() - for var in data.variables.values(): - var.attrs.clear() + data = create_test_data(add_attrs=False) + actual = xr.merge([data.var1, data.var2]) expected = data[["var1", "var2"]] assert_identical(actual, expected) def test_merge_datasets(self): - data = create_test_data() - for var in data.variables.values(): - var.attrs.clear() + data = create_test_data(add_attrs=False) actual = xr.merge([data[["var1"]], data[["var2"]]]) expected = data[["var1", "var2"]] @@ -59,14 +56,13 @@ def test_merge_arrays_attrs_default(self): var2_attrs = {"a": 1, "c": 3} expected_attrs = {} - data = create_test_data() + data = create_test_data(add_attrs=False) + expected = data[["var1", "var2"]].copy() + expected.attrs = expected_attrs + data.var1.attrs = var1_attrs data.var2.attrs = var2_attrs actual = xr.merge([data.var1, data.var2]) - expected = data[["var1", "var2"]] - for var in expected.variables.values(): - var.attrs.clear() - expected.attrs = expected_attrs assert_identical(actual, expected) @pytest.mark.parametrize( @@ -170,26 +166,22 @@ def test_merge_arrays_attrs_variables( self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception ): """check that combine_attrs is used on data variables and coords""" - data = create_test_data() - for var in data.variables.values(): - var.attrs.clear() - data.attrs.clear() - - data1 = data.copy() - data1.var1.attrs = attrs1 - data1.dim1.attrs = attrs1 - data2 = data.copy() - data2.var1.attrs = attrs2 - data2.dim1.attrs = attrs2 + data1 = xr.Dataset( + {"var1": ("dim1", [], attrs1)}, coords={"dim1": ("dim1", [], attrs1)} + ) + data2 = xr.Dataset( + {"var1": ("dim1", [], attrs2)}, coords={"dim1": ("dim1", [], attrs2)} + ) if expect_exception: with raises_regex(MergeError, "combine_attrs"): actual = xr.merge([data1, data2], combine_attrs=combine_attrs) else: actual = xr.merge([data1, data2], combine_attrs=combine_attrs) - expected = data.copy() - expected.var1.attrs = expected_attrs - expected.dim1.attrs = expected_attrs + expected = xr.Dataset( + {"var1": ("dim1", [], expected_attrs)}, + coords={"dim1": ("dim1", [], expected_attrs)}, + ) assert_identical(actual, expected) @@ -261,9 +253,7 @@ def test_merge_no_conflicts_single_var(self): xr.merge([ds1, ds3], compat="no_conflicts") def test_merge_no_conflicts_multi_var(self): - data = create_test_data() - for var in data.variables.values(): - var.attrs.clear() + data = create_test_data(add_attrs=False) data1 = data.copy(deep=True) data2 = data.copy(deep=True) From 8ca3aeb3beee196b2fd798b156cd958393a025c5 Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 20 Apr 2021 16:03:35 +0200 Subject: [PATCH 18/24] also merge attrs for indexed variables --- xarray/core/merge.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 5d6709e360b..236adcd231d 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -223,6 +223,10 @@ def merge_collected( % (name, variable.attrs, other_variable.attrs) ) merged_vars[name] = variable + merged_vars[name].attrs = merge_attrs( + [var.attrs for var, _ in indexed_elements], + combine_attrs=combine_attrs, + ) merged_indexes[name] = index else: variables = [variable for variable, _ in elements_list] From 6e06dc44639cbd9e9cced3dd8668607d3f99daf5 Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 20 Apr 2021 16:07:24 +0200 Subject: [PATCH 19/24] use pytest.raises instead of raises_regex --- xarray/tests/test_merge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index df880050b0b..dedeb08c25b 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -116,7 +116,7 @@ def test_merge_arrays_attrs( data1 = xr.Dataset(attrs=var1_attrs) data2 = xr.Dataset(attrs=var2_attrs) if expect_exception: - with raises_regex(MergeError, "combine_attrs"): + with pytest.raises(MergeError, match="combine_attrs"): actual = xr.merge([data1, data2], combine_attrs=combine_attrs) else: actual = xr.merge([data1, data2], combine_attrs=combine_attrs) @@ -174,7 +174,7 @@ def test_merge_arrays_attrs_variables( ) if expect_exception: - with raises_regex(MergeError, "combine_attrs"): + with pytest.raises(MergeError, match="combine_attrs"): actual = xr.merge([data1, data2], combine_attrs=combine_attrs) else: actual = xr.merge([data1, data2], combine_attrs=combine_attrs) From cc08b532d92f6716b08d9cf7c44b1a11fed81e70 Mon Sep 17 00:00:00 2001 From: Keewis Date: Thu, 29 Apr 2021 23:38:07 +0200 Subject: [PATCH 20/24] switch the default for merge's combine_attrs to override --- xarray/core/merge.py | 4 ++-- xarray/tests/test_merge.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 236adcd231d..ec3c9b0f065 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -659,7 +659,7 @@ def merge( compat: str = "no_conflicts", join: str = "outer", fill_value: object = dtypes.NA, - combine_attrs: str = "drop", + combine_attrs: str = "override", ) -> "Dataset": """Merge any number of xarray objects into a single Dataset as variables. @@ -698,7 +698,7 @@ def merge( variable names to fill values. Use a data array's name to refer to its values. combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ - "override"}, default: "drop" + "override"}, default: "override" String indicating how to combine attrs of the objects being merged: - "drop": empty attrs on returned Dataset. diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index cab5d3a7743..680c2a3a679 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -53,10 +53,12 @@ def test_merge_dataarray_unnamed(self): def test_merge_arrays_attrs_default(self): var1_attrs = {"a": 1, "b": 2} var2_attrs = {"a": 1, "c": 3} - expected_attrs = {} + expected_attrs = {"a": 1, "b": 2} data = create_test_data(add_attrs=False) expected = data[["var1", "var2"]].copy() + expected.var1.attrs = var1_attrs + expected.var2.attrs = var2_attrs expected.attrs = expected_attrs data.var1.attrs = var1_attrs From dffda43911d73725ac8af99aebaaa1aef5d1c3e9 Mon Sep 17 00:00:00 2001 From: Keewis Date: Thu, 29 Apr 2021 23:53:08 +0200 Subject: [PATCH 21/24] use pytest.raises --- xarray/tests/test_combine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index bb0e9c63994..3b6aaec60f2 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -820,7 +820,7 @@ def test_combine_nested_combine_attrs_variables( ) if expect_exception: - with raises_regex(MergeError, "combine_attrs"): + with pytest.raises(MergeError, match="combine_attrs"): combine_by_coords([data1, data2], combine_attrs=combine_attrs) else: actual = combine_by_coords([data1, data2], combine_attrs=combine_attrs) @@ -884,7 +884,7 @@ def test_combine_by_coords_combine_attrs_variables( ) if expect_exception: - with raises_regex(MergeError, "combine_attrs"): + with pytest.raises(MergeError, match="combine_attrs"): combine_by_coords([data1, data2], combine_attrs=combine_attrs) else: actual = combine_by_coords([data1, data2], combine_attrs=combine_attrs) From db0fc56594e678639f486043748794e2ee73fa70 Mon Sep 17 00:00:00 2001 From: Keewis Date: Thu, 29 Apr 2021 23:55:51 +0200 Subject: [PATCH 22/24] update whats-new.rst [skip-ci] --- doc/whats-new.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0cee2fa84d2..3f0b286251f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,7 +25,6 @@ New Features - apply ``combine_attrs`` on data variables and coordinate variables when concatenating and merging datasets and dataarrays (:pull:`4902`). By `Justus Magin `_. - - Add ``safe_chunks`` option to :py:meth:`Dataset.to_zarr` which allows overriding checks made to ensure Dask and Zarr chunk compatibility (:issue:`5056`). By `Ryan Abernathey `_ @@ -112,6 +111,9 @@ Breaking changes ``ds.coarsen(...).mean(keep_attrs=False)`` instead of ``ds.coarsen(..., keep_attrs=False).mean()``. Further, coarsen now keeps attributes per default (:pull:`5227`). By `Mathias Hauser `_. +- switch the default of :py:func`merge`'s ``combine_attrs`` parameter to ``"override"`` + (:pull:`4902`) + By `Justus Magin `_. Deprecations ~~~~~~~~~~~~ From a1f1dda60a009d2ec4df3cd4e687b9e47c10c8b5 Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 1 May 2021 21:43:11 +0200 Subject: [PATCH 23/24] fix whats-new.rst [skip-ci] --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3f0b286251f..f1877c177f9 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -111,7 +111,7 @@ Breaking changes ``ds.coarsen(...).mean(keep_attrs=False)`` instead of ``ds.coarsen(..., keep_attrs=False).mean()``. Further, coarsen now keeps attributes per default (:pull:`5227`). By `Mathias Hauser `_. -- switch the default of :py:func`merge`'s ``combine_attrs`` parameter to ``"override"`` +- switch the default of the :py:func`merge` ``combine_attrs`` parameter to ``"override"`` (:pull:`4902`) By `Justus Magin `_. From 065e757c5926be33a5d417d19cfe68118dd4d6d8 Mon Sep 17 00:00:00 2001 From: Keewis Date: Wed, 5 May 2021 18:05:31 +0200 Subject: [PATCH 24/24] provide more context for the change of the default value [skip-ci] --- doc/whats-new.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bb6310f6082..59857c67bf7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -146,8 +146,9 @@ Breaking changes ``ds.coarsen(...).mean(keep_attrs=False)`` instead of ``ds.coarsen(..., keep_attrs=False).mean()``. Further, coarsen now keeps attributes per default (:pull:`5227`). By `Mathias Hauser `_. -- switch the default of the :py:func`merge` ``combine_attrs`` parameter to ``"override"`` - (:pull:`4902`) +- switch the default of the :py:func:`merge` ``combine_attrs`` parameter to + ``"override"``. This will keep the current behavior for merging the ``attrs`` of + variables but stop dropping the ``attrs`` of the main objects (:pull:`4902`). By `Justus Magin `_. Deprecations