From 767a6687e6c2d0124b47b3c392b210202f16e7ec Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 19 Jan 2021 02:02:12 +0100 Subject: [PATCH 01/22] add the drop_conflicts option for merging attrs --- xarray/core/merge.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index d29a9e1ff02..69c75fe4124 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -513,6 +513,14 @@ def merge_attrs(variable_attrs, combine_attrs): "the same. Merging %s with %s" % (str(result), str(attrs)) ) return result + elif combine_attrs == "drop_conflicts": + result = dict(variable_attrs[0]) + for attrs in variable_attrs[1:]: + try: + result = compat_dict_union(result, attrs) + except ValueError: + continue + return result elif combine_attrs == "identical": result = dict(variable_attrs[0]) for attrs in variable_attrs[1:]: From 909c5cd79c31129eec947a00327e2626965bd32c Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 19 Jan 2021 02:11:28 +0100 Subject: [PATCH 02/22] mention drop_conflicts in a few docstrings changed functions: concat, merge, combine_by_coords, combine_nested, merge_core --- xarray/core/combine.py | 12 ++++++++---- xarray/core/concat.py | 6 ++++-- xarray/core/merge.py | 9 ++++++--- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 86ed1870302..573247937b7 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -412,14 +412,16 @@ def combine_nested( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. - combine_attrs : {"drop", "identical", "no_conflicts", "override"}, \ - default: "drop" + combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ + "override"}, default: "drop" String indicating how to combine attrs of the objects being merged: - "drop": empty attrs on returned Dataset. - "identical": all attrs must be the same on every object. - "no_conflicts": attrs from all objects are combined, any that have the same name must also have the same value. + - "drop_conflicts": attrs from all objects are combined, any that have + the same name but different values are dropped. - "override": skip comparing and copy attrs from the first dataset to the result. @@ -625,14 +627,16 @@ def combine_by_coords( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. - combine_attrs : {"drop", "identical", "no_conflicts", "override"}, \ - default: "drop" + combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ + "override"}, default: "drop" String indicating how to combine attrs of the objects being merged: - "drop": empty attrs on returned Dataset. - "identical": all attrs must be the same on every object. - "no_conflicts": attrs from all objects are combined, any that have the same name must also have the same value. + - "drop_conflicts": attrs from all objects are combined, any that have + the same name but different values are dropped. - "override": skip comparing and copy attrs from the first dataset to the result. diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 5cda5aa903c..7a958eb1404 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -142,14 +142,16 @@ def concat( - "override": if indexes are of same size, rewrite indexes to be those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. - combine_attrs : {"drop", "identical", "no_conflicts", "override"}, \ - default: "override" + combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ + "override"}, default: "override" String indicating how to combine attrs of the objects being merged: - "drop": empty attrs on returned Dataset. - "identical": all attrs must be the same on every object. - "no_conflicts": attrs from all objects are combined, any that have the same name must also have the same value. + - "drop_conflicts": attrs from all objects are combined, any that have + the same name but different values are dropped. - "override": skip comparing and copy attrs from the first dataset to the result. diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 69c75fe4124..1332dccfdd1 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -564,7 +564,8 @@ def merge_core( Compatibility checks to use when merging variables. join : {"outer", "inner", "left", "right"}, optional How to combine objects with different indexes. - combine_attrs : {"drop", "identical", "no_conflicts", "override"}, optional + combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ + "override"}, optional How to combine attributes of objects priority_arg : int, optional Optional argument in `objects` that takes precedence over the others. @@ -676,14 +677,16 @@ def merge( Value to use for newly missing values. If a dict-like, maps variable names to fill values. Use a data array's name to refer to its values. - combine_attrs : {"drop", "identical", "no_conflicts", "override"}, \ - default: "drop" + combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ + "override"}, default: "drop" String indicating how to combine attrs of the objects being merged: - "drop": empty attrs on returned Dataset. - "identical": all attrs must be the same on every object. - "no_conflicts": attrs from all objects are combined, any that have the same name must also have the same value. + - "drop_conflicts": attrs from all objects are combined, any that have + the same name but different values are dropped. - "override": skip comparing and copy attrs from the first dataset to the result. From d35a213027638e3ab436890ed45b1a32c5286f2e Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 19 Jan 2021 02:26:57 +0100 Subject: [PATCH 03/22] add a test for merge_attrs --- xarray/tests/test_merge.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 34b138e1f6a..6e50c847d38 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -116,6 +116,31 @@ def test_merge_attrs_override_copy(self): ds3.attrs["x"] = 2 assert ds1.x == 0 + @pytest.mark.parametrize( + ["combine_attrs", "expected"], + ( + pytest.param("drop", {}, id="drop"), + pytest.param("override", {"x": 0, "y": 1}, id="override"), + pytest.param("no_conflicts", None, id="no_conflicts"), + pytest.param("identical", None, id="identical"), + pytest.param("drop_conflicts", {"y": 1, "z": 2}, id="drop_conflicts"), + ), + ) + def test_merge_attrs(self, combine_attrs, expected): + variable_attrs = ( + {"x": 0, "y": 1}, + {"x": 1, "y": 1}, + {"y": 1, "z": 2}, + ) + + if expected is None: + with pytest.raises(MergeError): + merge.merge_attrs(variable_attrs, combine_attrs) + return + + actual = merge.merge_attrs(variable_attrs, combine_attrs) + assert actual == expected + def test_merge_dicts_simple(self): actual = xr.merge([{"foo": 0}, {"bar": "one"}, {"baz": 3.5}]) expected = xr.Dataset({"foo": 0, "bar": "one", "baz": 3.5}) From 7f9481c26f4640fb3c5f51cc5717d419cce49c65 Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 19 Jan 2021 03:03:43 +0100 Subject: [PATCH 04/22] rewrite the drop_conflicts algorithm --- xarray/core/merge.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 1332dccfdd1..793140d09f8 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -20,7 +20,7 @@ from . import dtypes, pdcompat from .alignment import deep_align from .duck_array_ops import lazy_array_equiv -from .utils import Frozen, compat_dict_union, dict_equiv +from .utils import Frozen, compat_dict_intersection, compat_dict_union, dict_equiv from .variable import Variable, as_variable, assert_unique_multiindex_level_names if TYPE_CHECKING: @@ -514,10 +514,13 @@ def merge_attrs(variable_attrs, combine_attrs): ) return result elif combine_attrs == "drop_conflicts": - result = dict(variable_attrs[0]) - for attrs in variable_attrs[1:]: + result = {} + for attrs in variable_attrs: + result.update( + {key: value for key, value in attrs.items() if key not in result} + ) try: - result = compat_dict_union(result, attrs) + result = compat_dict_intersection(result, attrs) except ValueError: continue return result From 1930d680aa723a0193f45f0108e4a5a81c1666bf Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 19 Jan 2021 22:54:44 +0100 Subject: [PATCH 05/22] verify that drop_conflicts works correctly when passed to concat --- xarray/tests/test_concat.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 7416cab13ed..2db6ec47a67 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -280,6 +280,24 @@ def test_concat_combine_attrs_kwarg(self): actual = concat([ds1, ds2], dim="x", combine_attrs=combine_attrs) assert_identical(actual, expected[combine_attrs]) + ds1 = Dataset( + {"a": ("x", [0], {"b": 41, "c": 43})}, + coords={"x": [0]}, + attrs={"a": 41, "b": 42, "c": 43}, + ) + ds2 = Dataset( + {"a": ("x", [0], {"a": 41, "b": 42, "c": 43})}, + coords={"x": [1]}, + attrs={"b": 41, "c": 43, "d": 44}, + ) + actual = concat([ds1, ds2], dim="x", combine_attrs="drop_conflicts") + expected = Dataset( + {"a": ("x", [0, 0], {"a": 41, "c": 43})}, + coords={"x": [0, 1]}, + attrs={"a": 41, "c": 43, "d": 44}, + ) + assert_identical(actual, expected) + def test_concat_promote_shape(self): # mixed dims within variables objs = [Dataset({}, {"x": 0}), Dataset({"x": [1]})] From a14a4d90df70c5bf80e041273f324b33573a11b2 Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 19 Jan 2021 23:15:23 +0100 Subject: [PATCH 06/22] add a test for combine_nested with combine_attrs="drop_conflicts" --- xarray/tests/test_combine.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 109b78f05a9..522b98cf864 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -732,6 +732,17 @@ def test_combine_coords_combine_attrs_identical(self): objs, concat_dim="x", join="outer", combine_attrs="identical" ) + def test_combine_nested_combine_attrs_drop_conflicts(self): + objs = [ + Dataset({"x": [0], "y": [0]}, attrs={"a": 1, "b": 2, "c": 3}), + Dataset({"x": [1], "y": [1]}, attrs={"a": 1, "b": 0, "d": 3}), + ] + expected = Dataset({"x": [0, 1], "y": [0, 1]}, attrs={"a": 1, "c": 3, "d": 3}) + actual = combine_nested( + objs, concat_dim="x", join="outer", combine_attrs="drop_conflicts" + ) + assert_identical(expected, actual) + def test_infer_order_from_coords(self): data = create_test_data() objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))] From edc8428db5ad0ff5cf58f3ef0b9832b9c86eff7f Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 19 Jan 2021 23:44:53 +0100 Subject: [PATCH 07/22] remove test_merge_attrs in favor of test_merge_arrays_attrs --- xarray/tests/test_merge.py | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 6e50c847d38..28f51aa0621 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -92,6 +92,13 @@ def test_merge_arrays_attrs_default(self): {"a": 1, "b": 2}, False, ), + ( + "drop_conflicts", + {"a": 1, "b": 2}, + {"b": 1, "c": 3}, + {"a": 1, "c": 3}, + False, + ), ], ) def test_merge_arrays_attrs( @@ -116,31 +123,6 @@ def test_merge_attrs_override_copy(self): ds3.attrs["x"] = 2 assert ds1.x == 0 - @pytest.mark.parametrize( - ["combine_attrs", "expected"], - ( - pytest.param("drop", {}, id="drop"), - pytest.param("override", {"x": 0, "y": 1}, id="override"), - pytest.param("no_conflicts", None, id="no_conflicts"), - pytest.param("identical", None, id="identical"), - pytest.param("drop_conflicts", {"y": 1, "z": 2}, id="drop_conflicts"), - ), - ) - def test_merge_attrs(self, combine_attrs, expected): - variable_attrs = ( - {"x": 0, "y": 1}, - {"x": 1, "y": 1}, - {"y": 1, "z": 2}, - ) - - if expected is None: - with pytest.raises(MergeError): - merge.merge_attrs(variable_attrs, combine_attrs) - return - - actual = merge.merge_attrs(variable_attrs, combine_attrs) - assert actual == expected - def test_merge_dicts_simple(self): actual = xr.merge([{"foo": 0}, {"bar": "one"}, {"baz": 3.5}]) expected = xr.Dataset({"foo": 0, "bar": "one", "baz": 3.5}) From 9344b930227a5fd7f9d952e6b50beb4997508664 Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 19 Jan 2021 23:52:01 +0100 Subject: [PATCH 08/22] use a dict comprehension instead of a dict intersection --- xarray/core/merge.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 793140d09f8..4c467c14f4c 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -20,7 +20,7 @@ from . import dtypes, pdcompat from .alignment import deep_align from .duck_array_ops import lazy_array_equiv -from .utils import Frozen, compat_dict_intersection, compat_dict_union, dict_equiv +from .utils import Frozen, compat_dict_union, dict_equiv from .variable import Variable, as_variable, assert_unique_multiindex_level_names if TYPE_CHECKING: @@ -519,10 +519,11 @@ def merge_attrs(variable_attrs, combine_attrs): result.update( {key: value for key, value in attrs.items() if key not in result} ) - try: - result = compat_dict_intersection(result, attrs) - except ValueError: - continue + result = { + key: value + for key, value in result.items() + if attrs.get(key, value) == value + } return result elif combine_attrs == "identical": result = dict(variable_attrs[0]) From 1791111a033211e816ccf62b9cf1ad6c3237bd35 Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 23 Jan 2021 23:32:19 +0100 Subject: [PATCH 09/22] reorganize the drop_conflicts concat test --- xarray/tests/test_concat.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 2db6ec47a67..5f11a0bd5a0 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -281,18 +281,18 @@ def test_concat_combine_attrs_kwarg(self): assert_identical(actual, expected[combine_attrs]) ds1 = Dataset( - {"a": ("x", [0], {"b": 41, "c": 43})}, + {"a": ("x", [0], {"b": 1, "c": 43, "d": 44})}, coords={"x": [0]}, - attrs={"a": 41, "b": 42, "c": 43}, + attrs={"b": 1, "c": 43, "d": 44}, ) ds2 = Dataset( {"a": ("x", [0], {"a": 41, "b": 42, "c": 43})}, coords={"x": [1]}, - attrs={"b": 41, "c": 43, "d": 44}, + attrs={"a": 41, "b": 42, "c": 43}, ) actual = concat([ds1, ds2], dim="x", combine_attrs="drop_conflicts") expected = Dataset( - {"a": ("x", [0, 0], {"a": 41, "c": 43})}, + {"a": ("x", [0, 0], {"a": 41, "c": 43, "d": 44})}, coords={"x": [0, 1]}, attrs={"a": 41, "c": 43, "d": 44}, ) From 2ec123f8d8ca8ccc903ba3a5126f1131692301a8 Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 23 Jan 2021 23:33:03 +0100 Subject: [PATCH 10/22] use different data for the drop_conflicts merge test --- xarray/tests/test_merge.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 28f51aa0621..deb6c99e4c4 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -94,9 +94,9 @@ def test_merge_arrays_attrs_default(self): ), ( "drop_conflicts", - {"a": 1, "b": 2}, - {"b": 1, "c": 3}, - {"a": 1, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + {"b": 1, "c": 3, "d": 4}, + {"a": 1, "c": 3, "d": 4}, False, ), ], From fad94a0a1953e979a1b120974563d6139e28127d Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 23 Jan 2021 23:33:53 +0100 Subject: [PATCH 11/22] add another test to make sure attrs on variables is also merged --- xarray/tests/test_merge.py | 57 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index deb6c99e4c4..a5007a9f66c 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -116,6 +116,63 @@ def test_merge_arrays_attrs( expected.attrs = expected_attrs assert_identical(actual, expected) + @pytest.mark.parametrize( + "combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception", + [ + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 1, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + False, + ), + ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False), + ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False), + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 4, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + False, + ), + ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, False), + ( + "override", + {"a": 1, "b": 2}, + {"a": 4, "b": 5, "c": 3}, + {"a": 1, "b": 2}, + False, + ), + ( + "drop_conflicts", + {"a": 1, "b": 2, "c": 3}, + {"b": 1, "c": 3, "d": 4}, + {"a": 1, "c": 3, "d": 4}, + False, + ), + ], + ) + def test_merge_arrays_attrs_variables( + self, combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception + ): + data = create_test_data() + data1 = data.copy() + data1.var1.attrs = var1_attrs + data2 = data.copy() + data2.var1.attrs = var2_attrs + + if expect_exception: + with raises_regex(MergeError, "combine_attrs"): + actual = xr.merge([data1, data2], combine_attrs=combine_attrs) + else: + actual = xr.merge([data1, data2], combine_attrs=combine_attrs) + expected = data.copy() + expected.var1.attrs = expected_attrs + + assert_identical(actual, expected) + def test_merge_attrs_override_copy(self): ds1 = xr.Dataset(attrs={"x": 0}) ds2 = xr.Dataset(attrs={"x": 1}) From 86c7c5f7cfebeb118a12c0d1d6d559cffe4638ce Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 23 Jan 2021 23:56:49 +0100 Subject: [PATCH 12/22] correctly set expect_exception --- xarray/tests/test_merge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index a5007a9f66c..a438533d3ff 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -133,11 +133,11 @@ def test_merge_arrays_attrs( {"a": 1, "b": 2}, {"a": 4, "c": 3}, {"a": 1, "b": 2, "c": 3}, - False, + True, ), ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False), ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False), - ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True), ( "override", {"a": 1, "b": 2}, From 353119857d14c93b68f84a511bd21de80f3fce3a Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 23 Jan 2021 23:57:04 +0100 Subject: [PATCH 13/22] skip the combine_attrs merge variables test --- xarray/tests/test_merge.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index a438533d3ff..49806ffe2ee 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -116,6 +116,7 @@ def test_merge_arrays_attrs( expected.attrs = expected_attrs assert_identical(actual, expected) + @pytest.mark.skip(reason="not implemented, yet (see #4827)") @pytest.mark.parametrize( "combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception", [ From be5ad6a0a2d03eabef56a94d92b7a6d82624f3d0 Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 23 Jan 2021 23:58:32 +0100 Subject: [PATCH 14/22] rewrite the concat combine_attrs test and use different data --- xarray/tests/test_concat.py | 140 ++++++++++++++++++++++++++---------- 1 file changed, 104 insertions(+), 36 deletions(-) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 5f11a0bd5a0..99f9cde2e84 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -258,45 +258,113 @@ def test_concat_join_kwarg(self): ) assert_identical(actual, expected) - def test_concat_combine_attrs_kwarg(self): - ds1 = Dataset({"a": ("x", [0])}, coords={"x": [0]}, attrs={"b": 42}) - ds2 = Dataset({"a": ("x", [0])}, coords={"x": [1]}, attrs={"b": 42, "c": 43}) - - expected = {} - expected["drop"] = Dataset({"a": ("x", [0, 0])}, {"x": [0, 1]}) - expected["no_conflicts"] = Dataset( - {"a": ("x", [0, 0])}, {"x": [0, 1]}, {"b": 42, "c": 43} - ) - expected["override"] = Dataset({"a": ("x", [0, 0])}, {"x": [0, 1]}, {"b": 42}) - - with raises_regex(ValueError, "combine_attrs='identical'"): - actual = concat([ds1, ds2], dim="x", combine_attrs="identical") - with raises_regex(ValueError, "combine_attrs='no_conflicts'"): - ds3 = ds2.copy(deep=True) - ds3.attrs["b"] = 44 - actual = concat([ds1, ds3], dim="x", combine_attrs="no_conflicts") + @pytest.mark.parametrize( + "combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception", + [ + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 1, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + False, + ), + ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False), + ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False), + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 4, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + True, + ), + ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True), + ( + "override", + {"a": 1, "b": 2}, + {"a": 4, "b": 5, "c": 3}, + {"a": 1, "b": 2}, + False, + ), + ( + "drop_conflicts", + {"a": 41, "b": 42, "c": 43}, + {"b": 2, "c": 43, "d": 44}, + {"a": 41, "c": 43, "d": 44}, + False, + ), + ], + ) + def test_concat_combine_attrs_kwarg( + self, combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception + ): + ds1 = Dataset({"a": ("x", [0])}, coords={"x": [0]}, attrs=var1_attrs) + ds2 = Dataset({"a": ("x", [0])}, coords={"x": [1]}, attrs=var2_attrs) + + if expect_exception: + with pytest.raises(ValueError, match=f"combine_attrs='{combine_attrs}'"): + concat([ds1, ds2], dim="x", combine_attrs=combine_attrs) + else: + actual = concat([ds1, ds2], dim="x", combine_attrs=combine_attrs) + expected = Dataset( + {"a": ("x", [0, 0])}, {"x": [0, 1]}, attrs=expected_attrs + ) - for combine_attrs in expected: + assert_identical(actual, expected) + + @pytest.mark.parametrize( + "combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception", + [ + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 1, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + False, + ), + ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False), + ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False), + ( + "no_conflicts", + {"a": 1, "b": 2}, + {"a": 4, "c": 3}, + {"a": 1, "b": 2, "c": 3}, + True, + ), + ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False), + ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True), + ( + "override", + {"a": 1, "b": 2}, + {"a": 4, "b": 5, "c": 3}, + {"a": 1, "b": 2}, + False, + ), + ( + "drop_conflicts", + {"a": 41, "b": 42, "c": 43}, + {"b": 2, "c": 43, "d": 44}, + {"a": 41, "c": 43, "d": 44}, + False, + ), + ], + ) + def test_concat_combine_attrs_kwarg_variables( + self, combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception + ): + ds1 = Dataset({"a": ("x", [0], var1_attrs)}, coords={"x": [0]}) + ds2 = Dataset({"a": ("x", [0], var2_attrs)}, coords={"x": [1]}) + + if expect_exception: + with pytest.raises(ValueError, match=f"combine_attrs='{combine_attrs}'"): + concat([ds1, ds2], dim="x", combine_attrs=combine_attrs) + else: actual = concat([ds1, ds2], dim="x", combine_attrs=combine_attrs) - assert_identical(actual, expected[combine_attrs]) + expected = Dataset({"a": ("x", [0, 0], expected_attrs)}, {"x": [0, 1]}) - ds1 = Dataset( - {"a": ("x", [0], {"b": 1, "c": 43, "d": 44})}, - coords={"x": [0]}, - attrs={"b": 1, "c": 43, "d": 44}, - ) - ds2 = Dataset( - {"a": ("x", [0], {"a": 41, "b": 42, "c": 43})}, - coords={"x": [1]}, - attrs={"a": 41, "b": 42, "c": 43}, - ) - actual = concat([ds1, ds2], dim="x", combine_attrs="drop_conflicts") - expected = Dataset( - {"a": ("x", [0, 0], {"a": 41, "c": 43, "d": 44})}, - coords={"x": [0, 1]}, - attrs={"a": 41, "c": 43, "d": 44}, - ) - assert_identical(actual, expected) + assert_identical(actual, expected) def test_concat_promote_shape(self): # mixed dims within variables From 5a6b55197f097fa0466074345ce19a1e2b02f46c Mon Sep 17 00:00:00 2001 From: Keewis Date: Sat, 23 Jan 2021 23:59:29 +0100 Subject: [PATCH 15/22] also skip the concat combine_attrs variables test --- xarray/tests/test_concat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 99f9cde2e84..361ffb633cb 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -313,6 +313,7 @@ def test_concat_combine_attrs_kwarg( assert_identical(actual, expected) + @pytest.mark.skip(reason="not implemented, yet (see #4827)") @pytest.mark.parametrize( "combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception", [ From af7260323d2cb41fc13f7e202dd84c4528a65df1 Mon Sep 17 00:00:00 2001 From: Keewis Date: Sun, 24 Jan 2021 13:56:00 +0100 Subject: [PATCH 16/22] use utils.equivalent instead of == so ndarray objects can be compared --- xarray/core/merge.py | 4 ++-- xarray/tests/test_merge.py | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 4c467c14f4c..50513a9a5a0 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -20,7 +20,7 @@ from . import dtypes, pdcompat from .alignment import deep_align from .duck_array_ops import lazy_array_equiv -from .utils import Frozen, compat_dict_union, dict_equiv +from .utils import Frozen, compat_dict_union, dict_equiv, equivalent from .variable import Variable, as_variable, assert_unique_multiindex_level_names if TYPE_CHECKING: @@ -522,7 +522,7 @@ def merge_attrs(variable_attrs, combine_attrs): result = { key: value for key, value in result.items() - if attrs.get(key, value) == value + if equivalent(attrs.get(key, value), value) } return result elif combine_attrs == "identical": diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 49806ffe2ee..e5480bac9d3 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -99,6 +99,13 @@ def test_merge_arrays_attrs_default(self): {"a": 1, "c": 3, "d": 4}, False, ), + ( + "drop_conflicts", + {"a": 1, "b": np.array([2]), "c": np.array([3])}, + {"b": 1, "c": np.array([3]), "d": 4}, + {"a": 1, "c": np.array([3]), "d": 4}, + False, + ), ], ) def test_merge_arrays_attrs( From dbc510a12855bffc55265bba9f41a46e1d52988e Mon Sep 17 00:00:00 2001 From: Keewis Date: Sun, 24 Jan 2021 15:38:23 +0100 Subject: [PATCH 17/22] make sure conflicting keys cannot be added back by other objects --- xarray/core/merge.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 50513a9a5a0..5f8cfd03a9c 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -515,15 +515,21 @@ def merge_attrs(variable_attrs, combine_attrs): return result elif combine_attrs == "drop_conflicts": result = {} + dropped_keys = set() for attrs in variable_attrs: result.update( - {key: value for key, value in attrs.items() if key not in result} + { + key: value + for key, value in attrs.items() + if key not in result and key not in dropped_keys + } ) result = { key: value for key, value in result.items() if equivalent(attrs.get(key, value), value) } + dropped_keys |= {key for key in attrs if key not in result} return result elif combine_attrs == "identical": result = dict(variable_attrs[0]) From 344d8225e6d4d13e01f09226156bc71d887a22bd Mon Sep 17 00:00:00 2001 From: Keewis Date: Sun, 24 Jan 2021 15:50:05 +0100 Subject: [PATCH 18/22] don't compare the value with itself if key is not in attrs --- xarray/core/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 5f8cfd03a9c..14beeff3db5 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -527,7 +527,7 @@ def merge_attrs(variable_attrs, combine_attrs): result = { key: value for key, value in result.items() - if equivalent(attrs.get(key, value), value) + if key not in attrs or equivalent(attrs[key], value) } dropped_keys |= {key for key in attrs if key not in result} return result From 6ed88f72758fd9ee0ab86725bfa663ffa9b46add Mon Sep 17 00:00:00 2001 From: Keewis Date: Fri, 5 Feb 2021 02:30:35 +0100 Subject: [PATCH 19/22] also check the coordinates --- xarray/tests/test_concat.py | 13 ++++++++----- xarray/tests/test_merge.py | 11 +++++++---- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 361ffb633cb..be085be04d6 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -315,7 +315,7 @@ def test_concat_combine_attrs_kwarg( @pytest.mark.skip(reason="not implemented, yet (see #4827)") @pytest.mark.parametrize( - "combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception", + "combine_attrs, attrs1, attrs2, expected_attrs, expect_exception", [ ( "no_conflicts", @@ -353,17 +353,20 @@ def test_concat_combine_attrs_kwarg( ], ) def test_concat_combine_attrs_kwarg_variables( - self, combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception + self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception ): - ds1 = Dataset({"a": ("x", [0], var1_attrs)}, coords={"x": [0]}) - ds2 = Dataset({"a": ("x", [0], var2_attrs)}, coords={"x": [1]}) + ds1 = Dataset({"a": ("x", [0], attrs1)}, coords={"x": ("x", [0], attrs1)}) + ds2 = Dataset({"a": ("x", [0], attrs2)}, coords={"x": ("x", [1], attrs2)}) if expect_exception: with pytest.raises(ValueError, match=f"combine_attrs='{combine_attrs}'"): concat([ds1, ds2], dim="x", combine_attrs=combine_attrs) else: actual = concat([ds1, ds2], dim="x", combine_attrs=combine_attrs) - expected = Dataset({"a": ("x", [0, 0], expected_attrs)}, {"x": [0, 1]}) + expected = Dataset( + {"a": ("x", [0, 0], expected_attrs)}, + {"x": ("x", [0, 1], expected_attrs)}, + ) assert_identical(actual, expected) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index e5480bac9d3..afe664bbbbe 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -125,7 +125,7 @@ def test_merge_arrays_attrs( @pytest.mark.skip(reason="not implemented, yet (see #4827)") @pytest.mark.parametrize( - "combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception", + "combine_attrs, attrs1, attrs2, expected_attrs, expect_exception", [ ( "no_conflicts", @@ -163,13 +163,15 @@ def test_merge_arrays_attrs( ], ) def test_merge_arrays_attrs_variables( - self, combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception + self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception ): data = create_test_data() data1 = data.copy() - data1.var1.attrs = var1_attrs + data1.var1.attrs = attrs1 + data1.dim1.attrs = attrs1 data2 = data.copy() - data2.var1.attrs = var2_attrs + data2.var1.attrs = attrs2 + data2.dim1.attrs = attrs2 if expect_exception: with raises_regex(MergeError, "combine_attrs"): @@ -178,6 +180,7 @@ def test_merge_arrays_attrs_variables( actual = xr.merge([data1, data2], combine_attrs=combine_attrs) expected = data.copy() expected.var1.attrs = expected_attrs + expected.dim1.attrs = expected_attrs assert_identical(actual, expected) From f43c0bec9528d7740ddd98189451899bcb9cae4b Mon Sep 17 00:00:00 2001 From: Keewis Date: Fri, 5 Feb 2021 02:30:50 +0100 Subject: [PATCH 20/22] add docstrings to explain the purpose of the tests --- xarray/tests/test_concat.py | 1 + xarray/tests/test_merge.py | 1 + 2 files changed, 2 insertions(+) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index be085be04d6..beed48a35fc 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -355,6 +355,7 @@ def test_concat_combine_attrs_kwarg( def test_concat_combine_attrs_kwarg_variables( self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception ): + """check that combine_attrs is used on data variables and coords""" ds1 = Dataset({"a": ("x", [0], attrs1)}, coords={"x": ("x", [0], attrs1)}) ds2 = Dataset({"a": ("x", [0], attrs2)}, coords={"x": ("x", [1], attrs2)}) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index afe664bbbbe..5ce306ece18 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -165,6 +165,7 @@ def test_merge_arrays_attrs( def test_merge_arrays_attrs_variables( self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception ): + """check that combine_attrs is used on data variables and coords""" data = create_test_data() data1 = data.copy() data1.var1.attrs = attrs1 From ac3c102eb47df2d74e388c35ff436af9e45e2c70 Mon Sep 17 00:00:00 2001 From: Keewis Date: Fri, 5 Feb 2021 23:35:24 +0100 Subject: [PATCH 21/22] update whats-new.rst --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 16b0cbf4ea1..dc0e12cd7c4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -47,6 +47,9 @@ New Features ~~~~~~~~~~~~ - Performance improvement when constructing DataArrays. Significantly speeds up repr for Datasets with large number of variables. By `Deepak Cherian `_ +- add ``"drop_conflicts"`` to the strategies supported by the ``combine_attrs`` kwarg + (:issue:`4749`, :pull:`4827`). + By `Justus Magin `_. Bug fixes ~~~~~~~~~ From ea24b32bf936b120188dba5fcbe3aa5d5c084ef2 Mon Sep 17 00:00:00 2001 From: Keewis Date: Fri, 5 Feb 2021 23:39:50 +0100 Subject: [PATCH 22/22] add a test for combine_attrs="drop_conflicts" with more than two attrs --- xarray/tests/test_merge.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 5ce306ece18..27e2b10dcbc 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -192,6 +192,15 @@ def test_merge_attrs_override_copy(self): ds3.attrs["x"] = 2 assert ds1.x == 0 + def test_merge_attrs_drop_conflicts(self): + ds1 = xr.Dataset(attrs={"a": 0, "b": 0, "c": 0}) + ds2 = xr.Dataset(attrs={"b": 0, "c": 1, "d": 0}) + ds3 = xr.Dataset(attrs={"a": 0, "b": 1, "c": 0, "e": 0}) + + actual = xr.merge([ds1, ds2, ds3], combine_attrs="drop_conflicts") + expected = xr.Dataset(attrs={"a": 0, "d": 0, "e": 0}) + assert_identical(actual, expected) + def test_merge_dicts_simple(self): actual = xr.merge([{"foo": 0}, {"bar": "one"}, {"baz": 3.5}]) expected = xr.Dataset({"foo": 0, "bar": "one", "baz": 3.5})