From e84cc97b6d72a1e55128de80df3955c6402cf025 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 27 Oct 2019 20:22:13 -0600 Subject: [PATCH 1/8] Optimize dask array equality checks. Dask arrays with the same graph have the same name. We can use this to quickly compare dask-backed variables without computing. Fixes #3068 and #3311 --- xarray/core/concat.py | 16 ++++++++++++++ xarray/core/duck_array_ops.py | 41 +++++++++++++++++++++++++++++++++++ xarray/core/merge.py | 17 +++++++++++---- xarray/core/variable.py | 14 +++++++----- xarray/tests/test_dask.py | 40 ++++++++++++++++++++++++++++++++++ 5 files changed, 119 insertions(+), 9 deletions(-) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index bcab136de8d..01da928c29d 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -2,6 +2,7 @@ from . import dtypes, utils from .alignment import align +from .duck_array_ops import lazy_array_equiv from .merge import _VALID_COMPAT, unique_variable from .variable import IndexVariable, Variable, as_variable from .variable import concat as concat_vars @@ -189,6 +190,21 @@ def process_subset_opt(opt, subset): # all nonindexes that are not the same in each dataset for k in getattr(datasets[0], subset): if k not in concat_over: + equals[k] = None + variables = [ds.variables[k] for ds in datasets] + # first check without comparing values i.e. no computes + for var in variables[1:]: + equals[k] = getattr(variables[0], compat)( + var, equiv=lazy_array_equiv + ) + if not equals[k]: + break + + if equals[k] is not None: + if equals[k] is False: + concat_over.add(k) + continue + # Compare the variable of all datasets vs. the one # of the first dataset. Perform the minimum amount of # loads in order to avoid multiple loads from disk diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index d943788c434..c2b72eb08e1 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -181,9 +181,34 @@ def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8): arr2 = asarray(arr2) if arr1.shape != arr2.shape: return False + if ( + dask_array + and isinstance(arr1, dask_array.Array) + and isinstance(arr2, dask_array.Array) + ): + # GH3068 + if arr1.name == arr2.name: + return True return bool(isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=True).all()) +def lazy_array_equiv(arr1, arr2): + """Like array_equal, but doesn't actually compare values + """ + arr1 = asarray(arr1) + arr2 = asarray(arr2) + if arr1.shape != arr2.shape: + return False + if ( + dask_array + and isinstance(arr1, dask_array.Array) + and isinstance(arr2, dask_array.Array) + ): + # GH3068 + if arr1.name == arr2.name: + return True + + def array_equiv(arr1, arr2): """Like np.array_equal, but also allows values to be NaN in both arrays """ @@ -191,6 +216,14 @@ def array_equiv(arr1, arr2): arr2 = asarray(arr2) if arr1.shape != arr2.shape: return False + if ( + dask_array + and isinstance(arr1, dask_array.Array) + and isinstance(arr2, dask_array.Array) + ): + # GH3068 + if arr1.name == arr2.name: + return True with warnings.catch_warnings(): warnings.filterwarnings("ignore", "In the future, 'NAT == x'") flag_array = (arr1 == arr2) | (isnull(arr1) & isnull(arr2)) @@ -205,6 +238,14 @@ def array_notnull_equiv(arr1, arr2): arr2 = asarray(arr2) if arr1.shape != arr2.shape: return False + if ( + dask_array + and isinstance(arr1, dask_array.Array) + and isinstance(arr2, dask_array.Array) + ): + # GH3068 + if arr1.name == arr2.name: + return True with warnings.catch_warnings(): warnings.filterwarnings("ignore", "In the future, 'NAT == x'") flag_array = (arr1 == arr2) | isnull(arr1) | isnull(arr2) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index db5ef9531df..21426b8bd37 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -19,6 +19,7 @@ from . import dtypes, pdcompat from .alignment import deep_align +from .duck_array_ops import lazy_array_equiv from .utils import Frozen, dict_equiv from .variable import Variable, as_variable, assert_unique_multiindex_level_names @@ -123,16 +124,24 @@ def unique_variable( combine_method = "fillna" if equals is None: - out = out.compute() + # first check without comparing values i.e. no computes for var in variables[1:]: - equals = getattr(out, compat)(var) + equals = getattr(out, compat)(var, equiv=lazy_array_equiv) if not equals: break + # now compare values with minimum number of computes + if not equals: + out = out.compute() + for var in variables[1:]: + equals = getattr(out, compat)(var) + if not equals: + break + if not equals: raise MergeError( - "conflicting values for variable {!r} on objects to be combined. " - "You can skip this check by specifying compat='override'.".format(name) + f"conflicting values for variable {name!r} on objects to be combined. " + "You can skip this check by specifying compat='override'." ) if combine_method: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 93ad1eafb97..82c041ecd05 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1229,7 +1229,9 @@ def transpose(self, *dims) -> "Variable": if len(dims) == 0: dims = self.dims[::-1] axes = self.get_axis_num(dims) - if len(dims) < 2: # no need to transpose if only one dimension + if len(dims) < 2 or dims == self.dims: + # no need to transpose if only one dimension + # or dims are in same order return self.copy(deep=False) data = as_indexable(self._data).transpose(axes) @@ -1588,22 +1590,24 @@ def broadcast_equals(self, other, equiv=duck_array_ops.array_equiv): return False return self.equals(other, equiv=equiv) - def identical(self, other): + def identical(self, other, equiv=duck_array_ops.array_equiv): """Like equals, but also checks attributes. """ try: - return utils.dict_equiv(self.attrs, other.attrs) and self.equals(other) + return utils.dict_equiv(self.attrs, other.attrs) and self.equals( + other, equiv=equiv + ) except (TypeError, AttributeError): return False - def no_conflicts(self, other): + def no_conflicts(self, other, equiv=duck_array_ops.array_notnull_equiv): """True if the intersection of two Variable's non-null data is equal; otherwise false. Variables can thus still be equal if there are locations where either, or both, contain NaN values. """ - return self.broadcast_equals(other, equiv=duck_array_ops.array_notnull_equiv) + return self.broadcast_equals(other, equiv=equiv) def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): """Compute the qth quantile of the data along the specified dimension. diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 50517ae3c9c..bfda579644f 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -22,6 +22,7 @@ assert_identical, raises_regex, ) +from ..core.duck_array_ops import lazy_array_equiv dask = pytest.importorskip("dask") da = pytest.importorskip("dask.array") @@ -1135,3 +1136,42 @@ def test_make_meta(map_ds): for variable in map_ds.data_vars: assert variable in meta.data_vars assert meta.data_vars[variable].shape == (0,) * meta.data_vars[variable].ndim + + +def test_identical_coords_no_computes(): + lons2 = xr.DataArray(da.zeros((10, 10), chunks=2), dims=("y", "x")) + a = xr.DataArray( + da.zeros((10, 10), chunks=2), dims=("y", "x"), coords={"lons": lons2} + ) + b = xr.DataArray( + da.zeros((10, 10), chunks=2), dims=("y", "x"), coords={"lons": lons2} + ) + with raise_if_dask_computes(): + c = a + b + assert_identical(c, a) + + +def test_lazy_array_equiv(): + lons1 = xr.DataArray(da.zeros((10, 10), chunks=2), dims=("y", "x")) + lons2 = xr.DataArray(da.zeros((10, 10), chunks=2), dims=("y", "x")) + var1 = lons1.variable + var2 = lons2.variable + with raise_if_dask_computes(): + lons1.equals(lons2) + with raise_if_dask_computes(): + var1.equals(var2 / 2, equiv=lazy_array_equiv) + assert var1.equals(var2.compute(), equiv=lazy_array_equiv) is None + assert var1.compute().equals(var2.compute(), equiv=lazy_array_equiv) is None + + with raise_if_dask_computes(): + assert lons1.equals(lons1.transpose("y", "x")) + + with raise_if_dask_computes(): + for compat in [ + "broadcast_equals", + "equals", + "override", + "identical", + "no_conflicts", + ]: + xr.merge([lons1, lons2], compat=compat) From 8739dddb12cd026d9073f60c01b1c3b81c80a072 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 28 Oct 2019 08:32:15 -0600 Subject: [PATCH 2/8] better docstring --- xarray/core/duck_array_ops.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index c2b72eb08e1..7c0b730814e 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -193,7 +193,9 @@ def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8): def lazy_array_equiv(arr1, arr2): - """Like array_equal, but doesn't actually compare values + """Like array_equal, but doesn't actually compare values. + Returns True or False when equality can be determined without computing. + Returns None when equality cannot determined (e.g. one or both of arr1, arr2 are numpy arrays) """ arr1 = asarray(arr1) arr2 = asarray(arr2) From 4a66e7c38bfe6f67599b1d46a7452d372384e347 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 28 Oct 2019 10:19:32 -0600 Subject: [PATCH 3/8] review suggestions. --- xarray/core/duck_array_ops.py | 73 ++++++++++++++--------------------- 1 file changed, 28 insertions(+), 45 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 7c0b730814e..9432ccf1904 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -174,8 +174,10 @@ def as_shared_dtype(scalars_or_arrays): return [x.astype(out_type, copy=False) for x in arrays] -def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8): - """Like np.allclose, but also allows values to be NaN in both arrays +def lazy_array_equiv(arr1, arr2): + """Like array_equal, but doesn't actually compare values. + Returns True or False when equality can be determined without computing. + Returns None when equality cannot determined (e.g. one or both of arr1, arr2 are numpy arrays) """ arr1 = asarray(arr1) arr2 = asarray(arr2) @@ -189,26 +191,19 @@ def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8): # GH3068 if arr1.name == arr2.name: return True - return bool(isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=True).all()) + return None -def lazy_array_equiv(arr1, arr2): - """Like array_equal, but doesn't actually compare values. - Returns True or False when equality can be determined without computing. - Returns None when equality cannot determined (e.g. one or both of arr1, arr2 are numpy arrays) +def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8): + """Like np.allclose, but also allows values to be NaN in both arrays """ arr1 = asarray(arr1) arr2 = asarray(arr2) - if arr1.shape != arr2.shape: - return False - if ( - dask_array - and isinstance(arr1, dask_array.Array) - and isinstance(arr2, dask_array.Array) - ): - # GH3068 - if arr1.name == arr2.name: - return True + lazy_equiv = lazy_array_equiv(arr1, arr2) + if lazy_equiv is None: + return bool(isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=True).all()) + else: + return lazy_equiv def array_equiv(arr1, arr2): @@ -216,20 +211,14 @@ def array_equiv(arr1, arr2): """ arr1 = asarray(arr1) arr2 = asarray(arr2) - if arr1.shape != arr2.shape: - return False - if ( - dask_array - and isinstance(arr1, dask_array.Array) - and isinstance(arr2, dask_array.Array) - ): - # GH3068 - if arr1.name == arr2.name: - return True - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "In the future, 'NAT == x'") - flag_array = (arr1 == arr2) | (isnull(arr1) & isnull(arr2)) - return bool(flag_array.all()) + lazy_equiv = lazy_array_equiv(arr1, arr2) + if lazy_equiv is None: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "In the future, 'NAT == x'") + flag_array = (arr1 == arr2) | (isnull(arr1) & isnull(arr2)) + return bool(flag_array.all()) + else: + return lazy_equiv def array_notnull_equiv(arr1, arr2): @@ -238,20 +227,14 @@ def array_notnull_equiv(arr1, arr2): """ arr1 = asarray(arr1) arr2 = asarray(arr2) - if arr1.shape != arr2.shape: - return False - if ( - dask_array - and isinstance(arr1, dask_array.Array) - and isinstance(arr2, dask_array.Array) - ): - # GH3068 - if arr1.name == arr2.name: - return True - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "In the future, 'NAT == x'") - flag_array = (arr1 == arr2) | isnull(arr1) | isnull(arr2) - return bool(flag_array.all()) + lazy_equiv = lazy_array_equiv(arr1, arr2) + if lazy_equiv is None: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "In the future, 'NAT == x'") + flag_array = (arr1 == arr2) | isnull(arr1) | isnull(arr2) + return bool(flag_array.all()) + else: + return lazy_equiv def count(data, axis=None): From e99148e130e7c3601366b92189cf69bb4c06d6aa Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 29 Oct 2019 09:17:58 -0600 Subject: [PATCH 4/8] add concat test --- xarray/tests/test_dask.py | 48 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index bfda579644f..cde9faa44b7 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -424,7 +424,53 @@ def test_concat_loads_variables(self): out.compute() assert kernel_call_count == 24 - # Finally, test that riginals are unaltered + # Finally, test that originals are unaltered + assert ds1["d"].data is d1 + assert ds1["c"].data is c1 + assert ds2["d"].data is d2 + assert ds2["c"].data is c2 + assert ds3["d"].data is d3 + assert ds3["c"].data is c3 + + # now check that concat() is correctly using dask name equality to skip loads + out = xr.concat( + [ds1, ds1, ds1], dim="n", data_vars="different", coords="different" + ) + assert kernel_call_count == 24 + # variables are not loaded in the output + assert isinstance(out["d"].data, dask.array.Array) + assert isinstance(out["c"].data, dask.array.Array) + + out = xr.concat( + [ds1, ds1, ds1], dim="n", data_vars=[], coords=[], compat="identical" + ) + assert kernel_call_count == 24 + # variables are not loaded in the output + assert isinstance(out["d"].data, dask.array.Array) + assert isinstance(out["c"].data, dask.array.Array) + + out = xr.concat( + [ds1, ds2.compute(), ds3], + dim="n", + data_vars="all", + coords="different", + compat="identical", + ) + # c1,c3 must be computed for comparison since c2 is numpy; + # d2 is computed too + assert kernel_call_count == 28 + + out = xr.concat( + [ds1, ds2.compute(), ds3], + dim="n", + data_vars="all", + coords="all", + compat="identical", + ) + # no extra computes + assert kernel_call_count == 30 + + # Finally, test that originals are unaltered assert ds1["d"].data is d1 assert ds1["c"].data is c1 assert ds2["d"].data is d2 From 5e742e4eb8cbe0ce73d2981466fc1c2715e65dc0 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 29 Oct 2019 09:22:15 -0600 Subject: [PATCH 5/8] update whats new --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 73618782460..19a3648c16e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -57,6 +57,9 @@ Bug fixes but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle `_ - Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`). By `Deepak Cherian `_. +- Use dask names to compare dask objects prior to comparing values after computation. + (:issue:`3068`, :issue:`3311`, :issue:`3454`, :pull:`3453`). + By `Deepak Cherian `_. - Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4. By `Anderson Banihirwe `_. - Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and From 53c0f4e283d880277b1ac17b38f88ea75a834e82 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 29 Oct 2019 09:25:27 -0600 Subject: [PATCH 6/8] Add identity check to lazy_array_equiv --- xarray/core/duck_array_ops.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 9432ccf1904..d95f043b6f3 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -179,6 +179,8 @@ def lazy_array_equiv(arr1, arr2): Returns True or False when equality can be determined without computing. Returns None when equality cannot determined (e.g. one or both of arr1, arr2 are numpy arrays) """ + if arr1 is arr2: + return True arr1 = asarray(arr1) arr2 = asarray(arr2) if arr1.shape != arr2.shape: From 4ee296310cd31882c98a03fadbf954c2c39c89d6 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 31 Oct 2019 11:27:05 -0600 Subject: [PATCH 7/8] pep8 --- xarray/tests/test_dask.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index a90b162057f..90e7ad2e224 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1227,7 +1227,7 @@ def test_lazy_array_equiv(): ]: xr.merge([lons1, lons2], compat=compat) - + @pytest.mark.parametrize( "obj", [make_da(), make_da().compute(), make_ds(), make_ds().compute()] ) @@ -1314,4 +1314,4 @@ def test_normalize_token_with_backend(map_ds): with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as tmp_file: map_ds.to_netcdf(tmp_file) read = xr.open_dataset(tmp_file) - assert not dask.base.tokenize(map_ds) == dask.base.tokenize(read) \ No newline at end of file + assert not dask.base.tokenize(map_ds) == dask.base.tokenize(read) From 0711eb04fee8bd51862cf49f9bfc99cbc865dce8 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 31 Oct 2019 21:18:58 -0600 Subject: [PATCH 8/8] bugfix. --- xarray/core/concat.py | 54 +++++++++++++------------- xarray/core/duck_array_ops.py | 8 +++- xarray/core/merge.py | 6 +-- xarray/tests/test_dask.py | 72 ++++++++++++++++++++++------------- 4 files changed, 83 insertions(+), 57 deletions(-) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 13218fb3f13..c26153eb0d8 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -197,34 +197,36 @@ def process_subset_opt(opt, subset): equals[k] = getattr(variables[0], compat)( var, equiv=lazy_array_equiv ) - if not equals[k]: + if equals[k] is not True: + # exit early if we know these are not equal or that + # equality cannot be determined i.e. one or all of + # the variables wraps a numpy array break - if equals[k] is not None: - if equals[k] is False: - concat_over.add(k) - continue - - # Compare the variable of all datasets vs. the one - # of the first dataset. Perform the minimum amount of - # loads in order to avoid multiple loads from disk - # while keeping the RAM footprint low. - v_lhs = datasets[0].variables[k].load() - # We'll need to know later on if variables are equal. - computed = [] - for ds_rhs in datasets[1:]: - v_rhs = ds_rhs.variables[k].compute() - computed.append(v_rhs) - if not getattr(v_lhs, compat)(v_rhs): - concat_over.add(k) - equals[k] = False - # computed variables are not to be re-computed - # again in the future - for ds, v in zip(datasets[1:], computed): - ds.variables[k].data = v.data - break - else: - equals[k] = True + if equals[k] is False: + concat_over.add(k) + + elif equals[k] is None: + # Compare the variable of all datasets vs. the one + # of the first dataset. Perform the minimum amount of + # loads in order to avoid multiple loads from disk + # while keeping the RAM footprint low. + v_lhs = datasets[0].variables[k].load() + # We'll need to know later on if variables are equal. + computed = [] + for ds_rhs in datasets[1:]: + v_rhs = ds_rhs.variables[k].compute() + computed.append(v_rhs) + if not getattr(v_lhs, compat)(v_rhs): + concat_over.add(k) + equals[k] = False + # computed variables are not to be re-computed + # again in the future + for ds, v in zip(datasets[1:], computed): + ds.variables[k].data = v.data + break + else: + equals[k] = True elif opt == "all": concat_over.update( diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index d95f043b6f3..71e79335c3d 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -176,8 +176,10 @@ def as_shared_dtype(scalars_or_arrays): def lazy_array_equiv(arr1, arr2): """Like array_equal, but doesn't actually compare values. - Returns True or False when equality can be determined without computing. - Returns None when equality cannot determined (e.g. one or both of arr1, arr2 are numpy arrays) + Returns True when arr1, arr2 identical or their dask names are equal. + Returns False when shapes are not equal. + Returns None when equality cannot determined: one or both of arr1, arr2 are numpy arrays; + or their dask names are not equal """ if arr1 is arr2: return True @@ -193,6 +195,8 @@ def lazy_array_equiv(arr1, arr2): # GH3068 if arr1.name == arr2.name: return True + else: + return None return None diff --git a/xarray/core/merge.py b/xarray/core/merge.py index b5fa85575d0..daf0c3b059f 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -127,11 +127,11 @@ def unique_variable( # first check without comparing values i.e. no computes for var in variables[1:]: equals = getattr(out, compat)(var, equiv=lazy_array_equiv) - if not equals: + if equals is not True: break - # now compare values with minimum number of computes - if not equals: + if equals is None: + # now compare values with minimum number of computes out = out.compute() for var in variables[1:]: equals = getattr(out, compat)(var) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 90e7ad2e224..34115b29b23 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1202,32 +1202,6 @@ def test_identical_coords_no_computes(): assert_identical(c, a) -def test_lazy_array_equiv(): - lons1 = xr.DataArray(da.zeros((10, 10), chunks=2), dims=("y", "x")) - lons2 = xr.DataArray(da.zeros((10, 10), chunks=2), dims=("y", "x")) - var1 = lons1.variable - var2 = lons2.variable - with raise_if_dask_computes(): - lons1.equals(lons2) - with raise_if_dask_computes(): - var1.equals(var2 / 2, equiv=lazy_array_equiv) - assert var1.equals(var2.compute(), equiv=lazy_array_equiv) is None - assert var1.compute().equals(var2.compute(), equiv=lazy_array_equiv) is None - - with raise_if_dask_computes(): - assert lons1.equals(lons1.transpose("y", "x")) - - with raise_if_dask_computes(): - for compat in [ - "broadcast_equals", - "equals", - "override", - "identical", - "no_conflicts", - ]: - xr.merge([lons1, lons2], compat=compat) - - @pytest.mark.parametrize( "obj", [make_da(), make_da().compute(), make_ds(), make_ds().compute()] ) @@ -1315,3 +1289,49 @@ def test_normalize_token_with_backend(map_ds): map_ds.to_netcdf(tmp_file) read = xr.open_dataset(tmp_file) assert not dask.base.tokenize(map_ds) == dask.base.tokenize(read) + + +@pytest.mark.parametrize( + "compat", ["broadcast_equals", "equals", "identical", "no_conflicts"] +) +def test_lazy_array_equiv_variables(compat): + var1 = xr.Variable(("y", "x"), da.zeros((10, 10), chunks=2)) + var2 = xr.Variable(("y", "x"), da.zeros((10, 10), chunks=2)) + var3 = xr.Variable(("y", "x"), da.zeros((20, 10), chunks=2)) + + with raise_if_dask_computes(): + assert getattr(var1, compat)(var2, equiv=lazy_array_equiv) + # values are actually equal, but we don't know that till we compute, return None + with raise_if_dask_computes(): + assert getattr(var1, compat)(var2 / 2, equiv=lazy_array_equiv) is None + + # shapes are not equal, return False without computes + with raise_if_dask_computes(): + assert getattr(var1, compat)(var3, equiv=lazy_array_equiv) is False + + # if one or both arrays are numpy, return None + assert getattr(var1, compat)(var2.compute(), equiv=lazy_array_equiv) is None + assert ( + getattr(var1.compute(), compat)(var2.compute(), equiv=lazy_array_equiv) is None + ) + + with raise_if_dask_computes(): + assert getattr(var1, compat)(var2.transpose("y", "x")) + + +@pytest.mark.parametrize( + "compat", ["broadcast_equals", "equals", "identical", "no_conflicts"] +) +def test_lazy_array_equiv_merge(compat): + da1 = xr.DataArray(da.zeros((10, 10), chunks=2), dims=("y", "x")) + da2 = xr.DataArray(da.zeros((10, 10), chunks=2), dims=("y", "x")) + da3 = xr.DataArray(da.ones((20, 10), chunks=2), dims=("y", "x")) + + with raise_if_dask_computes(): + xr.merge([da1, da2], compat=compat) + # shapes are not equal; no computes necessary + with raise_if_dask_computes(max_computes=0): + with pytest.raises(ValueError): + xr.merge([da1, da3], compat=compat) + with raise_if_dask_computes(max_computes=2): + xr.merge([da1, da2 / 2], compat=compat)