Skip to content

Commit

Permalink
Combine by coords dataarray bugfix (#5834)
Browse files Browse the repository at this point in the history
* fixed bug

* added tests + reorganised slightly

* clarified logic for dealing with mixed sets of objects

* removed commented out old code

* recorded bugfix in whatsnew

* Update doc/whats-new.rst

Co-authored-by: Mathias Hauser <mathause@users.noreply.github.com>

* Update xarray/core/combine.py

Co-authored-by: Mathias Hauser <mathause@users.noreply.github.com>

* removed pointless renaming

* update tests to look for capitalized error message

* clarified return type in docstring

* added test for combining two dataarrays with the same name

* Update xarray/tests/test_combine.py

Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>

* Update doc/whats-new.rst

Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>

* added examples to docstrings

* correct docstring example

* re-trigger CI

* Update xarray/core/combine.py

Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com>

Co-authored-by: Mathias Hauser <mathause@users.noreply.github.com>
Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com>
  • Loading branch information
4 people authored Oct 29, 2021
1 parent 1d94b1e commit 867646f
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 38 deletions.
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ Bug fixes
- Fixed performance bug where ``cftime`` import attempted within various core operations if ``cftime`` not
installed (:pull:`5640`).
By `Luke Sewell <https://github.com/lusewell>`_
- Fixed bug when combining named DataArrays using :py:func:`combine_by_coords`. (:pull:`5834`).
By `Tom Nicholas <https://github.com/TomNicholas>`_.
- When a custom engine was used in :py:func:`~xarray.open_dataset` the engine
wasn't initialized properly, causing missing argument errors or inconsistent
method signatures. (:pull:`5684`)
Expand Down
104 changes: 79 additions & 25 deletions xarray/core/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,7 @@ def combine_by_coords(
Attempt to auto-magically combine the given datasets (or data arrays)
into one by using dimension coordinates.
This method attempts to combine a group of datasets along any number of
This function attempts to combine a group of datasets along any number of
dimensions into a single entity by inspecting coords and metadata and using
a combination of concat and merge.
Expand Down Expand Up @@ -765,6 +765,8 @@ def combine_by_coords(
Returns
-------
combined : xarray.Dataset or xarray.DataArray
Will return a Dataset unless all the inputs are unnamed DataArrays, in which case a
DataArray will be returned.
See also
--------
Expand Down Expand Up @@ -870,6 +872,50 @@ def combine_by_coords(
Data variables:
temperature (y, x) float64 10.98 14.3 12.06 nan ... 18.89 10.44 8.293
precipitation (y, x) float64 0.4376 0.8918 0.9637 ... 0.5684 0.01879 0.6176
You can also combine DataArray objects, but the behaviour will differ depending on
whether or not the DataArrays are named. If all DataArrays are named then they will
be promoted to Datasets before combining, and then the resultant Dataset will be
returned, e.g.
>>> named_da1 = xr.DataArray(
... name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x"
... )
>>> named_da1
<xarray.DataArray 'a' (x: 2)>
array([1., 2.])
Coordinates:
* x (x) int64 0 1
>>> named_da2 = xr.DataArray(
... name="a", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x"
... )
>>> named_da2
<xarray.DataArray 'a' (x: 2)>
array([3., 4.])
Coordinates:
* x (x) int64 2 3
>>> xr.combine_by_coords([named_da1, named_da2])
<xarray.Dataset>
Dimensions: (x: 4)
Coordinates:
* x (x) int64 0 1 2 3
Data variables:
a (x) float64 1.0 2.0 3.0 4.0
If all the DataArrays are unnamed, a single DataArray will be returned, e.g.
>>> unnamed_da1 = xr.DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")
>>> unnamed_da2 = xr.DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x")
>>> xr.combine_by_coords([unnamed_da1, unnamed_da2])
<xarray.DataArray (x: 4)>
array([1., 2., 3., 4.])
Coordinates:
* x (x) int64 0 1 2 3
Finally, if you attempt to combine a mix of unnamed DataArrays with either named
DataArrays or Datasets, a ValueError will be raised (as this is an ambiguous operation).
"""

# TODO remove after version 0.21, see PR4696
Expand All @@ -883,33 +929,41 @@ def combine_by_coords(
if not data_objects:
return Dataset()

mixed_arrays_and_datasets = any(
objs_are_unnamed_dataarrays = [
isinstance(data_object, DataArray) and data_object.name is None
for data_object in data_objects
) and any(isinstance(data_object, Dataset) for data_object in data_objects)
if mixed_arrays_and_datasets:
raise ValueError("Can't automatically combine datasets with unnamed arrays.")

all_unnamed_data_arrays = all(
isinstance(data_object, DataArray) and data_object.name is None
for data_object in data_objects
)
if all_unnamed_data_arrays:
unnamed_arrays = data_objects
temp_datasets = [data_array._to_temp_dataset() for data_array in unnamed_arrays]

combined_temp_dataset = _combine_single_variable_hypercube(
temp_datasets,
fill_value=fill_value,
data_vars=data_vars,
coords=coords,
compat=compat,
join=join,
combine_attrs=combine_attrs,
)
return DataArray()._from_temp_dataset(combined_temp_dataset)

]
if any(objs_are_unnamed_dataarrays):
if all(objs_are_unnamed_dataarrays):
# Combine into a single larger DataArray
temp_datasets = [
unnamed_dataarray._to_temp_dataset()
for unnamed_dataarray in data_objects
]

combined_temp_dataset = _combine_single_variable_hypercube(
temp_datasets,
fill_value=fill_value,
data_vars=data_vars,
coords=coords,
compat=compat,
join=join,
combine_attrs=combine_attrs,
)
return DataArray()._from_temp_dataset(combined_temp_dataset)
else:
# Must be a mix of unnamed dataarrays with either named dataarrays or with datasets
# Can't combine these as we wouldn't know whether to merge or concatenate the arrays
raise ValueError(
"Can't automatically combine unnamed DataArrays with either named DataArrays or Datasets."
)
else:
# Promote any named DataArrays to single-variable Datasets to simplify combining
data_objects = [
obj.to_dataset() if isinstance(obj, DataArray) else obj
for obj in data_objects
]

# Group by data vars
sorted_datasets = sorted(data_objects, key=vars_as_keys)
grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
Expand Down
71 changes: 58 additions & 13 deletions xarray/tests/test_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
combine_by_coords,
combine_nested,
concat,
merge,
)
from xarray.core import dtypes
from xarray.core.combine import (
Expand Down Expand Up @@ -688,7 +689,7 @@ def test_nested_combine_mixed_datasets_arrays(self):
combine_nested(objs, "x")


class TestCombineAuto:
class TestCombineDatasetsbyCoords:
def test_combine_by_coords(self):
objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
actual = combine_by_coords(objs)
Expand Down Expand Up @@ -730,17 +731,6 @@ def test_combine_by_coords(self):
def test_empty_input(self):
assert_identical(Dataset(), combine_by_coords([]))

def test_combine_coords_mixed_datasets_arrays(self):
objs = [
DataArray([0, 1], dims=("x"), coords=({"x": [0, 1]})),
Dataset({"x": [2, 3]}),
]
with pytest.raises(
ValueError,
match=r"Can't automatically combine datasets with unnamed arrays.",
):
combine_by_coords(objs)

@pytest.mark.parametrize(
"join, expected",
[
Expand Down Expand Up @@ -1044,7 +1034,35 @@ def test_combine_by_coords_incomplete_hypercube(self):
with pytest.raises(ValueError):
combine_by_coords([x1, x2, x3], fill_value=None)

def test_combine_by_coords_unnamed_arrays(self):

class TestCombineMixedObjectsbyCoords:
def test_combine_by_coords_mixed_unnamed_dataarrays(self):
named_da = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")
unnamed_da = DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x")

with pytest.raises(
ValueError, match="Can't automatically combine unnamed DataArrays with"
):
combine_by_coords([named_da, unnamed_da])

da = DataArray([0, 1], dims="x", coords=({"x": [0, 1]}))
ds = Dataset({"x": [2, 3]})
with pytest.raises(
ValueError,
match="Can't automatically combine unnamed DataArrays with",
):
combine_by_coords([da, ds])

def test_combine_coords_mixed_datasets_named_dataarrays(self):
da = DataArray(name="a", data=[4, 5], dims="x", coords=({"x": [0, 1]}))
ds = Dataset({"b": ("x", [2, 3])})
actual = combine_by_coords([da, ds])
expected = Dataset(
{"a": ("x", [4, 5]), "b": ("x", [2, 3])}, coords={"x": ("x", [0, 1])}
)
assert_identical(expected, actual)

def test_combine_by_coords_all_unnamed_dataarrays(self):
unnamed_array = DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")

actual = combine_by_coords([unnamed_array])
Expand All @@ -1060,6 +1078,33 @@ def test_combine_by_coords_unnamed_arrays(self):
)
assert_identical(expected, actual)

def test_combine_by_coords_all_named_dataarrays(self):
named_da = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")

actual = combine_by_coords([named_da])
expected = named_da.to_dataset()
assert_identical(expected, actual)

named_da1 = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")
named_da2 = DataArray(name="b", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x")

actual = combine_by_coords([named_da1, named_da2])
expected = Dataset(
{
"a": DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x"),
"b": DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x"),
}
)
assert_identical(expected, actual)

def test_combine_by_coords_all_dataarrays_with_the_same_name(self):
named_da1 = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x")
named_da2 = DataArray(name="a", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x")

actual = combine_by_coords([named_da1, named_da2])
expected = merge([named_da1, named_da2])
assert_identical(expected, actual)


@requires_cftime
def test_combine_by_coords_distant_cftime_dates():
Expand Down

0 comments on commit 867646f

Please sign in to comment.