From 2633b1e5cbc21cd414e588571ab7be26df8e8dae Mon Sep 17 00:00:00 2001 From: Aslak Grinsted Date: Fri, 3 May 2019 10:12:34 +0200 Subject: [PATCH 1/4] DOC: Avoid downloading .tif file (#2919) * Avoid downloading .tif file Simplify by using that rasterio can read directly from http. This also removes imports of requests and os. * let rasterio handle the http * pep 8 * pep8 --- doc/gallery/plot_rasterio.py | 13 ++----------- doc/gallery/plot_rasterio_rgb.py | 13 ++----------- 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/doc/gallery/plot_rasterio.py b/doc/gallery/plot_rasterio.py index 98801990af3..82d5ce61284 100644 --- a/doc/gallery/plot_rasterio.py +++ b/doc/gallery/plot_rasterio.py @@ -16,9 +16,6 @@ original map projection (see :ref:`recipes.rasterio_rgb`). """ -import os -import urllib.request - import cartopy.crs as ccrs import matplotlib.pyplot as plt import numpy as np @@ -26,12 +23,9 @@ import xarray as xr -# Download the file from rasterio's repository -url = 'https://github.com/mapbox/rasterio/raw/master/tests/data/RGB.byte.tif' -urllib.request.urlretrieve(url, 'RGB.byte.tif') - # Read the data -da = xr.open_rasterio('RGB.byte.tif') +url = 'https://github.com/mapbox/rasterio/raw/master/tests/data/RGB.byte.tif' +da = xr.open_rasterio(url) # Compute the lon/lat coordinates with rasterio.warp.transform ny, nx = len(da['y']), len(da['x']) @@ -54,6 +48,3 @@ cmap='Greys_r', add_colorbar=False) ax.coastlines('10m', color='r') plt.show() - -# Delete the file -os.remove('RGB.byte.tif') diff --git a/doc/gallery/plot_rasterio_rgb.py b/doc/gallery/plot_rasterio_rgb.py index 2733bf149e5..23a56d5a291 100644 --- a/doc/gallery/plot_rasterio_rgb.py +++ b/doc/gallery/plot_rasterio_rgb.py @@ -13,20 +13,14 @@ transformation. """ -import os -import urllib.request - import cartopy.crs as ccrs import matplotlib.pyplot as plt import xarray as xr -# Download the file from rasterio's repository -url = 'https://github.com/mapbox/rasterio/raw/master/tests/data/RGB.byte.tif' -urllib.request.urlretrieve(url, 'RGB.byte.tif') - # Read the data -da = xr.open_rasterio('RGB.byte.tif') +url = 'https://github.com/mapbox/rasterio/raw/master/tests/data/RGB.byte.tif' +da = xr.open_rasterio(url) # The data is in UTM projection. We have to set it manually until # https://github.com/SciTools/cartopy/issues/813 is implemented @@ -37,6 +31,3 @@ da.plot.imshow(ax=ax, rgb='band', transform=crs) ax.coastlines('10m', color='r') plt.show() - -# Delete the file -os.remove('RGB.byte.tif') From dd99b7d7d8576eefcef4507ae9eb36a144b60adf Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 3 May 2019 16:14:27 -0400 Subject: [PATCH 2/4] List formatting in docs (#2939) * list formatting in docs * pep8 --- xarray/core/groupby.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index e8e2f1b08d4..4c98ea90389 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -519,6 +519,7 @@ def apply(self, func, shortcut=False, args=(), **kwargs): Apply uses heuristics (like `pandas.GroupBy.apply`) to figure out how to stack together the array. The rule is: + 1. If the dimension along which the group coordinate is defined is still in the first grouped array after applying `func`, then stack over this dimension. @@ -661,6 +662,7 @@ def apply(self, func, args=(), **kwargs): Apply uses heuristics (like `pandas.GroupBy.apply`) to figure out how to stack together the datasets. The rule is: + 1. If the dimension along which the group coordinate is defined is still in the first grouped item after applying `func`, then stack over this dimension. From ccd0b047ea8ca89c68ab6cfa942557e676e7d402 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Fri, 3 May 2019 23:15:54 -0700 Subject: [PATCH 3/4] Attempt to fix py35 build on Travis (#2925) * Attempt to fix py35 build on Travis This build is currently install NumPy 1.11, which isn't supported by xarray. Maybe adding minimum numpy and pandas versions will help. * fix pandas version * remove py35 build entirely * xfail failing test --- .travis.yml | 1 - ci/requirements-py35.yml | 23 ----------------------- ci/requirements-py36-dask-dev.yml | 4 ++-- ci/requirements-py36-hypothesis.yml | 4 ++-- ci/requirements-py36-pandas-dev.yml | 2 +- ci/requirements-py36-rasterio.yml | 4 ++-- ci/requirements-py36-windows.yml | 5 ++--- ci/requirements-py36-zarr-dev.yml | 4 ++-- ci/requirements-py36.yml | 4 ++-- ci/requirements-py37-windows.yml | 4 ++-- ci/requirements-py37.yml | 4 ++-- xarray/tests/test_dataarray.py | 1 + 12 files changed, 18 insertions(+), 42 deletions(-) delete mode 100644 ci/requirements-py35.yml diff --git a/.travis.yml b/.travis.yml index 212ddb77daa..155c0271b30 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,6 @@ matrix: fast_finish: true include: - env: CONDA_ENV=py35-min - - env: CONDA_ENV=py35 - env: CONDA_ENV=py36 - env: CONDA_ENV=py37 - env: diff --git a/ci/requirements-py35.yml b/ci/requirements-py35.yml deleted file mode 100644 index a71434865cc..00000000000 --- a/ci/requirements-py35.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: test_env -channels: - - conda-forge -dependencies: - - python=3.5 - - cftime - - dask=0.16 - - h5py - - h5netcdf - - matplotlib=1.5 - - netcdf4 - - pytest - - pytest-cov - - pytest-env - - coveralls - - flake8 - - numpy - - pandas - - scipy - - seaborn - - toolz - - rasterio - - zarr diff --git a/ci/requirements-py36-dask-dev.yml b/ci/requirements-py36-dask-dev.yml index 32d01765439..29603a59f7e 100644 --- a/ci/requirements-py36-dask-dev.yml +++ b/ci/requirements-py36-dask-dev.yml @@ -13,8 +13,8 @@ dependencies: - pytest-env - coveralls - flake8 - - numpy - - pandas + - numpy>=1.12 + - pandas>=0.19 - scipy - seaborn - toolz diff --git a/ci/requirements-py36-hypothesis.yml b/ci/requirements-py36-hypothesis.yml index 8066a53b6bc..495f81c9d3a 100644 --- a/ci/requirements-py36-hypothesis.yml +++ b/ci/requirements-py36-hypothesis.yml @@ -15,8 +15,8 @@ dependencies: - coveralls - hypothesis - flake8 - - numpy - - pandas + - numpy>=1.12 + - pandas>=0.19 - scipy - seaborn - toolz diff --git a/ci/requirements-py36-pandas-dev.yml b/ci/requirements-py36-pandas-dev.yml index bc0e5d0de09..05d2c11486c 100644 --- a/ci/requirements-py36-pandas-dev.yml +++ b/ci/requirements-py36-pandas-dev.yml @@ -16,7 +16,7 @@ dependencies: - pytest-env - coveralls - flake8 - - numpy + - numpy>=1.12 - scipy - toolz - pip: diff --git a/ci/requirements-py36-rasterio.yml b/ci/requirements-py36-rasterio.yml index e5ef1d29777..7307ed60d9a 100644 --- a/ci/requirements-py36-rasterio.yml +++ b/ci/requirements-py36-rasterio.yml @@ -14,8 +14,8 @@ dependencies: - pytest-cov - pytest-env - coveralls - - numpy - - pandas + - numpy>=1.12 + - pandas>=0.19 - scipy - seaborn - toolz diff --git a/ci/requirements-py36-windows.yml b/ci/requirements-py36-windows.yml index b139d5c78ca..22d917e332c 100644 --- a/ci/requirements-py36-windows.yml +++ b/ci/requirements-py36-windows.yml @@ -12,11 +12,10 @@ dependencies: - netcdf4 - pytest - pytest-env - - numpy - - pandas + - numpy>=1.12 + - pandas>=0.19 - scipy - seaborn - toolz - rasterio - zarr - diff --git a/ci/requirements-py36-zarr-dev.yml b/ci/requirements-py36-zarr-dev.yml index 94bdc50fbfe..2dbdf172b6c 100644 --- a/ci/requirements-py36-zarr-dev.yml +++ b/ci/requirements-py36-zarr-dev.yml @@ -12,8 +12,8 @@ dependencies: - pytest-env - coveralls - flake8 - - numpy - - pandas + - numpy>=1.12 + - pandas>=0.19 - scipy - seaborn - toolz diff --git a/ci/requirements-py36.yml b/ci/requirements-py36.yml index 7a3f0f53223..03242426a36 100644 --- a/ci/requirements-py36.yml +++ b/ci/requirements-py36.yml @@ -15,8 +15,8 @@ dependencies: - pytest-env - coveralls - pycodestyle - - numpy - - pandas + - numpy>=1.12 + - pandas>=0.19 - scipy - seaborn - toolz diff --git a/ci/requirements-py37-windows.yml b/ci/requirements-py37-windows.yml index fb4b97cde7c..1ad310a12e0 100644 --- a/ci/requirements-py37-windows.yml +++ b/ci/requirements-py37-windows.yml @@ -13,8 +13,8 @@ dependencies: - netcdf4 - pytest - pytest-env - - numpy - - pandas + - numpy>=1.12 + - pandas>=0.19 - scipy - seaborn - toolz diff --git a/ci/requirements-py37.yml b/ci/requirements-py37.yml index 4f4d2b1728b..0cece4ed6dd 100644 --- a/ci/requirements-py37.yml +++ b/ci/requirements-py37.yml @@ -16,8 +16,8 @@ dependencies: - pytest-env - coveralls - pycodestyle - - numpy - - pandas + - numpy>=1.12 + - pandas>=0.19 - scipy - seaborn - toolz diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 58eb6a1d6dc..a8655bbbf8c 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3594,6 +3594,7 @@ def test_rolling_wrapped_bottleneck(da, name, center, min_periods): @pytest.mark.parametrize('center', (True, False, None)) @pytest.mark.parametrize('min_periods', (1, None)) @pytest.mark.parametrize('window', (7, 8)) +@pytest.mark.xfail(reason='https://github.com/pydata/xarray/issues/2940') def test_rolling_wrapped_dask(da_dask, name, center, min_periods, window): pytest.importorskip('dask.array') # dask version From 5aaa6547cd14a713f89dfc7c22643d86fce87916 Mon Sep 17 00:00:00 2001 From: Zach Griffith Date: Sat, 4 May 2019 19:20:54 -0500 Subject: [PATCH 4/4] [WIP] Custom fill value for reindex, align, and merge operations (#2920) * add fill_value option to align and reindex functions * add fill_value tests for reindex and align * add fill_value option for merge functions * add tests for fill_value merge implementation * implement and test fill_value option in dataaarray reindex methods * fix PEP 8 issue * move function signature onto function * Add fill_value enhancement note --- doc/whats-new.rst | 3 +++ xarray/core/alignment.py | 41 ++++++++++++++--------------- xarray/core/dataarray.py | 22 ++++++++++------ xarray/core/dataset.py | 25 +++++++++++------- xarray/core/merge.py | 27 +++++++++++++------ xarray/tests/test_dataarray.py | 24 +++++++++++++++++ xarray/tests/test_dataset.py | 48 ++++++++++++++++++++++++++++++++++ xarray/tests/test_merge.py | 17 +++++++++++- 8 files changed, 159 insertions(+), 48 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 27709a09e7a..b88d10ffc23 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,6 +21,9 @@ v0.12.2 (unreleased) Enhancements ~~~~~~~~~~~~ +- Add ``fill_value`` argument for reindex, align, and merge operations + to enable custom fill values. (:issue:`2876`) + By `Zach Griffith `_. - Character arrays' character dimension name decoding and encoding handled by ``var.encoding['char_dim_name']`` (:issue:`2895`) By `James McCreight `_. diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 642be735e9b..295f69a2afc 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from . import utils +from . import utils, dtypes from .indexing import get_indexer_nd from .utils import is_dict_like, is_full_slice from .variable import IndexVariable, Variable @@ -31,20 +31,17 @@ def _get_joiner(join): raise ValueError('invalid value for join: %s' % join) -_DEFAULT_EXCLUDE = frozenset() # type: frozenset - - -def align(*objects, **kwargs): - """align(*objects, join='inner', copy=True, indexes=None, - exclude=frozenset()) - +def align(*objects, join='inner', copy=True, indexes=None, exclude=frozenset(), + fill_value=dtypes.NA): + """ Given any number of Dataset and/or DataArray objects, returns new objects with aligned indexes and dimension sizes. Array from the aligned objects are suitable as input to mathematical operators, because along each dimension they have the same index and size. - Missing values (if ``join != 'inner'``) are filled with NaN. + Missing values (if ``join != 'inner'``) are filled with ``fill_value``. + The default fill value is NaN. Parameters ---------- @@ -65,11 +62,13 @@ def align(*objects, **kwargs): ``copy=False`` and reindexing is unnecessary, or can be performed with only slice operations, then the output may share memory with the input. In either case, new xarray objects are always returned. - exclude : sequence of str, optional - Dimensions that must be excluded from alignment indexes : dict-like, optional Any indexes explicitly provided with the `indexes` argument should be used in preference to the aligned indexes. + exclude : sequence of str, optional + Dimensions that must be excluded from alignment + fill_value : scalar, optional + Value to use for newly missing values Returns ------- @@ -82,15 +81,8 @@ def align(*objects, **kwargs): If any dimensions without labels on the arguments have different sizes, or a different size than the size of the aligned dimension labels. """ - join = kwargs.pop('join', 'inner') - copy = kwargs.pop('copy', True) - indexes = kwargs.pop('indexes', None) - exclude = kwargs.pop('exclude', _DEFAULT_EXCLUDE) if indexes is None: indexes = {} - if kwargs: - raise TypeError('align() got unexpected keyword arguments: %s' - % list(kwargs)) if not indexes and len(objects) == 1: # fast path for the trivial case @@ -162,7 +154,8 @@ def align(*objects, **kwargs): # fast path for no reindexing necessary new_obj = obj.copy(deep=copy) else: - new_obj = obj.reindex(copy=copy, **valid_indexers) + new_obj = obj.reindex(copy=copy, fill_value=fill_value, + **valid_indexers) new_obj.encoding = obj.encoding result.append(new_obj) @@ -170,7 +163,8 @@ def align(*objects, **kwargs): def deep_align(objects, join='inner', copy=True, indexes=None, - exclude=frozenset(), raise_on_invalid=True): + exclude=frozenset(), raise_on_invalid=True, + fill_value=dtypes.NA): """Align objects for merging, recursing into dictionary values. This function is not public API. @@ -214,7 +208,7 @@ def is_alignable(obj): out.append(variables) aligned = align(*targets, join=join, copy=copy, indexes=indexes, - exclude=exclude) + exclude=exclude, fill_value=fill_value) for position, key, aligned_obj in zip(positions, keys, aligned): if key is no_key: @@ -270,6 +264,7 @@ def reindex_variables( method: Optional[str] = None, tolerance: Any = None, copy: bool = True, + fill_value: Optional[Any] = dtypes.NA, ) -> 'Tuple[OrderedDict[Any, Variable], OrderedDict[Any, pd.Index]]': """Conform a dictionary of aligned variables onto a new set of variables, filling in missing values with NaN. @@ -305,6 +300,8 @@ def reindex_variables( ``copy=False`` and reindexing is unnecessary, or can be performed with only slice operations, then the output may share memory with the input. In either case, new xarray objects are always returned. + fill_value : scalar, optional + Value to use for newly missing values Returns ------- @@ -380,7 +377,7 @@ def reindex_variables( needs_masking = any(d in masked_dims for d in var.dims) if needs_masking: - new_var = var._getitem_with_mask(key) + new_var = var._getitem_with_mask(key, fill_value=fill_value) elif all(is_full_slice(k) for k in key): # no reindexing necessary # here we need to manually deal with copying data, since diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 39e9fc048e3..15e2e00dc21 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -879,9 +879,10 @@ def sel_points(self, dim='points', method=None, tolerance=None, dim=dim, method=method, tolerance=tolerance, **indexers) return self._from_temp_dataset(ds) - def reindex_like(self, other, method=None, tolerance=None, copy=True): - """Conform this object onto the indexes of another object, filling - in missing values with NaN. + def reindex_like(self, other, method=None, tolerance=None, copy=True, + fill_value=dtypes.NA): + """Conform this object onto the indexes of another object, filling in + missing values with ``fill_value``. The default fill value is NaN. Parameters ---------- @@ -910,6 +911,8 @@ def reindex_like(self, other, method=None, tolerance=None, copy=True): ``copy=False`` and reindexing is unnecessary, or can be performed with only slice operations, then the output may share memory with the input. In either case, a new xarray object is always returned. + fill_value : scalar, optional + Value to use for newly missing values Returns ------- @@ -924,12 +927,12 @@ def reindex_like(self, other, method=None, tolerance=None, copy=True): """ indexers = reindex_like_indexers(self, other) return self.reindex(method=method, tolerance=tolerance, copy=copy, - **indexers) + fill_value=fill_value, **indexers) def reindex(self, indexers=None, method=None, tolerance=None, copy=True, - **indexers_kwargs): - """Conform this object onto a new set of indexes, filling in - missing values with NaN. + fill_value=dtypes.NA, **indexers_kwargs): + """Conform this object onto the indexes of another object, filling in + missing values with ``fill_value``. The default fill value is NaN. Parameters ---------- @@ -956,6 +959,8 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True, Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + fill_value : scalar, optional + Value to use for newly missing values **indexers_kwarg : {dim: indexer, ...}, optional The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -974,7 +979,8 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True, indexers = either_dict_or_kwargs( indexers, indexers_kwargs, 'reindex') ds = self._to_temp_dataset().reindex( - indexers=indexers, method=method, tolerance=tolerance, copy=copy) + indexers=indexers, method=method, tolerance=tolerance, copy=copy, + fill_value=fill_value) return self._from_temp_dataset(ds) def interp(self, coords=None, method='linear', assume_sorted=False, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 0f9f68d3106..79a42b303c2 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1932,9 +1932,10 @@ def sel_points(self, dim='points', method=None, tolerance=None, ) return self.isel_points(dim=dim, **pos_indexers) - def reindex_like(self, other, method=None, tolerance=None, copy=True): - """Conform this object onto the indexes of another object, filling - in missing values with NaN. + def reindex_like(self, other, method=None, tolerance=None, copy=True, + fill_value=dtypes.NA): + """Conform this object onto the indexes of another object, filling in + missing values with ``fill_value``. The default fill value is NaN. Parameters ---------- @@ -1963,6 +1964,8 @@ def reindex_like(self, other, method=None, tolerance=None, copy=True): ``copy=False`` and reindexing is unnecessary, or can be performed with only slice operations, then the output may share memory with the input. In either case, a new xarray object is always returned. + fill_value : scalar, optional + Value to use for newly missing values Returns ------- @@ -1977,12 +1980,12 @@ def reindex_like(self, other, method=None, tolerance=None, copy=True): """ indexers = alignment.reindex_like_indexers(self, other) return self.reindex(indexers=indexers, method=method, copy=copy, - tolerance=tolerance) + fill_value=fill_value, tolerance=tolerance) def reindex(self, indexers=None, method=None, tolerance=None, copy=True, - **indexers_kwargs): + fill_value=dtypes.NA, **indexers_kwargs): """Conform this object onto a new set of indexes, filling in - missing values with NaN. + missing values with ``fill_value``. The default fill value is NaN. Parameters ---------- @@ -2010,6 +2013,8 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True, ``copy=False`` and reindexing is unnecessary, or can be performed with only slice operations, then the output may share memory with the input. In either case, a new xarray object is always returned. + fill_value : scalar, optional + Value to use for newly missing values **indexers_kwarg : {dim: indexer, ...}, optional Keyword arguments in the same form as ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -2034,7 +2039,7 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True, variables, indexes = alignment.reindex_variables( self.variables, self.sizes, self.indexes, indexers, method, - tolerance, copy=copy) + tolerance, copy=copy, fill_value=fill_value) coord_names = set(self._coord_names) coord_names.update(indexers) return self._replace_with_new_dims( @@ -2752,7 +2757,7 @@ def update(self, other, inplace=None): inplace=inplace) def merge(self, other, inplace=None, overwrite_vars=frozenset(), - compat='no_conflicts', join='outer'): + compat='no_conflicts', join='outer', fill_value=dtypes.NA): """Merge the arrays of two datasets into a single dataset. This method generally not allow for overriding data, with the exception @@ -2790,6 +2795,8 @@ def merge(self, other, inplace=None, overwrite_vars=frozenset(), - 'left': use indexes from ``self`` - 'right': use indexes from ``other`` - 'exact': error instead of aligning non-equal indexes + fill_value: scalar, optional + Value to use for newly missing values Returns ------- @@ -2804,7 +2811,7 @@ def merge(self, other, inplace=None, overwrite_vars=frozenset(), inplace = _check_inplace(inplace) variables, coord_names, dims = dataset_merge_method( self, other, overwrite_vars=overwrite_vars, compat=compat, - join=join) + join=join, fill_value=fill_value) return self._replace_vars_and_dims(variables, coord_names, dims, inplace=inplace) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 363fdfc2337..421ac39ebd8 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -4,6 +4,7 @@ import pandas as pd +from . import dtypes from .alignment import deep_align from .pycompat import TYPE_CHECKING from .utils import Frozen @@ -349,7 +350,7 @@ def expand_and_merge_variables(objs, priority_arg=None): def merge_coords(objs, compat='minimal', join='outer', priority_arg=None, - indexes=None): + indexes=None, fill_value=dtypes.NA): """Merge coordinate variables. See merge_core below for argument descriptions. This works similarly to @@ -358,7 +359,8 @@ def merge_coords(objs, compat='minimal', join='outer', priority_arg=None, """ _assert_compat_valid(compat) coerced = coerce_pandas_values(objs) - aligned = deep_align(coerced, join=join, copy=False, indexes=indexes) + aligned = deep_align(coerced, join=join, copy=False, indexes=indexes, + fill_value=fill_value) expanded = expand_variable_dicts(aligned) priority_vars = _get_priority_vars(aligned, priority_arg, compat=compat) variables = merge_variables(expanded, priority_vars, compat=compat) @@ -404,7 +406,8 @@ def merge_core(objs, join='outer', priority_arg=None, explicit_coords=None, - indexes=None): + indexes=None, + fill_value=dtypes.NA): """Core logic for merging labeled objects. This is not public API. @@ -423,6 +426,8 @@ def merge_core(objs, An explicit list of variables from `objs` that are coordinates. indexes : dict, optional Dictionary with values given by pandas.Index objects. + fill_value : scalar, optional + Value to use for newly missing values Returns ------- @@ -442,7 +447,8 @@ def merge_core(objs, _assert_compat_valid(compat) coerced = coerce_pandas_values(objs) - aligned = deep_align(coerced, join=join, copy=False, indexes=indexes) + aligned = deep_align(coerced, join=join, copy=False, indexes=indexes, + fill_value=fill_value) expanded = expand_variable_dicts(aligned) coord_names, noncoord_names = determine_coords(coerced) @@ -470,7 +476,7 @@ def merge_core(objs, return variables, coord_names, dict(dims) -def merge(objects, compat='no_conflicts', join='outer'): +def merge(objects, compat='no_conflicts', join='outer', fill_value=dtypes.NA): """Merge any number of xarray objects into a single Dataset as variables. Parameters @@ -492,6 +498,8 @@ def merge(objects, compat='no_conflicts', join='outer'): of all non-null values. join : {'outer', 'inner', 'left', 'right', 'exact'}, optional How to combine objects with different indexes. + fill_value : scalar, optional + Value to use for newly missing values Returns ------- @@ -529,7 +537,8 @@ def merge(objects, compat='no_conflicts', join='outer'): obj.to_dataset() if isinstance(obj, DataArray) else obj for obj in objects] - variables, coord_names, dims = merge_core(dict_like_objects, compat, join) + variables, coord_names, dims = merge_core(dict_like_objects, compat, join, + fill_value=fill_value) # TODO: don't always recompute indexes merged = Dataset._construct_direct( variables, coord_names, dims, indexes=None) @@ -537,7 +546,8 @@ def merge(objects, compat='no_conflicts', join='outer'): return merged -def dataset_merge_method(dataset, other, overwrite_vars, compat, join): +def dataset_merge_method(dataset, other, overwrite_vars, compat, join, + fill_value=dtypes.NA): """Guts of the Dataset.merge method.""" # we are locked into supporting overwrite_vars for the Dataset.merge @@ -565,7 +575,8 @@ def dataset_merge_method(dataset, other, overwrite_vars, compat, join): objs = [dataset, other_no_overwrite, other_overwrite] priority_arg = 2 - return merge_core(objs, compat, join, priority_arg=priority_arg) + return merge_core(objs, compat, join, priority_arg=priority_arg, + fill_value=fill_value) def dataset_update_method(dataset, other): diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index a8655bbbf8c..ab6a5eb3626 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1259,6 +1259,18 @@ def test_reindex_like_no_index(self): ValueError, 'different size for unlabeled'): foo.reindex_like(bar) + @pytest.mark.parametrize('fill_value', [dtypes.NA, 2, 2.0]) + def test_reindex_fill_value(self, fill_value): + foo = DataArray([10, 20], dims='y', coords={'y': [0, 1]}) + bar = DataArray([10, 20, 30], dims='y', coords={'y': [0, 1, 2]}) + if fill_value == dtypes.NA: + # if we supply the default, we expect the missing value for a + # float array + fill_value = np.nan + actual = x.reindex_like(bar, fill_value=fill_value) + expected = DataArray([10, 20, fill_value], coords=[('y', [0, 1, 2])]) + assert_identical(expected, actual) + @pytest.mark.filterwarnings('ignore:Indexer has dimensions') def test_reindex_regressions(self): # regression test for #279 @@ -1286,6 +1298,18 @@ def test_reindex_method(self): expected = DataArray([10, 20, np.nan], coords=[('y', y)]) assert_identical(expected, actual) + @pytest.mark.parametrize('fill_value', [dtypes.NA, 2, 2.0]) + def test_reindex_fill_value(self, fill_value): + x = DataArray([10, 20], dims='y', coords={'y': [0, 1]}) + y = [0, 1, 2] + if fill_value == dtypes.NA: + # if we supply the default, we expect the missing value for a + # float array + fill_value = np.nan + actual = x.reindex(y=y, fill_value=fill_value) + expected = DataArray([10, 20, fill_value], coords=[('y', y)]) + assert_identical(expected, actual) + def test_rename(self): renamed = self.dv.rename('bar') assert_identical( diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 3ace80f5eea..207bb5d992d 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1619,6 +1619,54 @@ def test_reindex_method(self): actual = ds.reindex_like(alt, method='pad') assert_identical(expected, actual) + @pytest.mark.parametrize('fill_value', [dtypes.NA, 2, 2.0]) + def test_reindex_fill_value(self, fill_value): + ds = Dataset({'x': ('y', [10, 20]), 'y': [0, 1]}) + y = [0, 1, 2] + actual = ds.reindex(y=y, fill_value=fill_value) + if fill_value == dtypes.NA: + # if we supply the default, we expect the missing value for a + # float array + fill_value = np.nan + expected = Dataset({'x': ('y', [10, 20, fill_value]), 'y': y}) + assert_identical(expected, actual) + + @pytest.mark.parametrize('fill_value', [dtypes.NA, 2, 2.0]) + def test_reindex_like_fill_value(self, fill_value): + ds = Dataset({'x': ('y', [10, 20]), 'y': [0, 1]}) + y = [0, 1, 2] + alt = Dataset({'y': y}) + actual = ds.reindex_like(alt, fill_value=fill_value) + if fill_value == dtypes.NA: + # if we supply the default, we expect the missing value for a + # float array + fill_value = np.nan + expected = Dataset({'x': ('y', [10, 20, fill_value]), 'y': y}) + assert_identical(expected, actual) + + @pytest.mark.parametrize('fill_value', [dtypes.NA, 2, 2.0]) + def test_align_fill_value(self, fill_value): + x = Dataset({'foo': DataArray([1, 2], dims=['x'], + coords={'x': [1, 2]})}) + y = Dataset({'bar': DataArray([1, 2], dims=['x'], + coords={'x': [1, 3]})}) + x2, y2 = align(x, y, join='outer', fill_value=fill_value) + if fill_value == dtypes.NA: + # if we supply the default, we expect the missing value for a + # float array + fill_value = np.nan + + expected_x2 = Dataset( + {'foo': DataArray([1, 2, fill_value], + dims=['x'], + coords={'x': [1, 2, 3]})}) + expected_y2 = Dataset( + {'bar': DataArray([1, fill_value, 2], + dims=['x'], + coords={'x': [1, 2, 3]})}) + assert_identical(expected_x2, x2) + assert_identical(expected_y2, y2) + def test_align(self): left = create_test_data() right = left.copy(deep=True) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 4f26d616ce7..9c043f4dcfb 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -2,7 +2,7 @@ import pytest import xarray as xr -from xarray.core import merge +from xarray.core import merge, dtypes from . import raises_regex from .test_dataset import create_test_data @@ -213,6 +213,21 @@ def test_merge_auto_align(self): assert expected.identical(ds1.merge(ds2, join='inner')) assert expected.identical(ds2.merge(ds1, join='inner')) + @pytest.mark.parametrize('fill_value', [dtypes.NA, 2, 2.0]) + def test_merge_fill_value(self, fill_value): + ds1 = xr.Dataset({'a': ('x', [1, 2]), 'x': [0, 1]}) + ds2 = xr.Dataset({'b': ('x', [3, 4]), 'x': [1, 2]}) + if fill_value == dtypes.NA: + # if we supply the default, we expect the missing value for a + # float array + fill_value = np.nan + expected = xr.Dataset({'a': ('x', [1, 2, fill_value]), + 'b': ('x', [fill_value, 3, 4])}, + {'x': [0, 1, 2]}) + assert expected.identical(ds1.merge(ds2, fill_value=fill_value)) + assert expected.identical(ds2.merge(ds1, fill_value=fill_value)) + assert expected.identical(xr.merge([ds1, ds2], fill_value=fill_value)) + def test_merge_no_conflicts(self): ds1 = xr.Dataset({'a': ('x', [1, 2]), 'x': [0, 1]}) ds2 = xr.Dataset({'a': ('x', [2, 3]), 'x': [1, 2]})