From b51683f157ca6421ef58d527422a1a88c9ac67f3 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Sun, 29 Sep 2019 07:50:21 -0600 Subject: [PATCH] Documentation improvements (#3328) * Add examples for full_like, zeros_like, ones_like * Add examples for xr.align * Add examples for xr.merge * Update xr.where docstring * Update xr.dot docstring * Update xarray/core/common.py Co-Authored-By: Deepak Cherian * Update xarray/core/common.py Co-Authored-By: Deepak Cherian * Update xr.combine_by_coords docstring * Apply black formatting only * More black formatting * Remove unnecessary pandas bits * Fix indentation issues * Update assign and pipe * Update `Dataset.reindex` with examples * Update `Dataset.fillna` with examples * Address styling issues * Update docstring Co-Authored-By: Deepak Cherian --- doc/whats-new.rst | 5 +- xarray/core/alignment.py | 130 +++++++++++++++++++++ xarray/core/combine.py | 109 +++++++++++++++--- xarray/core/common.py | 221 +++++++++++++++++++++++++++++++++++- xarray/core/computation.py | 98 ++++++++++++++-- xarray/core/dataset.py | 227 +++++++++++++++++++++++++++++++++++++ xarray/core/merge.py | 150 ++++++++++++++++++++++-- 7 files changed, 899 insertions(+), 41 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 13ea55e1c4b..760ce66ca04 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -32,7 +32,10 @@ Documentation ~~~~~~~~~~~~~ - Add examples for :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims`. By `Justus Magin `_. - +- Add examples for :py:meth:`align`, :py:meth:`merge`, :py:meth:`combine_by_coords`, + :py:meth:`full_like`, :py:meth:`zeros_like`, :py:meth:`ones_like`, :py:meth:`Dataset.pipe`, + :py:meth:`Dataset.assign`, :py:meth:`Dataset.reindex`, :py:meth:`Dataset.fillna`. + By `Anderson Banihirwe `_. - Fixed documentation to clean up an unwanted file created in ``ipython`` example (:pull:`3353`). By `Gregory Gundersen `_. diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index d63718500bc..c26b879d839 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -116,6 +116,136 @@ def align( ValueError If any dimensions without labels on the arguments have different sizes, or a different size than the size of the aligned dimension labels. + + Examples + -------- + + >>> import xarray as xr + >>> x = xr.DataArray([[25, 35], [10, 24]], dims=('lat', 'lon'), + ... coords={'lat': [35., 40.], 'lon': [100., 120.]}) + >>> y = xr.DataArray([[20, 5], [7, 13]], dims=('lat', 'lon'), + ... coords={'lat': [35., 42.], 'lon': [100., 120.]}) + + >>> x + + array([[25, 35], + [10, 24]]) + Coordinates: + * lat (lat) float64 35.0 40.0 + * lon (lon) float64 100.0 120.0 + + >>> y + + array([[20, 5], + [ 7, 13]]) + Coordinates: + * lat (lat) float64 35.0 42.0 + * lon (lon) float64 100.0 120.0 + + >>> a, b = xr.align(x, y) + >>> a + + array([[25, 35]]) + Coordinates: + * lat (lat) float64 35.0 + * lon (lon) float64 100.0 120.0 + >>> b + + array([[20, 5]]) + Coordinates: + * lat (lat) float64 35.0 + * lon (lon) float64 100.0 120.0 + + >>> a, b = xr.align(x, y, join='outer') + >>> a + + array([[25., 35.], + [10., 24.], + [nan, nan]]) + Coordinates: + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 + >>> b + + array([[20., 5.], + [nan, nan], + [ 7., 13.]]) + Coordinates: + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 + + >>> a, b = xr.align(x, y, join='outer', fill_value=-999) + >>> a + + array([[ 25, 35], + [ 10, 24], + [-999, -999]]) + Coordinates: + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 + >>> b + + array([[ 20, 5], + [-999, -999], + [ 7, 13]]) + Coordinates: + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 + + >>> a, b = xr.align(x, y, join='left') + >>> a + + array([[25, 35], + [10, 24]]) + Coordinates: + * lat (lat) float64 35.0 40.0 + * lon (lon) float64 100.0 120.0 + >>> b + + array([[20., 5.], + [nan, nan]]) + Coordinates: + * lat (lat) float64 35.0 40.0 + * lon (lon) float64 100.0 120.0 + + >>> a, b = xr.align(x, y, join='right') + >>> a + + array([[25., 35.], + [nan, nan]]) + Coordinates: + * lat (lat) float64 35.0 42.0 + * lon (lon) float64 100.0 120.0 + >>> b + + array([[20, 5], + [ 7, 13]]) + Coordinates: + * lat (lat) float64 35.0 42.0 + * lon (lon) float64 100.0 120.0 + + >>> a, b = xr.align(x, y, join='exact') + Traceback (most recent call last): + ... + "indexes along dimension {!r} are not equal".format(dim) + ValueError: indexes along dimension 'lat' are not equal + + >>> a, b = xr.align(x, y, join='override') + >>> a + + array([[25, 35], + [10, 24]]) + Coordinates: + * lat (lat) float64 35.0 40.0 + * lon (lon) float64 100.0 120.0 + >>> b + + array([[20, 5], + [ 7, 13]]) + Coordinates: + * lat (lat) float64 35.0 40.0 + * lon (lon) float64 100.0 120.0 + """ if indexes is None: indexes = {} diff --git a/xarray/core/combine.py b/xarray/core/combine.py index be7fd86555c..38befd5698f 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -393,7 +393,7 @@ def combine_nested( -------- A common task is collecting data from a parallelized simulation in which - each processor wrote out to a separate file. A domain which was decomposed + each process wrote out to a separate file. A domain which was decomposed into 4 parts, 2 each along both the x and y axes, requires organising the datasets into a doubly-nested list, e.g: @@ -505,8 +505,7 @@ def combine_by_coords( ---------- datasets : sequence of xarray.Dataset Dataset objects to combine. - compat : {'identical', 'equals', 'broadcast_equals', - 'no_conflicts', 'override'}, optional + compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts', 'override'}, optional String indicating how to compare variables of the same name for potential conflicts: @@ -520,9 +519,21 @@ def combine_by_coords( of all non-null values. - 'override': skip comparing and pick variable from first dataset data_vars : {'minimal', 'different', 'all' or list of str}, optional - Details are in the documentation of concat + These data variables will be concatenated together: + + * 'minimal': Only data variables in which the dimension already + appears are included. + * 'different': Data variables which are not equal (ignoring + attributes) across all datasets are also concatenated (as well as + all for which dimension already appears). Beware: this option may + load the data payload of data variables into memory if they are not + already loaded. + * 'all': All data variables will be concatenated. + * list of str: The listed data variables will be concatenated, in + addition to the 'minimal' data variables. + If objects are DataArrays, `data_vars` must be 'all'. coords : {'minimal', 'different', 'all' or list of str}, optional - Details are in the documentation of concat + As per the 'data_vars' kwarg, but for coordinate variables. fill_value : scalar, optional Value to use for newly missing values join : {'outer', 'inner', 'left', 'right', 'exact'}, optional @@ -556,29 +567,91 @@ def combine_by_coords( they are concatenated based on the values in their dimension coordinates, not on their position in the list passed to `combine_by_coords`. + >>> import numpy as np + >>> import xarray as xr + + >>> x1 = xr.Dataset( + ... { + ... "temperature": (("y", "x"), 20 * np.random.rand(6).reshape(2, 3)), + ... "precipitation": (("y", "x"), np.random.rand(6).reshape(2, 3)), + ... }, + ... coords={"y": [0, 1], "x": [10, 20, 30]}, + ... ) + >>> x2 = xr.Dataset( + ... { + ... "temperature": (("y", "x"), 20 * np.random.rand(6).reshape(2, 3)), + ... "precipitation": (("y", "x"), np.random.rand(6).reshape(2, 3)), + ... }, + ... coords={"y": [2, 3], "x": [10, 20, 30]}, + ... ) + >>> x3 = xr.Dataset( + ... { + ... "temperature": (("y", "x"), 20 * np.random.rand(6).reshape(2, 3)), + ... "precipitation": (("y", "x"), np.random.rand(6).reshape(2, 3)), + ... }, + ... coords={"y": [2, 3], "x": [40, 50, 60]}, + ... ) + >>> x1 - Dimensions: (x: 3) - Coords: - * position (x) int64 0 1 2 + Dimensions: (x: 3, y: 2) + Coordinates: + * y (y) int64 0 1 + * x (x) int64 10 20 30 Data variables: - temperature (x) float64 11.04 23.57 20.77 ... + temperature (y, x) float64 1.654 10.63 7.015 2.543 13.93 9.436 + precipitation (y, x) float64 0.2136 0.9974 0.7603 0.4679 0.3115 0.945 >>> x2 - Dimensions: (x: 3) - Coords: - * position (x) int64 3 4 5 + Dimensions: (x: 3, y: 2) + Coordinates: + * y (y) int64 2 3 + * x (x) int64 10 20 30 + Data variables: + temperature (y, x) float64 9.341 0.1251 6.269 7.709 8.82 2.316 + precipitation (y, x) float64 0.1728 0.1178 0.03018 0.6509 0.06938 0.3792 + + >>> x3 + + Dimensions: (x: 3, y: 2) + Coordinates: + * y (y) int64 2 3 + * x (x) int64 40 50 60 Data variables: - temperature (x) float64 6.97 8.13 7.42 ... + temperature (y, x) float64 2.789 2.446 6.551 12.46 2.22 15.96 + precipitation (y, x) float64 0.4804 0.1902 0.2457 0.6125 0.4654 0.5953 - >>> combined = xr.combine_by_coords([x2, x1]) + >>> xr.combine_by_coords([x2, x1]) - Dimensions: (x: 6) - Coords: - * position (x) int64 0 1 2 3 4 5 + Dimensions: (x: 3, y: 4) + Coordinates: + * x (x) int64 10 20 30 + * y (y) int64 0 1 2 3 Data variables: - temperature (x) float64 11.04 23.57 20.77 ... + temperature (y, x) float64 1.654 10.63 7.015 2.543 ... 7.709 8.82 2.316 + precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6509 0.06938 0.3792 + + >>> xr.combine_by_coords([x3, x1]) + + Dimensions: (x: 6, y: 4) + Coordinates: + * x (x) int64 10 20 30 40 50 60 + * y (y) int64 0 1 2 3 + Data variables: + temperature (y, x) float64 1.654 10.63 7.015 nan ... nan 12.46 2.22 15.96 + precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953 + + >>> xr.combine_by_coords([x3, x1], join='override') + + Dimensions: (x: 3, y: 4) + Coordinates: + * x (x) int64 10 20 30 + * y (y) int64 0 1 2 3 + Data variables: + temperature (y, x) float64 1.654 10.63 7.015 2.543 ... 12.46 2.22 15.96 + precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953 + """ # Group by data vars diff --git a/xarray/core/common.py b/xarray/core/common.py index ab9e7616ce1..5b166890575 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -542,6 +542,72 @@ def pipe( ... .pipe((f, 'arg2'), arg1=a, arg3=c) ... ) + Examples + -------- + + >>> import numpy as np + >>> import xarray as xr + >>> x = xr.Dataset( + ... { + ... "temperature_c": (("lat", "lon"), 20 * np.random.rand(4).reshape(2, 2)), + ... "precipitation": (("lat", "lon"), np.random.rand(4).reshape(2, 2)), + ... }, + ... coords={"lat": [10, 20], "lon": [150, 160]}, + ... ) + >>> x + + Dimensions: (lat: 2, lon: 2) + Coordinates: + * lat (lat) int64 10 20 + * lon (lon) int64 150 160 + Data variables: + temperature_c (lat, lon) float64 14.53 11.85 19.27 16.37 + precipitation (lat, lon) float64 0.7315 0.7189 0.8481 0.4671 + + >>> def adder(data, arg): + ... return data + arg + ... + >>> def div(data, arg): + ... return data / arg + ... + >>> def sub_mult(data, sub_arg, mult_arg): + ... return (data * mult_arg) - sub_arg + ... + >>> x.pipe(adder, 2) + + Dimensions: (lat: 2, lon: 2) + Coordinates: + * lon (lon) int64 150 160 + * lat (lat) int64 10 20 + Data variables: + temperature_c (lat, lon) float64 16.53 13.85 21.27 18.37 + precipitation (lat, lon) float64 2.731 2.719 2.848 2.467 + + >>> x.pipe(adder, arg=2) + + Dimensions: (lat: 2, lon: 2) + Coordinates: + * lon (lon) int64 150 160 + * lat (lat) int64 10 20 + Data variables: + temperature_c (lat, lon) float64 16.53 13.85 21.27 18.37 + precipitation (lat, lon) float64 2.731 2.719 2.848 2.467 + + >>> ( + ... x + ... .pipe(adder, arg=2) + ... .pipe(div, arg=2) + ... .pipe(sub_mult, sub_arg=2, mult_arg=2) + ... ) + + Dimensions: (lat: 2, lon: 2) + Coordinates: + * lon (lon) int64 150 160 + * lat (lat) int64 10 20 + Data variables: + temperature_c (lat, lon) float64 14.53 11.85 19.27 16.37 + precipitation (lat, lon) float64 0.7315 0.7189 0.8481 0.4671 + See Also -------- pandas.DataFrame.pipe @@ -1172,6 +1238,61 @@ def full_like(other, fill_value, dtype: DTypeLike = None): filled with fill_value. Coords will be copied from other. If other is based on dask, the new one will be as well, and will be split in the same chunks. + + Examples + -------- + + >>> import numpy as np + >>> import xarray as xr + >>> x = xr.DataArray(np.arange(6).reshape(2, 3), + ... dims=['lat', 'lon'], + ... coords={'lat': [1, 2], 'lon': [0, 1, 2]}) + >>> x + + array([[0, 1, 2], + [3, 4, 5]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + >>> xr.full_like(x, 1) + + array([[1, 1, 1], + [1, 1, 1]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + >>> xr.full_like(x, 0.5) + + array([[0, 0, 0], + [0, 0, 0]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + >>> xr.full_like(x, 0.5, dtype=np.double) + + array([[0.5, 0.5, 0.5], + [0.5, 0.5, 0.5]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + >>> xr.full_like(x, np.nan, dtype=np.double) + + array([[nan, nan, nan], + [nan, nan, nan]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + See also + -------- + + zeros_like + ones_like + """ from .dataarray import DataArray from .dataset import Dataset @@ -1217,13 +1338,109 @@ def _full_like_variable(other, fill_value, dtype: DTypeLike = None): def zeros_like(other, dtype: DTypeLike = None): - """Shorthand for full_like(other, 0, dtype) + """Return a new object of zeros with the same shape and + type as a given dataarray or dataset. + + Parameters + ---------- + other : DataArray, Dataset, or Variable + The reference object. The output will have the same dimensions and coordinates as this object. + dtype : dtype, optional + dtype of the new array. If omitted, it defaults to other.dtype. + + Returns + ------- + out : same as object + New object of zeros with the same shape and type as other. + + Examples + -------- + + >>> import numpy as np + >>> import xarray as xr + >>> x = xr.DataArray(np.arange(6).reshape(2, 3), + ... dims=['lat', 'lon'], + ... coords={'lat': [1, 2], 'lon': [0, 1, 2]}) + >>> x + + array([[0, 1, 2], + [3, 4, 5]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + >>> xr.zeros_like(x) + + array([[0, 0, 0], + [0, 0, 0]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + >>> xr.zeros_like(x, dtype=np.float) + + array([[0., 0., 0.], + [0., 0., 0.]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + See also + -------- + + ones_like + full_like + """ return full_like(other, 0, dtype) def ones_like(other, dtype: DTypeLike = None): - """Shorthand for full_like(other, 1, dtype) + """Return a new object of ones with the same shape and + type as a given dataarray or dataset. + + Parameters + ---------- + other : DataArray, Dataset, or Variable + The reference object. The output will have the same dimensions and coordinates as this object. + dtype : dtype, optional + dtype of the new array. If omitted, it defaults to other.dtype. + + Returns + ------- + out : same as object + New object of ones with the same shape and type as other. + + Examples + -------- + + >>> import numpy as np + >>> import xarray as xr + >>> x = xr.DataArray(np.arange(6).reshape(2, 3), + ... dims=['lat', 'lon'], + ... coords={'lat': [1, 2], 'lon': [0, 1, 2]}) + >>> x + + array([[0, 1, 2], + [3, 4, 5]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + >>> >>> xr.ones_like(x) + + array([[1, 1, 1], + [1, 1, 1]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + See also + -------- + + zeros_like + full_like + """ return full_like(other, 1, dtype) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 424ab5be87a..0d08234c474 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1084,17 +1084,54 @@ def dot(*arrays, dims=None, **kwargs): Examples -------- - >>> da_a = xr.DataArray(np.arange(3 * 4).reshape(3, 4), dims=['a', 'b']) - >>> da_b = xr.DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5), - >>> dims=['a', 'b', 'c']) - >>> da_c = xr.DataArray(np.arange(5 * 6).reshape(5, 6), dims=['c', 'd']) - >>> - >>> xr.dot(da_a, da_b, dims=['a', 'b']).dims - ('c', ) - >>> xr.dot(da_a, da_b, dims=['a']).dims - ('b', 'c') - >>> xr.dot(da_a, da_b, da_c, dims=['b', 'c']).dims - ('a', 'd') + >>> import numpy as np + >>> import xarray as xp + >>> da_a = xr.DataArray(np.arange(3 * 2).reshape(3, 2), dims=['a', 'b']) + >>> da_b = xr.DataArray(np.arange(3 * 2 * 2).reshape(3, 2, 2), + ... dims=['a', 'b', 'c']) + >>> da_c = xr.DataArray(np.arange(2 * 3).reshape(2, 3), dims=['c', 'd']) + + >>> da_a + + array([[0, 1], + [2, 3], + [4, 5]]) + Dimensions without coordinates: a, b + + >>> da_b + + array([[[ 0, 1], + [ 2, 3]], + [[ 4, 5], + [ 6, 7]], + [[ 8, 9], + [10, 11]]]) + Dimensions without coordinates: a, b, c + + >>> da_c + + array([[0, 1, 2], + [3, 4, 5]]) + Dimensions without coordinates: c, d + + >>> xr.dot(da_a, da_b, dims=['a', 'b']) + + array([110, 125]) + Dimensions without coordinates: c + + >>> xr.dot(da_a, da_b, dims=['a']) + + array([[40, 46], + [70, 79]]) + Dimensions without coordinates: b, c + + >>> xr.dot(da_a, da_b, da_c, dims=['b', 'c']) + + array([[ 9, 14, 19], + [ 93, 150, 207], + [273, 446, 619]]) + Dimensions without coordinates: a, d + """ from .dataarray import DataArray from .variable import Variable @@ -1195,6 +1232,45 @@ def where(cond, x, y): Examples -------- + >>> import xarray as xr + >>> import numpy as np + >>> x = xr.DataArray(0.1 * np.arange(10), dims=['lat'], + ... coords={'lat': np.arange(10)}, name='sst') + >>> x + + array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]) + Coordinates: + * lat (lat) int64 0 1 2 3 4 5 6 7 8 9 + + >>> xr.where(x < 0.5, x, 100*x) + + array([ 0. , 0.1, 0.2, 0.3, 0.4, 50. , 60. , 70. , 80. , 90. ]) + Coordinates: + * lat (lat) int64 0 1 2 3 4 5 6 7 8 9 + + >>> >>> y = xr.DataArray( + ... 0.1 * np.arange(9).reshape(3, 3), + ... dims=["lat", "lon"], + ... coords={"lat": np.arange(3), "lon": 10 + np.arange(3)}, + ... name="sst", + ... ) + >>> y + + array([[0. , 0.1, 0.2], + [0.3, 0.4, 0.5], + [0.6, 0.7, 0.8]]) + Coordinates: + * lat (lat) int64 0 1 2 + * lon (lon) int64 10 11 12 + + >>> xr.where(y.lat < 1, y, -1) + + array([[ 0. , 0.1, 0.2], + [-1. , -1. , -1. ], + [-1. , -1. , -1. ]]) + Coordinates: + * lat (lat) int64 0 1 2 + * lon (lon) int64 10 11 12 >>> cond = xr.DataArray([True, False], dims=['x']) >>> x = xr.DataArray([1, 2], dims=['y']) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ea087ce3ce1..5fa96216ba0 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2292,6 +2292,134 @@ def reindex( Dataset.reindex_like align pandas.Index.get_indexer + + Examples + -------- + + Create a dataset with some fictional data. + + >>> import xarray as xr + >>> import pandas as pd + >>> x = xr.Dataset( + ... { + ... "temperature": ("station", 20 * np.random.rand(4)), + ... "pressure": ("station", 500 * np.random.rand(4)) + ... }, + ... coords={"station": ["boston", "nyc", "seattle", "denver"]}) + >>> x + + Dimensions: (station: 4) + Coordinates: + * station (station) >> x.indexes + station: Index(['boston', 'nyc', 'seattle', 'denver'], dtype='object', name='station') + + Create a new index and reindex the dataset. By default values in the new index that + do not have corresponding records in the dataset are assigned `NaN`. + + >>> new_index = ['boston', 'austin', 'seattle', 'lincoln'] + >>> x.reindex({'station': new_index}) + + Dimensions: (station: 4) + Coordinates: + * station (station) object 'boston' 'austin' 'seattle' 'lincoln' + Data variables: + temperature (station) float64 18.84 nan 19.22 nan + pressure (station) float64 324.1 nan 122.8 nan + + We can fill in the missing values by passing a value to the keyword `fill_value`. + + >>> x.reindex({'station': new_index}, fill_value=0) + + Dimensions: (station: 4) + Coordinates: + * station (station) object 'boston' 'austin' 'seattle' 'lincoln' + Data variables: + temperature (station) float64 18.84 0.0 19.22 0.0 + pressure (station) float64 324.1 0.0 122.8 0.0 + + Because the index is not monotonically increasing or decreasing, we cannot use arguments + to the keyword method to fill the `NaN` values. + + >>> x.reindex({'station': new_index}, method='nearest') + Traceback (most recent call last): + ... + raise ValueError('index must be monotonic increasing or decreasing') + ValueError: index must be monotonic increasing or decreasing + + To further illustrate the filling functionality in reindex, we will create a + dataset with a monotonically increasing index (for example, a sequence of dates). + + >>> x2 = xr.Dataset( + ... { + ... "temperature": ("time", [15.57, 12.77, np.nan, 0.3081, 16.59, 15.12]), + ... "pressure": ("time", 500 * np.random.rand(6)) + ... }, + ... coords={"time": pd.date_range('01/01/2019', periods=6, freq='D')}) + >>> x2 + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2019-01-01 2019-01-02 ... 2019-01-06 + Data variables: + temperature (time) float64 15.57 12.77 nan 0.3081 16.59 15.12 + pressure (time) float64 103.4 122.7 452.0 444.0 399.2 486.0 + + Suppose we decide to expand the dataset to cover a wider date range. + + >>> time_index2 = pd.date_range('12/29/2018', periods=10, freq='D') + >>> x2.reindex({'time': time_index2}) + + Dimensions: (time: 10) + Coordinates: + * time (time) datetime64[ns] 2018-12-29 2018-12-30 ... 2019-01-07 + Data variables: + temperature (time) float64 nan nan nan 15.57 ... 0.3081 16.59 15.12 nan + pressure (time) float64 nan nan nan 103.4 ... 444.0 399.2 486.0 nan + + The index entries that did not have a value in the original data frame (for example, `2018-12-29`) + are by default filled with NaN. If desired, we can fill in the missing values using one of several options. + + For example, to back-propagate the last valid value to fill the `NaN` values, + pass `bfill` as an argument to the `method` keyword. + + >>> x3 = x2.reindex({'time': time_index2}, method='bfill') + >>> x3 + + Dimensions: (time: 10) + Coordinates: + * time (time) datetime64[ns] 2018-12-29 2018-12-30 ... 2019-01-07 + Data variables: + temperature (time) float64 15.57 15.57 15.57 15.57 ... 16.59 15.12 nan + pressure (time) float64 103.4 103.4 103.4 103.4 ... 399.2 486.0 nan + + Please note that the `NaN` value present in the original dataset (at index value `2019-01-03`) + will not be filled by any of the value propagation schemes. + + >>> x2.where(x2.temperature.isnull(), drop=True) + + Dimensions: (time: 1) + Coordinates: + * time (time) datetime64[ns] 2019-01-03 + Data variables: + temperature (time) float64 nan + pressure (time) float64 452.0 + >>> x3.where(x3.temperature.isnull(), drop=True) + + Dimensions: (time: 2) + Coordinates: + * time (time) datetime64[ns] 2019-01-03 2019-01-07 + Data variables: + temperature (time) float64 nan nan + pressure (time) float64 452.0 nan + + This is because filling while reindexing does not look at dataset values, but only compares + the original and desired indexes. If you do want to fill in the `NaN` values present in the + original dataset, use the :py:meth:`~Dataset.fillna()` method. + """ indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") @@ -3718,6 +3846,57 @@ def fillna(self, value: Any) -> "Dataset": Returns ------- Dataset + + Examples + -------- + + >>> import numpy as np + >>> import xarray as xr + >>> ds = xr.Dataset( + ... { + ... "A": ("x", [np.nan, 2, np.nan, 0]), + ... "B": ("x", [3, 4, np.nan, 1]), + ... "C": ("x", [np.nan, np.nan, np.nan, 5]), + ... "D": ("x", [np.nan, 3, np.nan, 4]) + ... }, + ... coords={"x": [0, 1, 2, 3]}) + >>> ds + + Dimensions: (x: 4) + Coordinates: + * x (x) int64 0 1 2 3 + Data variables: + A (x) float64 nan 2.0 nan 0.0 + B (x) float64 3.0 4.0 nan 1.0 + C (x) float64 nan nan nan 5.0 + D (x) float64 nan 3.0 nan 4.0 + + Replace all `NaN` values with 0s. + + >>> ds.fillna(0) + + Dimensions: (x: 4) + Coordinates: + * x (x) int64 0 1 2 3 + Data variables: + A (x) float64 0.0 2.0 0.0 0.0 + B (x) float64 3.0 4.0 0.0 1.0 + C (x) float64 0.0 0.0 0.0 5.0 + D (x) float64 0.0 3.0 0.0 4.0 + + Replace all `NaN` elements in column ‘A’, ‘B’, ‘C’, and ‘D’, with 0, 1, 2, and 3 respectively. + + >>> values = {'A': 0, 'B': 1, 'C': 2, 'D': 3} + >>> ds.fillna(value=values) + + Dimensions: (x: 4) + Coordinates: + * x (x) int64 0 1 2 3 + Data variables: + A (x) float64 0.0 2.0 0.0 0.0 + B (x) float64 3.0 4.0 1.0 1.0 + C (x) float64 2.0 2.0 2.0 5.0 + D (x) float64 3.0 3.0 3.0 4.0 """ if utils.is_dict_like(value): value_keys = getattr(value, "data_vars", value).keys() @@ -4043,6 +4222,54 @@ def assign( See Also -------- pandas.DataFrame.assign + + Examples + -------- + >>> import numpy as np + >>> import xarray as xr + >>> x = xr.Dataset( + ... { + ... "temperature_c": (("lat", "lon"), 20 * np.random.rand(4).reshape(2, 2)), + ... "precipitation": (("lat", "lon"), np.random.rand(4).reshape(2, 2)), + ... }, + ... coords={"lat": [10, 20], "lon": [150, 160]}, + ... ) + >>> x + + Dimensions: (lat: 2, lon: 2) + Coordinates: + * lat (lat) int64 10 20 + * lon (lon) int64 150 160 + Data variables: + temperature_c (lat, lon) float64 18.04 12.51 17.64 9.313 + precipitation (lat, lon) float64 0.4751 0.6827 0.3697 0.03524 + + Where the value is a callable, evaluated on dataset: + + >>> x.assign(temperature_f = lambda x: x.temperature_c * 9 / 5 + 32) + + Dimensions: (lat: 2, lon: 2) + Coordinates: + * lat (lat) int64 10 20 + * lon (lon) int64 150 160 + Data variables: + temperature_c (lat, lon) float64 18.04 12.51 17.64 9.313 + precipitation (lat, lon) float64 0.4751 0.6827 0.3697 0.03524 + temperature_f (lat, lon) float64 64.47 54.51 63.75 48.76 + + Alternatively, the same behavior can be achieved by directly referencing an existing dataarray: + + >>> x.assign(temperature_f=x["temperature_c"] * 9 / 5 + 32) + + Dimensions: (lat: 2, lon: 2) + Coordinates: + * lat (lat) int64 10 20 + * lon (lon) int64 150 160 + Data variables: + temperature_c (lat, lon) float64 18.04 12.51 17.64 9.313 + precipitation (lat, lon) float64 0.4751 0.6827 0.3697 0.03524 + temperature_f (lat, lon) float64 64.47 54.51 63.75 48.76 + """ variables = either_dict_or_kwargs(variables, variables_kwargs, "assign") data = self.copy() diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 6dba659f992..ca753c588d4 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -565,18 +565,150 @@ def merge(objects, compat="no_conflicts", join="outer", fill_value=dtypes.NA): Examples -------- - >>> arrays = [xr.DataArray(n, name='var%d' % n) for n in range(5)] - >>> xr.merge(arrays) + >>> import xarray as xr + >>> x = xr.DataArray( + ... [[1.0, 2.0], [3.0, 5.0]], + ... dims=("lat", "lon"), + ... coords={"lat": [35.0, 40.0], "lon": [100.0, 120.0]}, + ... name="var1", + ... ) + >>> y = xr.DataArray( + ... [[5.0, 6.0], [7.0, 8.0]], + ... dims=("lat", "lon"), + ... coords={"lat": [35.0, 42.0], "lon": [100.0, 150.0]}, + ... name="var2", + ... ) + >>> z = xr.DataArray( + ... [[0.0, 3.0], [4.0, 9.0]], + ... dims=("time", "lon"), + ... coords={"time": [30.0, 60.0], "lon": [100.0, 150.0]}, + ... name="var3", + ... ) + + >>> x + + array([[1., 2.], + [3., 5.]]) + Coordinates: + * lat (lat) float64 35.0 40.0 + * lon (lon) float64 100.0 120.0 + + >>> y + + array([[5., 6.], + [7., 8.]]) + Coordinates: + * lat (lat) float64 35.0 42.0 + * lon (lon) float64 100.0 150.0 + + >>> z + + array([[0., 3.], + [4., 9.]]) + Coordinates: + * time (time) float64 30.0 60.0 + * lon (lon) float64 100.0 150.0 + + >>> xr.merge([x, y, z]) + + Dimensions: (lat: 3, lon: 3, time: 2) + Coordinates: + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 150.0 + * time (time) float64 30.0 60.0 + Data variables: + var1 (lat, lon) float64 1.0 2.0 nan 3.0 5.0 nan nan nan nan + var2 (lat, lon) float64 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 + var3 (time, lon) float64 0.0 nan 3.0 4.0 nan 9.0 + + >>> xr.merge([x, y, z], compat='identical') + + Dimensions: (lat: 3, lon: 3, time: 2) + Coordinates: + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 150.0 + * time (time) float64 30.0 60.0 + Data variables: + var1 (lat, lon) float64 1.0 2.0 nan 3.0 5.0 nan nan nan nan + var2 (lat, lon) float64 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 + var3 (time, lon) float64 0.0 nan 3.0 4.0 nan 9.0 + + >>> xr.merge([x, y, z], compat='equals') + + Dimensions: (lat: 3, lon: 3, time: 2) + Coordinates: + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 150.0 + * time (time) float64 30.0 60.0 + Data variables: + var1 (lat, lon) float64 1.0 2.0 nan 3.0 5.0 nan nan nan nan + var2 (lat, lon) float64 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 + var3 (time, lon) float64 0.0 nan 3.0 4.0 nan 9.0 + + >>> xr.merge([x, y, z], compat='equals', fill_value=-999.) + + Dimensions: (lat: 3, lon: 3, time: 2) + Coordinates: + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 150.0 + * time (time) float64 30.0 60.0 + Data variables: + var1 (lat, lon) float64 1.0 2.0 -999.0 3.0 ... -999.0 -999.0 -999.0 + var2 (lat, lon) float64 5.0 -999.0 6.0 -999.0 ... -999.0 7.0 -999.0 8.0 + var3 (time, lon) float64 0.0 -999.0 3.0 4.0 -999.0 9.0 + + >>> xr.merge([x, y, z], join='override') + + Dimensions: (lat: 2, lon: 2, time: 2) + Coordinates: + * lat (lat) float64 35.0 40.0 + * lon (lon) float64 100.0 120.0 + * time (time) float64 30.0 60.0 + Data variables: + var1 (lat, lon) float64 1.0 2.0 3.0 5.0 + var2 (lat, lon) float64 5.0 6.0 7.0 8.0 + var3 (time, lon) float64 0.0 3.0 4.0 9.0 + + >>> xr.merge([x, y, z], join='inner') + + Dimensions: (lat: 1, lon: 1, time: 2) + Coordinates: + * lat (lat) float64 35.0 + * lon (lon) float64 100.0 + * time (time) float64 30.0 60.0 + Data variables: + var1 (lat, lon) float64 1.0 + var2 (lat, lon) float64 5.0 + var3 (time, lon) float64 0.0 4.0 + + >>> xr.merge([x, y, z], compat='identical', join='inner') + + Dimensions: (lat: 1, lon: 1, time: 2) + Coordinates: + * lat (lat) float64 35.0 + * lon (lon) float64 100.0 + * time (time) float64 30.0 60.0 + Data variables: + var1 (lat, lon) float64 1.0 + var2 (lat, lon) float64 5.0 + var3 (time, lon) float64 0.0 4.0 + + >>> xr.merge([x, y, z], compat='broadcast_equals', join='outer') - Dimensions: () + Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: - *empty* + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 150.0 + * time (time) float64 30.0 60.0 Data variables: - var0 int64 0 - var1 int64 1 - var2 int64 2 - var3 int64 3 - var4 int64 4 + var1 (lat, lon) float64 1.0 2.0 nan 3.0 5.0 nan nan nan nan + var2 (lat, lon) float64 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 + var3 (time, lon) float64 0.0 nan 3.0 4.0 nan 9.0 + + >>> xr.merge([x, y, z], join='exact') + Traceback (most recent call last): + ... + ValueError: indexes along dimension 'lat' are not equal Raises ------