Merge remote-tracking branch 'upstream/master' into error_message_tim…

…e_units * upstream/master: combine keep_attrs and combine_attrs in apply_ufunc (pydata#5041) Explained what a deprecation cycle is (pydata#5289) Code cleanup (pydata#5234) FacetGrid docstrings (pydata#5293) Add whats new for dataset interpolation with non-numerics (pydata#5297) Allow dataset interpolation with different datatypes (pydata#5008) Flexible indexes: add Index base class and xindexes properties (pydata#5102)
gcaria · May 13, 2021 · c33fa82 · c33fa82
2 parents eab4fa8 + 751f76a
commit c33fa82
Show file tree

Hide file tree

Showing 65 changed files with 1,490 additions and 901 deletions.
diff --git a/doc/api.rst b/doc/api.rst
@@ -846,6 +846,7 @@ Faceting
    plot.FacetGrid
    plot.FacetGrid.add_colorbar
    plot.FacetGrid.add_legend
+   plot.FacetGrid.add_quiverkey
    plot.FacetGrid.map
    plot.FacetGrid.map_dataarray
    plot.FacetGrid.map_dataarray_line

diff --git a/doc/contributing.rst b/doc/contributing.rst
@@ -379,10 +379,34 @@ with ``git commit --no-verify``.
 Backwards Compatibility
 ~~~~~~~~~~~~~~~~~~~~~~~
 
-Please try to maintain backward compatibility. *xarray* has growing number of users with
+Please try to maintain backwards compatibility. *xarray* has a growing number of users with
 lots of existing code, so don't break it if at all possible.  If you think breakage is
-required, clearly state why as part of the pull request.  Also, be careful when changing
-method signatures and add deprecation warnings where needed.
+required, clearly state why as part of the pull request.
+
+Be especially careful when changing function and method signatures, because any change
+may require a deprecation warning. For example, if your pull request means that the
+argument ``old_arg`` to ``func`` is no longer valid, instead of simply raising an error if
+a user passes ``old_arg``, we would instead catch it:
+
+.. code-block:: python
+
+    def func(new_arg, old_arg=None):
+        if old_arg is not None:
+            from warnings import warn
+
+            warn(
+                "`old_arg` has been deprecated, and in the future will raise an error."
+                "Please use `new_arg` from now on.",
+                DeprecationWarning,
+            )
+
+            # Still do what the user intended here
+
+This temporary check would then be removed in a subsequent version of xarray.
+This process of first warning users before actually breaking their code is known as a
+"deprecation cycle", and makes changes significantly easier to handle both for users
+of xarray, and for developers of other libraries that depend on xarray.
+
 
 .. _contributing.ci:
 

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -21,7 +21,12 @@ v0.18.1 (unreleased)
 
 New Features
 ~~~~~~~~~~~~
-
+- allow passing ``combine_attrs`` strategy names to the ``keep_attrs`` parameter of
+  :py:func:`apply_ufunc` (:pull:`5041`)
+  By `Justus Magin <https://github.com/keewis>`_.
+- :py:meth:`Dataset.interp` now allows interpolation with non-numerical datatypes,
+  such as booleans, instead of dropping them. (:issue:`4761` :pull:`5008`).
+  By `Jimmy Westling <https://github.com/illviljan>`_.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~
@@ -38,10 +43,18 @@ Bug fixes
 Documentation
 ~~~~~~~~~~~~~
 
+- Explanation of deprecation cycles and how to implement them added to contributors
+  guide. (:pull:`5289`)
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
+
 
 Internal Changes
 ~~~~~~~~~~~~~~~~
 
+- Explicit indexes refactor: add an ``xarray.Index`` base class and
+  ``Dataset.xindexes`` / ``DataArray.xindexes`` properties. Also rename
+  ``PandasIndexAdapter`` to ``PandasIndex``, which now inherits from
+  ``xarray.Index`` (:pull:`5102`). By `Benoit Bovy <https://github.com/benbovy>`_.
 
 .. _whats-new.0.18.0:
 
@@ -268,7 +281,6 @@ Internal Changes
   (:pull:`5188`), (:pull:`5191`).
   By `Maximilian Roos <https://github.com/max-sixty>`_.
 
-
 .. _whats-new.0.17.0:
 
 v0.17.0 (24 Feb 2021)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -102,12 +102,11 @@ def _get_default_engine_netcdf():
 
 def _get_default_engine(path: str, allow_remote: bool = False):
     if allow_remote and is_remote_uri(path):
-        engine = _get_default_engine_remote_uri()
+        return _get_default_engine_remote_uri()
     elif path.endswith(".gz"):
-        engine = _get_default_engine_gz()
+        return _get_default_engine_gz()
     else:
-        engine = _get_default_engine_netcdf()
-    return engine
+        return _get_default_engine_netcdf()
 
 
 def _validate_dataset_names(dataset):
@@ -282,7 +281,7 @@ def _chunk_ds(
 
     mtime = _get_mtime(filename_or_obj)
     token = tokenize(filename_or_obj, mtime, engine, chunks, **extra_tokens)
-    name_prefix = "open_dataset-%s" % token
+    name_prefix = f"open_dataset-{token}"
 
     variables = {}
     for name, var in backend_ds.variables.items():
@@ -295,8 +294,7 @@ def _chunk_ds(
             name_prefix=name_prefix,
             token=token,
         )
-    ds = backend_ds._replace(variables)
-    return ds
+    return backend_ds._replace(variables)
 
 
 def _dataset_from_backend_dataset(
@@ -308,12 +306,10 @@ def _dataset_from_backend_dataset(
     overwrite_encoded_chunks,
     **extra_tokens,
 ):
-    if not (isinstance(chunks, (int, dict)) or chunks is None):
-        if chunks != "auto":
-            raise ValueError(
-                "chunks must be an int, dict, 'auto', or None. "
-                "Instead found %s. " % chunks
-            )
+    if not isinstance(chunks, (int, dict)) and chunks not in {None, "auto"}:
+        raise ValueError(
+            f"chunks must be an int, dict, 'auto', or None. Instead found {chunks}."
+        )
 
     _protect_dataset_variables_inplace(backend_ds, cache)
     if chunks is None:
@@ -331,9 +327,8 @@ def _dataset_from_backend_dataset(
     ds.set_close(backend_ds._close)
 
     # Ensure source filename always stored in dataset object (GH issue #2550)
-    if "source" not in ds.encoding:
-        if isinstance(filename_or_obj, str):
-            ds.encoding["source"] = filename_or_obj
+    if "source" not in ds.encoding and isinstance(filename_or_obj, str):
+        ds.encoding["source"] = filename_or_obj
 
     return ds
 
@@ -515,7 +510,6 @@ def open_dataset(
         **decoders,
         **kwargs,
     )
-
     return ds
 
 
@@ -1015,8 +1009,8 @@ def to_netcdf(
         elif engine != "scipy":
             raise ValueError(
                 "invalid engine for creating bytes with "
-                "to_netcdf: %r. Only the default engine "
-                "or engine='scipy' is supported" % engine
+                f"to_netcdf: {engine!r}. Only the default engine "
+                "or engine='scipy' is supported"
             )
         if not compute:
             raise NotImplementedError(
@@ -1037,7 +1031,7 @@ def to_netcdf(
     try:
         store_open = WRITEABLE_STORES[engine]
     except KeyError:
-        raise ValueError("unrecognized engine for to_netcdf: %r" % engine)
+        raise ValueError(f"unrecognized engine for to_netcdf: {engine!r}")
 
     if format is not None:
         format = format.upper()
@@ -1049,9 +1043,8 @@ def to_netcdf(
     autoclose = have_chunks and scheduler in ["distributed", "multiprocessing"]
     if autoclose and engine == "scipy":
         raise NotImplementedError(
-            "Writing netCDF files with the %s backend "
-            "is not currently supported with dask's %s "
-            "scheduler" % (engine, scheduler)
+            f"Writing netCDF files with the {engine} backend "
+            f"is not currently supported with dask's {scheduler} scheduler"
         )
 
     target = path_or_file if path_or_file is not None else BytesIO()
@@ -1061,7 +1054,7 @@ def to_netcdf(
             kwargs["invalid_netcdf"] = invalid_netcdf
         else:
             raise ValueError(
-                "unrecognized option 'invalid_netcdf' for engine %s" % engine
+                f"unrecognized option 'invalid_netcdf' for engine {engine}"
             )
     store = store_open(target, mode, format, group, **kwargs)
 
@@ -1203,7 +1196,7 @@ def save_mfdataset(
     Data variables:
         a        (time) float64 0.0 0.02128 0.04255 0.06383 ... 0.9574 0.9787 1.0
     >>> years, datasets = zip(*ds.groupby("time.year"))
-    >>> paths = ["%s.nc" % y for y in years]
+    >>> paths = [f"{y}.nc" for y in years]
     >>> xr.save_mfdataset(datasets, paths)
     """
     if mode == "w" and len(set(paths)) < len(paths):
@@ -1215,7 +1208,7 @@ def save_mfdataset(
         if not isinstance(obj, Dataset):
             raise TypeError(
                 "save_mfdataset only supports writing Dataset "
-                "objects, received type %s" % type(obj)
+                f"objects, received type {type(obj)}"
             )
 
     if groups is None:

diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py
@@ -90,8 +90,7 @@ def get_dimensions(self):
 
     def get_encoding(self):
         dims = self.get_dimensions()
-        encoding = {"unlimited_dims": {k for k, v in dims.items() if v is None}}
-        return encoding
+        return {"unlimited_dims": {k for k, v in dims.items() if v is None}}
 
 
 class CfgribfBackendEntrypoint(BackendEntrypoint):

diff --git a/xarray/backends/common.py b/xarray/backends/common.py
@@ -69,9 +69,8 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500
             base_delay = initial_delay * 2 ** n
             next_delay = base_delay + np.random.randint(base_delay)
             msg = (
-                "getitem failed, waiting %s ms before trying again "
-                "(%s tries remaining). Full traceback: %s"
-                % (next_delay, max_retries - n, traceback.format_exc())
+                f"getitem failed, waiting {next_delay} ms before trying again "
+                f"({max_retries - n} tries remaining). Full traceback: {traceback.format_exc()}"
             )
             logger.debug(msg)
             time.sleep(1e-3 * next_delay)
@@ -336,7 +335,7 @@ def set_dimensions(self, variables, unlimited_dims=None):
             if dim in existing_dims and length != existing_dims[dim]:
                 raise ValueError(
                     "Unable to update size for existing dimension"
-                    "%r (%d != %d)" % (dim, length, existing_dims[dim])
+                    f"{dim!r} ({length} != {existing_dims[dim]})"
                 )
             elif dim not in existing_dims:
                 is_unlimited = dim in unlimited_dims

diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
@@ -37,8 +37,7 @@
 class H5NetCDFArrayWrapper(BaseNetCDF4Array):
     def get_array(self, needs_lock=True):
         ds = self.datastore._acquire(needs_lock)
-        variable = ds.variables[self.variable_name]
-        return variable
+        return ds.variables[self.variable_name]
 
     def __getitem__(self, key):
         return indexing.explicit_indexing_adapter(
@@ -102,7 +101,7 @@ def __init__(self, manager, group=None, mode=None, lock=HDF5_LOCK, autoclose=Fal
             if group is None:
                 root, group = find_root_and_group(manager)
             else:
-                if not type(manager) is h5netcdf.File:
+                if type(manager) is not h5netcdf.File:
                     raise ValueError(
                         "must supply a h5netcdf.File if the group "
                         "argument is provided"
@@ -233,11 +232,9 @@ def get_dimensions(self):
         return self.ds.dimensions
 
     def get_encoding(self):
-        encoding = {}
-        encoding["unlimited_dims"] = {
-            k for k, v in self.ds.dimensions.items() if v is None
+        return {
+            "unlimited_dims": {k for k, v in self.ds.dimensions.items() if v is None}
         }
-        return encoding
 
     def set_dimension(self, name, length, is_unlimited=False):
         if is_unlimited:
@@ -266,9 +263,9 @@ def prepare_variable(
                 "h5netcdf does not yet support setting a fill value for "
                 "variable-length strings "
                 "(https://github.com/shoyer/h5netcdf/issues/37). "
-                "Either remove '_FillValue' from encoding on variable %r "
+                f"Either remove '_FillValue' from encoding on variable {name!r} "
                 "or set {'dtype': 'S1'} in encoding to use the fixed width "
-                "NC_CHAR type." % name
+                "NC_CHAR type."
             )
 
         if dtype is str:

diff --git a/xarray/backends/locks.py b/xarray/backends/locks.py
@@ -167,7 +167,7 @@ def locked(self):
         return any(lock.locked for lock in self.locks)
 
     def __repr__(self):
-        return "CombinedLock(%r)" % list(self.locks)
+        return f"CombinedLock({list(self.locks)!r})"
 
 
 class DummyLock: