From f961606c48c50fd437a271e1b71c6816621e95dc Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Fri, 25 Sep 2020 19:05:48 +0200
Subject: [PATCH 01/46] add in api.open_dataset dispatching to stub apiv2

---
 xarray/backends/api.py   |   8 +-
 xarray/backends/apiv2.py | 307 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 314 insertions(+), 1 deletion(-)
 create mode 100644 xarray/backends/apiv2.py

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 9ea222954f4..3486981d0b2 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -1,4 +1,4 @@
-import os.path
+import os
 import warnings
 from glob import glob
 from io import BytesIO
@@ -428,6 +428,12 @@ def open_dataset(
     --------
     open_mfdataset
     """
+    if os.environ.get("XARRAY_BACKEND_API", "v1") == "v1":
+        kwargs = locals().copy()
+        from . import apiv2
+        if engine in apiv2.ENGINES:
+            return apiv2.open_dataset(**kwargs)
+
     if autoclose is not None:
         warnings.warn(
             "The autoclose argument is no longer used by "
diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
new file mode 100644
index 00000000000..13a70425739
--- /dev/null
+++ b/xarray/backends/apiv2.py
@@ -0,0 +1,307 @@
+import os
+import warnings
+from pathlib import Path
+from typing import (
+    TYPE_CHECKING,
+
+)
+
+from .. import backends, conventions
+
+from ..core.dataset import Dataset
+from ..core.utils import close_on_error, is_grib_path, is_remote_uri
+from .api import (
+    _get_backend_cls,
+    _get_default_engine,
+    _get_engine_from_magic_number,
+    _normalize_path,
+    _protect_dataset_variables_inplace,
+)
+from .common import AbstractDataStore, ArrayWriter
+
+if TYPE_CHECKING:
+    try:
+        from dask.delayed import Delayed
+    except ImportError:
+        Delayed = None
+
+
+DATAARRAY_NAME = "__xarray_dataarray_name__"
+DATAARRAY_VARIABLE = "__xarray_dataarray_variable__"
+
+ENGINES = {
+    "h5netcdf": backends.H5NetCDFStore.open,
+}
+
+
+def open_dataset(
+    filename_or_obj,
+    group=None,
+    decode_cf=True,
+    mask_and_scale=None,
+    decode_times=True,
+    autoclose=None,
+    concat_characters=True,
+    decode_coords=True,
+    engine=None,
+    chunks=None,
+    lock=None,
+    cache=None,
+    drop_variables=None,
+    backend_kwargs=None,
+    use_cftime=None,
+    decode_timedelta=None,
+):
+    """Open and decode a dataset from a file or file-like object.
+
+    Parameters
+    ----------
+    filename_or_obj : str, Path, file-like or DataStore
+        Strings and Path objects are interpreted as a path to a netCDF file
+        or an OpenDAP URL and opened with python-netCDF4, unless the filename
+        ends with .gz, in which case the file is gunzipped and opened with
+        scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
+        objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
+    group : str, optional
+        Path to the netCDF4 group in the given file to open (only works for
+        netCDF4 files).
+    decode_cf : bool, optional
+        Whether to decode these variables, assuming they were saved according
+        to CF conventions.
+    mask_and_scale : bool, optional
+        If True, replace array values equal to `_FillValue` with NA and scale
+        values according to the formula `original_values * scale_factor +
+        add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
+        taken from variable attributes (if they exist).  If the `_FillValue` or
+        `missing_value` attribute contains multiple values a warning will be
+        issued and all array values matching one of the multiple values will
+        be replaced by NA. mask_and_scale defaults to True except for the
+        pseudonetcdf backend.
+    decode_times : bool, optional
+        If True, decode times encoded in the standard NetCDF datetime format
+        into datetime objects. Otherwise, leave them encoded as numbers.
+    autoclose : bool, optional
+        If True, automatically close files to avoid OS Error of too many files
+        being open.  However, this option doesn't work with streams, e.g.,
+        BytesIO.
+    concat_characters : bool, optional
+        If True, concatenate along the last dimension of character arrays to
+        form string arrays. Dimensions will only be concatenated over (and
+        removed) if they have no corresponding variable and if they are only
+        used as the last dimension of character arrays.
+    decode_coords : bool, optional
+        If True, decode the 'coordinates' attribute to identify coordinates in
+        the resulting dataset.
+    engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib", \
+        "pseudonetcdf", "zarr"}, optional
+        Engine to use when reading files. If not provided, the default engine
+        is chosen based on available dependencies, with a preference for
+        "netcdf4".
+    chunks : int or dict, optional
+        If chunks is provided, it is used to load the new dataset into dask
+        arrays. ``chunks={}`` loads the dataset with dask using a single
+        chunk for all arrays. When using ``engine="zarr"``, setting
+        ``chunks='auto'`` will create dask chunks based on the variable's zarr
+        chunks.
+    lock : False or lock-like, optional
+        Resource lock to use when reading data from disk. Only relevant when
+        using dask or another form of parallelism. By default, appropriate
+        locks are chosen to safely read and write files with the currently
+        active dask scheduler.
+    cache : bool, optional
+        If True, cache data loaded from the underlying datastore in memory as
+        NumPy arrays when accessed to avoid reading from the underlying data-
+        store multiple times. Defaults to True unless you specify the `chunks`
+        argument to use dask, in which case it defaults to False. Does not
+        change the behavior of coordinates corresponding to dimensions, which
+        always load their data from disk into a ``pandas.Index``.
+    drop_variables: str or iterable, optional
+        A variable or list of variables to exclude from being parsed from the
+        dataset. This may be useful to drop variables with problems or
+        inconsistent values.
+    backend_kwargs: dict, optional
+        A dictionary of keyword arguments to pass on to the backend. This
+        may be useful when backend options would improve performance or
+        allow user control of dataset processing.
+    use_cftime: bool, optional
+        Only relevant if encoded dates come from a standard calendar
+        (e.g. "gregorian", "proleptic_gregorian", "standard", or not
+        specified).  If None (default), attempt to decode times to
+        ``np.datetime64[ns]`` objects; if this is not possible, decode times to
+        ``cftime.datetime`` objects. If True, always decode times to
+        ``cftime.datetime`` objects, regardless of whether or not they can be
+        represented using ``np.datetime64[ns]`` objects.  If False, always
+        decode times to ``np.datetime64[ns]`` objects; if this is not possible
+        raise an error.
+    decode_timedelta : bool, optional
+        If True, decode variables and coordinates with time units in
+        {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
+        into timedelta objects. If False, leave them encoded as numbers.
+        If None (default), assume the same value of decode_time.
+
+    Returns
+    -------
+    dataset : Dataset
+        The newly created dataset.
+
+    Notes
+    -----
+    ``open_dataset`` opens the file with read-only access. When you modify
+    values of a Dataset, even one linked to files on disk, only the in-memory
+    copy you are manipulating in xarray is modified: the original file on disk
+    is never touched.
+
+    See Also
+    --------
+    open_mfdataset
+    """
+
+    if autoclose is not None:
+        warnings.warn(
+            "The autoclose argument is no longer used by "
+            "xarray.open_dataset() and is now ignored; it will be removed in "
+            "a future version of xarray. If necessary, you can control the "
+            "maximum number of simultaneous open files with "
+            "xarray.set_options(file_cache_maxsize=...).",
+            FutureWarning,
+            stacklevel=2,
+        )
+
+    if mask_and_scale is None:
+        mask_and_scale = not engine == "pseudonetcdf"
+
+    if not decode_cf:
+        mask_and_scale = False
+        decode_times = False
+        concat_characters = False
+        decode_coords = False
+        decode_timedelta = False
+
+    if cache is None:
+        cache = chunks is None
+
+    if backend_kwargs is None:
+        backend_kwargs = {}
+    extra_kwargs = {}
+
+    def maybe_decode_store(store, chunks, lock=False):
+        ds = conventions.decode_cf(
+            store,
+            mask_and_scale=mask_and_scale,
+            decode_times=decode_times,
+            concat_characters=concat_characters,
+            decode_coords=decode_coords,
+            drop_variables=drop_variables,
+            use_cftime=use_cftime,
+            decode_timedelta=decode_timedelta,
+        )
+
+        _protect_dataset_variables_inplace(ds, cache)
+
+        if chunks is not None and engine != "zarr":
+            from dask.base import tokenize
+
+            # if passed an actual file path, augment the token with
+            # the file modification time
+            if isinstance(filename_or_obj, str) and not is_remote_uri(filename_or_obj):
+                mtime = os.path.getmtime(filename_or_obj)
+            else:
+                mtime = None
+            token = tokenize(
+                filename_or_obj,
+                mtime,
+                group,
+                decode_cf,
+                mask_and_scale,
+                decode_times,
+                concat_characters,
+                decode_coords,
+                engine,
+                chunks,
+                drop_variables,
+                use_cftime,
+                decode_timedelta,
+            )
+            name_prefix = "open_dataset-%s" % token
+            ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
+
+        elif engine == "zarr":
+            # adapted from Dataset.Chunk() and taken from open_zarr
+            if not (isinstance(chunks, (int, dict)) or chunks is None):
+                if chunks != "auto":
+                    raise ValueError(
+                        "chunks must be an int, dict, 'auto', or None. "
+                        "Instead found %s. " % chunks
+                    )
+
+            if chunks == "auto":
+                try:
+                    import dask.array  # noqa
+                except ImportError:
+                    chunks = None
+
+            # auto chunking needs to be here and not in ZarrStore because
+            # the variable chunks does not survive decode_cf
+            # return trivial case
+            if chunks is None:
+                return ds
+
+            if isinstance(chunks, int):
+                chunks = dict.fromkeys(ds.dims, chunks)
+
+            variables = {
+                k: store.maybe_chunk(k, v, chunks, overwrite_encoded_chunks)
+                for k, v in ds.variables.items()
+            }
+            ds2 = ds._replace(variables)
+
+        else:
+            ds2 = ds
+        ds2._file_obj = ds._file_obj
+        return ds2
+
+    if isinstance(filename_or_obj, Path):
+        filename_or_obj = str(filename_or_obj)
+
+    if isinstance(filename_or_obj, AbstractDataStore):
+        store = filename_or_obj
+    else:
+        if isinstance(filename_or_obj, str):
+            filename_or_obj = _normalize_path(filename_or_obj)
+
+            if engine is None:
+                engine = _get_default_engine(filename_or_obj, allow_remote=True)
+        elif engine != "zarr":
+            if engine not in [None, "scipy", "h5netcdf"]:
+                raise ValueError(
+                    "can only read bytes or file-like objects "
+                    "with engine='scipy' or 'h5netcdf'"
+                )
+            engine = _get_engine_from_magic_number(filename_or_obj)
+
+        if engine in ["netcdf4", "h5netcdf"]:
+            extra_kwargs["group"] = group
+            extra_kwargs["lock"] = lock
+        elif engine in ["pynio", "pseudonetcdf", "cfgrib"]:
+            extra_kwargs["lock"] = lock
+        elif engine == "zarr":
+            backend_kwargs = backend_kwargs.copy()
+            overwrite_encoded_chunks = backend_kwargs.pop(
+                "overwrite_encoded_chunks", None
+            )
+            extra_kwargs["mode"] = "r"
+            extra_kwargs["group"] = group
+
+        opener = _get_backend_cls(engine)
+        store = opener(filename_or_obj, **extra_kwargs, **backend_kwargs)
+
+    with close_on_error(store):
+        ds = maybe_decode_store(store, chunks)
+
+    # Ensure source filename always stored in dataset object (GH issue #2550)
+    if "source" not in ds.encoding:
+        if isinstance(filename_or_obj, str):
+            ds.encoding["source"] = filename_or_obj
+
+    return ds
+

From fb166faaad0c30aa5d504e7ff3485ba9ce27ef78 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Fri, 25 Sep 2020 19:17:23 +0200
Subject: [PATCH 02/46] remove in apiv2 check for input AbstractDataStore

---
 xarray/backends/apiv2.py | 56 +++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 30 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 13a70425739..65d3e528fe4 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -263,37 +263,34 @@ def maybe_decode_store(store, chunks, lock=False):
     if isinstance(filename_or_obj, Path):
         filename_or_obj = str(filename_or_obj)
 
-    if isinstance(filename_or_obj, AbstractDataStore):
-        store = filename_or_obj
-    else:
-        if isinstance(filename_or_obj, str):
-            filename_or_obj = _normalize_path(filename_or_obj)
-
-            if engine is None:
-                engine = _get_default_engine(filename_or_obj, allow_remote=True)
-        elif engine != "zarr":
-            if engine not in [None, "scipy", "h5netcdf"]:
-                raise ValueError(
-                    "can only read bytes or file-like objects "
-                    "with engine='scipy' or 'h5netcdf'"
-                )
-            engine = _get_engine_from_magic_number(filename_or_obj)
-
-        if engine in ["netcdf4", "h5netcdf"]:
-            extra_kwargs["group"] = group
-            extra_kwargs["lock"] = lock
-        elif engine in ["pynio", "pseudonetcdf", "cfgrib"]:
-            extra_kwargs["lock"] = lock
-        elif engine == "zarr":
-            backend_kwargs = backend_kwargs.copy()
-            overwrite_encoded_chunks = backend_kwargs.pop(
-                "overwrite_encoded_chunks", None
+    if isinstance(filename_or_obj, str):
+        filename_or_obj = _normalize_path(filename_or_obj)
+
+        if engine is None:
+            engine = _get_default_engine(filename_or_obj, allow_remote=True)
+    elif engine != "zarr":
+        if engine not in [None, "scipy", "h5netcdf"]:
+            raise ValueError(
+                "can only read bytes or file-like objects "
+                "with engine='scipy' or 'h5netcdf'"
             )
-            extra_kwargs["mode"] = "r"
-            extra_kwargs["group"] = group
+        engine = _get_engine_from_magic_number(filename_or_obj)
+
+    if engine in ["netcdf4", "h5netcdf"]:
+        extra_kwargs["group"] = group
+        extra_kwargs["lock"] = lock
+    elif engine in ["pynio", "pseudonetcdf", "cfgrib"]:
+        extra_kwargs["lock"] = lock
+    elif engine == "zarr":
+        backend_kwargs = backend_kwargs.copy()
+        overwrite_encoded_chunks = backend_kwargs.pop(
+            "overwrite_encoded_chunks", None
+        )
+        extra_kwargs["mode"] = "r"
+        extra_kwargs["group"] = group
 
-        opener = _get_backend_cls(engine)
-        store = opener(filename_or_obj, **extra_kwargs, **backend_kwargs)
+    opener = _get_backend_cls(engine)
+    store = opener(filename_or_obj, **extra_kwargs, **backend_kwargs)
 
     with close_on_error(store):
         ds = maybe_decode_store(store, chunks)
@@ -304,4 +301,3 @@ def maybe_decode_store(store, chunks, lock=False):
             ds.encoding["source"] = filename_or_obj
 
     return ds
-

From 0221eec647228ad5d296c614f4307d3707209be9 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Fri, 25 Sep 2020 20:15:18 +0200
Subject: [PATCH 03/46] bugfix typo

---
 xarray/backends/api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 3486981d0b2..ce14c63de36 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -428,7 +428,7 @@ def open_dataset(
     --------
     open_mfdataset
     """
-    if os.environ.get("XARRAY_BACKEND_API", "v1") == "v1":
+    if os.environ.get("XARRAY_BACKEND_API", "v1") == "v2":
         kwargs = locals().copy()
         from . import apiv2
         if engine in apiv2.ENGINES:

From 36a02c706cf62808c5f49d2a9abfc8951cce9200 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Fri, 25 Sep 2020 20:17:26 +0200
Subject: [PATCH 04/46] add kwarg engines in _get_backend_cls needed by apiv2

---
 xarray/backends/api.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index ce14c63de36..a177634fa5d 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -163,10 +163,10 @@ def _get_default_engine(path, allow_remote=False):
     return engine
 
 
-def _get_backend_cls(engine):
+def _get_backend_cls(engine, engines=ENGINES):
     """Select open_dataset method based on current engine"""
     try:
-        return ENGINES[engine]
+        return engines[engine]
     except KeyError:
         raise ValueError(
             "unrecognized engine for open_dataset: {}\n"

From cfb8cb8b592e149079f32d72aa203ef743d95084 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Fri, 25 Sep 2020 20:29:51 +0200
Subject: [PATCH 05/46] add alpha support for h5netcdf

---
 xarray/backends/apiv2.py     | 70 +++++++++++-------------------------
 xarray/backends/h5netcdf_.py | 43 +++++++++++++++++++++-
 2 files changed, 62 insertions(+), 51 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 65d3e528fe4..92dbd2f307b 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -17,7 +17,7 @@
     _normalize_path,
     _protect_dataset_variables_inplace,
 )
-from .common import AbstractDataStore, ArrayWriter
+from . import h5netcdf_
 
 if TYPE_CHECKING:
     try:
@@ -30,7 +30,7 @@
 DATAARRAY_VARIABLE = "__xarray_dataarray_variable__"
 
 ENGINES = {
-    "h5netcdf": backends.H5NetCDFStore.open,
+    "h5netcdf": h5netcdf_.open_dataset_h5necdf_,
 }
 
 
@@ -184,21 +184,10 @@ def open_dataset(
         backend_kwargs = {}
     extra_kwargs = {}
 
-    def maybe_decode_store(store, chunks, lock=False):
-        ds = conventions.decode_cf(
-            store,
-            mask_and_scale=mask_and_scale,
-            decode_times=decode_times,
-            concat_characters=concat_characters,
-            decode_coords=decode_coords,
-            drop_variables=drop_variables,
-            use_cftime=use_cftime,
-            decode_timedelta=decode_timedelta,
-        )
-
+    def chunk_backend_ds(ds, chunks, lock=False):
         _protect_dataset_variables_inplace(ds, cache)
 
-        if chunks is not None and engine != "zarr":
+        if chunks is not None:
             from dask.base import tokenize
 
             # if passed an actual file path, augment the token with
@@ -225,36 +214,6 @@ def maybe_decode_store(store, chunks, lock=False):
             name_prefix = "open_dataset-%s" % token
             ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
 
-        elif engine == "zarr":
-            # adapted from Dataset.Chunk() and taken from open_zarr
-            if not (isinstance(chunks, (int, dict)) or chunks is None):
-                if chunks != "auto":
-                    raise ValueError(
-                        "chunks must be an int, dict, 'auto', or None. "
-                        "Instead found %s. " % chunks
-                    )
-
-            if chunks == "auto":
-                try:
-                    import dask.array  # noqa
-                except ImportError:
-                    chunks = None
-
-            # auto chunking needs to be here and not in ZarrStore because
-            # the variable chunks does not survive decode_cf
-            # return trivial case
-            if chunks is None:
-                return ds
-
-            if isinstance(chunks, int):
-                chunks = dict.fromkeys(ds.dims, chunks)
-
-            variables = {
-                k: store.maybe_chunk(k, v, chunks, overwrite_encoded_chunks)
-                for k, v in ds.variables.items()
-            }
-            ds2 = ds._replace(variables)
-
         else:
             ds2 = ds
         ds2._file_obj = ds._file_obj
@@ -289,11 +248,22 @@ def maybe_decode_store(store, chunks, lock=False):
         extra_kwargs["mode"] = "r"
         extra_kwargs["group"] = group
 
-    opener = _get_backend_cls(engine)
-    store = opener(filename_or_obj, **extra_kwargs, **backend_kwargs)
-
-    with close_on_error(store):
-        ds = maybe_decode_store(store, chunks)
+    opener = _get_backend_cls(engine, engines=ENGINES)
+
+    backend_ds = opener(
+        filename_or_obj,
+        mask_and_scale=mask_and_scale,
+        decode_times=decode_times,
+        concat_characters=concat_characters,
+        decode_coords=decode_coords,
+        drop_variables=drop_variables,
+        use_cftime=use_cftime,
+        decode_timedelta=decode_timedelta,
+        **backend_kwargs,
+        **extra_kwargs
+    )
+
+    ds = chunk_backend_ds(backend_ds, chunks, lock=False)
 
     # Ensure source filename always stored in dataset object (GH issue #2550)
     if "source" not in ds.encoding:
diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
index f3e61eeee74..4735d9d87ea 100644
--- a/xarray/backends/h5netcdf_.py
+++ b/xarray/backends/h5netcdf_.py
@@ -3,8 +3,10 @@
 
 import numpy as np
 
+from .. import conventions
 from ..core import indexing
-from ..core.utils import FrozenDict, is_remote_uri
+from ..core.dataset import Dataset
+from ..core.utils import close_on_error, FrozenDict, is_remote_uri
 from ..core.variable import Variable
 from .common import WritableCFDataStore, find_root_and_group
 from .file_manager import CachingFileManager, DummyFileManager
@@ -302,3 +304,42 @@ def sync(self):
 
     def close(self, **kwargs):
         self._manager.close(**kwargs)
+
+
+def open_dataset_h5necdf_(
+    filename_or_obj,
+    mask_and_scale=None,
+    decode_times=None,
+    concat_characters=None,
+    decode_coords=None,
+    drop_variables=None,
+    use_cftime=None,
+    decode_timedelta=None,
+    **kwargs
+):
+    store = H5NetCDFStore.open(filename_or_obj, **kwargs)
+
+    with close_on_error(store):
+        vars, attrs = store.load()
+        extra_coords = set()
+        file_obj = store
+        encoding = store.get_encoding()
+
+        vars, attrs, coord_names = conventions.decode_cf_variables(
+            vars,
+            attrs,
+            mask_and_scale=mask_and_scale,
+            decode_times=decode_times,
+            concat_characters=concat_characters,
+            decode_coords=decode_coords,
+            drop_variables=drop_variables,
+            use_cftime=use_cftime,
+            decode_timedelta=decode_timedelta,
+        )
+
+        ds = Dataset(vars, attrs=attrs)
+        ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars))
+        ds._file_obj = file_obj
+        ds.encoding = encoding
+
+    return ds
\ No newline at end of file

From 4256bc89c645648ef0ef7d55ae38e2dd2cef5f06 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Mon, 28 Sep 2020 08:55:12 +0200
Subject: [PATCH 06/46] style: clean not used code, modify some
 variable/function name

---
 xarray/backends/apiv2.py     | 22 +++-------------------
 xarray/backends/h5netcdf_.py |  2 +-
 2 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 92dbd2f307b..7e79d1246c4 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -1,12 +1,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import (
-    TYPE_CHECKING,
 
-)
-
-from .. import backends, conventions
 
 from ..core.dataset import Dataset
 from ..core.utils import close_on_error, is_grib_path, is_remote_uri
@@ -19,18 +14,8 @@
 )
 from . import h5netcdf_
 
-if TYPE_CHECKING:
-    try:
-        from dask.delayed import Delayed
-    except ImportError:
-        Delayed = None
-
-
-DATAARRAY_NAME = "__xarray_dataarray_name__"
-DATAARRAY_VARIABLE = "__xarray_dataarray_variable__"
-
 ENGINES = {
-    "h5netcdf": h5netcdf_.open_dataset_h5necdf_,
+    "h5netcdf": h5netcdf_.open_backend_dataset_h5necdf,
 }
 
 
@@ -155,7 +140,6 @@ def open_dataset(
     --------
     open_mfdataset
     """
-
     if autoclose is not None:
         warnings.warn(
             "The autoclose argument is no longer used by "
@@ -248,9 +232,9 @@ def chunk_backend_ds(ds, chunks, lock=False):
         extra_kwargs["mode"] = "r"
         extra_kwargs["group"] = group
 
-    opener = _get_backend_cls(engine, engines=ENGINES)
+    open_backend_dataset = _get_backend_cls(engine, engines=ENGINES)
 
-    backend_ds = opener(
+    backend_ds = open_backend_dataset(
         filename_or_obj,
         mask_and_scale=mask_and_scale,
         decode_times=decode_times,
diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
index 4735d9d87ea..4af0947f16d 100644
--- a/xarray/backends/h5netcdf_.py
+++ b/xarray/backends/h5netcdf_.py
@@ -306,7 +306,7 @@ def close(self, **kwargs):
         self._manager.close(**kwargs)
 
 
-def open_dataset_h5necdf_(
+def open_backend_dataset_h5necdf(
     filename_or_obj,
     mask_and_scale=None,
     decode_times=None,

From 1bc73919a85cb86b5d0d2e2c894e619edf838184 Mon Sep 17 00:00:00 2001
From: TheRed86 <m.rossetti@bopen.eu>
Date: Mon, 28 Sep 2020 11:51:59 +0200
Subject: [PATCH 07/46] Add ENGINES entry for cfgrib.

---
 xarray/backends/apiv2.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 7e79d1246c4..6a259101db7 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -12,10 +12,11 @@
     _normalize_path,
     _protect_dataset_variables_inplace,
 )
-from . import h5netcdf_
+from . import h5netcdf_, cfgrib_
 
 ENGINES = {
     "h5netcdf": h5netcdf_.open_backend_dataset_h5necdf,
+    "cfgrib": cfgrib_.open_backend_dataset_cfgrib,
 }
 
 

From 748fe5acc80f4ee9302f2f62dd2adf03d0f4c80a Mon Sep 17 00:00:00 2001
From: TheRed86 <m.rossetti@bopen.eu>
Date: Mon, 28 Sep 2020 11:53:32 +0200
Subject: [PATCH 08/46] Define function open_backend_dataset_cfgrib() to be
 used in apiv2.py. Add necessary imports for this function.

---
 xarray/backends/cfgrib_.py | 43 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py
index bd946df89b2..1be7c4c7504 100644
--- a/xarray/backends/cfgrib_.py
+++ b/xarray/backends/cfgrib_.py
@@ -1,7 +1,9 @@
 import numpy as np
 
+from .. import conventions
 from ..core import indexing
-from ..core.utils import Frozen, FrozenDict
+from ..core.dataset import Dataset
+from ..core.utils import close_on_error, Frozen, FrozenDict
 from ..core.variable import Variable
 from .common import AbstractDataStore, BackendArray
 from .locks import SerializableLock, ensure_lock
@@ -69,3 +71,42 @@ def get_encoding(self):
         dims = self.get_dimensions()
         encoding = {"unlimited_dims": {k for k, v in dims.items() if v is None}}
         return encoding
+
+
+def open_backend_dataset_cfgrib(
+        filename_or_obj,
+        mask_and_scale=None,
+        decode_times=None,
+        concat_characters=None,
+        decode_coords=None,
+        drop_variables=None,
+        use_cftime=None,
+        decode_timedelta=None,
+        **kwargs
+):
+    store = CfGribDataStore(filename_or_obj, **kwargs)
+
+    with close_on_error(store):
+        vars, attrs = store.load()
+        extra_coords = set()
+        file_obj = store
+        encoding = store.get_encoding()
+
+        vars, attrs, coord_names = conventions.decode_cf_variables(
+            vars,
+            attrs,
+            mask_and_scale=mask_and_scale,
+            decode_times=decode_times,
+            concat_characters=concat_characters,
+            decode_coords=decode_coords,
+            drop_variables=drop_variables,
+            use_cftime=use_cftime,
+            decode_timedelta=decode_timedelta,
+        )
+
+        ds = Dataset(vars, attrs=attrs)
+        ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars))
+        ds._file_obj = file_obj
+        ds.encoding = encoding
+
+    return ds

From fb368feca2354f84b1bc91738d32fed90433b62d Mon Sep 17 00:00:00 2001
From: TheRed86 <m.rossetti@bopen.eu>
Date: Mon, 28 Sep 2020 11:55:22 +0200
Subject: [PATCH 09/46] Apply black to check formatting.

---
 xarray/backends/apiv2.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 6a259101db7..0d1ae216af5 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -227,9 +227,7 @@ def chunk_backend_ds(ds, chunks, lock=False):
         extra_kwargs["lock"] = lock
     elif engine == "zarr":
         backend_kwargs = backend_kwargs.copy()
-        overwrite_encoded_chunks = backend_kwargs.pop(
-            "overwrite_encoded_chunks", None
-        )
+        overwrite_encoded_chunks = backend_kwargs.pop("overwrite_encoded_chunks", None)
         extra_kwargs["mode"] = "r"
         extra_kwargs["group"] = group
 
@@ -245,7 +243,7 @@ def chunk_backend_ds(ds, chunks, lock=False):
         use_cftime=use_cftime,
         decode_timedelta=decode_timedelta,
         **backend_kwargs,
-        **extra_kwargs
+        **extra_kwargs,
     )
 
     ds = chunk_backend_ds(backend_ds, chunks, lock=False)

From 80e111cbd7a3393f1faef5645dc34878856e4351 Mon Sep 17 00:00:00 2001
From: TheRed86 <m.rossetti@bopen.eu>
Date: Mon, 28 Sep 2020 11:56:59 +0200
Subject: [PATCH 10/46] Apply black to check formatting.

---
 xarray/backends/cfgrib_.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py
index 1be7c4c7504..6b038142ea2 100644
--- a/xarray/backends/cfgrib_.py
+++ b/xarray/backends/cfgrib_.py
@@ -74,15 +74,15 @@ def get_encoding(self):
 
 
 def open_backend_dataset_cfgrib(
-        filename_or_obj,
-        mask_and_scale=None,
-        decode_times=None,
-        concat_characters=None,
-        decode_coords=None,
-        drop_variables=None,
-        use_cftime=None,
-        decode_timedelta=None,
-        **kwargs
+    filename_or_obj,
+    mask_and_scale=None,
+    decode_times=None,
+    concat_characters=None,
+    decode_coords=None,
+    drop_variables=None,
+    use_cftime=None,
+    decode_timedelta=None,
+    **kwargs,
 ):
     store = CfGribDataStore(filename_or_obj, **kwargs)
 

From e15ca6b3965cf846da438abb128a6b5d1128a03e Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Mon, 28 Sep 2020 18:17:06 +0200
Subject: [PATCH 11/46] add dummy zarr apiv2 backend

---
 xarray/backends/apiv2.py | 36 ++++++++++++++++++++++++++++++---
 xarray/backends/zarr.py  | 43 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 7e79d1246c4..728dd04cc6c 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -12,10 +12,11 @@
     _normalize_path,
     _protect_dataset_variables_inplace,
 )
-from . import h5netcdf_
+from . import h5netcdf_, zarr
 
 ENGINES = {
     "h5netcdf": h5netcdf_.open_backend_dataset_h5necdf,
+    "zaar": zarr.open_backend_dataset_zarr,
 }
 
 
@@ -170,8 +171,7 @@ def open_dataset(
 
     def chunk_backend_ds(ds, chunks, lock=False):
         _protect_dataset_variables_inplace(ds, cache)
-
-        if chunks is not None:
+        if chunks is not None and engine != "zarr":
             from dask.base import tokenize
 
             # if passed an actual file path, augment the token with
@@ -198,6 +198,36 @@ def chunk_backend_ds(ds, chunks, lock=False):
             name_prefix = "open_dataset-%s" % token
             ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
 
+        elif engine == "zarr":
+            # adapted from Dataset.Chunk() and taken from open_zarr
+            if not (isinstance(chunks, (int, dict)) or chunks is None):
+                if chunks != "auto":
+                    raise ValueError(
+                        "chunks must be an int, dict, 'auto', or None. "
+                        "Instead found %s. " % chunks
+                    )
+
+            if chunks == "auto":
+                try:
+                    import dask.array  # noqa
+                except ImportError:
+                    chunks = None
+
+            # auto chunking needs to be here and not in ZarrStore because
+            # the variable chunks does not survive decode_cf
+            # return trivial case
+            if chunks is None:
+                return ds
+
+            if isinstance(chunks, int):
+                chunks = dict.fromkeys(ds.dims, chunks)
+
+            variables = {
+                k: store.maybe_chunk(k, v, chunks, overwrite_encoded_chunks)
+                for k, v in ds.variables.items()
+            }
+            ds2 = ds._replace(variables)
+
         else:
             ds2 = ds
         ds2._file_obj = ds._file_obj
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 260d27fbabe..bf80fa87bf2 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -2,10 +2,12 @@
 
 import numpy as np
 
+
 from .. import coding, conventions
 from ..core import indexing
+from ..core.dataset import Dataset
 from ..core.pycompat import integer_types
-from ..core.utils import FrozenDict, HiddenKeyDict
+from ..core.utils import close_on_error, FrozenDict, HiddenKeyDict
 from ..core.variable import Variable
 from .common import AbstractWritableDataStore, BackendArray, _encode_variable_name
 
@@ -677,3 +679,42 @@ def open_zarr(
     )
 
     return ds
+
+
+def open_backend_dataset_zarr(
+        filename_or_obj,
+        mask_and_scale=None,
+        decode_times=None,
+        concat_characters=None,
+        decode_coords=None,
+        drop_variables=None,
+        use_cftime=None,
+        decode_timedelta=None,
+        **kwargs
+):
+    store = ZarrStore.open_group.open(filename_or_obj, **kwargs)
+
+    with close_on_error(store):
+        vars, attrs = store.load()
+        extra_coords = set()
+        file_obj = store
+        encoding = store.get_encoding()
+
+        vars, attrs, coord_names = conventions.decode_cf_variables(
+            vars,
+            attrs,
+            mask_and_scale=mask_and_scale,
+            decode_times=decode_times,
+            concat_characters=concat_characters,
+            decode_coords=decode_coords,
+            drop_variables=drop_variables,
+            use_cftime=use_cftime,
+            decode_timedelta=decode_timedelta,
+        )
+
+        ds = Dataset(vars, attrs=attrs)
+        ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars))
+        ds._file_obj = file_obj
+        ds.encoding = encoding
+
+    return ds

From 4b19399d17a10df55a0114f0d4dee41a4b3163b0 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Mon, 28 Sep 2020 18:48:35 +0200
Subject: [PATCH 12/46] align apiv2.open_dataset to api.open_dataset

---
 xarray/backends/api.py   |  1 +
 xarray/backends/apiv2.py | 17 ++++-------------
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 58f1a6806d7..2c6949987c7 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -154,6 +154,7 @@ def _get_default_engine(path, allow_remote=False):
         engine = _get_default_engine_netcdf()
     return engine
 
+
 def _autodetect_engine(filename_or_obj):
     if isinstance(filename_or_obj, str):
         engine = _get_default_engine(filename_or_obj, allow_remote=True)
diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 728dd04cc6c..92fc8e7fd67 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -11,6 +11,7 @@
     _get_engine_from_magic_number,
     _normalize_path,
     _protect_dataset_variables_inplace,
+    _autodetect_engine,
 )
 from . import h5netcdf_, zarr
 
@@ -233,21 +234,11 @@ def chunk_backend_ds(ds, chunks, lock=False):
         ds2._file_obj = ds._file_obj
         return ds2
 
-    if isinstance(filename_or_obj, Path):
-        filename_or_obj = str(filename_or_obj)
 
-    if isinstance(filename_or_obj, str):
-        filename_or_obj = _normalize_path(filename_or_obj)
+    filename_or_obj = _normalize_path(filename_or_obj)
 
-        if engine is None:
-            engine = _get_default_engine(filename_or_obj, allow_remote=True)
-    elif engine != "zarr":
-        if engine not in [None, "scipy", "h5netcdf"]:
-            raise ValueError(
-                "can only read bytes or file-like objects "
-                "with engine='scipy' or 'h5netcdf'"
-            )
-        engine = _get_engine_from_magic_number(filename_or_obj)
+    if engine is None:
+        engine = _autodetect_engine(filename_or_obj)
 
     if engine in ["netcdf4", "h5netcdf"]:
         extra_kwargs["group"] = group

From 572595f5e2aeb94b25cfd3e60c22667f59c72912 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Tue, 29 Sep 2020 09:16:00 +0200
Subject: [PATCH 13/46] remove unused extra_coords in open_backend_dataset_*

---
 xarray/backends/apiv2.py     | 1 -
 xarray/backends/h5netcdf_.py | 3 +--
 xarray/backends/zarr.py      | 3 +--
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 92fc8e7fd67..f9d345ff486 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -234,7 +234,6 @@ def chunk_backend_ds(ds, chunks, lock=False):
         ds2._file_obj = ds._file_obj
         return ds2
 
-
     filename_or_obj = _normalize_path(filename_or_obj)
 
     if engine is None:
diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
index fc6480e41bb..8dcb0a28681 100644
--- a/xarray/backends/h5netcdf_.py
+++ b/xarray/backends/h5netcdf_.py
@@ -340,7 +340,6 @@ def open_backend_dataset_h5necdf(
 
     with close_on_error(store):
         vars, attrs = store.load()
-        extra_coords = set()
         file_obj = store
         encoding = store.get_encoding()
 
@@ -357,7 +356,7 @@ def open_backend_dataset_h5necdf(
         )
 
         ds = Dataset(vars, attrs=attrs)
-        ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars))
+        ds = ds.set_coords(coord_names.intersection(vars))
         ds._file_obj = file_obj
         ds.encoding = encoding
 
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index bf80fa87bf2..7e7e3fc0040 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -696,7 +696,6 @@ def open_backend_dataset_zarr(
 
     with close_on_error(store):
         vars, attrs = store.load()
-        extra_coords = set()
         file_obj = store
         encoding = store.get_encoding()
 
@@ -713,7 +712,7 @@ def open_backend_dataset_zarr(
         )
 
         ds = Dataset(vars, attrs=attrs)
-        ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars))
+        ds = ds.set_coords(coord_names.intersection(vars))
         ds._file_obj = file_obj
         ds.encoding = encoding
 

From 74aba140a5dafd8fe366680fa368b9c49eaf818f Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Tue, 29 Sep 2020 09:38:10 +0200
Subject: [PATCH 14/46] remove extra_coords in open_backend_dataset_cfgrib

---
 xarray/backends/cfgrib_.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py
index 6b038142ea2..6f44eee3827 100644
--- a/xarray/backends/cfgrib_.py
+++ b/xarray/backends/cfgrib_.py
@@ -88,7 +88,6 @@ def open_backend_dataset_cfgrib(
 
     with close_on_error(store):
         vars, attrs = store.load()
-        extra_coords = set()
         file_obj = store
         encoding = store.get_encoding()
 
@@ -105,7 +104,7 @@ def open_backend_dataset_cfgrib(
         )
 
         ds = Dataset(vars, attrs=attrs)
-        ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars))
+        ds = ds.set_coords(coord_names.intersection(vars))
         ds._file_obj = file_obj
         ds.encoding = encoding
 

From d6280ec1e65659cf130fe30387d5f3e20dc3845a Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Tue, 29 Sep 2020 11:42:09 +0200
Subject: [PATCH 15/46] transform zarr maybe_chunk and get_chunks in
 classmethod - to be used in apiv2 without instantiate the object

---
 xarray/backends/zarr.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 7e7e3fc0040..76d6ac8a61b 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -363,7 +363,8 @@ def encode_variable(self, variable):
     def encode_attribute(self, a):
         return encode_zarr_attr_value(a)
 
-    def get_chunk(self, name, var, chunks):
+    @classmethod
+    def get_chunk(cls, name, var, chunks):
         chunk_spec = dict(zip(var.dims, var.encoding.get("chunks")))
 
         # Coordinate labels aren't chunked
@@ -392,8 +393,9 @@ def get_chunk(self, name, var, chunks):
                 chunk_spec[dim] = chunks[dim]
         return chunk_spec
 
-    def maybe_chunk(self, name, var, chunks, overwrite_encoded_chunks):
-        chunk_spec = self.get_chunk(name, var, chunks)
+    @classmethod
+    def maybe_chunk(cls, name, var, chunks, overwrite_encoded_chunks):
+        chunk_spec = cls.get_chunk(name, var, chunks)
 
         if (var.ndim > 0) and (chunk_spec is not None):
             from dask.base import tokenize
@@ -692,7 +694,7 @@ def open_backend_dataset_zarr(
         decode_timedelta=None,
         **kwargs
 ):
-    store = ZarrStore.open_group.open(filename_or_obj, **kwargs)
+    store = ZarrStore.open_group(filename_or_obj, **kwargs)
 
     with close_on_error(store):
         vars, attrs = store.load()

From c0e0f344c1409152fb26558c4e94706b45e4ef41 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Tue, 29 Sep 2020 11:48:37 +0200
Subject: [PATCH 16/46] make alpha zarr apiv2 working

---
 xarray/backends/apiv2.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index c159719b674..5e596d87059 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -17,7 +17,7 @@
 
 ENGINES = {
     "h5netcdf": h5netcdf_.open_backend_dataset_h5necdf,
-    "zaar": zarr.open_backend_dataset_zarr,
+    "zarr": zarr.open_backend_dataset_zarr,
     "cfgrib": cfgrib_.open_backend_dataset_cfgrib,
 }
 
@@ -225,7 +225,7 @@ def chunk_backend_ds(ds, chunks, lock=False):
                 chunks = dict.fromkeys(ds.dims, chunks)
 
             variables = {
-                k: store.maybe_chunk(k, v, chunks, overwrite_encoded_chunks)
+                k: zarr.ZarrStore.maybe_chunk(k, v, chunks, overwrite_encoded_chunks)
                 for k, v in ds.variables.items()
             }
             ds2 = ds._replace(variables)

From 6431101b61f4f9ac22b549e3fa25cb6f9f160977 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Tue, 29 Sep 2020 17:19:39 +0200
Subject: [PATCH 17/46] refactor apiv2.open_dataset: - modify signature - move
 default setting inside backends

---
 xarray/backends/apiv2.py     | 72 +++++-------------------------------
 xarray/backends/cfgrib_.py   | 37 +++++++++++++++++-
 xarray/backends/h5netcdf_.py | 36 ++++++++++++++++--
 xarray/backends/zarr.py      | 48 +++++++++++++++++++-----
 xarray/core/variable.py      |  7 +---
 5 files changed, 116 insertions(+), 84 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 5e596d87059..405e23399c7 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -1,14 +1,9 @@
 import os
-import warnings
-from pathlib import Path
-
 
 from ..core.dataset import Dataset
 from ..core.utils import close_on_error, is_grib_path, is_remote_uri
 from .api import (
     _get_backend_cls,
-    _get_default_engine,
-    _get_engine_from_magic_number,
     _normalize_path,
     _protect_dataset_variables_inplace,
     _autodetect_engine,
@@ -22,23 +17,18 @@
 }
 
 
+def filter_None(d):
+    return {k: v for k, v in d.items() if v is not None}
+
+
 def open_dataset(
     filename_or_obj,
-    group=None,
-    decode_cf=True,
-    mask_and_scale=None,
-    decode_times=True,
-    autoclose=None,
-    concat_characters=True,
-    decode_coords=True,
+    *,
     engine=None,
     chunks=None,
-    lock=None,
     cache=None,
-    drop_variables=None,
     backend_kwargs=None,
-    use_cftime=None,
-    decode_timedelta=None,
+    **kwargs
 ):
     """Open and decode a dataset from a file or file-like object.
 
@@ -143,33 +133,12 @@ def open_dataset(
     --------
     open_mfdataset
     """
-    if autoclose is not None:
-        warnings.warn(
-            "The autoclose argument is no longer used by "
-            "xarray.open_dataset() and is now ignored; it will be removed in "
-            "a future version of xarray. If necessary, you can control the "
-            "maximum number of simultaneous open files with "
-            "xarray.set_options(file_cache_maxsize=...).",
-            FutureWarning,
-            stacklevel=2,
-        )
-
-    if mask_and_scale is None:
-        mask_and_scale = not engine == "pseudonetcdf"
-
-    if not decode_cf:
-        mask_and_scale = False
-        decode_times = False
-        concat_characters = False
-        decode_coords = False
-        decode_timedelta = False
 
     if cache is None:
         cache = chunks is None
 
     if backend_kwargs is None:
         backend_kwargs = {}
-    extra_kwargs = {}
 
     def chunk_backend_ds(ds, chunks, lock=False):
         _protect_dataset_variables_inplace(ds, cache)
@@ -185,17 +154,10 @@ def chunk_backend_ds(ds, chunks, lock=False):
             token = tokenize(
                 filename_or_obj,
                 mtime,
-                group,
-                decode_cf,
-                mask_and_scale,
-                decode_times,
-                concat_characters,
-                decode_coords,
                 engine,
                 chunks,
-                drop_variables,
-                use_cftime,
-                decode_timedelta,
+                **backend_kwargs,
+                **kwargs
             )
             name_prefix = "open_dataset-%s" % token
             ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
@@ -240,32 +202,18 @@ def chunk_backend_ds(ds, chunks, lock=False):
     if engine is None:
         engine = _autodetect_engine(filename_or_obj)
 
-    if engine in ["netcdf4", "h5netcdf"]:
-        extra_kwargs["group"] = group
-        extra_kwargs["lock"] = lock
-    elif engine in ["pynio", "pseudonetcdf", "cfgrib"]:
-        extra_kwargs["lock"] = lock
-    elif engine == "zarr":
+    if engine == "zarr":
         backend_kwargs = backend_kwargs.copy()
         overwrite_encoded_chunks = backend_kwargs.pop(
             "overwrite_encoded_chunks", None
         )
-        extra_kwargs["mode"] = "r"
-        extra_kwargs["group"] = group
 
     open_backend_dataset = _get_backend_cls(engine, engines=ENGINES)
 
     backend_ds = open_backend_dataset(
         filename_or_obj,
-        mask_and_scale=mask_and_scale,
-        decode_times=decode_times,
-        concat_characters=concat_characters,
-        decode_coords=decode_coords,
-        drop_variables=drop_variables,
-        use_cftime=use_cftime,
-        decode_timedelta=decode_timedelta,
         **backend_kwargs,
-        **extra_kwargs
+        **filter_None(kwargs),
     )
 
     ds = chunk_backend_ds(backend_ds, chunks, lock=False)
diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py
index 6f44eee3827..2764b0cab5a 100644
--- a/xarray/backends/cfgrib_.py
+++ b/xarray/backends/cfgrib_.py
@@ -1,3 +1,4 @@
+import warnings
 import numpy as np
 
 from .. import conventions
@@ -75,16 +76,48 @@ def get_encoding(self):
 
 def open_backend_dataset_cfgrib(
     filename_or_obj,
-    mask_and_scale=None,
+    *,
+    decode_cf=True,
+    mask_and_scale=True,
     decode_times=None,
     concat_characters=None,
     decode_coords=None,
     drop_variables=None,
     use_cftime=None,
     decode_timedelta=None,
+    lock=None,
+    indexpath='{path}.{short_hash}.idx',
+    filter_by_keys={},
+    read_keys=[],
+    encode_cf=('parameter', 'time', 'geography', 'vertical'),
+    squeeze=True,
+    time_dims=('time', 'step'),
     **kwargs,
 ):
-    store = CfGribDataStore(filename_or_obj, **kwargs)
+    if kwargs:
+        warnings.warn(
+            "The following keywords are not supported by cfgrib "
+            "backend and they will bw ignored:%r" % kwargs
+        )
+
+    if not decode_cf:
+        mask_and_scale = False
+        decode_times = False
+        concat_characters = False
+        decode_coords = False
+        decode_timedelta = False
+
+
+    store = CfGribDataStore(
+        filename_or_obj,
+        indexpath=indexpath,
+        filter_by_keys=filter_by_keys,
+        read_keys=read_keys,
+        encode_cf=encode_cf,
+        squeeze=squeeze,
+        time_dims=time_dims,
+        lock=lock,
+    )
 
     with close_on_error(store):
         vars, attrs = store.load()
diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
index 8dcb0a28681..980f8e25235 100644
--- a/xarray/backends/h5netcdf_.py
+++ b/xarray/backends/h5netcdf_.py
@@ -1,4 +1,5 @@
 import functools
+import warnings
 from distutils.version import LooseVersion
 
 import numpy as np
@@ -327,16 +328,43 @@ def close(self, **kwargs):
 
 def open_backend_dataset_h5necdf(
     filename_or_obj,
-    mask_and_scale=None,
+    *,
+    decode_cf=True,
+    mask_and_scale=True,
     decode_times=None,
     concat_characters=None,
     decode_coords=None,
     drop_variables=None,
     use_cftime=None,
     decode_timedelta=None,
-    **kwargs
+    format=None,
+    group=None,
+    lock=None,
+    invalid_netcdf=None,
+    phony_dims=None,
+    **kwargs,
 ):
-    store = H5NetCDFStore.open(filename_or_obj, **kwargs)
+    if kwargs:
+        warnings.warn(
+            "The following keywords are not supported by h5netcdf "
+            "and they will bw ignored:%r" % kwargs
+        )
+
+    if not decode_cf:
+        mask_and_scale = False
+        decode_times = False
+        concat_characters = False
+        decode_coords = False
+        decode_timedelta = False
+
+    store = H5NetCDFStore.open(
+        filename_or_obj,
+        format=format,
+        group=group,
+        lock=lock,
+        invalid_netcdf=invalid_netcdf,
+        phony_dims=phony_dims,
+    )
 
     with close_on_error(store):
         vars, attrs = store.load()
@@ -360,4 +388,4 @@ def open_backend_dataset_h5necdf(
         ds._file_obj = file_obj
         ds.encoding = encoding
 
-    return ds
\ No newline at end of file
+    return ds
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 76d6ac8a61b..ed7972e5390 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -684,17 +684,45 @@ def open_zarr(
 
 
 def open_backend_dataset_zarr(
-        filename_or_obj,
-        mask_and_scale=None,
-        decode_times=None,
-        concat_characters=None,
-        decode_coords=None,
-        drop_variables=None,
-        use_cftime=None,
-        decode_timedelta=None,
-        **kwargs
+    filename_or_obj,
+    decode_cf=True,
+    mask_and_scale=True,
+    decode_times=None,
+    concat_characters=None,
+    decode_coords=None,
+    drop_variables=None,
+    use_cftime=None,
+    decode_timedelta=None,
+    group=None,
+    mode='r',
+    synchronizer=None,
+    consolidated=False,
+    consolidate_on_close=False,
+    chunk_store=None,
+    **kwargs,
 ):
-    store = ZarrStore.open_group(filename_or_obj, **kwargs)
+    if kwargs:
+        warnings.warn(
+            "The following keywords are not supported by zarr"
+            "and they will bw ignored:%r" % kwargs
+        )
+
+    if not decode_cf:
+        mask_and_scale = False
+        decode_times = False
+        concat_characters = False
+        decode_coords = False
+        decode_timedelta = False
+
+    store = ZarrStore.open_group(
+        filename_or_obj,
+        group=group,
+        mode=mode,
+        synchronizer=synchronizer,
+        consolidated=consolidated,
+        consolidate_on_close=consolidate_on_close,
+        chunk_store=chunk_store,
+    )
 
     with close_on_error(store):
         vars, attrs = store.load()
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index c55e61cb816..d880dbc6737 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -51,12 +51,7 @@
 )
 
 NON_NUMPY_SUPPORTED_ARRAY_TYPES = (
-    (
-        indexing.ExplicitlyIndexed,
-        pd.Index,
-    )
-    + dask_array_type
-    + cupy_array_type
+    (indexing.ExplicitlyIndexed, pd.Index,) + dask_array_type + cupy_array_type
 )
 # https://github.com/python/mypy/issues/224
 BASIC_INDEXING_TYPES = integer_types + (slice,)  # type: ignore

From 50d1ebe4b71d1c646a4517711975ac1b138ecbc4 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Wed, 30 Sep 2020 12:06:17 +0200
Subject: [PATCH 18/46] move dataset_from_backend_dataset out of
 apiv2.open_dataset

---
 xarray/backends/apiv2.py | 146 +++++++++++++++++++++------------------
 1 file changed, 77 insertions(+), 69 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 405e23399c7..55aa6b615da 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -21,6 +21,77 @@ def filter_None(d):
     return {k: v for k, v in d.items() if v is not None}
 
 
+def dataset_from_backend_dataset (
+        ds,
+        filename_or_obj,
+        engine,
+        chunks,
+        cache,
+        overwrite_encoded_chunks,
+        backend_kwargs,
+        **kwargs,
+):
+    if not (isinstance(chunks, (int, dict)) or chunks is None):
+        if chunks != "auto":
+            raise ValueError(
+                "chunks must be an int, dict, 'auto', or None. "
+                "Instead found %s. " % chunks
+            )
+
+    _protect_dataset_variables_inplace(ds, cache)
+    if chunks is not None and engine != "zarr":
+        from dask.base import tokenize
+
+        # if passed an actual file path, augment the token with
+        # the file modification time
+        if isinstance(filename_or_obj, str) and not is_remote_uri(filename_or_obj):
+            mtime = os.path.getmtime(filename_or_obj)
+        else:
+            mtime = None
+        token = tokenize(
+            filename_or_obj,
+            mtime,
+            engine,
+            chunks,
+            **backend_kwargs,
+            **kwargs
+        )
+        name_prefix = "open_dataset-%s" % token
+        ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
+
+    elif engine == "zarr":
+
+        if chunks == "auto":
+            try:
+                import dask.array  # noqa
+            except ImportError:
+                chunks = None
+
+        if chunks is None:
+            return ds
+
+        if isinstance(chunks, int):
+            chunks = dict.fromkeys(ds.dims, chunks)
+
+        variables = {
+            k: zarr.ZarrStore.maybe_chunk(k, v, chunks, overwrite_encoded_chunks)
+            for k, v in ds.variables.items()
+        }
+        ds2 = ds._replace(variables)
+
+    else:
+        ds2 = ds
+    ds2._file_obj = ds._file_obj
+
+    # Ensure source filename always stored in dataset object (GH issue #2550)
+    if "source" not in ds.encoding:
+        if isinstance(filename_or_obj, str):
+            ds.encoding["source"] = filename_or_obj
+
+    return ds2
+
+
+
 def open_dataset(
     filename_or_obj,
     *,
@@ -140,73 +211,16 @@ def open_dataset(
     if backend_kwargs is None:
         backend_kwargs = {}
 
-    def chunk_backend_ds(ds, chunks, lock=False):
-        _protect_dataset_variables_inplace(ds, cache)
-        if chunks is not None and engine != "zarr":
-            from dask.base import tokenize
-
-            # if passed an actual file path, augment the token with
-            # the file modification time
-            if isinstance(filename_or_obj, str) and not is_remote_uri(filename_or_obj):
-                mtime = os.path.getmtime(filename_or_obj)
-            else:
-                mtime = None
-            token = tokenize(
-                filename_or_obj,
-                mtime,
-                engine,
-                chunks,
-                **backend_kwargs,
-                **kwargs
-            )
-            name_prefix = "open_dataset-%s" % token
-            ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
-
-        elif engine == "zarr":
-            # adapted from Dataset.Chunk() and taken from open_zarr
-            if not (isinstance(chunks, (int, dict)) or chunks is None):
-                if chunks != "auto":
-                    raise ValueError(
-                        "chunks must be an int, dict, 'auto', or None. "
-                        "Instead found %s. " % chunks
-                    )
-
-            if chunks == "auto":
-                try:
-                    import dask.array  # noqa
-                except ImportError:
-                    chunks = None
-
-            # auto chunking needs to be here and not in ZarrStore because
-            # the variable chunks does not survive decode_cf
-            # return trivial case
-            if chunks is None:
-                return ds
-
-            if isinstance(chunks, int):
-                chunks = dict.fromkeys(ds.dims, chunks)
-
-            variables = {
-                k: zarr.ZarrStore.maybe_chunk(k, v, chunks, overwrite_encoded_chunks)
-                for k, v in ds.variables.items()
-            }
-            ds2 = ds._replace(variables)
-
-        else:
-            ds2 = ds
-        ds2._file_obj = ds._file_obj
-        return ds2
-
     filename_or_obj = _normalize_path(filename_or_obj)
 
     if engine is None:
         engine = _autodetect_engine(filename_or_obj)
 
-    if engine == "zarr":
-        backend_kwargs = backend_kwargs.copy()
-        overwrite_encoded_chunks = backend_kwargs.pop(
-            "overwrite_encoded_chunks", None
-        )
+
+    backend_kwargs = backend_kwargs.copy()
+    overwrite_encoded_chunks = backend_kwargs.pop(
+        "overwrite_encoded_chunks", None
+    )
 
     open_backend_dataset = _get_backend_cls(engine, engines=ENGINES)
 
@@ -215,12 +229,6 @@ def chunk_backend_ds(ds, chunks, lock=False):
         **backend_kwargs,
         **filter_None(kwargs),
     )
-
-    ds = chunk_backend_ds(backend_ds, chunks, lock=False)
-
-    # Ensure source filename always stored in dataset object (GH issue #2550)
-    if "source" not in ds.encoding:
-        if isinstance(filename_or_obj, str):
-            ds.encoding["source"] = filename_or_obj
+    ds = dataset_from_backend_dataset(backend_ds, filename_or_obj, engine, chunks, cache, overwrite_encoded_chunks, backend_kwargs, **kwargs)
 
     return ds

From 383d32303e8c478ea6dbc64e5ef80cf9aadaa48c Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Wed, 30 Sep 2020 13:18:34 +0200
Subject: [PATCH 19/46] remove blank lines

---
 xarray/backends/apiv2.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 55aa6b615da..e0c3010224f 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -216,14 +216,12 @@ def open_dataset(
     if engine is None:
         engine = _autodetect_engine(filename_or_obj)
 
-
     backend_kwargs = backend_kwargs.copy()
     overwrite_encoded_chunks = backend_kwargs.pop(
         "overwrite_encoded_chunks", None
     )
 
     open_backend_dataset = _get_backend_cls(engine, engines=ENGINES)
-
     backend_ds = open_backend_dataset(
         filename_or_obj,
         **backend_kwargs,

From 457a09cf02bea6571d761d975ed05c13df9557f8 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Wed, 30 Sep 2020 13:19:53 +0200
Subject: [PATCH 20/46] remove blank lines

---
 xarray/backends/apiv2.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index e0c3010224f..b7587fc7c52 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -91,7 +91,6 @@ def dataset_from_backend_dataset (
     return ds2
 
 
-
 def open_dataset(
     filename_or_obj,
     *,

From 2803fe360bfebe307de21e54f70fcbf37c324a3d Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Wed, 30 Sep 2020 21:33:11 +0200
Subject: [PATCH 21/46] style

---
 xarray/backends/apiv2.py   | 19 ++++++++++++-------
 xarray/backends/cfgrib_.py |  1 -
 xarray/backends/zarr.py    |  1 -
 xarray/core/variable.py    |  7 ++++++-
 4 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index b7587fc7c52..849459a338b 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -17,11 +17,7 @@
 }
 
 
-def filter_None(d):
-    return {k: v for k, v in d.items() if v is not None}
-
-
-def dataset_from_backend_dataset (
+def dataset_from_backend_dataset(
         ds,
         filename_or_obj,
         engine,
@@ -224,8 +220,17 @@ def open_dataset(
     backend_ds = open_backend_dataset(
         filename_or_obj,
         **backend_kwargs,
-        **filter_None(kwargs),
+        **{k: v for k, v in kwargs.items() if v is not None},
+    )
+    ds = dataset_from_backend_dataset(
+        backend_ds,
+        filename_or_obj,
+        engine,
+        chunks,
+        cache,
+        overwrite_encoded_chunks,
+        backend_kwargs,
+        **kwargs
     )
-    ds = dataset_from_backend_dataset(backend_ds, filename_or_obj, engine, chunks, cache, overwrite_encoded_chunks, backend_kwargs, **kwargs)
 
     return ds
diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py
index 2764b0cab5a..357290a0d9c 100644
--- a/xarray/backends/cfgrib_.py
+++ b/xarray/backends/cfgrib_.py
@@ -107,7 +107,6 @@ def open_backend_dataset_cfgrib(
         decode_coords = False
         decode_timedelta = False
 
-
     store = CfGribDataStore(
         filename_or_obj,
         indexpath=indexpath,
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index ed7972e5390..ba8b3e86463 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -2,7 +2,6 @@
 
 import numpy as np
 
-
 from .. import coding, conventions
 from ..core import indexing
 from ..core.dataset import Dataset
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index d880dbc6737..c55e61cb816 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -51,7 +51,12 @@
 )
 
 NON_NUMPY_SUPPORTED_ARRAY_TYPES = (
-    (indexing.ExplicitlyIndexed, pd.Index,) + dask_array_type + cupy_array_type
+    (
+        indexing.ExplicitlyIndexed,
+        pd.Index,
+    )
+    + dask_array_type
+    + cupy_array_type
 )
 # https://github.com/python/mypy/issues/224
 BASIC_INDEXING_TYPES = integer_types + (slice,)  # type: ignore

From 08db0bdc5abfdc1fa0517dda8685829d0b7bc3af Mon Sep 17 00:00:00 2001
From: Alessandro Amici <a.amici@bopen.eu>
Date: Wed, 30 Sep 2020 21:42:19 +0200
Subject: [PATCH 22/46] Re-write error messages

---
 xarray/backends/cfgrib_.py   | 4 ++--
 xarray/backends/h5netcdf_.py | 4 ++--
 xarray/backends/zarr.py      | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py
index 357290a0d9c..0b17b85c01d 100644
--- a/xarray/backends/cfgrib_.py
+++ b/xarray/backends/cfgrib_.py
@@ -96,8 +96,8 @@ def open_backend_dataset_cfgrib(
 ):
     if kwargs:
         warnings.warn(
-            "The following keywords are not supported by cfgrib "
-            "backend and they will bw ignored:%r" % kwargs
+            "The following keywords are not supported by the engine "
+            "and will be ignored: %r" % kwargs
         )
 
     if not decode_cf:
diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
index 980f8e25235..d157c5e96f1 100644
--- a/xarray/backends/h5netcdf_.py
+++ b/xarray/backends/h5netcdf_.py
@@ -346,8 +346,8 @@ def open_backend_dataset_h5necdf(
 ):
     if kwargs:
         warnings.warn(
-            "The following keywords are not supported by h5netcdf "
-            "and they will bw ignored:%r" % kwargs
+            "The following keywords are not supported by the engine "
+            "and will be ignored: %r" % kwargs
         )
 
     if not decode_cf:
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index ba8b3e86463..b65429854ee 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -702,8 +702,8 @@ def open_backend_dataset_zarr(
 ):
     if kwargs:
         warnings.warn(
-            "The following keywords are not supported by zarr"
-            "and they will bw ignored:%r" % kwargs
+            "The following keywords are not supported by the engine "
+            "and will be ignored: %r" % kwargs
         )
 
     if not decode_cf:

From 1f11845f6c617326606ab24a59e8699045be0b37 Mon Sep 17 00:00:00 2001
From: Alessandro Amici <a.amici@bopen.eu>
Date: Wed, 30 Sep 2020 21:48:17 +0200
Subject: [PATCH 23/46] Fix code style

---
 xarray/backends/api.py     |  1 +
 xarray/backends/apiv2.py   | 31 ++++++++++++-------------------
 xarray/backends/cfgrib_.py |  6 +++---
 xarray/backends/zarr.py    |  2 +-
 4 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 2c6949987c7..e5031705837 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -434,6 +434,7 @@ def open_dataset(
     if os.environ.get("XARRAY_BACKEND_API", "v1") == "v2":
         kwargs = locals().copy()
         from . import apiv2
+
         if engine in apiv2.ENGINES:
             return apiv2.open_dataset(**kwargs)
 
diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 849459a338b..7086f8d499e 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -18,14 +18,14 @@
 
 
 def dataset_from_backend_dataset(
-        ds,
-        filename_or_obj,
-        engine,
-        chunks,
-        cache,
-        overwrite_encoded_chunks,
-        backend_kwargs,
-        **kwargs,
+    ds,
+    filename_or_obj,
+    engine,
+    chunks,
+    cache,
+    overwrite_encoded_chunks,
+    backend_kwargs,
+    **kwargs,
 ):
     if not (isinstance(chunks, (int, dict)) or chunks is None):
         if chunks != "auto":
@@ -45,12 +45,7 @@ def dataset_from_backend_dataset(
         else:
             mtime = None
         token = tokenize(
-            filename_or_obj,
-            mtime,
-            engine,
-            chunks,
-            **backend_kwargs,
-            **kwargs
+            filename_or_obj, mtime, engine, chunks, **backend_kwargs, **kwargs
         )
         name_prefix = "open_dataset-%s" % token
         ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
@@ -94,7 +89,7 @@ def open_dataset(
     chunks=None,
     cache=None,
     backend_kwargs=None,
-    **kwargs
+    **kwargs,
 ):
     """Open and decode a dataset from a file or file-like object.
 
@@ -212,9 +207,7 @@ def open_dataset(
         engine = _autodetect_engine(filename_or_obj)
 
     backend_kwargs = backend_kwargs.copy()
-    overwrite_encoded_chunks = backend_kwargs.pop(
-        "overwrite_encoded_chunks", None
-    )
+    overwrite_encoded_chunks = backend_kwargs.pop("overwrite_encoded_chunks", None)
 
     open_backend_dataset = _get_backend_cls(engine, engines=ENGINES)
     backend_ds = open_backend_dataset(
@@ -230,7 +223,7 @@ def open_dataset(
         cache,
         overwrite_encoded_chunks,
         backend_kwargs,
-        **kwargs
+        **kwargs,
     )
 
     return ds
diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py
index 0b17b85c01d..5e201a90cb9 100644
--- a/xarray/backends/cfgrib_.py
+++ b/xarray/backends/cfgrib_.py
@@ -86,12 +86,12 @@ def open_backend_dataset_cfgrib(
     use_cftime=None,
     decode_timedelta=None,
     lock=None,
-    indexpath='{path}.{short_hash}.idx',
+    indexpath="{path}.{short_hash}.idx",
     filter_by_keys={},
     read_keys=[],
-    encode_cf=('parameter', 'time', 'geography', 'vertical'),
+    encode_cf=("parameter", "time", "geography", "vertical"),
     squeeze=True,
-    time_dims=('time', 'step'),
+    time_dims=("time", "step"),
     **kwargs,
 ):
     if kwargs:
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index b65429854ee..708b88288da 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -693,7 +693,7 @@ def open_backend_dataset_zarr(
     use_cftime=None,
     decode_timedelta=None,
     group=None,
-    mode='r',
+    mode="r",
     synchronizer=None,
     consolidated=False,
     consolidate_on_close=False,

From 93303b14b40e8d869504620bfee12e4318614206 Mon Sep 17 00:00:00 2001
From: Alessandro Amici <a.amici@bopen.eu>
Date: Wed, 30 Sep 2020 21:49:52 +0200
Subject: [PATCH 24/46] Fix code style

---
 xarray/backends/apiv2.py     | 4 ++--
 xarray/backends/cfgrib_.py   | 3 ++-
 xarray/backends/h5netcdf_.py | 2 +-
 xarray/backends/zarr.py      | 2 +-
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 7086f8d499e..820cc7f105c 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -2,13 +2,13 @@
 
 from ..core.dataset import Dataset
 from ..core.utils import close_on_error, is_grib_path, is_remote_uri
+from . import cfgrib_, h5netcdf_, zarr
 from .api import (
+    _autodetect_engine,
     _get_backend_cls,
     _normalize_path,
     _protect_dataset_variables_inplace,
-    _autodetect_engine,
 )
-from . import h5netcdf_, zarr, cfgrib_
 
 ENGINES = {
     "h5netcdf": h5netcdf_.open_backend_dataset_h5necdf,
diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py
index 5e201a90cb9..9fd2d92f2c8 100644
--- a/xarray/backends/cfgrib_.py
+++ b/xarray/backends/cfgrib_.py
@@ -1,10 +1,11 @@
 import warnings
+
 import numpy as np
 
 from .. import conventions
 from ..core import indexing
 from ..core.dataset import Dataset
-from ..core.utils import close_on_error, Frozen, FrozenDict
+from ..core.utils import Frozen, FrozenDict, close_on_error
 from ..core.variable import Variable
 from .common import AbstractDataStore, BackendArray
 from .locks import SerializableLock, ensure_lock
diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
index d157c5e96f1..5e83754575d 100644
--- a/xarray/backends/h5netcdf_.py
+++ b/xarray/backends/h5netcdf_.py
@@ -7,7 +7,7 @@
 from .. import conventions
 from ..core import indexing
 from ..core.dataset import Dataset
-from ..core.utils import close_on_error, FrozenDict, is_remote_uri
+from ..core.utils import FrozenDict, close_on_error, is_remote_uri
 from ..core.variable import Variable
 from .common import WritableCFDataStore, find_root_and_group
 from .file_manager import CachingFileManager, DummyFileManager
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 708b88288da..0cef8aba8fc 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -6,7 +6,7 @@
 from ..core import indexing
 from ..core.dataset import Dataset
 from ..core.pycompat import integer_types
-from ..core.utils import close_on_error, FrozenDict, HiddenKeyDict
+from ..core.utils import FrozenDict, HiddenKeyDict, close_on_error
 from ..core.variable import Variable
 from .common import AbstractWritableDataStore, BackendArray, _encode_variable_name
 

From bc2fe008d171b742becd2d750914b958de482f51 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Wed, 30 Sep 2020 21:54:33 +0200
Subject: [PATCH 25/46] remove unused import

---
 xarray/backends/apiv2.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 820cc7f105c..f30eb240625 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -1,7 +1,6 @@
 import os
 
-from ..core.dataset import Dataset
-from ..core.utils import close_on_error, is_grib_path, is_remote_uri
+from ..core.utils import is_remote_uri
 from . import cfgrib_, h5netcdf_, zarr
 from .api import (
     _autodetect_engine,

From 102b00adb290d1b4fb5714977ebbbad26332f563 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 1 Oct 2020 08:52:54 +0200
Subject: [PATCH 26/46] zarr chunking refactor draft not working

---
 xarray/backends/apiv2.py     | 77 +++++++++++++++++++-----------------
 xarray/backends/cfgrib_.py   |  2 +-
 xarray/backends/h5netcdf_.py |  2 +-
 xarray/backends/zarr.py      |  5 ++-
 4 files changed, 45 insertions(+), 41 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index f30eb240625..f7318029045 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -15,6 +15,23 @@
     "cfgrib": cfgrib_.open_backend_dataset_cfgrib,
 }
 
+def get_mtime(filename_or_obj):
+    # if passed an actual file path, augment the token with
+    # the file modification time
+    if isinstance(filename_or_obj, str) and not is_remote_uri(filename_or_obj):
+        mtime = os.path.getmtime(filename_or_obj)
+    else:
+        mtime = None
+    return mtime
+
+
+def add_source(ds, filename_or_obj):
+    if "source" not in ds.encoding:
+        if isinstance(filename_or_obj, str):
+            ds.encoding["source"] = filename_or_obj
+    return ds
+
+
 
 def dataset_from_backend_dataset(
     ds,
@@ -26,59 +43,45 @@ def dataset_from_backend_dataset(
     backend_kwargs,
     **kwargs,
 ):
-    if not (isinstance(chunks, (int, dict)) or chunks is None):
-        if chunks != "auto":
-            raise ValueError(
-                "chunks must be an int, dict, 'auto', or None. "
-                "Instead found %s. " % chunks
-            )
-
+    if not (isinstance(chunks, (int, dict)) or (chunks is None) or (chunks == "auto")):
+        raise ValueError(
+            "chunks must be an int, dict, 'auto', or None. "
+            "Instead found %s. " % chunks
+        )
     _protect_dataset_variables_inplace(ds, cache)
-    if chunks is not None and engine != "zarr":
-        from dask.base import tokenize
 
-        # if passed an actual file path, augment the token with
-        # the file modification time
-        if isinstance(filename_or_obj, str) and not is_remote_uri(filename_or_obj):
-            mtime = os.path.getmtime(filename_or_obj)
-        else:
-            mtime = None
+    if chunks is None:
+        return add_source(ds, filename_or_obj)
+
+    file_obj = ds._file_obj
+    if engine != "zarr":
+        from dask.base import tokenize
+        mtime = get_mtime(filename_or_obj)
         token = tokenize(
-            filename_or_obj, mtime, engine, chunks, **backend_kwargs, **kwargs
+            filename_or_obj,
+            mtime,
+            engine,
+            chunks,
+            **backend_kwargs,
+            **kwargs
         )
         name_prefix = "open_dataset-%s" % token
-        ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
-
-    elif engine == "zarr":
-
+        ds = ds.chunk(chunks, name_prefix=name_prefix, token=token)
+    else:
         if chunks == "auto":
             try:
                 import dask.array  # noqa
             except ImportError:
                 chunks = None
-
-        if chunks is None:
-            return ds
-
         if isinstance(chunks, int):
             chunks = dict.fromkeys(ds.dims, chunks)
-
         variables = {
             k: zarr.ZarrStore.maybe_chunk(k, v, chunks, overwrite_encoded_chunks)
             for k, v in ds.variables.items()
         }
-        ds2 = ds._replace(variables)
-
-    else:
-        ds2 = ds
-    ds2._file_obj = ds._file_obj
-
-    # Ensure source filename always stored in dataset object (GH issue #2550)
-    if "source" not in ds.encoding:
-        if isinstance(filename_or_obj, str):
-            ds.encoding["source"] = filename_or_obj
-
-    return ds2
+        ds = ds._replace(variables)
+    ds._file_obj = file_obj
+    return add_source(ds, filename_or_obj)
 
 
 def open_dataset(
diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py
index 9fd2d92f2c8..7c00b880d76 100644
--- a/xarray/backends/cfgrib_.py
+++ b/xarray/backends/cfgrib_.py
@@ -121,7 +121,7 @@ def open_backend_dataset_cfgrib(
 
     with close_on_error(store):
         vars, attrs = store.load()
-        file_obj = store
+        # file_obj = store
         encoding = store.get_encoding()
 
         vars, attrs, coord_names = conventions.decode_cf_variables(
diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
index 5e83754575d..3b49f137138 100644
--- a/xarray/backends/h5netcdf_.py
+++ b/xarray/backends/h5netcdf_.py
@@ -368,7 +368,7 @@ def open_backend_dataset_h5necdf(
 
     with close_on_error(store):
         vars, attrs = store.load()
-        file_obj = store
+        # file_obj = store
         encoding = store.get_encoding()
 
         vars, attrs, coord_names = conventions.decode_cf_variables(
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 0cef8aba8fc..0f56310fde2 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -369,7 +369,6 @@ def get_chunk(cls, name, var, chunks):
         # Coordinate labels aren't chunked
         if var.ndim == 1 and var.dims[0] == name:
             return chunk_spec
-
         if chunks == "auto":
             return chunk_spec
 
@@ -378,7 +377,9 @@ def get_chunk(cls, name, var, chunks):
                 spec = chunks[dim]
                 if isinstance(spec, int):
                     spec = (spec,)
+
                 if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
+                    if len(spec) > 1:
                     if any(s % chunk_spec[dim] for s in spec):
                         warnings.warn(
                             "Specified Dask chunks %r would "
@@ -725,7 +726,7 @@ def open_backend_dataset_zarr(
 
     with close_on_error(store):
         vars, attrs = store.load()
-        file_obj = store
+        #file_obj = store
         encoding = store.get_encoding()
 
         vars, attrs, coord_names = conventions.decode_cf_variables(

From f47605af5e6c6546693377a671ece337eef7a594 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Fri, 2 Oct 2020 11:11:47 +0200
Subject: [PATCH 27/46] refactor dataset_from_backend_dataset

---
 xarray/backends/apiv2.py | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index f7318029045..8cf617ccf6a 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -25,16 +25,15 @@ def get_mtime(filename_or_obj):
     return mtime
 
 
-def add_source(ds, filename_or_obj):
+def set_source(ds, filename_or_obj):
     if "source" not in ds.encoding:
         if isinstance(filename_or_obj, str):
             ds.encoding["source"] = filename_or_obj
     return ds
 
 
-
 def dataset_from_backend_dataset(
-    ds,
+    backend_ds,
     filename_or_obj,
     engine,
     chunks,
@@ -48,25 +47,19 @@ def dataset_from_backend_dataset(
             "chunks must be an int, dict, 'auto', or None. "
             "Instead found %s. " % chunks
         )
-    _protect_dataset_variables_inplace(ds, cache)
 
+    _protect_dataset_variables_inplace(backend_ds, cache)
     if chunks is None:
-        return add_source(ds, filename_or_obj)
+        return set_source(backend_ds, filename_or_obj)
 
-    file_obj = ds._file_obj
     if engine != "zarr":
         from dask.base import tokenize
         mtime = get_mtime(filename_or_obj)
         token = tokenize(
-            filename_or_obj,
-            mtime,
-            engine,
-            chunks,
-            **backend_kwargs,
-            **kwargs
+            filename_or_obj, mtime, engine, chunks, **backend_kwargs, **kwargs
         )
         name_prefix = "open_dataset-%s" % token
-        ds = ds.chunk(chunks, name_prefix=name_prefix, token=token)
+        ds = backend_ds.chunk(chunks, name_prefix=name_prefix, token=token)
     else:
         if chunks == "auto":
             try:
@@ -74,14 +67,14 @@ def dataset_from_backend_dataset(
             except ImportError:
                 chunks = None
         if isinstance(chunks, int):
-            chunks = dict.fromkeys(ds.dims, chunks)
+            chunks = dict.fromkeys(backend_ds.dims, chunks)
         variables = {
             k: zarr.ZarrStore.maybe_chunk(k, v, chunks, overwrite_encoded_chunks)
-            for k, v in ds.variables.items()
+            for k, v in backend_ds.variables.items()
         }
-        ds = ds._replace(variables)
-    ds._file_obj = file_obj
-    return add_source(ds, filename_or_obj)
+        ds = backend_ds._replace(variables)
+    ds._file_obj = backend_ds._file_obj
+    return set_source(ds, filename_or_obj)
 
 
 def open_dataset(

From b632b054ed6376ca5a4315553d6c6ae45b237ee1 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Fri, 2 Oct 2020 11:12:52 +0200
Subject: [PATCH 28/46] fix wrong commit

---
 xarray/backends/cfgrib_.py   | 2 +-
 xarray/backends/h5netcdf_.py | 2 +-
 xarray/backends/zarr.py      | 4 +---
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py
index 7c00b880d76..9fd2d92f2c8 100644
--- a/xarray/backends/cfgrib_.py
+++ b/xarray/backends/cfgrib_.py
@@ -121,7 +121,7 @@ def open_backend_dataset_cfgrib(
 
     with close_on_error(store):
         vars, attrs = store.load()
-        # file_obj = store
+        file_obj = store
         encoding = store.get_encoding()
 
         vars, attrs, coord_names = conventions.decode_cf_variables(
diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
index 3b49f137138..5e83754575d 100644
--- a/xarray/backends/h5netcdf_.py
+++ b/xarray/backends/h5netcdf_.py
@@ -368,7 +368,7 @@ def open_backend_dataset_h5necdf(
 
     with close_on_error(store):
         vars, attrs = store.load()
-        # file_obj = store
+        file_obj = store
         encoding = store.get_encoding()
 
         vars, attrs, coord_names = conventions.decode_cf_variables(
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 0f56310fde2..bd122f8d89d 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -377,9 +377,7 @@ def get_chunk(cls, name, var, chunks):
                 spec = chunks[dim]
                 if isinstance(spec, int):
                     spec = (spec,)
-
                 if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
-                    if len(spec) > 1:
                     if any(s % chunk_spec[dim] for s in spec):
                         warnings.warn(
                             "Specified Dask chunks %r would "
@@ -726,7 +724,7 @@ def open_backend_dataset_zarr(
 
     with close_on_error(store):
         vars, attrs = store.load()
-        #file_obj = store
+        file_obj = store
         encoding = store.get_encoding()
 
         vars, attrs, coord_names = conventions.decode_cf_variables(

From b437f025dd3ec53eca84b5711be2a5033fb08199 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Fri, 2 Oct 2020 14:19:15 +0200
Subject: [PATCH 29/46] add get_chunk in apiv2

---
 xarray/backends/apiv2.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 8cf617ccf6a..2c6b4a5b7b6 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -1,4 +1,5 @@
 import os
+import warnings
 
 from ..core.utils import is_remote_uri
 from . import cfgrib_, h5netcdf_, zarr
@@ -15,6 +16,36 @@
     "cfgrib": cfgrib_.open_backend_dataset_cfgrib,
 }
 
+
+def check_chunks_compatibility(chunks, chunk_spec):
+    for dim in chunk_spec:
+        if dim in chunks:
+            spec = chunks[dim]
+            if isinstance(spec, int):
+                spec = (spec,)
+            if any(s % chunk_spec[dim] for s in spec):
+                warnings.warn(
+                    "Specified Dask chunks %r would "
+                    "separate on disks chunk shape %r for "
+                    "dimension %r. This could "
+                    "degrades performance. Consider "
+                    "rechunking after loading instead."
+                    % (chunks[dim], chunk_spec[dim], dim),
+                    stacklevel=2,
+                )
+
+
+def get_chunk(name, var, chunks):
+    chunk_spec = dict(zip(var.dims, var.encoding.get("chunks", {})))
+    if chunks == 'auto':
+        return dict(zip(var.dims, var.encoding.get("chunks", {})))
+    # Coordinate labels aren't chunked
+    if var.ndim == 1 and var.dims[0] == name:
+        return chunk_spec
+    check_chunks_compatibility(chunks, chunk_spec)
+    return chunk_spec
+
+
 def get_mtime(filename_or_obj):
     # if passed an actual file path, augment the token with
     # the file modification time

From d69414666c9e262233daf7efb7dde243941e67b4 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 8 Oct 2020 07:03:03 +0200
Subject: [PATCH 30/46] replace warning with ValueError for not supported
 kwargs in backends

---
 xarray/backends/cfgrib_.py   | 2 +-
 xarray/backends/h5netcdf_.py | 2 +-
 xarray/backends/zarr.py      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py
index 9fd2d92f2c8..e2e994bbc53 100644
--- a/xarray/backends/cfgrib_.py
+++ b/xarray/backends/cfgrib_.py
@@ -96,7 +96,7 @@ def open_backend_dataset_cfgrib(
     **kwargs,
 ):
     if kwargs:
-        warnings.warn(
+        raise ValueError(
             "The following keywords are not supported by the engine "
             "and will be ignored: %r" % kwargs
         )
diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
index 5e83754575d..4fbfc583298 100644
--- a/xarray/backends/h5netcdf_.py
+++ b/xarray/backends/h5netcdf_.py
@@ -345,7 +345,7 @@ def open_backend_dataset_h5necdf(
     **kwargs,
 ):
     if kwargs:
-        warnings.warn(
+        raise ValueError(
             "The following keywords are not supported by the engine "
             "and will be ignored: %r" % kwargs
         )
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 0cef8aba8fc..70987490548 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -701,7 +701,7 @@ def open_backend_dataset_zarr(
     **kwargs,
 ):
     if kwargs:
-        warnings.warn(
+        raise ValueError(
             "The following keywords are not supported by the engine "
             "and will be ignored: %r" % kwargs
         )

From 56f4d3f0ad8ade78d850f31da5342fbbe41c25a3 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 8 Oct 2020 07:25:09 +0200
Subject: [PATCH 31/46] change zarr.ZarStore.get_chunks into a static method

---
 xarray/backends/zarr.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 70987490548..1c0f10dfd16 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -362,8 +362,8 @@ def encode_variable(self, variable):
     def encode_attribute(self, a):
         return encode_zarr_attr_value(a)
 
-    @classmethod
-    def get_chunk(cls, name, var, chunks):
+    @staticmethod
+    def get_chunk(name, var, chunks):
         chunk_spec = dict(zip(var.dims, var.encoding.get("chunks")))
 
         # Coordinate labels aren't chunked

From df23b1825a2762e854eb4b8fa6ebcadb33a54876 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 8 Oct 2020 08:09:39 +0200
Subject: [PATCH 32/46] group `backend_kwargs` and `kwargs` in `extra_tokes`
 argument in apiv2.dataset_from_backend_dataset`

---
 xarray/backends/apiv2.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index f30eb240625..4c58995947e 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -23,8 +23,7 @@ def dataset_from_backend_dataset(
     chunks,
     cache,
     overwrite_encoded_chunks,
-    backend_kwargs,
-    **kwargs,
+    extra_tokens,
 ):
     if not (isinstance(chunks, (int, dict)) or chunks is None):
         if chunks != "auto":
@@ -44,7 +43,7 @@ def dataset_from_backend_dataset(
         else:
             mtime = None
         token = tokenize(
-            filename_or_obj, mtime, engine, chunks, **backend_kwargs, **kwargs
+            filename_or_obj, mtime, engine, chunks, **extra_tokens
         )
         name_prefix = "open_dataset-%s" % token
         ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
@@ -221,8 +220,7 @@ def open_dataset(
         chunks,
         cache,
         overwrite_encoded_chunks,
-        backend_kwargs,
-        **kwargs,
+        {**backend_kwargs, **kwargs},
     )
 
     return ds

From a04e6acb61fb5cecc582fdf5105e3425790bd4ec Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 8 Oct 2020 08:14:04 +0200
Subject: [PATCH 33/46] remove in open_backend_dayaset_${engine} signature
 kwarags and the related error message

---
 xarray/backends/cfgrib_.py   | 8 --------
 xarray/backends/h5netcdf_.py | 7 -------
 xarray/backends/zarr.py      | 6 ------
 3 files changed, 21 deletions(-)

diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py
index e2e994bbc53..cfff1b146e3 100644
--- a/xarray/backends/cfgrib_.py
+++ b/xarray/backends/cfgrib_.py
@@ -1,5 +1,3 @@
-import warnings
-
 import numpy as np
 
 from .. import conventions
@@ -93,13 +91,7 @@ def open_backend_dataset_cfgrib(
     encode_cf=("parameter", "time", "geography", "vertical"),
     squeeze=True,
     time_dims=("time", "step"),
-    **kwargs,
 ):
-    if kwargs:
-        raise ValueError(
-            "The following keywords are not supported by the engine "
-            "and will be ignored: %r" % kwargs
-        )
 
     if not decode_cf:
         mask_and_scale = False
diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
index 4fbfc583298..c4d23741a77 100644
--- a/xarray/backends/h5netcdf_.py
+++ b/xarray/backends/h5netcdf_.py
@@ -1,5 +1,4 @@
 import functools
-import warnings
 from distutils.version import LooseVersion
 
 import numpy as np
@@ -342,13 +341,7 @@ def open_backend_dataset_h5necdf(
     lock=None,
     invalid_netcdf=None,
     phony_dims=None,
-    **kwargs,
 ):
-    if kwargs:
-        raise ValueError(
-            "The following keywords are not supported by the engine "
-            "and will be ignored: %r" % kwargs
-        )
 
     if not decode_cf:
         mask_and_scale = False
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 1c0f10dfd16..0f60a436d80 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -698,13 +698,7 @@ def open_backend_dataset_zarr(
     consolidated=False,
     consolidate_on_close=False,
     chunk_store=None,
-    **kwargs,
 ):
-    if kwargs:
-        raise ValueError(
-            "The following keywords are not supported by the engine "
-            "and will be ignored: %r" % kwargs
-        )
 
     if not decode_cf:
         mask_and_scale = False

From de29a4cc49904323eab99637def7b50a4beb203b Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 8 Oct 2020 10:46:56 +0200
Subject: [PATCH 34/46] black

---
 xarray/backends/apiv2.py | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 4c58995947e..27b0b3f7ee2 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -17,13 +17,7 @@
 
 
 def dataset_from_backend_dataset(
-    ds,
-    filename_or_obj,
-    engine,
-    chunks,
-    cache,
-    overwrite_encoded_chunks,
-    extra_tokens,
+    ds, filename_or_obj, engine, chunks, cache, overwrite_encoded_chunks, extra_tokens,
 ):
     if not (isinstance(chunks, (int, dict)) or chunks is None):
         if chunks != "auto":
@@ -42,9 +36,7 @@ def dataset_from_backend_dataset(
             mtime = os.path.getmtime(filename_or_obj)
         else:
             mtime = None
-        token = tokenize(
-            filename_or_obj, mtime, engine, chunks, **extra_tokens
-        )
+        token = tokenize(filename_or_obj, mtime, engine, chunks, **extra_tokens)
         name_prefix = "open_dataset-%s" % token
         ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
 

From cf77dc3ee07964ccd89e1733aa7ab28775c718d6 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 8 Oct 2020 12:16:48 +0200
Subject: [PATCH 35/46] remove not used apiv2.set_source

---
 xarray/backends/apiv2.py | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 40b33997462..660543a1bb6 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -35,15 +35,13 @@ def check_chunks_compatibility(chunks, chunk_spec):
                 )
 
 
-def get_chunk(name, var, chunks):
+def get_chunk(var, chunks):
     chunk_spec = dict(zip(var.dims, var.encoding.get("chunks", {})))
     if chunks == 'auto':
-        return dict(zip(var.dims, var.encoding.get("chunks", {})))
-    # Coordinate labels aren't chunked
-    if var.ndim == 1 and var.dims[0] == name:
-        return chunk_spec
-    check_chunks_compatibility(chunks, chunk_spec)
-    return chunk_spec
+        chunks = dict(zip(var.dims, var.encoding.get("chunks", {})))
+    if chunks is not None:
+        check_chunks_compatibility(chunks, chunk_spec)
+    return chunks
 
 
 def get_mtime(filename_or_obj):
@@ -56,13 +54,6 @@ def get_mtime(filename_or_obj):
     return mtime
 
 
-def set_source(ds, filename_or_obj):
-    if "source" not in ds.encoding:
-        if isinstance(filename_or_obj, str):
-            ds.encoding["source"] = filename_or_obj
-    return ds
-
-
 def dataset_from_backend_dataset(
     backend_ds, filename_or_obj, engine, chunks, cache, overwrite_encoded_chunks, extra_tokens,
 ):
@@ -72,12 +63,16 @@ def dataset_from_backend_dataset(
                 "chunks must be an int, dict, 'auto', or None. "
                 "Instead found %s. " % chunks
             )
+    if chunks == "auto":
+        try:
+            import dask.array  # noqa
+        except ImportError:
+            chunks = None
 
     _protect_dataset_variables_inplace(backend_ds, cache)
     if chunks is None:
-        return set_source(backend_ds, filename_or_obj)
-
-    if engine != "zarr":
+        ds = backend_ds
+    elif engine != "zarr":
         from dask.base import tokenize
         mtime = get_mtime(filename_or_obj)
         token = tokenize(

From c1b763a9a34b78a4b886e451978cf783bfa494fb Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Fri, 9 Oct 2020 11:36:10 +0200
Subject: [PATCH 36/46] remove `auto` as chunk value

---
 xarray/backends/apiv2.py | 64 ++++++++++++++++++----------------------
 1 file changed, 29 insertions(+), 35 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 660543a1bb6..42bfb7f6051 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -17,30 +17,30 @@
 }
 
 
-def check_chunks_compatibility(chunks, chunk_spec):
-    for dim in chunk_spec:
-        if dim in chunks:
-            spec = chunks[dim]
-            if isinstance(spec, int):
-                spec = (spec,)
-            if any(s % chunk_spec[dim] for s in spec):
-                warnings.warn(
-                    "Specified Dask chunks %r would "
-                    "separate on disks chunk shape %r for "
-                    "dimension %r. This could "
-                    "degrades performance. Consider "
-                    "rechunking after loading instead."
-                    % (chunks[dim], chunk_spec[dim], dim),
-                    stacklevel=2,
-                )
+def check_chunks_compatibility(dim, chunks, chunk_spec):
+    spec = chunks[dim]
+    if isinstance(spec, int):
+        spec = (spec,)
+    if any(s % chunk_spec[dim] for s in spec):
+        warnings.warn(
+            "Specified Dask chunks %r would "
+            "separate on disks chunk shape %r for "
+            "dimension %r. This could "
+            "degrades performance. Consider "
+            "rechunking after loading instead."
+            % (chunks[dim], chunk_spec[dim], dim),
+            stacklevel=2,
+        )
 
 
 def get_chunk(var, chunks):
     chunk_spec = dict(zip(var.dims, var.encoding.get("chunks", {})))
-    if chunks == 'auto':
-        chunks = dict(zip(var.dims, var.encoding.get("chunks", {})))
     if chunks is not None:
-        check_chunks_compatibility(chunks, chunk_spec)
+        for dim in chunk_spec:
+            if dim in chunks:
+                check_chunks_compatibility(chunks, chunk_spec)
+            else:
+                chunks[dim] = chunk_spec[dim]
     return chunks
 
 
@@ -58,16 +58,10 @@ def dataset_from_backend_dataset(
     backend_ds, filename_or_obj, engine, chunks, cache, overwrite_encoded_chunks, extra_tokens,
 ):
     if not (isinstance(chunks, (int, dict)) or chunks is None):
-        if chunks != "auto":
-            raise ValueError(
-                "chunks must be an int, dict, 'auto', or None. "
-                "Instead found %s. " % chunks
-            )
-    if chunks == "auto":
-        try:
-            import dask.array  # noqa
-        except ImportError:
-            chunks = None
+        raise ValueError(
+            "chunks must be an int, dict, 'auto', or None. "
+            "Instead found %s. " % chunks
+        )
 
     _protect_dataset_variables_inplace(backend_ds, cache)
     if chunks is None:
@@ -81,11 +75,7 @@ def dataset_from_backend_dataset(
         name_prefix = "open_dataset-%s" % token
         ds = backend_ds.chunk(chunks, name_prefix=name_prefix, token=token)
     else:
-        if chunks == "auto":
-            try:
-                import dask.array  # noqa
-            except ImportError:
-                chunks = None
+
         if isinstance(chunks, int):
             chunks = dict.fromkeys(backend_ds.dims, chunks)
         variables = {
@@ -94,7 +84,11 @@ def dataset_from_backend_dataset(
         }
         ds = backend_ds._replace(variables)
     ds._file_obj = backend_ds._file_obj
-    return set_source(ds, filename_or_obj)
+
+    if "source" not in ds.encoding:
+        if isinstance(filename_or_obj, str):
+            ds.encoding["source"] = filename_or_obj
+    return ds
 
 
 def open_dataset(

From 5eb0dafbd05394887ce389bedaf177db8f7388ce Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 29 Oct 2020 06:52:30 +0100
Subject: [PATCH 37/46] - align with api.py

---
 xarray/backends/apiv2.py | 56 ++++++++++++++++++++++++++++------------
 1 file changed, 40 insertions(+), 16 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 42bfb7f6051..2d2d61cc714 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -2,6 +2,7 @@
 import warnings
 
 from ..core.utils import is_remote_uri
+from ..core.dataset import _maybe_chunk
 from . import cfgrib_, h5netcdf_, zarr
 from .api import (
     _autodetect_engine,
@@ -33,15 +34,20 @@ def check_chunks_compatibility(dim, chunks, chunk_spec):
         )
 
 
-def get_chunk(var, chunks):
-    chunk_spec = dict(zip(var.dims, var.encoding.get("chunks", {})))
+def get_chunk(name, var, chunks):
+    preferred_chunks = dict(zip(var.dims, var.encoding.get("chunks", {})))
+    if var.ndim == 1 and var.dims[0] == name:
+        return preferred_chunks
+
+    output_chunks = {}
     if chunks is not None:
-        for dim in chunk_spec:
+        for dim in preferred_chunks:
             if dim in chunks:
-                check_chunks_compatibility(chunks, chunk_spec)
+                check_chunks_compatibility(dim, chunks, preferred_chunks)
+                output_chunks[dim] = chunks[dim]
             else:
-                chunks[dim] = chunk_spec[dim]
-    return chunks
+                output_chunks[dim] = preferred_chunks[dim]
+    return output_chunks
 
 
 def get_mtime(filename_or_obj):
@@ -55,13 +61,14 @@ def get_mtime(filename_or_obj):
 
 
 def dataset_from_backend_dataset(
-    backend_ds, filename_or_obj, engine, chunks, cache, overwrite_encoded_chunks, extra_tokens,
+    backend_ds,
+    filename_or_obj,
+    engine,
+    chunks,
+    cache,
+    overwrite_encoded_chunks,
+    extra_tokens,
 ):
-    if not (isinstance(chunks, (int, dict)) or chunks is None):
-        raise ValueError(
-            "chunks must be an int, dict, 'auto', or None. "
-            "Instead found %s. " % chunks
-        )
 
     _protect_dataset_variables_inplace(backend_ds, cache)
     if chunks is None:
@@ -74,20 +81,28 @@ def dataset_from_backend_dataset(
         )
         name_prefix = "open_dataset-%s" % token
         ds = backend_ds.chunk(chunks, name_prefix=name_prefix, token=token)
-    else:
 
+    else:
         if isinstance(chunks, int):
             chunks = dict.fromkeys(backend_ds.dims, chunks)
+
         variables = {
-            k: zarr.ZarrStore.maybe_chunk(k, v, chunks, overwrite_encoded_chunks)
-            for k, v in backend_ds.variables.items()
+            name: _maybe_chunk(
+                name,
+                var,
+                get_chunk(name, var, chunks),
+                overwrite_encoded_chunks=overwrite_encoded_chunks,
+            )
+            for name, var in backend_ds.variables.items()
         }
         ds = backend_ds._replace(variables)
+
     ds._file_obj = backend_ds._file_obj
 
     if "source" not in ds.encoding:
         if isinstance(filename_or_obj, str):
-            ds.encoding["source"] = filename_or_obj
+            backend_ds.encoding["source"] = filename_or_obj
+
     return ds
 
 
@@ -204,6 +219,15 @@ def open_dataset(
     open_mfdataset
     """
 
+    if chunks == "auto":
+        chunks = {}
+
+    if not (isinstance(chunks, (int, dict)) or (chunks is None)):
+        raise ValueError(
+            "chunks must be an int, dict, 'auto', or None. "
+            "Instead found %s. " % chunks
+        )
+
     if cache is None:
         cache = chunks is None
 

From 4fc1d8da6b8130048f7b923cbcbabb826e0831f5 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 29 Oct 2020 11:37:30 +0100
Subject: [PATCH 38/46] unify backends chunking

---
 xarray/backends/apiv2.py |  7 +++----
 xarray/backends/zarr.py  | 45 ----------------------------------------
 2 files changed, 3 insertions(+), 49 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 2d2d61cc714..3319746a7bd 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -73,16 +73,13 @@ def dataset_from_backend_dataset(
     _protect_dataset_variables_inplace(backend_ds, cache)
     if chunks is None:
         ds = backend_ds
-    elif engine != "zarr":
+    else:
         from dask.base import tokenize
         mtime = get_mtime(filename_or_obj)
         token = tokenize(
             filename_or_obj, mtime, engine, chunks, **extra_tokens
         )
         name_prefix = "open_dataset-%s" % token
-        ds = backend_ds.chunk(chunks, name_prefix=name_prefix, token=token)
-
-    else:
         if isinstance(chunks, int):
             chunks = dict.fromkeys(backend_ds.dims, chunks)
 
@@ -92,6 +89,8 @@ def dataset_from_backend_dataset(
                 var,
                 get_chunk(name, var, chunks),
                 overwrite_encoded_chunks=overwrite_encoded_chunks,
+                name_prefix=name_prefix,
+                token=token,
             )
             for name, var in backend_ds.variables.items()
         }
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 33dd7fc5c90..54cf40b4c17 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -362,51 +362,6 @@ def encode_variable(self, variable):
     def encode_attribute(self, a):
         return encode_zarr_attr_value(a)
 
-    @staticmethod
-    def get_chunk(name, var, chunks):
-        chunk_spec = dict(zip(var.dims, var.encoding.get("chunks")))
-
-        # Coordinate labels aren't chunked
-        if var.ndim == 1 and var.dims[0] == name:
-            return chunk_spec
-        if chunks == "auto":
-            return chunk_spec
-
-        for dim in var.dims:
-            if dim in chunks:
-                spec = chunks[dim]
-                if isinstance(spec, int):
-                    spec = (spec,)
-                if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
-                    if any(s % chunk_spec[dim] for s in spec):
-                        warnings.warn(
-                            "Specified Dask chunks %r would "
-                            "separate Zarr chunk shape %r for "
-                            "dimension %r. This significantly "
-                            "degrades performance. Consider "
-                            "rechunking after loading instead."
-                            % (chunks[dim], chunk_spec[dim], dim),
-                            stacklevel=2,
-                        )
-                chunk_spec[dim] = chunks[dim]
-        return chunk_spec
-
-    @classmethod
-    def maybe_chunk(cls, name, var, chunks, overwrite_encoded_chunks):
-        chunk_spec = cls.get_chunk(name, var, chunks)
-
-        if (var.ndim > 0) and (chunk_spec is not None):
-            from dask.base import tokenize
-
-            # does this cause any data to be read?
-            token2 = tokenize(name, var._data, chunks)
-            name2 = f"xarray-{name}-{token2}"
-            var = var.chunk(chunk_spec, name=name2, lock=None)
-            if overwrite_encoded_chunks and var.chunks is not None:
-                var.encoding["chunks"] = tuple(x[0] for x in var.chunks)
-            return var
-        else:
-            return var
 
     def store(
         self,

From 1cf69682346ea79eb4d528654dfdf393c990de16 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 29 Oct 2020 11:53:41 +0100
Subject: [PATCH 39/46] move get_chunk funtion in dataset

---
 xarray/backends/api.py   |  4 ++--
 xarray/backends/apiv2.py | 41 ++--------------------------------------
 xarray/core/dataset.py   | 35 ++++++++++++++++++++++++++++++++++
 3 files changed, 39 insertions(+), 41 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 8dd431c5f62..90d83284632 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -25,7 +25,7 @@
     combine_by_coords,
 )
 from ..core.dataarray import DataArray
-from ..core.dataset import Dataset, _maybe_chunk
+from ..core.dataset import Dataset, _maybe_chunk, _get_chunk
 from ..core.utils import close_on_error, is_grib_path, is_remote_uri
 from .common import AbstractDataStore, ArrayWriter
 from .locks import _get_scheduler
@@ -535,7 +535,7 @@ def maybe_decode_store(store, chunks):
                 k: _maybe_chunk(
                     k,
                     v,
-                    store.get_chunk(k, v, chunks),
+                    _get_chunk(k, v, chunks),
                     overwrite_encoded_chunks=overwrite_encoded_chunks,
                 )
                 for k, v in ds.variables.items()
diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 3319746a7bd..8dd7b6556f3 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -2,7 +2,7 @@
 import warnings
 
 from ..core.utils import is_remote_uri
-from ..core.dataset import _maybe_chunk
+from ..core.dataset import _maybe_chunk, _get_chunk
 from . import cfgrib_, h5netcdf_, zarr
 from .api import (
     _autodetect_engine,
@@ -18,37 +18,6 @@
 }
 
 
-def check_chunks_compatibility(dim, chunks, chunk_spec):
-    spec = chunks[dim]
-    if isinstance(spec, int):
-        spec = (spec,)
-    if any(s % chunk_spec[dim] for s in spec):
-        warnings.warn(
-            "Specified Dask chunks %r would "
-            "separate on disks chunk shape %r for "
-            "dimension %r. This could "
-            "degrades performance. Consider "
-            "rechunking after loading instead."
-            % (chunks[dim], chunk_spec[dim], dim),
-            stacklevel=2,
-        )
-
-
-def get_chunk(name, var, chunks):
-    preferred_chunks = dict(zip(var.dims, var.encoding.get("chunks", {})))
-    if var.ndim == 1 and var.dims[0] == name:
-        return preferred_chunks
-
-    output_chunks = {}
-    if chunks is not None:
-        for dim in preferred_chunks:
-            if dim in chunks:
-                check_chunks_compatibility(dim, chunks, preferred_chunks)
-                output_chunks[dim] = chunks[dim]
-            else:
-                output_chunks[dim] = preferred_chunks[dim]
-    return output_chunks
-
 
 def get_mtime(filename_or_obj):
     # if passed an actual file path, augment the token with
@@ -87,7 +56,7 @@ def dataset_from_backend_dataset(
             name: _maybe_chunk(
                 name,
                 var,
-                get_chunk(name, var, chunks),
+                _get_chunk(name, var, chunks),
                 overwrite_encoded_chunks=overwrite_encoded_chunks,
                 name_prefix=name_prefix,
                 token=token,
@@ -221,12 +190,6 @@ def open_dataset(
     if chunks == "auto":
         chunks = {}
 
-    if not (isinstance(chunks, (int, dict)) or (chunks is None)):
-        raise ValueError(
-            "chunks must be an int, dict, 'auto', or None. "
-            "Instead found %s. " % chunks
-        )
-
     if cache is None:
         cache = chunks is None
 
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 1ceb5623abd..8617b02b2da 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -359,6 +359,41 @@ def _assert_empty(args: tuple, msg: str = "%s") -> None:
         raise ValueError(msg % args)
 
 
+def _check_chunks_compatibility(dim, chunks, chunk_spec):
+    spec = chunks[dim]
+    if isinstance(spec, int):
+        spec = (spec,)
+    if any(s % chunk_spec[dim] for s in spec):
+        warnings.warn(
+            "Specified Dask chunks %r would "
+            "separate on disks chunk shape %r for "
+            "dimension %r. This could "
+            "degrades performance. Consider "
+            "rechunking after loading instead."
+            % (chunks[dim], chunk_spec[dim], dim),
+            stacklevel=2,
+        )
+
+
+def _get_chunk(name, var, chunks):
+    if chunks == "auto":
+        chunks = {}
+
+    preferred_chunks = dict(zip(var.dims, var.encoding.get("chunks", {})))
+    if var.ndim == 1 and var.dims[0] == name:
+        return preferred_chunks
+
+    output_chunks = {}
+    if chunks is not None:
+        for dim in preferred_chunks:
+            if dim in chunks:
+                _check_chunks_compatibility(dim, chunks, preferred_chunks)
+                output_chunks[dim] = chunks[dim]
+            else:
+                output_chunks[dim] = preferred_chunks[dim]
+    return output_chunks
+
+
 def _maybe_chunk(
     name,
     var,

From a780efc093c0cc2537b8bb8201bd06e22f7bb9d7 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 29 Oct 2020 11:53:41 +0100
Subject: [PATCH 40/46] move get_chunk funtion in dataset

---
 xarray/backends/api.py   |  4 ++--
 xarray/backends/apiv2.py | 47 ++++------------------------------------
 xarray/backends/zarr.py  |  1 -
 xarray/core/dataset.py   | 34 +++++++++++++++++++++++++++++
 4 files changed, 40 insertions(+), 46 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 8dd431c5f62..90d83284632 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -25,7 +25,7 @@
     combine_by_coords,
 )
 from ..core.dataarray import DataArray
-from ..core.dataset import Dataset, _maybe_chunk
+from ..core.dataset import Dataset, _maybe_chunk, _get_chunk
 from ..core.utils import close_on_error, is_grib_path, is_remote_uri
 from .common import AbstractDataStore, ArrayWriter
 from .locks import _get_scheduler
@@ -535,7 +535,7 @@ def maybe_decode_store(store, chunks):
                 k: _maybe_chunk(
                     k,
                     v,
-                    store.get_chunk(k, v, chunks),
+                    _get_chunk(k, v, chunks),
                     overwrite_encoded_chunks=overwrite_encoded_chunks,
                 )
                 for k, v in ds.variables.items()
diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 3319746a7bd..fce782c8dc5 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -1,8 +1,8 @@
 import os
 import warnings
 
+from ..core.dataset import _get_chunk, _maybe_chunk
 from ..core.utils import is_remote_uri
-from ..core.dataset import _maybe_chunk
 from . import cfgrib_, h5netcdf_, zarr
 from .api import (
     _autodetect_engine,
@@ -18,38 +18,6 @@
 }
 
 
-def check_chunks_compatibility(dim, chunks, chunk_spec):
-    spec = chunks[dim]
-    if isinstance(spec, int):
-        spec = (spec,)
-    if any(s % chunk_spec[dim] for s in spec):
-        warnings.warn(
-            "Specified Dask chunks %r would "
-            "separate on disks chunk shape %r for "
-            "dimension %r. This could "
-            "degrades performance. Consider "
-            "rechunking after loading instead."
-            % (chunks[dim], chunk_spec[dim], dim),
-            stacklevel=2,
-        )
-
-
-def get_chunk(name, var, chunks):
-    preferred_chunks = dict(zip(var.dims, var.encoding.get("chunks", {})))
-    if var.ndim == 1 and var.dims[0] == name:
-        return preferred_chunks
-
-    output_chunks = {}
-    if chunks is not None:
-        for dim in preferred_chunks:
-            if dim in chunks:
-                check_chunks_compatibility(dim, chunks, preferred_chunks)
-                output_chunks[dim] = chunks[dim]
-            else:
-                output_chunks[dim] = preferred_chunks[dim]
-    return output_chunks
-
-
 def get_mtime(filename_or_obj):
     # if passed an actual file path, augment the token with
     # the file modification time
@@ -75,10 +43,9 @@ def dataset_from_backend_dataset(
         ds = backend_ds
     else:
         from dask.base import tokenize
+
         mtime = get_mtime(filename_or_obj)
-        token = tokenize(
-            filename_or_obj, mtime, engine, chunks, **extra_tokens
-        )
+        token = tokenize(filename_or_obj, mtime, engine, chunks, **extra_tokens)
         name_prefix = "open_dataset-%s" % token
         if isinstance(chunks, int):
             chunks = dict.fromkeys(backend_ds.dims, chunks)
@@ -87,7 +54,7 @@ def dataset_from_backend_dataset(
             name: _maybe_chunk(
                 name,
                 var,
-                get_chunk(name, var, chunks),
+                _get_chunk(name, var, chunks),
                 overwrite_encoded_chunks=overwrite_encoded_chunks,
                 name_prefix=name_prefix,
                 token=token,
@@ -221,12 +188,6 @@ def open_dataset(
     if chunks == "auto":
         chunks = {}
 
-    if not (isinstance(chunks, (int, dict)) or (chunks is None)):
-        raise ValueError(
-            "chunks must be an int, dict, 'auto', or None. "
-            "Instead found %s. " % chunks
-        )
-
     if cache is None:
         cache = chunks is None
 
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 54cf40b4c17..2145685b2b2 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -362,7 +362,6 @@ def encode_variable(self, variable):
     def encode_attribute(self, a):
         return encode_zarr_attr_value(a)
 
-
     def store(
         self,
         variables,
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 1ceb5623abd..562a99b9028 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -359,6 +359,40 @@ def _assert_empty(args: tuple, msg: str = "%s") -> None:
         raise ValueError(msg % args)
 
 
+def _check_chunks_compatibility(dim, chunks, chunk_spec):
+    spec = chunks[dim]
+    if isinstance(spec, int):
+        spec = (spec,)
+    if any(s % chunk_spec[dim] for s in spec):
+        warnings.warn(
+            "Specified Dask chunks %r would "
+            "separate on disks chunk shape %r for "
+            "dimension %r. This could "
+            "degrades performance. Consider "
+            "rechunking after loading instead." % (chunks[dim], chunk_spec[dim], dim),
+            stacklevel=2,
+        )
+
+
+def _get_chunk(name, var, chunks):
+    if chunks == "auto":
+        chunks = {}
+
+    preferred_chunks = dict(zip(var.dims, var.encoding.get("chunks", {})))
+    if var.ndim == 1 and var.dims[0] == name:
+        return preferred_chunks
+
+    output_chunks = {}
+    if chunks is not None:
+        for dim in preferred_chunks:
+            if dim in chunks:
+                _check_chunks_compatibility(dim, chunks, preferred_chunks)
+                output_chunks[dim] = chunks[dim]
+            else:
+                output_chunks[dim] = preferred_chunks[dim]
+    return output_chunks
+
+
 def _maybe_chunk(
     name,
     var,

From 93cadeef1e24a5a2d1658b1a81d35b11c500da8f Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 29 Oct 2020 12:03:03 +0100
Subject: [PATCH 41/46] black

---
 xarray/core/dataset.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index ec8aaaf898a..d72df555597 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -403,8 +403,7 @@ def _check_chunks_compatibility(dim, chunks, chunk_spec):
             "separate on disks chunk shape %r for "
             "dimension %r. This could "
             "degrades performance. Consider "
-            "rechunking after loading instead."
-            % (chunks[dim], chunk_spec[dim], dim),
+            "rechunking after loading instead." % (chunks[dim], chunk_spec[dim], dim),
             stacklevel=2,
         )
 

From 6e9a56214484548bdbfac673aef765ef5c59e78a Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 29 Oct 2020 14:04:38 +0100
Subject: [PATCH 42/46] remove unused import re-add check on chunks type

---
 xarray/backends/apiv2.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index fce782c8dc5..63be14caef7 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -1,5 +1,4 @@
 import os
-import warnings
 
 from ..core.dataset import _get_chunk, _maybe_chunk
 from ..core.utils import is_remote_uri
@@ -187,6 +186,11 @@ def open_dataset(
 
     if chunks == "auto":
         chunks = {}
+    if not (isinstance(chunks, (int, dict)) or chunks is None):
+            raise ValueError(
+                "chunks must be an int, dict, 'auto', or None. "
+                "Instead found %s. " % chunks
+            )
 
     if cache is None:
         cache = chunks is None

From 69c2790124e97c8f2c79c2ceb97fe49e820583dd Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 29 Oct 2020 14:04:38 +0100
Subject: [PATCH 43/46] remove unused import re-add check on chunks type

---
 xarray/backends/apiv2.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index fce782c8dc5..496b7b3e484 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -1,5 +1,4 @@
 import os
-import warnings
 
 from ..core.dataset import _get_chunk, _maybe_chunk
 from ..core.utils import is_remote_uri
@@ -187,6 +186,11 @@ def open_dataset(
 
     if chunks == "auto":
         chunks = {}
+    if not (isinstance(chunks, (int, dict)) or chunks is None):
+        raise ValueError(
+            "chunks must be an int, dict, 'auto', or None. "
+            "Instead found %s. " % chunks
+        )
 
     if cache is None:
         cache = chunks is None

From a1cfa297166c9131fd9268687876b6d96a280287 Mon Sep 17 00:00:00 2001
From: TheRed86 <m.rossetti@bopen.eu>
Date: Mon, 2 Nov 2020 12:49:09 +0100
Subject: [PATCH 44/46] Pass isort test.

---
 xarray/backends/apiv2.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 496b7b3e484..9ee802336a4 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -3,12 +3,8 @@
 from ..core.dataset import _get_chunk, _maybe_chunk
 from ..core.utils import is_remote_uri
 from . import cfgrib_, h5netcdf_, zarr
-from .api import (
-    _autodetect_engine,
-    _get_backend_cls,
-    _normalize_path,
-    _protect_dataset_variables_inplace,
-)
+from .api import (_autodetect_engine, _get_backend_cls, _normalize_path,
+                  _protect_dataset_variables_inplace)
 
 ENGINES = {
     "h5netcdf": h5netcdf_.open_backend_dataset_h5necdf,

From c590e05a20778ae6c940fa8666e4364929268b08 Mon Sep 17 00:00:00 2001
From: TheRed86 <m.rossetti@bopen.eu>
Date: Mon, 2 Nov 2020 12:52:59 +0100
Subject: [PATCH 45/46] Pass black -t py36 test.

---
 xarray/backends/apiv2.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 9ee802336a4..496b7b3e484 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -3,8 +3,12 @@
 from ..core.dataset import _get_chunk, _maybe_chunk
 from ..core.utils import is_remote_uri
 from . import cfgrib_, h5netcdf_, zarr
-from .api import (_autodetect_engine, _get_backend_cls, _normalize_path,
-                  _protect_dataset_variables_inplace)
+from .api import (
+    _autodetect_engine,
+    _get_backend_cls,
+    _normalize_path,
+    _protect_dataset_variables_inplace,
+)
 
 ENGINES = {
     "h5netcdf": h5netcdf_.open_backend_dataset_h5necdf,

From c6d341c7ad0190588184d4126f2f8236fc162da8 Mon Sep 17 00:00:00 2001
From: TheRed86 <m.rossetti@bopen.eu>
Date: Mon, 2 Nov 2020 13:05:12 +0100
Subject: [PATCH 46/46] Remove duplicated functions.

---
 xarray/core/dataset.py | 34 ----------------------------------
 1 file changed, 34 deletions(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index d72df555597..562a99b9028 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -393,40 +393,6 @@ def _get_chunk(name, var, chunks):
     return output_chunks
 
 
-def _check_chunks_compatibility(dim, chunks, chunk_spec):
-    spec = chunks[dim]
-    if isinstance(spec, int):
-        spec = (spec,)
-    if any(s % chunk_spec[dim] for s in spec):
-        warnings.warn(
-            "Specified Dask chunks %r would "
-            "separate on disks chunk shape %r for "
-            "dimension %r. This could "
-            "degrades performance. Consider "
-            "rechunking after loading instead." % (chunks[dim], chunk_spec[dim], dim),
-            stacklevel=2,
-        )
-
-
-def _get_chunk(name, var, chunks):
-    if chunks == "auto":
-        chunks = {}
-
-    preferred_chunks = dict(zip(var.dims, var.encoding.get("chunks", {})))
-    if var.ndim == 1 and var.dims[0] == name:
-        return preferred_chunks
-
-    output_chunks = {}
-    if chunks is not None:
-        for dim in preferred_chunks:
-            if dim in chunks:
-                _check_chunks_compatibility(dim, chunks, preferred_chunks)
-                output_chunks[dim] = chunks[dim]
-            else:
-                output_chunks[dim] = preferred_chunks[dim]
-    return output_chunks
-
-
 def _maybe_chunk(
     name,
     var,