Merge remote-tracking branch 'upstream/master' into interp-na-maxgap

* upstream/master: minor lint tweaks (pydata#3429) Hack around pydata#3440 (pydata#3442) Update Terminology page to account for multidimensional coordinates (pydata#3410) Use cftime master for upstream-dev build (pydata#3439) MAGA (Make Azure Green Again) (pydata#3436) Test that Dataset and DataArray resampling are identical (pydata#3412) Avoid multiplication DeprecationWarning in rasterio backend (pydata#3428) Sync with latest version of cftime (v1.0.4) (pydata#3430) Add cftime git tip to upstream-dev + temporarily pin cftime (pydata#3431)
dcherian · Oct 24, 2019 · c12e1da · c12e1da
2 parents 179eff1 + 652dd3c
commit c12e1da
Show file tree

Hide file tree

Showing 26 changed files with 148 additions and 113 deletions.
diff --git a/ci/azure/install.yml b/ci/azure/install.yml
@@ -15,16 +15,17 @@ steps:
         --no-deps \
         --pre \
         --upgrade \
-        numpy \
         matplotlib \
-        pandas \
+        pandas=0.26.0.dev0+628.g03c1a3db2 \  # FIXME https://github.com/pydata/xarray/issues/3440
         scipy
+        # numpy \  # FIXME https://github.com/pydata/xarray/issues/3409
     pip install \
         --no-deps \
         --upgrade \
         git+https://github.com/dask/dask \
         git+https://github.com/dask/distributed \
-        git+https://github.com/zarr-developers/zarr
+        git+https://github.com/zarr-developers/zarr \
+        git+https://github.com/Unidata/cftime
   condition: eq(variables['UPSTREAM_DEV'], 'true')
   displayName: Install upstream dev dependencies
 

diff --git a/ci/min_deps_check.py b/ci/min_deps_check.py
@@ -6,7 +6,7 @@
 import sys
 from concurrent.futures import ThreadPoolExecutor
 from datetime import datetime, timedelta
-from typing import Dict, Iterator, Tuple
+from typing import Dict, Iterator, Optional, Tuple
 
 import yaml
 
@@ -34,10 +34,14 @@ def error(msg: str) -> None:
     print("ERROR:", msg)
 
 
-def parse_requirements(fname) -> Iterator[Tuple[str, int, int]]:
+def warning(msg: str) -> None:
+    print("WARNING:", msg)
+
+
+def parse_requirements(fname) -> Iterator[Tuple[str, int, int, Optional[int]]]:
     """Load requirements/py36-min-all-deps.yml
 
-    Yield (package name, major version, minor version)
+    Yield (package name, major version, minor version, [patch version])
     """
     global has_errors
 
@@ -52,15 +56,18 @@ def parse_requirements(fname) -> Iterator[Tuple[str, int, int]]:
         if pkg.endswith("<") or pkg.endswith(">") or eq != "=":
             error("package should be pinned with exact version: " + row)
             continue
+
         try:
-            major, minor = version.split(".")
-        except ValueError:
-            error("expected major.minor (without patch): " + row)
-            continue
-        try:
-            yield pkg, int(major), int(minor)
+            version_tup = tuple(int(x) for x in version.split("."))
         except ValueError:
-            error("failed to parse version: " + row)
+            raise ValueError("non-numerical version: " + row)
+
+        if len(version_tup) == 2:
+            yield (pkg, *version_tup, None)  # type: ignore
+        elif len(version_tup) == 3:
+            yield (pkg, *version_tup)  # type: ignore
+        else:
+            raise ValueError("expected major.minor or major.minor.patch: " + row)
 
 
 def query_conda(pkg: str) -> Dict[Tuple[int, int], datetime]:
@@ -80,9 +87,9 @@ def query_conda(pkg: str) -> Dict[Tuple[int, int], datetime]:
         label = label.strip()
         if label == "file name":
             value = value.strip()[len(pkg) :]
-            major, minor = value.split("-")[1].split(".")[:2]
-            major = int(major)
-            minor = int(minor)
+            smajor, sminor = value.split("-")[1].split(".")[:2]
+            major = int(smajor)
+            minor = int(sminor)
         if label == "timestamp":
             assert major is not None
             assert minor is not None
@@ -109,17 +116,15 @@ def query_conda(pkg: str) -> Dict[Tuple[int, int], datetime]:
 
 
 def process_pkg(
-    pkg: str, req_major: int, req_minor: int
-) -> Tuple[str, int, int, str, int, int, str, str]:
+    pkg: str, req_major: int, req_minor: int, req_patch: Optional[int]
+) -> Tuple[str, str, str, str, str, str]:
     """Compare package version from requirements file to available versions in conda.
     Return row to build pandas dataframe:
 
     - package name
-    - major version in requirements file
-    - minor version in requirements file
+    - major.minor.[patch] version in requirements file
     - publication date of version in requirements file (YYYY-MM-DD)
-    - major version suggested by policy
-    - minor version suggested by policy
+    - major.minor version suggested by policy
     - publication date of version suggested by policy (YYYY-MM-DD)
     - status ("<", "=", "> (!)")
     """
@@ -130,7 +135,7 @@ def process_pkg(
         req_published = versions[req_major, req_minor]
     except KeyError:
         error("not found in conda: " + pkg)
-        return pkg, req_major, req_minor, "-", 0, 0, "-", "(!)"
+        return pkg, fmt_version(req_major, req_minor, req_patch), "-", "-", "-", "(!)"
 
     policy_months = POLICY_MONTHS.get(pkg, POLICY_MONTHS_DEFAULT)
     policy_published = datetime.now() - timedelta(days=policy_months * 30)
@@ -153,30 +158,39 @@ def process_pkg(
     else:
         status = "="
 
+    if req_patch is not None:
+        warning("patch version should not appear in requirements file: " + pkg)
+        status += " (w)"
+
     return (
         pkg,
-        req_major,
-        req_minor,
+        fmt_version(req_major, req_minor, req_patch),
         req_published.strftime("%Y-%m-%d"),
-        policy_major,
-        policy_minor,
+        fmt_version(policy_major, policy_minor),
         policy_published_actual.strftime("%Y-%m-%d"),
         status,
     )
 
 
+def fmt_version(major: int, minor: int, patch: int = None) -> str:
+    if patch is None:
+        return f"{major}.{minor}"
+    else:
+        return f"{major}.{minor}.{patch}"
+
+
 def main() -> None:
     fname = sys.argv[1]
     with ThreadPoolExecutor(8) as ex:
         futures = [
-            ex.submit(process_pkg, pkg, major, minor)
-            for pkg, major, minor in parse_requirements(fname)
+            ex.submit(process_pkg, pkg, major, minor, patch)
+            for pkg, major, minor, patch in parse_requirements(fname)
         ]
         rows = [f.result() for f in futures]
 
-    print("Package       Required          Policy            Status")
-    print("------------- ----------------- ----------------- ------")
-    fmt = "{:13} {:>1d}.{:<2d} ({:10}) {:>1d}.{:<2d} ({:10}) {}"
+    print("Package       Required             Policy               Status")
+    print("------------- -------------------- -------------------- ------")
+    fmt = "{:13} {:7} ({:10}) {:7} ({:10}) {}"
     for row in rows:
         print(fmt.format(*row))
 

diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml
@@ -13,7 +13,7 @@ dependencies:
   - cartopy=0.17
   - cdms2=3.1
   - cfgrib=0.9
-  - cftime=1.0
+  - cftime=1.0.3  # FIXME need 1.0.5 (not released yet); 1.0.4 is broken
   - coveralls
   - dask=1.2
   - distributed=1.27

diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml
@@ -9,7 +9,7 @@ dependencies:
   - cartopy
   - cdms2
   - cfgrib
-  - cftime
+  - cftime<1.0.4  # FIXME need 1.0.5 (not released yet); 1.0.4 is broken
   - coveralls
   - dask
   - distributed
@@ -25,7 +25,7 @@ dependencies:
   - nc-time-axis
   - netcdf4
   - numba
-  - numpy
+  - numpy<1.18  # FIXME https://github.com/pydata/xarray/issues/3409
   - pandas
   - pint
   - pip

diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml
@@ -8,8 +8,8 @@ dependencies:
   - bottleneck
   - cartopy
   # - cdms2  # Not available on Windows
-  # - cfgrib>=0.9.2  # Causes Python interpreter crash on Windows
-  - cftime
+  # - cfgrib  # Causes Python interpreter crash on Windows
+  - cftime<1.0.4  # FIXME need 1.0.5 (not released yet); 1.0.4 is broken
   - coveralls
   - dask
   - distributed
@@ -25,7 +25,7 @@ dependencies:
   - nc-time-axis
   - netcdf4
   - numba
-  - numpy
+  - numpy<1.18  # FIXME https://github.com/pydata/xarray/issues/3409
   - pandas
   - pint
   - pip

diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml
@@ -9,7 +9,7 @@ dependencies:
   - cartopy
   - cdms2
   - cfgrib
-  - cftime
+  - cftime<1.0.4  # FIXME need 1.0.5 (not released yet); 1.0.4 is broken
   - coveralls
   - dask
   - distributed
@@ -25,7 +25,7 @@ dependencies:
   - nc-time-axis
   - netcdf4
   - numba
-  - numpy
+  - numpy<1.18  # FIXME https://github.com/pydata/xarray/issues/3409
   - pandas
   - pint
   - pip

diff --git a/doc/contributing.rst b/doc/contributing.rst
@@ -286,12 +286,12 @@ How to build the *xarray* documentation
 Requirements
 ~~~~~~~~~~~~
 Make sure to follow the instructions on :ref:`creating a development environment above <contributing.dev_env>`, but
-to build the docs you need to use the environment file ``doc/environment.yml``.
+to build the docs you need to use the environment file ``ci/requirements/doc.yml``.
 
 .. code-block:: none
 
     # Create and activate the docs environment
-    conda env create -f doc/environment.yml
+    conda env create -f ci/requirements/doc.yml
     conda activate xarray-docs
 
     # or with older versions of Anaconda:

diff --git a/doc/terminology.rst b/doc/terminology.rst
@@ -27,15 +27,15 @@ Terminology
 
 ----
 
-**Coordinate:** An array that labels a dimension of another ``DataArray``. Loosely, the coordinate array's values can be thought of as tick labels along a dimension. There are two types of coordinate arrays: *dimension coordinates* and *non-dimension coordinates* (see below). A coordinate named ``x`` can be retrieved from ``arr.coords[x]``. A ``DataArray`` can have more coordinates than dimensions because a single dimension can be assigned multiple coordinate arrays. However, only one coordinate array can be a assigned as a particular dimension's dimension coordinate array. As a consequence, ``len(arr.dims) <= len(arr.coords)`` in general.
+**Coordinate:** An array that labels a dimension or set of dimensions of another ``DataArray``. In the usual one-dimensional case, the coordinate array's values can loosely be thought of as tick labels along a dimension. There are two types of coordinate arrays: *dimension coordinates* and *non-dimension coordinates* (see below). A coordinate named ``x`` can be retrieved from ``arr.coords[x]``. A ``DataArray`` can have more coordinates than dimensions because a single dimension can be labeled by multiple coordinate arrays. However, only one coordinate array can be a assigned as a particular dimension's dimension coordinate array. As a consequence, ``len(arr.dims) <= len(arr.coords)`` in general.
 
 ----
 
-**Dimension coordinate:** A coordinate array assigned to ``arr`` with both a name and dimension name in ``arr.dims``. Dimension coordinates are used for label-based indexing and alignment, like the index found on a :py:class:`pandas.DataFrame` or :py:class:`pandas.Series`. In fact, dimension coordinates use :py:class:`pandas.Index` objects under the hood for efficient computation. Dimension coordinates are marked by ``*`` when printing a ``DataArray`` or ``Dataset``.
+**Dimension coordinate:** A one-dimensional coordinate array assigned to ``arr`` with both a name and dimension name in ``arr.dims``. Dimension coordinates are used for label-based indexing and alignment, like the index found on a :py:class:`pandas.DataFrame` or :py:class:`pandas.Series`. In fact, dimension coordinates use :py:class:`pandas.Index` objects under the hood for efficient computation. Dimension coordinates are marked by ``*`` when printing a ``DataArray`` or ``Dataset``.
 
 ----
 
-**Non-dimension coordinate:** A coordinate array assigned to ``arr`` with a name in ``arr.dims`` but a dimension name *not* in ``arr.dims``. These coordinate arrays are useful for auxiliary labeling. However, non-dimension coordinates are not indexed, and any operation on non-dimension coordinates that leverages indexing will fail. Printing ``arr.coords`` will print all of ``arr``'s coordinate names, with the assigned dimensions in parentheses. For example, ``coord_name   (dim_name) 1 2 3 ...``.
+**Non-dimension coordinate:** A coordinate array assigned to ``arr`` with a name in ``arr.coords`` but *not* in ``arr.dims``. These coordinates arrays can be one-dimensional or multidimensional, and they are useful for auxiliary labeling. As an example, multidimensional coordinates are often used in geoscience datasets when :doc:`the data's physical coordinates (such as latitude and longitude) differ from their logical coordinates <examples/multidimensional-coords>`. However, non-dimension coordinates are not indexed, and any operation on non-dimension coordinates that leverages indexing will fail. Printing ``arr.coords`` will print all of ``arr``'s coordinate names, with the corresponding dimension(s) in parentheses. For example, ``coord_name   (dim_name) 1 2 3 ...``.
 
 ----
 

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -18,6 +18,11 @@ What's New
 v0.14.1 (unreleased)
 --------------------
 
+Breaking changes
+~~~~~~~~~~~~~~~~
+
+- Minimum cftime version is now 1.0.3. By `Deepak Cherian <https://github.com/dcherian>`_.
+
 New Features
 ~~~~~~~~~~~~
 - Added the ``max_gap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and
@@ -39,13 +44,19 @@ Bug fixes
 - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed
   but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle <https://github.com/rdoyle45>`_
 
+- Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4. 
+  By `Anderson Banihirwe <https://github.com/andersy005>`_.
+
+
 Documentation
 ~~~~~~~~~~~~~
 
 - Fix the documentation of :py:meth:`DataArray.resample` and
   :py:meth:`Dataset.resample` and explicitly state that a
   datetime-like dimension is required. (:pull:`3400`)
   By `Justus Magin <https://github.com/keewis>`_.
+- Update the terminology page to address multidimensional coordinates. (:pull:`3410`)
+  By `Jon Thielen <https://github.com/jthielen>`_.
 
 Internal Changes
 ~~~~~~~~~~~~~~~~

diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
@@ -245,7 +245,7 @@ def prepare_variable(
                 dtype=dtype,
                 dimensions=variable.dims,
                 fillvalue=fillvalue,
-                **kwargs
+                **kwargs,
             )
         else:
             nc4_var = self.ds[name]

diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py
@@ -257,8 +257,8 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc
         if parse:
             nx, ny = riods.width, riods.height
             # xarray coordinates are pixel centered
-            x, _ = (np.arange(nx) + 0.5, np.zeros(nx) + 0.5) * riods.transform
-            _, y = (np.zeros(ny) + 0.5, np.arange(ny) + 0.5) * riods.transform
+            x, _ = riods.transform * (np.arange(nx) + 0.5, np.zeros(nx) + 0.5)
+            _, y = riods.transform * (np.zeros(ny) + 0.5, np.arange(ny) + 0.5)
             coords["y"] = y
             coords["x"] = x
     else:

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
@@ -467,7 +467,7 @@ def open_zarr(
     drop_variables=None,
     consolidated=False,
     overwrite_encoded_chunks=False,
-    **kwargs
+    **kwargs,
 ):
     """Load and decode a dataset from a Zarr store.
 

diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py
@@ -50,6 +50,7 @@
 from ..core.pdcompat import count_not_none
 from .cftimeindex import CFTimeIndex, _parse_iso8601_with_reso
 from .times import format_cftime_datetime
+from distutils.version import LooseVersion
 
 
 def get_date_type(calendar):
@@ -222,6 +223,8 @@ def _adjust_n_years(other, n, month, reference_day):
 def _shift_month(date, months, day_option="start"):
     """Shift the date to a month start or end a given number of months away.
     """
+    import cftime
+
     delta_year = (date.month + months) // 12
     month = (date.month + months) % 12
 
@@ -237,11 +240,14 @@ def _shift_month(date, months, day_option="start"):
         day = _days_in_month(reference)
     else:
         raise ValueError(day_option)
-    # dayofwk=-1 is required to update the dayofwk and dayofyr attributes of
-    # the returned date object in versions of cftime between 1.0.2 and
-    # 1.0.3.4.  It can be removed for versions of cftime greater than
-    # 1.0.3.4.
-    return date.replace(year=year, month=month, day=day, dayofwk=-1)
+    if LooseVersion(cftime.__version__) < LooseVersion("1.0.4"):
+        # dayofwk=-1 is required to update the dayofwk and dayofyr attributes of
+        # the returned date object in versions of cftime between 1.0.2 and
+        # 1.0.3.4.  It can be removed for versions of cftime greater than
+        # 1.0.3.4.
+        return date.replace(year=year, month=month, day=day, dayofwk=-1)
+    else:
+        return date.replace(year=year, month=month, day=day)
 
 
 def roll_qtrday(other, n, month, day_option, modby=3):

diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py
@@ -96,6 +96,8 @@ def parse_iso8601(datetime_string):
 
 
 def _parse_iso8601_with_reso(date_type, timestr):
+    import cftime
+
     default = date_type(1, 1, 1)
     result = parse_iso8601(timestr)
     replace = {}
@@ -107,12 +109,12 @@ def _parse_iso8601_with_reso(date_type, timestr):
             # TODO: Consider adding support for sub-second resolution?
             replace[attr] = int(value)
             resolution = attr
-
-    # dayofwk=-1 is required to update the dayofwk and dayofyr attributes of
-    # the returned date object in versions of cftime between 1.0.2 and
-    # 1.0.3.4.  It can be removed for versions of cftime greater than
-    # 1.0.3.4.
-    replace["dayofwk"] = -1
+    if LooseVersion(cftime.__version__) < LooseVersion("1.0.4"):
+        # dayofwk=-1 is required to update the dayofwk and dayofyr attributes of
+        # the returned date object in versions of cftime between 1.0.2 and
+        # 1.0.3.4.  It can be removed for versions of cftime greater than
+        # 1.0.3.4.
+        replace["dayofwk"] = -1
     return default.replace(**replace), resolution