Skip to content
forked from pydata/xarray

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into interp-na-maxgap
Browse files Browse the repository at this point in the history
* upstream/master:
  minor lint tweaks (pydata#3429)
  Hack around pydata#3440 (pydata#3442)
  Update Terminology page to account for multidimensional coordinates (pydata#3410)
  Use cftime master for upstream-dev build (pydata#3439)
  MAGA (Make Azure Green Again) (pydata#3436)
  Test that Dataset and DataArray resampling are identical (pydata#3412)
  Avoid multiplication DeprecationWarning in rasterio backend (pydata#3428)
  Sync with latest version of cftime (v1.0.4) (pydata#3430)
  Add cftime git tip to upstream-dev + temporarily pin cftime (pydata#3431)
  • Loading branch information
dcherian committed Oct 24, 2019
2 parents 179eff1 + 652dd3c commit c12e1da
Show file tree
Hide file tree
Showing 26 changed files with 148 additions and 113 deletions.
7 changes: 4 additions & 3 deletions ci/azure/install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,17 @@ steps:
--no-deps \
--pre \
--upgrade \
numpy \
matplotlib \
pandas \
pandas=0.26.0.dev0+628.g03c1a3db2 \ # FIXME https://github.com/pydata/xarray/issues/3440
scipy
# numpy \ # FIXME https://github.com/pydata/xarray/issues/3409
pip install \
--no-deps \
--upgrade \
git+https://github.com/dask/dask \
git+https://github.com/dask/distributed \
git+https://github.com/zarr-developers/zarr
git+https://github.com/zarr-developers/zarr \
git+https://github.com/Unidata/cftime
condition: eq(variables['UPSTREAM_DEV'], 'true')
displayName: Install upstream dev dependencies

Expand Down
72 changes: 43 additions & 29 deletions ci/min_deps_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import sys
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timedelta
from typing import Dict, Iterator, Tuple
from typing import Dict, Iterator, Optional, Tuple

import yaml

Expand Down Expand Up @@ -34,10 +34,14 @@ def error(msg: str) -> None:
print("ERROR:", msg)


def parse_requirements(fname) -> Iterator[Tuple[str, int, int]]:
def warning(msg: str) -> None:
print("WARNING:", msg)


def parse_requirements(fname) -> Iterator[Tuple[str, int, int, Optional[int]]]:
"""Load requirements/py36-min-all-deps.yml
Yield (package name, major version, minor version)
Yield (package name, major version, minor version, [patch version])
"""
global has_errors

Expand All @@ -52,15 +56,18 @@ def parse_requirements(fname) -> Iterator[Tuple[str, int, int]]:
if pkg.endswith("<") or pkg.endswith(">") or eq != "=":
error("package should be pinned with exact version: " + row)
continue

try:
major, minor = version.split(".")
except ValueError:
error("expected major.minor (without patch): " + row)
continue
try:
yield pkg, int(major), int(minor)
version_tup = tuple(int(x) for x in version.split("."))
except ValueError:
error("failed to parse version: " + row)
raise ValueError("non-numerical version: " + row)

if len(version_tup) == 2:
yield (pkg, *version_tup, None) # type: ignore
elif len(version_tup) == 3:
yield (pkg, *version_tup) # type: ignore
else:
raise ValueError("expected major.minor or major.minor.patch: " + row)


def query_conda(pkg: str) -> Dict[Tuple[int, int], datetime]:
Expand All @@ -80,9 +87,9 @@ def query_conda(pkg: str) -> Dict[Tuple[int, int], datetime]:
label = label.strip()
if label == "file name":
value = value.strip()[len(pkg) :]
major, minor = value.split("-")[1].split(".")[:2]
major = int(major)
minor = int(minor)
smajor, sminor = value.split("-")[1].split(".")[:2]
major = int(smajor)
minor = int(sminor)
if label == "timestamp":
assert major is not None
assert minor is not None
Expand All @@ -109,17 +116,15 @@ def query_conda(pkg: str) -> Dict[Tuple[int, int], datetime]:


def process_pkg(
pkg: str, req_major: int, req_minor: int
) -> Tuple[str, int, int, str, int, int, str, str]:
pkg: str, req_major: int, req_minor: int, req_patch: Optional[int]
) -> Tuple[str, str, str, str, str, str]:
"""Compare package version from requirements file to available versions in conda.
Return row to build pandas dataframe:
- package name
- major version in requirements file
- minor version in requirements file
- major.minor.[patch] version in requirements file
- publication date of version in requirements file (YYYY-MM-DD)
- major version suggested by policy
- minor version suggested by policy
- major.minor version suggested by policy
- publication date of version suggested by policy (YYYY-MM-DD)
- status ("<", "=", "> (!)")
"""
Expand All @@ -130,7 +135,7 @@ def process_pkg(
req_published = versions[req_major, req_minor]
except KeyError:
error("not found in conda: " + pkg)
return pkg, req_major, req_minor, "-", 0, 0, "-", "(!)"
return pkg, fmt_version(req_major, req_minor, req_patch), "-", "-", "-", "(!)"

policy_months = POLICY_MONTHS.get(pkg, POLICY_MONTHS_DEFAULT)
policy_published = datetime.now() - timedelta(days=policy_months * 30)
Expand All @@ -153,30 +158,39 @@ def process_pkg(
else:
status = "="

if req_patch is not None:
warning("patch version should not appear in requirements file: " + pkg)
status += " (w)"

return (
pkg,
req_major,
req_minor,
fmt_version(req_major, req_minor, req_patch),
req_published.strftime("%Y-%m-%d"),
policy_major,
policy_minor,
fmt_version(policy_major, policy_minor),
policy_published_actual.strftime("%Y-%m-%d"),
status,
)


def fmt_version(major: int, minor: int, patch: int = None) -> str:
if patch is None:
return f"{major}.{minor}"
else:
return f"{major}.{minor}.{patch}"


def main() -> None:
fname = sys.argv[1]
with ThreadPoolExecutor(8) as ex:
futures = [
ex.submit(process_pkg, pkg, major, minor)
for pkg, major, minor in parse_requirements(fname)
ex.submit(process_pkg, pkg, major, minor, patch)
for pkg, major, minor, patch in parse_requirements(fname)
]
rows = [f.result() for f in futures]

print("Package Required Policy Status")
print("------------- ----------------- ----------------- ------")
fmt = "{:13} {:>1d}.{:<2d} ({:10}) {:>1d}.{:<2d} ({:10}) {}"
print("Package Required Policy Status")
print("------------- -------------------- -------------------- ------")
fmt = "{:13} {:7} ({:10}) {:7} ({:10}) {}"
for row in rows:
print(fmt.format(*row))

Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/py36-min-all-deps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ dependencies:
- cartopy=0.17
- cdms2=3.1
- cfgrib=0.9
- cftime=1.0
- cftime=1.0.3 # FIXME need 1.0.5 (not released yet); 1.0.4 is broken
- coveralls
- dask=1.2
- distributed=1.27
Expand Down
4 changes: 2 additions & 2 deletions ci/requirements/py36.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dependencies:
- cartopy
- cdms2
- cfgrib
- cftime
- cftime<1.0.4 # FIXME need 1.0.5 (not released yet); 1.0.4 is broken
- coveralls
- dask
- distributed
Expand All @@ -25,7 +25,7 @@ dependencies:
- nc-time-axis
- netcdf4
- numba
- numpy
- numpy<1.18 # FIXME https://github.com/pydata/xarray/issues/3409
- pandas
- pint
- pip
Expand Down
6 changes: 3 additions & 3 deletions ci/requirements/py37-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ dependencies:
- bottleneck
- cartopy
# - cdms2 # Not available on Windows
# - cfgrib>=0.9.2 # Causes Python interpreter crash on Windows
- cftime
# - cfgrib # Causes Python interpreter crash on Windows
- cftime<1.0.4 # FIXME need 1.0.5 (not released yet); 1.0.4 is broken
- coveralls
- dask
- distributed
Expand All @@ -25,7 +25,7 @@ dependencies:
- nc-time-axis
- netcdf4
- numba
- numpy
- numpy<1.18 # FIXME https://github.com/pydata/xarray/issues/3409
- pandas
- pint
- pip
Expand Down
4 changes: 2 additions & 2 deletions ci/requirements/py37.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dependencies:
- cartopy
- cdms2
- cfgrib
- cftime
- cftime<1.0.4 # FIXME need 1.0.5 (not released yet); 1.0.4 is broken
- coveralls
- dask
- distributed
Expand All @@ -25,7 +25,7 @@ dependencies:
- nc-time-axis
- netcdf4
- numba
- numpy
- numpy<1.18 # FIXME https://github.com/pydata/xarray/issues/3409
- pandas
- pint
- pip
Expand Down
4 changes: 2 additions & 2 deletions doc/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -286,12 +286,12 @@ How to build the *xarray* documentation
Requirements
~~~~~~~~~~~~
Make sure to follow the instructions on :ref:`creating a development environment above <contributing.dev_env>`, but
to build the docs you need to use the environment file ``doc/environment.yml``.
to build the docs you need to use the environment file ``ci/requirements/doc.yml``.

.. code-block:: none
# Create and activate the docs environment
conda env create -f doc/environment.yml
conda env create -f ci/requirements/doc.yml
conda activate xarray-docs
# or with older versions of Anaconda:
Expand Down
6 changes: 3 additions & 3 deletions doc/terminology.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ Terminology

----

**Coordinate:** An array that labels a dimension of another ``DataArray``. Loosely, the coordinate array's values can be thought of as tick labels along a dimension. There are two types of coordinate arrays: *dimension coordinates* and *non-dimension coordinates* (see below). A coordinate named ``x`` can be retrieved from ``arr.coords[x]``. A ``DataArray`` can have more coordinates than dimensions because a single dimension can be assigned multiple coordinate arrays. However, only one coordinate array can be a assigned as a particular dimension's dimension coordinate array. As a consequence, ``len(arr.dims) <= len(arr.coords)`` in general.
**Coordinate:** An array that labels a dimension or set of dimensions of another ``DataArray``. In the usual one-dimensional case, the coordinate array's values can loosely be thought of as tick labels along a dimension. There are two types of coordinate arrays: *dimension coordinates* and *non-dimension coordinates* (see below). A coordinate named ``x`` can be retrieved from ``arr.coords[x]``. A ``DataArray`` can have more coordinates than dimensions because a single dimension can be labeled by multiple coordinate arrays. However, only one coordinate array can be a assigned as a particular dimension's dimension coordinate array. As a consequence, ``len(arr.dims) <= len(arr.coords)`` in general.

----

**Dimension coordinate:** A coordinate array assigned to ``arr`` with both a name and dimension name in ``arr.dims``. Dimension coordinates are used for label-based indexing and alignment, like the index found on a :py:class:`pandas.DataFrame` or :py:class:`pandas.Series`. In fact, dimension coordinates use :py:class:`pandas.Index` objects under the hood for efficient computation. Dimension coordinates are marked by ``*`` when printing a ``DataArray`` or ``Dataset``.
**Dimension coordinate:** A one-dimensional coordinate array assigned to ``arr`` with both a name and dimension name in ``arr.dims``. Dimension coordinates are used for label-based indexing and alignment, like the index found on a :py:class:`pandas.DataFrame` or :py:class:`pandas.Series`. In fact, dimension coordinates use :py:class:`pandas.Index` objects under the hood for efficient computation. Dimension coordinates are marked by ``*`` when printing a ``DataArray`` or ``Dataset``.

----

**Non-dimension coordinate:** A coordinate array assigned to ``arr`` with a name in ``arr.dims`` but a dimension name *not* in ``arr.dims``. These coordinate arrays are useful for auxiliary labeling. However, non-dimension coordinates are not indexed, and any operation on non-dimension coordinates that leverages indexing will fail. Printing ``arr.coords`` will print all of ``arr``'s coordinate names, with the assigned dimensions in parentheses. For example, ``coord_name (dim_name) 1 2 3 ...``.
**Non-dimension coordinate:** A coordinate array assigned to ``arr`` with a name in ``arr.coords`` but *not* in ``arr.dims``. These coordinates arrays can be one-dimensional or multidimensional, and they are useful for auxiliary labeling. As an example, multidimensional coordinates are often used in geoscience datasets when :doc:`the data's physical coordinates (such as latitude and longitude) differ from their logical coordinates <examples/multidimensional-coords>`. However, non-dimension coordinates are not indexed, and any operation on non-dimension coordinates that leverages indexing will fail. Printing ``arr.coords`` will print all of ``arr``'s coordinate names, with the corresponding dimension(s) in parentheses. For example, ``coord_name (dim_name) 1 2 3 ...``.

----

Expand Down
11 changes: 11 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ What's New
v0.14.1 (unreleased)
--------------------

Breaking changes
~~~~~~~~~~~~~~~~

- Minimum cftime version is now 1.0.3. By `Deepak Cherian <https://github.com/dcherian>`_.

New Features
~~~~~~~~~~~~
- Added the ``max_gap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and
Expand All @@ -39,13 +44,19 @@ Bug fixes
- Fix regression introduced in v0.14.0 that would cause a crash if dask is installed
but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle <https://github.com/rdoyle45>`_

- Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4.
By `Anderson Banihirwe <https://github.com/andersy005>`_.


Documentation
~~~~~~~~~~~~~

- Fix the documentation of :py:meth:`DataArray.resample` and
:py:meth:`Dataset.resample` and explicitly state that a
datetime-like dimension is required. (:pull:`3400`)
By `Justus Magin <https://github.com/keewis>`_.
- Update the terminology page to address multidimensional coordinates. (:pull:`3410`)
By `Jon Thielen <https://github.com/jthielen>`_.

Internal Changes
~~~~~~~~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion xarray/backends/h5netcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def prepare_variable(
dtype=dtype,
dimensions=variable.dims,
fillvalue=fillvalue,
**kwargs
**kwargs,
)
else:
nc4_var = self.ds[name]
Expand Down
4 changes: 2 additions & 2 deletions xarray/backends/rasterio_.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,8 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc
if parse:
nx, ny = riods.width, riods.height
# xarray coordinates are pixel centered
x, _ = (np.arange(nx) + 0.5, np.zeros(nx) + 0.5) * riods.transform
_, y = (np.zeros(ny) + 0.5, np.arange(ny) + 0.5) * riods.transform
x, _ = riods.transform * (np.arange(nx) + 0.5, np.zeros(nx) + 0.5)
_, y = riods.transform * (np.zeros(ny) + 0.5, np.arange(ny) + 0.5)
coords["y"] = y
coords["x"] = x
else:
Expand Down
2 changes: 1 addition & 1 deletion xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ def open_zarr(
drop_variables=None,
consolidated=False,
overwrite_encoded_chunks=False,
**kwargs
**kwargs,
):
"""Load and decode a dataset from a Zarr store.
Expand Down
16 changes: 11 additions & 5 deletions xarray/coding/cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from ..core.pdcompat import count_not_none
from .cftimeindex import CFTimeIndex, _parse_iso8601_with_reso
from .times import format_cftime_datetime
from distutils.version import LooseVersion


def get_date_type(calendar):
Expand Down Expand Up @@ -222,6 +223,8 @@ def _adjust_n_years(other, n, month, reference_day):
def _shift_month(date, months, day_option="start"):
"""Shift the date to a month start or end a given number of months away.
"""
import cftime

delta_year = (date.month + months) // 12
month = (date.month + months) % 12

Expand All @@ -237,11 +240,14 @@ def _shift_month(date, months, day_option="start"):
day = _days_in_month(reference)
else:
raise ValueError(day_option)
# dayofwk=-1 is required to update the dayofwk and dayofyr attributes of
# the returned date object in versions of cftime between 1.0.2 and
# 1.0.3.4. It can be removed for versions of cftime greater than
# 1.0.3.4.
return date.replace(year=year, month=month, day=day, dayofwk=-1)
if LooseVersion(cftime.__version__) < LooseVersion("1.0.4"):
# dayofwk=-1 is required to update the dayofwk and dayofyr attributes of
# the returned date object in versions of cftime between 1.0.2 and
# 1.0.3.4. It can be removed for versions of cftime greater than
# 1.0.3.4.
return date.replace(year=year, month=month, day=day, dayofwk=-1)
else:
return date.replace(year=year, month=month, day=day)


def roll_qtrday(other, n, month, day_option, modby=3):
Expand Down
14 changes: 8 additions & 6 deletions xarray/coding/cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ def parse_iso8601(datetime_string):


def _parse_iso8601_with_reso(date_type, timestr):
import cftime

default = date_type(1, 1, 1)
result = parse_iso8601(timestr)
replace = {}
Expand All @@ -107,12 +109,12 @@ def _parse_iso8601_with_reso(date_type, timestr):
# TODO: Consider adding support for sub-second resolution?
replace[attr] = int(value)
resolution = attr

# dayofwk=-1 is required to update the dayofwk and dayofyr attributes of
# the returned date object in versions of cftime between 1.0.2 and
# 1.0.3.4. It can be removed for versions of cftime greater than
# 1.0.3.4.
replace["dayofwk"] = -1
if LooseVersion(cftime.__version__) < LooseVersion("1.0.4"):
# dayofwk=-1 is required to update the dayofwk and dayofyr attributes of
# the returned date object in versions of cftime between 1.0.2 and
# 1.0.3.4. It can be removed for versions of cftime greater than
# 1.0.3.4.
replace["dayofwk"] = -1
return default.replace(**replace), resolution


Expand Down
Loading

0 comments on commit c12e1da

Please sign in to comment.