diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index c9c0b720c35..15c971907f6 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -2,6 +2,6 @@ - [ ] Closes #xxxx - [ ] Tests added - - [ ] Passes `isort -rc . && black . && mypy . && flake8` + - [ ] Passes `isort . && black . && mypy . && flake8` - [ ] User visible changes (including notable bug fixes) are documented in `whats-new.rst` - [ ] New functions/methods are listed in `api.rst` diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 447f0007fc2..9fd92a50c16 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,10 +2,9 @@ repos: # isort should run before black as black sometimes tweaks the isort output - repo: https://github.com/timothycrosley/isort - rev: 4.3.21-2 + rev: 5.1.0 hooks: - id: isort - files: .+\.py$ # https://github.com/python/black#version-control-integration - repo: https://github.com/python/black rev: stable @@ -16,7 +15,7 @@ repos: hooks: - id: blackdoc - repo: https://gitlab.com/pycqa/flake8 - rev: 3.7.9 + rev: 3.8.3 hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-mypy diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index c890d61d966..ec0cca59545 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -1,79 +1,93 @@ -# How to issue an xarray release in 17 easy steps +# How to issue an xarray release in 20 easy steps Time required: about an hour. +These instructions assume that `upstream` refers to the main repository: +``` +$ git remote -v +{...} +upstream https://github.com/pydata/xarray (fetch) +upstream https://github.com/pydata/xarray (push) +``` + 1. Ensure your master branch is synced to upstream: - ``` - git pull upstream master - ``` + ```sh + git pull upstream master + ``` 2. Get a list of contributors with: - ``` + ```sh git log "$(git tag --sort="v:refname" | sed -n 'x;$p').." --format=%aN | sort -u | perl -pe 's/\n/$1, /' ``` - or by substituting the _previous_ release in: - ``` - git log v0.X.Y-1.. --format=%aN | sort -u | perl -pe 's/\n/$1, /' + or by substituting the _previous_ release in {0.X.Y-1}: + ```sh + git log v{0.X.Y-1}.. --format=%aN | sort -u | perl -pe 's/\n/$1, /' ``` Add these into `whats-new.rst` somewhere :) - 3. Look over whats-new.rst and the docs. Make sure "What's New" is complete - (check the date!) and consider adding a brief summary note describing the - release at the top. + 3. Write a release summary: ~50 words describing the high level features. This + will be used in the release emails, tweets, GitHub release notes, etc. + 4. Look over whats-new.rst and the docs. Make sure "What's New" is complete + (check the date!) and add the release summary at the top. Things to watch out for: - Important new features should be highlighted towards the top. - Function/method references should include links to the API docs. - Sometimes notes get added in the wrong section of whats-new, typically due to a bad merge. Check for these before a release by using git diff, - e.g., `git diff v0.X.Y whats-new.rst` where 0.X.Y is the previous + e.g., `git diff v{0.X.Y-1} whats-new.rst` where {0.X.Y-1} is the previous release. - 4. If you have any doubts, run the full test suite one final time! - ``` + 5. If possible, open a PR with the release summary and whatsnew changes. + 6. After merging, again ensure your master branch is synced to upstream: + ```sh + git pull upstream master + ``` + 7. If you have any doubts, run the full test suite one final time! + ```sh pytest ``` - 5. Check that the ReadTheDocs build is passing. - 6. On the master branch, commit the release in git: + 8. Check that the ReadTheDocs build is passing. + 9. On the master branch, commit the release in git: + ```s + git commit -am 'Release v{0.X.Y}' ``` - git commit -am 'Release v0.X.Y' +10. Tag the release: + ```sh + git tag -a v{0.X.Y} -m 'v{0.X.Y}' ``` - 7. Tag the release: - ``` - git tag -a v0.X.Y -m 'v0.X.Y' - ``` - 8. Build source and binary wheels for pypi: - ``` - git clean -xdf # this deletes all uncommited changes! +11. Build source and binary wheels for PyPI: + ```sh + git clean -xdf # this deletes all uncommitted changes! python setup.py bdist_wheel sdist ``` - 9. Use twine to check the package build: - ``` - twine check dist/xarray-0.X.Y* +12. Use twine to check the package build: + ```sh + twine check dist/xarray-{0.X.Y}* ``` -10. Use twine to register and upload the release on pypi. Be careful, you can't +13. Use twine to register and upload the release on PyPI. Be careful, you can't take this back! - ``` - twine upload dist/xarray-0.X.Y* + ```sh + twine upload dist/xarray-{0.X.Y}* ``` You will need to be listed as a package owner at https://pypi.python.org/pypi/xarray for this to work. -11. Push your changes to master: - ``` +14. Push your changes to master: + ```sh git push upstream master git push upstream --tags ``` -12. Update the stable branch (used by ReadTheDocs) and switch back to master: - ``` +15. Update the stable branch (used by ReadTheDocs) and switch back to master: + ```sh git checkout stable git rebase master - git push upstream stable + git push --force upstream stable git checkout master ``` It's OK to force push to 'stable' if necessary. (We also update the stable - branch with `git cherrypick` for documentation only fixes that apply the + branch with `git cherry-pick` for documentation only fixes that apply the current released version.) -13. Add a section for the next release (v.X.Y+1) to doc/whats-new.rst: +16. Add a section for the next release {0.X.Y+1} to doc/whats-new.rst: ``` - .. _whats-new.0.X.Y+1: + .. _whats-new.{0.X.Y+1}: - v0.X.Y+1 (unreleased) + v{0.X.Y+1} (unreleased) --------------------- Breaking changes @@ -95,20 +109,20 @@ Time required: about an hour. Internal Changes ~~~~~~~~~~~~~~~~ ``` -14. Commit your changes and push to master again: - ``` +17. Commit your changes and push to master again: + ```sh git commit -am 'New whatsnew section' git push upstream master ``` You're done pushing to master! -15. Issue the release on GitHub. Click on "Draft a new release" at - https://github.com/pydata/xarray/releases. Type in the version number, but - don't bother to describe it -- we maintain that on the docs instead. -16. Update the docs. Login to https://readthedocs.org/projects/xray/versions/ +18. Issue the release on GitHub. Click on "Draft a new release" at + https://github.com/pydata/xarray/releases. Type in the version number + and paste the release summary in the notes. +19. Update the docs. Login to https://readthedocs.org/projects/xray/versions/ and switch your new release tag (at the bottom) from "Inactive" to "Active". It should now build automatically. -17. Issue the release announcement! For bug fix releases, I usually only email - xarray@googlegroups.com. For major/feature releases, I will email a broader +20. Issue the release announcement to mailing lists & Twitter. For bug fix releases, I + usually only email xarray@googlegroups.com. For major/feature releases, I will email a broader list (no more than once every 3-6 months): - pydata@googlegroups.com - xarray@googlegroups.com diff --git a/azure-pipelines.yml b/azure-pipelines.yml index e04c8f74f68..8061c9895ca 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -95,7 +95,7 @@ jobs: - template: ci/azure/install.yml - bash: | source activate xarray-tests - isort -rc --check . + isort --check . displayName: isort formatting checks - job: MinimumVersionsPolicy diff --git a/ci/min_deps_check.py b/ci/min_deps_check.py index 527093cf5bc..93d12754365 100755 --- a/ci/min_deps_check.py +++ b/ci/min_deps_check.py @@ -23,9 +23,21 @@ "pytest-env", } -POLICY_MONTHS = {"python": 42, "numpy": 24, "pandas": 12, "scipy": 12} -POLICY_MONTHS_DEFAULT = 6 - +POLICY_MONTHS = {"python": 42, "numpy": 24, "setuptools": 42} +POLICY_MONTHS_DEFAULT = 12 +POLICY_OVERRIDE = { + # dask < 2.9 has trouble with nan-reductions + # TODO remove this special case and the matching note in installing.rst + # after January 2021. + "dask": (2, 9), + "distributed": (2, 9), + # setuptools-scm doesn't work with setuptools < 36.7 (Nov 2017). + # The conda metadata is malformed for setuptools < 38.4 (Jan 2018) + # (it's missing a timestamp which prevents this tool from working). + # TODO remove this special case and the matching note in installing.rst + # after July 2021. + "setuptools": (38, 4), +} has_errors = False @@ -151,6 +163,11 @@ def process_pkg( policy_minor = minor policy_published_actual = published + try: + policy_major, policy_minor = POLICY_OVERRIDE[pkg] + except KeyError: + pass + if (req_major, req_minor) < (policy_major, policy_minor): status = "<" elif (req_major, req_minor) > (policy_major, policy_minor): diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index 6caebc46cdf..be1b073cf1e 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -13,14 +13,18 @@ dependencies: - ipython - iris>=2.3 - jupyter_client + - matplotlib-base=3.3.0 - nbsphinx - netcdf4>=1.5 - numba - numpy>=1.17 - - pandas>=1.0 + # FIXME https://github.com/pydata/xarray/issues/4287 + # - pandas>=1.0 + - pandas=1.0 - rasterio>=1.1 - seaborn - setuptools - - sphinx>=2.3 + - sphinx=3.1 - sphinx_rtd_theme>=0.4 + - sphinx-autosummary-accessors - zarr>=2.4 diff --git a/ci/requirements/py36-bare-minimum.yml b/ci/requirements/py36-bare-minimum.yml index 00fef672855..aaba5366f67 100644 --- a/ci/requirements/py36-bare-minimum.yml +++ b/ci/requirements/py36-bare-minimum.yml @@ -10,4 +10,4 @@ dependencies: - pytest-env - numpy=1.15 - pandas=0.25 - - setuptools=41.2 + - setuptools=38.4 diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml index b14582ca9c2..2a977449033 100644 --- a/ci/requirements/py36-min-all-deps.yml +++ b/ci/requirements/py36-min-all-deps.yml @@ -23,7 +23,7 @@ dependencies: - hdf5=1.10 - hypothesis - iris=2.2 - - isort=4.3.21 + - isort - lxml=4.4 # Optional dep of pydap - matplotlib=3.1 - mypy=0.761 # Must match .pre-commit-config.yaml @@ -43,7 +43,7 @@ dependencies: - rasterio=1.0 - scipy=1.3 - seaborn=0.9 - - setuptools=41.2 + - setuptools=38.4 # - sparse # See py36-min-nep18.yml - toolz=0.10 - zarr=2.3 diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml index a9f12abfeae..17aae6932ac 100644 --- a/ci/requirements/py36-min-nep18.yml +++ b/ci/requirements/py36-min-nep18.yml @@ -15,6 +15,6 @@ dependencies: - pytest - pytest-cov - pytest-env - - scipy=1.2 - - setuptools=41.2 + - scipy=1.3 + - setuptools=38.4 - sparse=0.8 diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml index 9ff2c6c49ca..a500173f277 100644 --- a/ci/requirements/py36.yml +++ b/ci/requirements/py36.yml @@ -19,7 +19,7 @@ dependencies: - hdf5 - hypothesis - iris - - isort=4.3.21 + - isort - lxml # Optional dep of pydap - matplotlib - mypy=0.761 # Must match .pre-commit-config.yaml diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml index 19285a35eca..e9e5c7a900a 100644 --- a/ci/requirements/py37-windows.yml +++ b/ci/requirements/py37-windows.yml @@ -19,7 +19,7 @@ dependencies: - hdf5 - hypothesis - iris - - isort=4.3.21 + - isort - lxml # Optional dep of pydap - matplotlib - mypy=0.761 # Must match .pre-commit-config.yaml diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml index 3fcb4efd009..dba3926596e 100644 --- a/ci/requirements/py37.yml +++ b/ci/requirements/py37.yml @@ -19,7 +19,7 @@ dependencies: - hdf5 - hypothesis - iris - - isort=4.3.21 + - isort - lxml # Optional dep of pydap - matplotlib - mypy=0.761 # Must match .pre-commit-config.yaml diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml index 4e6f0dd5387..a375d9e1e5a 100644 --- a/ci/requirements/py38-all-but-dask.yml +++ b/ci/requirements/py38-all-but-dask.yml @@ -16,7 +16,7 @@ dependencies: - h5py - hdf5 - hypothesis - - isort=4.3.21 + - isort - lxml # Optional dep of pydap - matplotlib - mypy=0.761 # Must match .pre-commit-config.yaml diff --git a/ci/requirements/py38.yml b/ci/requirements/py38.yml index 4598fcd2790..7dff3a1bd97 100644 --- a/ci/requirements/py38.yml +++ b/ci/requirements/py38.yml @@ -19,7 +19,7 @@ dependencies: - hdf5 - hypothesis - iris - - isort=4.3.21 + - isort - lxml # Optional dep of pydap - matplotlib - mypy=0.780 # Must match .pre-commit-config.yaml diff --git a/conftest.py b/conftest.py index 712af1d3759..ddce5e0d593 100644 --- a/conftest.py +++ b/conftest.py @@ -27,6 +27,7 @@ def pytest_runtest_setup(item): def add_standard_imports(doctest_namespace): import numpy as np import pandas as pd + import xarray as xr doctest_namespace["np"] = np diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index efef4259b74..6aca90860d2 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -665,13 +665,10 @@ backends.NetCDF4DataStore.encode backends.NetCDF4DataStore.encode_attribute backends.NetCDF4DataStore.encode_variable - backends.NetCDF4DataStore.get backends.NetCDF4DataStore.get_attrs backends.NetCDF4DataStore.get_dimensions backends.NetCDF4DataStore.get_encoding backends.NetCDF4DataStore.get_variables - backends.NetCDF4DataStore.items - backends.NetCDF4DataStore.keys backends.NetCDF4DataStore.load backends.NetCDF4DataStore.open backends.NetCDF4DataStore.open_store_variable @@ -685,27 +682,20 @@ backends.NetCDF4DataStore.store backends.NetCDF4DataStore.store_dataset backends.NetCDF4DataStore.sync - backends.NetCDF4DataStore.values - backends.NetCDF4DataStore.attrs backends.NetCDF4DataStore.autoclose - backends.NetCDF4DataStore.dimensions backends.NetCDF4DataStore.ds backends.NetCDF4DataStore.format backends.NetCDF4DataStore.is_remote backends.NetCDF4DataStore.lock - backends.NetCDF4DataStore.variables backends.H5NetCDFStore.close backends.H5NetCDFStore.encode backends.H5NetCDFStore.encode_attribute backends.H5NetCDFStore.encode_variable - backends.H5NetCDFStore.get backends.H5NetCDFStore.get_attrs backends.H5NetCDFStore.get_dimensions backends.H5NetCDFStore.get_encoding backends.H5NetCDFStore.get_variables - backends.H5NetCDFStore.items - backends.H5NetCDFStore.keys backends.H5NetCDFStore.load backends.H5NetCDFStore.open_store_variable backends.H5NetCDFStore.prepare_variable @@ -718,39 +708,25 @@ backends.H5NetCDFStore.store backends.H5NetCDFStore.store_dataset backends.H5NetCDFStore.sync - backends.H5NetCDFStore.values - backends.H5NetCDFStore.attrs - backends.H5NetCDFStore.dimensions backends.H5NetCDFStore.ds - backends.H5NetCDFStore.variables backends.PydapDataStore.close - backends.PydapDataStore.get backends.PydapDataStore.get_attrs backends.PydapDataStore.get_dimensions backends.PydapDataStore.get_encoding backends.PydapDataStore.get_variables - backends.PydapDataStore.items - backends.PydapDataStore.keys backends.PydapDataStore.load backends.PydapDataStore.open backends.PydapDataStore.open_store_variable - backends.PydapDataStore.values - backends.PydapDataStore.attrs - backends.PydapDataStore.dimensions - backends.PydapDataStore.variables backends.ScipyDataStore.close backends.ScipyDataStore.encode backends.ScipyDataStore.encode_attribute backends.ScipyDataStore.encode_variable - backends.ScipyDataStore.get backends.ScipyDataStore.get_attrs backends.ScipyDataStore.get_dimensions backends.ScipyDataStore.get_encoding backends.ScipyDataStore.get_variables - backends.ScipyDataStore.items - backends.ScipyDataStore.keys backends.ScipyDataStore.load backends.ScipyDataStore.open_store_variable backends.ScipyDataStore.prepare_variable @@ -763,11 +739,7 @@ backends.ScipyDataStore.store backends.ScipyDataStore.store_dataset backends.ScipyDataStore.sync - backends.ScipyDataStore.values - backends.ScipyDataStore.attrs - backends.ScipyDataStore.dimensions backends.ScipyDataStore.ds - backends.ScipyDataStore.variables backends.FileManager.acquire backends.FileManager.acquire_context diff --git a/doc/api.rst b/doc/api.rst index 72a6dd4d97a..5e8a2be0ed4 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -572,7 +572,9 @@ Universal functions With recent versions of numpy, dask and xarray, NumPy ufuncs are now supported directly on all xarray and dask objects. This obviates the need for the ``xarray.ufuncs`` module, which should not be used for new code - unless compatibility with versions of NumPy prior to v1.13 is required. + unless compatibility with versions of NumPy prior to v1.13 is + required. They will be removed once support for NumPy prior to + v1.17 is dropped. These functions are copied from NumPy, but extended to work on NumPy arrays, dask arrays and all xarray objects. You can find them in the ``xarray.ufuncs`` diff --git a/doc/computation.rst b/doc/computation.rst index 3660aed93ed..dcfe270a942 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -188,9 +188,16 @@ a value when aggregating: r = arr.rolling(y=3, center=True, min_periods=2) r.mean() +From version 0.17, xarray supports multidimensional rolling, + +.. ipython:: python + + r = arr.rolling(x=2, y=3, min_periods=2) + r.mean() + .. tip:: - Note that rolling window aggregations are faster and use less memory when bottleneck_ is installed. This only applies to numpy-backed xarray objects. + Note that rolling window aggregations are faster and use less memory when bottleneck_ is installed. This only applies to numpy-backed xarray objects with 1d-rolling. .. _bottleneck: https://github.com/pydata/bottleneck/ @@ -227,9 +234,9 @@ windowed rolling, convolution, short-time FFT etc. .. ipython:: python # rolling with 2-point stride - rolling_da = r.construct("window_dim", stride=2) + rolling_da = r.construct(x="x_win", y="y_win", stride=2) rolling_da - rolling_da.mean("window_dim", skipna=False) + rolling_da.mean(["x_win", "y_win"], skipna=False) Because the ``DataArray`` given by ``r.construct('window_dim')`` is a view of the original array, it is memory efficient. @@ -238,7 +245,7 @@ You can also use ``construct`` to compute a weighted rolling sum: .. ipython:: python weight = xr.DataArray([0.25, 0.5, 0.25], dims=["window"]) - arr.rolling(y=3).construct("window").dot(weight) + arr.rolling(y=3).construct(y="window").dot(weight) .. note:: numpy's Nan-aggregation functions such as ``nansum`` copy the original array. diff --git a/doc/conf.py b/doc/conf.py index d3d126cb33f..2f97c884ff5 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -20,11 +20,7 @@ import sys from contextlib import suppress -# --------- autosummary templates ------------------ -# TODO: eventually replace this with a sphinx.ext.auto_accessor module -import sphinx -from sphinx.ext.autodoc import AttributeDocumenter, Documenter, MethodDocumenter -from sphinx.util import rpartition +import sphinx_autosummary_accessors # make sure the source version is preferred (#3567) root = pathlib.Path(__file__).absolute().parent.parent @@ -53,14 +49,14 @@ matplotlib.use("Agg") try: - import rasterio + import rasterio # noqa: F401 except ImportError: allowed_failures.update( ["gallery/plot_rasterio_rgb.py", "gallery/plot_rasterio.py"] ) try: - import cartopy + import cartopy # noqa: F401 except ImportError: allowed_failures.update( [ @@ -88,6 +84,7 @@ "IPython.sphinxext.ipython_directive", "IPython.sphinxext.ipython_console_highlighting", "nbsphinx", + "sphinx_autosummary_accessors", ] extlinks = { @@ -116,7 +113,7 @@ numpydoc_show_class_members = False # Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] +templates_path = ["_templates", sphinx_autosummary_accessors.templates_path] # The suffix of source filenames. source_suffix = ".rst" @@ -275,14 +272,14 @@ # -- Options for LaTeX output --------------------------------------------- -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). - # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. - # 'preamble': '', -} +# latex_elements = { +# # The paper size ('letterpaper' or 'a4paper'). +# # 'papersize': 'letterpaper', +# # The font size ('10pt', '11pt' or '12pt'). +# # 'pointsize': '10pt', +# # Additional stuff for the LaTeX preamble. +# # 'preamble': '', +# } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, @@ -364,113 +361,3 @@ "dask": ("https://docs.dask.org/en/latest", None), "cftime": ("https://unidata.github.io/cftime", None), } - - -# --------- autosummary templates ------------------ -# TODO: eventually replace this with a sphinx.ext.auto_accessor module -class AccessorDocumenter(MethodDocumenter): - """ - Specialized Documenter subclass for accessors. - """ - - objtype = "accessor" - directivetype = "method" - - # lower than MethodDocumenter so this is not chosen for normal methods - priority = 0.6 - - def format_signature(self): - # this method gives an error/warning for the accessors, therefore - # overriding it (accessor has no arguments) - return "" - - -class AccessorLevelDocumenter(Documenter): - """ - Specialized Documenter subclass for objects on accessor level (methods, - attributes). - """ - - # This is the simple straightforward version - # modname is None, base the last elements (eg 'hour') - # and path the part before (eg 'Series.dt') - # def resolve_name(self, modname, parents, path, base): - # modname = 'pandas' - # mod_cls = path.rstrip('.') - # mod_cls = mod_cls.split('.') - # - # return modname, mod_cls + [base] - - def resolve_name(self, modname, parents, path, base): - if modname is None: - if path: - mod_cls = path.rstrip(".") - else: - mod_cls = None - # if documenting a class-level object without path, - # there must be a current class, either from a parent - # auto directive ... - mod_cls = self.env.temp_data.get("autodoc:class") - # ... or from a class directive - if mod_cls is None: - mod_cls = self.env.temp_data.get("py:class") - # ... if still None, there's no way to know - if mod_cls is None: - return None, [] - # HACK: this is added in comparison to ClassLevelDocumenter - # mod_cls still exists of class.accessor, so an extra - # rpartition is needed - modname, accessor = rpartition(mod_cls, ".") - modname, cls = rpartition(modname, ".") - parents = [cls, accessor] - # if the module name is still missing, get it like above - if not modname: - modname = self.env.temp_data.get("autodoc:module") - if not modname: - if sphinx.__version__ > "1.3": - modname = self.env.ref_context.get("py:module") - else: - modname = self.env.temp_data.get("py:module") - # ... else, it stays None, which means invalid - return modname, parents + [base] - - -class AccessorAttributeDocumenter(AccessorLevelDocumenter, AttributeDocumenter): - - objtype = "accessorattribute" - directivetype = "attribute" - - # lower than AttributeDocumenter so this is not chosen for normal attributes - priority = 0.6 - - -class AccessorMethodDocumenter(AccessorLevelDocumenter, MethodDocumenter): - - objtype = "accessormethod" - directivetype = "method" - - # lower than MethodDocumenter so this is not chosen for normal methods - priority = 0.6 - - -class AccessorCallableDocumenter(AccessorLevelDocumenter, MethodDocumenter): - """ - This documenter lets us removes .__call__ from the method signature for - callable accessors like Series.plot - """ - - objtype = "accessorcallable" - directivetype = "method" - - # lower than MethodDocumenter; otherwise the doc build prints warnings - priority = 0.5 - - def format_name(self): - return MethodDocumenter.format_name(self).rstrip(".__call__") - - -def setup(app): - app.add_autodocumenter(AccessorDocumenter) - app.add_autodocumenter(AccessorAttributeDocumenter) - app.add_autodocumenter(AccessorMethodDocumenter) - app.add_autodocumenter(AccessorCallableDocumenter) diff --git a/doc/contributing.rst b/doc/contributing.rst index 9e6a3c250e9..975f4e67ba2 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -368,7 +368,7 @@ xarray uses several tools to ensure a consistent code format throughout the proj and then run from the root of the Xarray repository:: - isort -rc . + isort . black -t py36 . blackdoc -t py36 . flake8 diff --git a/doc/examples.rst b/doc/examples.rst index 1d48d29bcc5..102138b6e4e 100644 --- a/doc/examples.rst +++ b/doc/examples.rst @@ -2,7 +2,7 @@ Examples ======== .. toctree:: - :maxdepth: 2 + :maxdepth: 1 examples/weather-data examples/monthly-means @@ -15,7 +15,7 @@ Examples Using apply_ufunc ------------------ .. toctree:: - :maxdepth: 2 + :maxdepth: 1 examples/apply_ufunc_vectorize_1d diff --git a/doc/index.rst b/doc/index.rst index 972eb0a732e..e3cbb331285 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -107,6 +107,7 @@ Documentation See also -------- +- `Xarray's Tutorial`_ presented at the 2020 SciPy Conference (`video recording`_). - Stephan Hoyer and Joe Hamman's `Journal of Open Research Software paper`_ describing the xarray project. - The `UW eScience Institute's Geohackweek`_ tutorial on xarray for geospatial data scientists. - Stephan Hoyer's `SciPy2015 talk`_ introducing xarray to a general audience. @@ -114,6 +115,8 @@ See also xarray to users familiar with netCDF. - `Nicolas Fauchereau's tutorial`_ on xarray for netCDF users. +.. _Xarray's Tutorial: https://xarray-contrib.github.io/xarray-tutorial/ +.. _video recording: https://youtu.be/mecN-Ph_-78 .. _Journal of Open Research Software paper: http://doi.org/10.5334/jors.148 .. _UW eScience Institute's Geohackweek : https://geohackweek.github.io/nDarrays/ .. _SciPy2015 talk: https://www.youtube.com/watch?v=X0pAhJgySxk diff --git a/doc/installing.rst b/doc/installing.rst index a25bf65e342..62e026e20a4 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -7,7 +7,7 @@ Required dependencies --------------------- - Python (3.6 or later) -- setuptools +- setuptools (38.4 or later) - `numpy `__ (1.15 or later) - `pandas `__ (0.25 or later) @@ -93,16 +93,16 @@ dependencies: - **Python:** 42 months (`NEP-29 `_) +- **setuptools:** 42 months (but no older than 38.4) - **numpy:** 24 months (`NEP-29 `_) -- **pandas:** 12 months -- **scipy:** 12 months +- **dask and dask.distributed:** 12 months (but no older than 2.9) - **sparse, pint** and other libraries that rely on `NEP-18 `_ for integration: very latest available versions only, until the technology will have matured. This extends to dask when used in conjunction with any of these libraries. numpy >=1.17. -- **all other libraries:** 6 months +- **all other libraries:** 12 months The above should be interpreted as *the minor version (X.Y) initially published no more than N months ago*. Patch versions (x.y.Z) are not pinned, and only the latest available diff --git a/doc/internals.rst b/doc/internals.rst index 46c117e312b..aa9e1dedc68 100644 --- a/doc/internals.rst +++ b/doc/internals.rst @@ -42,6 +42,38 @@ xarray objects via the (readonly) :py:attr:`Dataset.variables ` and :py:attr:`DataArray.variable ` attributes. +Duck arrays +----------- + +.. warning:: + + This is a experimental feature. + +xarray can wrap custom `duck array`_ objects as long as they define numpy's +``shape``, ``dtype`` and ``ndim`` properties and the ``__array__``, +``__array_ufunc__`` and ``__array_function__`` methods. + +In certain situations (e.g. when printing the collapsed preview of +variables of a ``Dataset``), xarray will display the repr of a `duck array`_ +in a single line, truncating it to a certain number of characters. If that +would drop too much information, the `duck array`_ may define a +``_repr_inline_`` method that takes ``max_width`` (number of characters) as an +argument: + +.. code:: python + + class MyDuckArray: + ... + + def _repr_inline_(self, max_width): + """ format to a single line with at most max_width characters """ + ... + + ... + +.. _duck array: https://numpy.org/neps/nep-0022-ndarray-duck-typing-overview.html + + Extending xarray ---------------- @@ -139,6 +171,11 @@ To help users keep things straight, please `let us know for an open source library. In the future, we will maintain a list of accessors and the libraries that implement them on this page. +To make documenting accessors with ``sphinx`` and ``sphinx.ext.autosummary`` +easier, you can use `sphinx-ext-autosummary`_. + +.. _sphinx-ext-autosummary: https://sphinx-autosummary-accessors.readthedocs.io/ + .. _zarr_encoding: Zarr Encoding Specification diff --git a/doc/plotting.rst b/doc/plotting.rst index 02ddba1e00c..3903ea5cde9 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -99,6 +99,7 @@ One Dimension The simplest way to make a plot is to call the :py:func:`DataArray.plot()` method. .. ipython:: python + :okwarning: air1d = air.isel(lat=10, lon=10) @@ -125,6 +126,7 @@ can be used: .. _matplotlib.pyplot.plot: http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.plot .. ipython:: python + :okwarning: @savefig plotting_1d_additional_args.png width=4in air1d[:200].plot.line("b-^") @@ -137,6 +139,7 @@ can be used: Keyword arguments work the same way, and are more explicit. .. ipython:: python + :okwarning: @savefig plotting_example_sin3.png width=4in air1d[:200].plot.line(color="purple", marker="o") @@ -151,6 +154,7 @@ In this example ``axes`` is an array consisting of the left and right axes created by ``plt.subplots``. .. ipython:: python + :okwarning: fig, axes = plt.subplots(ncols=2) @@ -178,6 +182,7 @@ support the ``aspect`` and ``size`` arguments which control the size of the resulting image via the formula ``figsize = (aspect * size, size)``: .. ipython:: python + :okwarning: air1d.plot(aspect=2, size=3) @savefig plotting_example_size_and_aspect.png @@ -219,6 +224,7 @@ without coordinates along the x-axis. To illustrate this, let's calculate a 'dec from the time and assign it as a non-dimension coordinate: .. ipython:: python + :okwarning: decimal_day = (air1d.time - air1d.time[0]) / pd.Timedelta("1d") air1d_multi = air1d.assign_coords(decimal_day=("time", decimal_day)) @@ -227,6 +233,7 @@ from the time and assign it as a non-dimension coordinate: To use ``'decimal_day'`` as x coordinate it must be explicitly specified: .. ipython:: python + :okwarning: air1d_multi.plot(x="decimal_day") @@ -234,6 +241,7 @@ Creating a new MultiIndex named ``'date'`` from ``'time'`` and ``'decimal_day'`` it is also possible to use a MultiIndex level as x-axis: .. ipython:: python + :okwarning: air1d_multi = air1d_multi.set_index(date=("time", "decimal_day")) air1d_multi.plot(x="decimal_day") @@ -241,6 +249,7 @@ it is also possible to use a MultiIndex level as x-axis: Finally, if a dataset does not have any coordinates it enumerates all data points: .. ipython:: python + :okwarning: air1d_multi = air1d_multi.drop("date") air1d_multi.plot() @@ -256,6 +265,7 @@ with appropriate arguments. Consider the 3D variable ``air`` defined above. We c plots to check the variation of air temperature at three different latitudes along a longitude line: .. ipython:: python + :okwarning: @savefig plotting_example_multiple_lines_x_kwarg.png air.isel(lon=10, lat=[19, 21, 22]).plot.line(x="time") @@ -277,6 +287,7 @@ If required, the automatic legend can be turned off using ``add_legend=False``. It is also possible to make line plots such that the data are on the x-axis and a dimension is on the y-axis. This can be done by specifying the appropriate ``y`` keyword argument. .. ipython:: python + :okwarning: @savefig plotting_example_xy_kwarg.png air.isel(time=10, lon=[10, 11]).plot(y="lat", hue="lon") @@ -299,6 +310,7 @@ The argument ``where`` defines where the steps should be placed, options are when plotting data grouped with :py:meth:`Dataset.groupby_bins`. .. ipython:: python + :okwarning: air_grp = air.mean(["time", "lon"]).groupby_bins("lat", [0, 23.5, 66.5, 90]) air_mean = air_grp.mean() @@ -321,6 +333,7 @@ Other axes kwargs The keyword arguments ``xincrease`` and ``yincrease`` let you control the axes direction. .. ipython:: python + :okwarning: @savefig plotting_example_xincrease_yincrease_kwarg.png air.isel(time=10, lon=[10, 11]).plot.line( @@ -340,6 +353,7 @@ Two Dimensions The default method :py:meth:`DataArray.plot` calls :py:func:`xarray.plot.pcolormesh` by default when the data is two-dimensional. .. ipython:: python + :okwarning: air2d = air.isel(time=500) @@ -350,6 +364,7 @@ All 2d plots in xarray allow the use of the keyword arguments ``yincrease`` and ``xincrease``. .. ipython:: python + :okwarning: @savefig 2d_simple_yincrease.png width=4in air2d.plot(yincrease=False) @@ -369,6 +384,7 @@ and ``xincrease``. xarray plots data with :ref:`missing_values`. .. ipython:: python + :okwarning: bad_air2d = air2d.copy() @@ -386,6 +402,7 @@ It's not necessary for the coordinates to be evenly spaced. Both produce plots with nonuniform coordinates. .. ipython:: python + :okwarning: b = air2d.copy() # Apply a nonlinear transformation to one of the coords @@ -402,6 +419,7 @@ Since this is a thin wrapper around matplotlib, all the functionality of matplotlib is available. .. ipython:: python + :okwarning: air2d.plot(cmap=plt.cm.Blues) plt.title("These colors prove North America\nhas fallen in the ocean") @@ -421,6 +439,7 @@ matplotlib is available. ``d_ylog.plot()`` updates the xlabel. .. ipython:: python + :okwarning: plt.xlabel("Never gonna see this.") air2d.plot() @@ -436,6 +455,7 @@ xarray borrows logic from Seaborn to infer what kind of color map to use. For example, consider the original data in Kelvins rather than Celsius: .. ipython:: python + :okwarning: @savefig plotting_kelvin.png width=4in airtemps.air.isel(time=0).plot() @@ -454,6 +474,7 @@ Here we add two bad data points. This affects the color scale, washing out the plot. .. ipython:: python + :okwarning: air_outliers = airtemps.air.isel(time=0).copy() air_outliers[0, 0] = 100 @@ -469,6 +490,7 @@ This will use the 2nd and 98th percentiles of the data to compute the color limits. .. ipython:: python + :okwarning: @savefig plotting_robust2.png width=4in air_outliers.plot(robust=True) @@ -487,6 +509,7 @@ rather than the default continuous colormaps that matplotlib uses. The colormaps. For example, to make a plot with 8 discrete color intervals: .. ipython:: python + :okwarning: @savefig plotting_discrete_levels.png width=4in air2d.plot(levels=8) @@ -495,6 +518,7 @@ It is also possible to use a list of levels to specify the boundaries of the discrete colormap: .. ipython:: python + :okwarning: @savefig plotting_listed_levels.png width=4in air2d.plot(levels=[0, 12, 18, 30]) @@ -502,6 +526,7 @@ discrete colormap: You can also specify a list of discrete colors through the ``colors`` argument: .. ipython:: python + :okwarning: flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"] @savefig plotting_custom_colors_levels.png width=4in @@ -559,6 +584,7 @@ arguments to the xarray plotting methods/functions. This returns a :py:class:`xarray.plot.FacetGrid` object. .. ipython:: python + :okwarning: @savefig plot_facet_dataarray.png g_simple = t.plot(x="lon", y="lat", col="time", col_wrap=3) @@ -566,6 +592,7 @@ arguments to the xarray plotting methods/functions. This returns a Faceting also works for line plots. .. ipython:: python + :okwarning: @savefig plot_facet_dataarray_line.png g_simple_line = t.isel(lat=slice(0, None, 4)).plot( @@ -582,6 +609,7 @@ a fixed amount. Now we can see how the temperature maps would compare if one were much hotter. .. ipython:: python + :okwarning: t2 = t.isel(time=slice(0, 2)) t4d = xr.concat([t2, t2 + 40], pd.Index(["normal", "hot"], name="fourth_dim")) @@ -603,6 +631,7 @@ Faceted plotting supports other arguments common to xarray 2d plots. plt.close("all") .. ipython:: python + :okwarning: hasoutliers = t.isel(time=slice(0, 5)).copy() hasoutliers[0, 0, 0] = -100 @@ -649,6 +678,7 @@ Here is an example of using the lower level API and then modifying the axes afte they have been plotted. .. ipython:: python + :okwarning: g = t.plot.imshow("lon", "lat", col="time", col_wrap=3, robust=True) @@ -688,6 +718,7 @@ Consider this dataset Suppose we want to scatter ``A`` against ``B`` .. ipython:: python + :okwarning: @savefig ds_simple_scatter.png ds.plot.scatter(x="A", y="B") @@ -695,6 +726,7 @@ Suppose we want to scatter ``A`` against ``B`` The ``hue`` kwarg lets you vary the color by variable value .. ipython:: python + :okwarning: @savefig ds_hue_scatter.png ds.plot.scatter(x="A", y="B", hue="w") @@ -705,6 +737,7 @@ You can force a legend instead of a colorbar by setting ``hue_style='discrete'`` Additionally, the boolean kwarg ``add_guide`` can be used to prevent the display of a legend or colorbar (as appropriate). .. ipython:: python + :okwarning: ds = ds.assign(w=[1, 2, 3, 5]) @savefig ds_discrete_legend_hue_scatter.png @@ -713,6 +746,7 @@ Additionally, the boolean kwarg ``add_guide`` can be used to prevent the display The ``markersize`` kwarg lets you vary the point's size by variable value. You can additionally pass ``size_norm`` to control how the variable's values are mapped to point sizes. .. ipython:: python + :okwarning: @savefig ds_hue_size_scatter.png ds.plot.scatter(x="A", y="B", hue="z", hue_style="discrete", markersize="z") @@ -720,6 +754,7 @@ The ``markersize`` kwarg lets you vary the point's size by variable value. You c Faceting is also possible .. ipython:: python + :okwarning: @savefig ds_facet_scatter.png ds.plot.scatter(x="A", y="B", col="x", row="z", hue="w", hue_style="discrete") @@ -738,6 +773,7 @@ To follow this section you'll need to have Cartopy installed and working. This script will plot the air temperature on a map. .. ipython:: python + :okwarning: import cartopy.crs as ccrs @@ -745,7 +781,8 @@ This script will plot the air temperature on a map. p = air.isel(time=0).plot( subplot_kws=dict(projection=ccrs.Orthographic(-80, 35), facecolor="gray"), - transform=ccrs.PlateCarree()) + transform=ccrs.PlateCarree(), + ) p.axes.set_global() @savefig plotting_maps_cartopy.png width=100% @@ -788,6 +825,7 @@ There are three ways to use the xarray plotting functionality: These are provided for user convenience; they all call the same code. .. ipython:: python + :okwarning: import xarray.plot as xplt @@ -837,6 +875,7 @@ think carefully about what the limits, labels, and orientation for each of the axes should be. .. ipython:: python + :okwarning: @savefig plotting_example_2d_simple.png width=4in a.plot() @@ -857,6 +896,7 @@ xarray, but you'll have to tell the plot function to use these coordinates instead of the default ones: .. ipython:: python + :okwarning: lon, lat = np.meshgrid(np.linspace(-20, 20, 5), np.linspace(0, 30, 4)) lon += lat / 10 @@ -876,6 +916,7 @@ on a polar projection (:issue:`781`). This is why the default is to not follow this convention when plotting on a map: .. ipython:: python + :okwarning: import cartopy.crs as ccrs @@ -890,6 +931,7 @@ You can however decide to infer the cell boundaries and use the ``infer_intervals`` keyword: .. ipython:: python + :okwarning: ax = plt.subplot(projection=ccrs.PlateCarree()) da.plot.pcolormesh("lon", "lat", ax=ax, infer_intervals=True) @@ -908,6 +950,7 @@ You can however decide to infer the cell boundaries and use the One can also make line plots with multidimensional coordinates. In this case, ``hue`` must be a dimension name, not a coordinate name. .. ipython:: python + :okwarning: f, ax = plt.subplots(2, 1) da.plot.line(x="lon", hue="y", ax=ax[0]) diff --git a/doc/related-projects.rst b/doc/related-projects.rst index 8e8e3f63098..cc780921b34 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -3,9 +3,11 @@ Xarray related projects ----------------------- -Here below is a list of existing open source projects that build +Below is a list of existing open source projects that build functionality upon xarray. See also section :ref:`internals` for more -details on how to build xarray extensions. +details on how to build xarray extensions. We also maintain the +`xarray-contrib `_ GitHub organization +as a place to curate projects that build upon xarray. Geosciences ~~~~~~~~~~~ @@ -36,6 +38,7 @@ Geosciences harmonic wind analysis in Python. - `wrf-python `_: A collection of diagnostic and interpolation routines for use with output of the Weather Research and Forecasting (WRF-ARW) Model. - `xarray-simlab `_: xarray extension for computer model simulations. +- `xarray-spatial `_: Numba-accelerated raster-based spatial processing tools (NDVI, curvature, zonal-statistics, proximity, hillshading, viewshed, etc.) - `xarray-topo `_: xarray extension for topographic analysis and modelling. - `xbpch `_: xarray interface for bpch files. - `xclim `_: A library for calculating climate science indices with unit handling built from xarray and dask. @@ -73,7 +76,7 @@ Extend xarray capabilities Visualization ~~~~~~~~~~~~~ -- `Datashader `_, `geoviews `_, `holoviews `_, : visualization packages for large data. +- `datashader `_, `geoviews `_, `holoviews `_, : visualization packages for large data. - `hvplot `_ : A high-level plotting API for the PyData ecosystem built on HoloViews. - `psyplot `_: Interactive data visualization with python. - `xarray-leaflet `_: An xarray extension for tiled map plotting based on ipyleaflet. diff --git a/doc/roadmap.rst b/doc/roadmap.rst index 401dac779ad..2f7063434b8 100644 --- a/doc/roadmap.rst +++ b/doc/roadmap.rst @@ -224,6 +224,7 @@ Current core developers - Tom Nicholas - Guido Imperiale - Justus Magin +- Mathias Hauser NumFOCUS ~~~~~~~~ diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d086d4f411d..3521e8215dd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,18 +25,57 @@ Breaking changes New Features ~~~~~~~~~~~~ +- :py:meth:`~xarray.DataArray.rolling` and :py:meth:`~xarray.Dataset.rolling` + now accept more than 1 dimension.(:pull:`4219`) + By `Keisuke Fujii `_. +- Build :py:meth:`CFTimeIndex.__repr__` explicitly as :py:class:`pandas.Index`. Add ``calendar`` as a new + property for :py:class:`CFTimeIndex` and show ``calendar`` and ``length`` in + :py:meth:`CFTimeIndex.__repr__` (:issue:`2416`, :pull:`4092`) + `Aaron Spring `_. +- Relaxed the :ref:`mindeps_policy` to support: + + - all versions of setuptools released in the last 42 months (but no older than 38.4) + - all versions of dask and dask.distributed released in the last 12 months (but no + older than 2.9) + - all versions of other packages released in the last 12 months + + All are up from 6 months (:issue:`4295`) + `Guido Imperiale `_. +- Use a wrapped array's ``_repr_inline_`` method to construct the collapsed ``repr`` + of :py:class:`DataArray` and :py:class:`Dataset` objects and + document the new method in :doc:`internals`. (:pull:`4248`). + By `Justus Magin `_. +- :py:meth:`~xarray.DataArray.to_dataframe` and :py:meth:`~xarray.Dataset.to_dataframe` + now accept a ``dim_order`` parameter allowing to specify the resulting dataframe's + dimensions order (:issue:`4331`, :pull:`4333`). + By `Thomas Zilio `_. Bug fixes ~~~~~~~~~ +- Fixed a bug in backend caused by basic installation of Dask (:issue:`4164`, :pull:`4318`) + `Sam Morley `_. +- Fixed inconsistencies between docstring and functionality for :py:meth:`DataArray.str.get` + and :py:meth:`DataArray.str.wrap` (:issue:`4334`). By `Mathias Hauser `_. +- Fixed overflow issue causing incorrect results in computing means of :py:class:`cftime.datetime` + arrays (:issue:`4341`). By `Spencer Clark `_. Documentation ~~~~~~~~~~~~~ +- update the docstring of :py:meth:`DataArray.copy` to remove incorrect mention of 'dataset' (:issue:`3606`) + By `Sander van Rijn `_. +- removed skipna argument from :py:meth:`DataArray.count`, :py:meth:`DataArray.any`, :py:meth:`DataArray.all`. (:issue:`755`) + By `Sander van Rijn `_ Internal Changes ~~~~~~~~~~~~~~~~ +- Fix ``pip install .`` when no ``.git`` directory exists; namely when the xarray source + directory has been rsync'ed by PyCharm Professional for a remote deployment over SSH. + By `Guido Imperiale `_ +- Only load resource files when running inside a Jupyter Notebook + (:issue:`4294`) By `Guido Imperiale `_ .. _whats-new.0.16.0: @@ -113,8 +152,8 @@ New Features :py:func:`combine_by_coords` and :py:func:`combine_nested` using combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`) By `John Omotani `_ -- 'missing_dims' argument to :py:meth:`Dataset.isel`, - `:py:meth:`DataArray.isel` and :py:meth:`Variable.isel` to allow replacing +- `missing_dims` argument to :py:meth:`Dataset.isel`, + :py:meth:`DataArray.isel` and :py:meth:`Variable.isel` to allow replacing the exception when a dimension passed to ``isel`` is not present with a warning, or just ignore the dimension. (:issue:`3866`, :pull:`3923`) By `John Omotani `_ @@ -128,7 +167,7 @@ New Features By `Stephan Hoyer `_. - Allow plotting of boolean arrays. (:pull:`3766`) By `Marek Jacob `_ -- Enable using MultiIndex levels as cordinates in 1D and 2D plots (:issue:`3927`). +- Enable using MultiIndex levels as coordinates in 1D and 2D plots (:issue:`3927`). By `Mathias Hauser `_. - A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to the ``days_in_month`` accessor for a :py:class:`pandas.DatetimeIndex`, which @@ -154,9 +193,10 @@ New Features Enhancements ~~~~~~~~~~~~ - Performance improvement of :py:meth:`DataArray.interp` and :py:func:`Dataset.interp` - For orthogonal linear- and nearest-neighbor interpolation, we do 1d-interpolation sequentially - rather than interpolating in multidimensional space. (:issue:`2223`) + We performs independant interpolation sequentially rather than interpolating in + one large multidimensional space. (:issue:`2223`) By `Keisuke Fujii `_. +- :py:meth:`DataArray.interp` now support interpolations over chunked dimensions (:pull:`4155`). By `Alexandre Poux `_. - Major performance improvement for :py:meth:`Dataset.from_dataframe` when the dataframe has a MultiIndex (:pull:`4184`). By `Stephan Hoyer `_. @@ -173,7 +213,6 @@ Enhancements (:pull:`3905`) By `Maximilian Roos `_ - Bug fixes ~~~~~~~~~ - Fix errors combining attrs in :py:func:`open_mfdataset` (:issue:`4009`, :pull:`4173`) diff --git a/readthedocs.yml b/readthedocs.yml index 88abb57ae43..072a4b5110c 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -7,6 +7,6 @@ conda: environment: ci/requirements/doc.yml sphinx: - fail_on_warning: false + fail_on_warning: true formats: [] diff --git a/requirements.txt b/requirements.txt index f73887ff5cc..3cbeb368c09 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,4 @@ numpy >= 1.15 pandas >= 0.25 -setuptools >= 41.2 +setuptools >= 38.4 diff --git a/setup.cfg b/setup.cfg index 42dc53bb882..93d55cbca75 100644 --- a/setup.cfg +++ b/setup.cfg @@ -69,16 +69,16 @@ classifiers = Topic :: Scientific/Engineering [options] -packages = xarray +packages = find: zip_safe = False # https://mypy.readthedocs.io/en/latest/installed_packages.html include_package_data = True python_requires = >=3.6 install_requires = numpy >= 1.15 pandas >= 0.25 - setuptools >= 41.2 # For pkg_resources + setuptools >= 38.4 # For pkg_resources setup_requires = - setuptools >= 41.2 + setuptools >= 38.4 setuptools_scm [options.package_data] @@ -138,6 +138,8 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-cftime.*] ignore_missing_imports = True +[mypy-cupy.*] +ignore_missing_imports = True [mypy-dask.*] ignore_missing_imports = True [mypy-distributed.*] @@ -195,4 +197,4 @@ ignore_errors = True test = pytest [pytest-watch] -nobeep = True \ No newline at end of file +nobeep = True diff --git a/setup.py b/setup.py index 76755a445f7..e7cd9bc18e2 100755 --- a/setup.py +++ b/setup.py @@ -1,4 +1,11 @@ #!/usr/bin/env python from setuptools import setup -setup(use_scm_version=True) +try: + setup(use_scm_version=True) +except LookupError as e: + # .git has been removed, and this is not a package created by sdist + # This is the case e.g. of a remote deployment with PyCharm Professional + if not str(e).startswith("setuptools-scm was unable to detect version"): + raise + setup(version="999") diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 63c4c956f86..da619905ce6 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -1,8 +1,6 @@ import logging import time import traceback -import warnings -from collections.abc import Mapping import numpy as np @@ -74,18 +72,9 @@ def __array__(self, dtype=None): return np.asarray(self[key], dtype=dtype) -class AbstractDataStore(Mapping): +class AbstractDataStore: __slots__ = () - def __iter__(self): - return iter(self.variables) - - def __getitem__(self, key): - return self.variables[key] - - def __len__(self): - return len(self.variables) - def get_dimensions(self): # pragma: no cover raise NotImplementedError() @@ -125,38 +114,6 @@ def load(self): attributes = FrozenDict(self.get_attrs()) return variables, attributes - @property - def variables(self): # pragma: no cover - warnings.warn( - "The ``variables`` property has been deprecated and " - "will be removed in xarray v0.11.", - FutureWarning, - stacklevel=2, - ) - variables, _ = self.load() - return variables - - @property - def attrs(self): # pragma: no cover - warnings.warn( - "The ``attrs`` property has been deprecated and " - "will be removed in xarray v0.11.", - FutureWarning, - stacklevel=2, - ) - _, attrs = self.load() - return attrs - - @property - def dimensions(self): # pragma: no cover - warnings.warn( - "The ``dimensions`` property has been deprecated and " - "will be removed in xarray v0.11.", - FutureWarning, - stacklevel=2, - ) - return self.get_dimensions() - def close(self): pass diff --git a/xarray/backends/locks.py b/xarray/backends/locks.py index 435690f2079..bb876a432c8 100644 --- a/xarray/backends/locks.py +++ b/xarray/backends/locks.py @@ -72,12 +72,15 @@ def _get_scheduler(get=None, collection=None) -> Optional[str]: dask.base.get_scheduler """ try: - import dask # noqa: F401 + # Fix for bug caused by dask installation that doesn't involve the toolz library + # Issue: 4164 + import dask + from dask.base import get_scheduler # noqa: F401 + + actual_get = get_scheduler(get, collection) except ImportError: return None - actual_get = dask.base.get_scheduler(get, collection) - try: from dask.distributed import Client diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 9863285d6de..b7d91a840fe 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -57,9 +57,10 @@ def __setitem__(self, key, value): def _open_scipy_netcdf(filename, mode, mmap, version): - import scipy.io import gzip + import scipy.io + # if the string ends with .gz, then gunzip and open as netcdf file if isinstance(filename, str) and filename.endswith(".gz"): try: diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 2a7eaa99edb..cd57af5c7eb 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -50,8 +50,14 @@ from xarray.core.utils import is_scalar from ..core.common import _contains_cftime_datetimes +from ..core.options import OPTIONS from .times import _STANDARD_CALENDARS, cftime_to_nptime, infer_calendar_name +# constants for cftimeindex.repr +CFTIME_REPR_LENGTH = 19 +ITEMS_IN_REPR_MAX_ELSE_ELLIPSIS = 100 +REPR_ELLIPSIS_SHOW_ITEMS_FRONT_END = 10 + def named(name, pattern): return "(?P<" + name + ">" + pattern + ")" @@ -215,6 +221,48 @@ def assert_all_valid_date_type(data): ) +def format_row(times, indent=0, separator=", ", row_end=",\n"): + """Format a single row from format_times.""" + return indent * " " + separator.join(map(str, times)) + row_end + + +def format_times( + index, + max_width, + offset, + separator=", ", + first_row_offset=0, + intermediate_row_end=",\n", + last_row_end="", +): + """Format values of cftimeindex as pd.Index.""" + n_per_row = max(max_width // (CFTIME_REPR_LENGTH + len(separator)), 1) + n_rows = int(np.ceil(len(index) / n_per_row)) + + representation = "" + for row in range(n_rows): + indent = first_row_offset if row == 0 else offset + row_end = last_row_end if row == n_rows - 1 else intermediate_row_end + times_for_row = index[row * n_per_row : (row + 1) * n_per_row] + representation = representation + format_row( + times_for_row, indent=indent, separator=separator, row_end=row_end + ) + + return representation + + +def format_attrs(index, separator=", "): + """Format attributes of CFTimeIndex for __repr__.""" + attrs = { + "dtype": f"'{index.dtype}'", + "length": f"{len(index)}", + "calendar": f"'{index.calendar}'", + } + attrs_str = [f"{k}={v}" for k, v in attrs.items()] + attrs_str = f"{separator}".join(attrs_str) + return attrs_str + + class CFTimeIndex(pd.Index): """Custom Index for working with CF calendars and dates @@ -259,6 +307,46 @@ def __new__(cls, data, name=None): result._cache = {} return result + def __repr__(self): + """ + Return a string representation for this object. + """ + klass_name = type(self).__name__ + display_width = OPTIONS["display_width"] + offset = len(klass_name) + 2 + + if len(self) <= ITEMS_IN_REPR_MAX_ELSE_ELLIPSIS: + datastr = format_times( + self.values, display_width, offset=offset, first_row_offset=0 + ) + else: + front_str = format_times( + self.values[:REPR_ELLIPSIS_SHOW_ITEMS_FRONT_END], + display_width, + offset=offset, + first_row_offset=0, + last_row_end=",", + ) + end_str = format_times( + self.values[-REPR_ELLIPSIS_SHOW_ITEMS_FRONT_END:], + display_width, + offset=offset, + first_row_offset=offset, + ) + datastr = "\n".join([front_str, f"{' '*offset}...", end_str]) + + attrs_str = format_attrs(self) + # oneliner only if smaller than display_width + full_repr_str = f"{klass_name}([{datastr}], {attrs_str})" + if len(full_repr_str) <= display_width: + return full_repr_str + else: + # if attrs_str too long, one per line + if len(attrs_str) >= display_width - offset: + attrs_str = attrs_str.replace(",", f",\n{' '*(offset-2)}") + full_repr_str = f"{klass_name}([{datastr}],\n{' '*(offset-1)}{attrs_str})" + return full_repr_str + def _partial_date_slice(self, resolution, parsed): """Adapted from pandas.tseries.index.DatetimeIndex._partial_date_slice @@ -582,6 +670,13 @@ def asi8(self): dtype=np.int64, ) + @property + def calendar(self): + """The calendar used by the datetimes in the index.""" + from .times import infer_calendar_name + + return infer_calendar_name(self) + def _round_via_method(self, freq, method): """Round dates using a specified method.""" from .cftime_offsets import CFTIME_TICKS, to_offset diff --git a/xarray/conventions.py b/xarray/conventions.py index fc0572944f3..700dcbc0fc4 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -567,8 +567,8 @@ def decode_cf( ------- decoded : Dataset """ - from .core.dataset import Dataset from .backends.common import AbstractDataStore + from .core.dataset import Dataset if isinstance(obj, Dataset): vars = obj._variables diff --git a/xarray/convert.py b/xarray/convert.py index 0c86b090f34..395581bace7 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -254,6 +254,7 @@ def from_iris(cube): """ Convert a Iris cube into an DataArray """ import iris.exceptions + from xarray.core.pycompat import dask_array_type name = _name(cube) diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index 5502ba72855..1f0c95af71e 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -90,7 +90,7 @@ def _apply(self, f, dtype=None): def len(self): """ - Compute the length of each element in the array. + Compute the length of each string in the array. Returns ------- @@ -104,9 +104,9 @@ def __getitem__(self, key): else: return self.get(key) - def get(self, i): + def get(self, i, default=""): """ - Extract element from indexable in each element in the array. + Extract character number `i` from each string in the array. Parameters ---------- @@ -120,12 +120,18 @@ def get(self, i): ------- items : array of objects """ - obj = slice(-1, None) if i == -1 else slice(i, i + 1) - return self._apply(lambda x: x[obj]) + s = slice(-1, None) if i == -1 else slice(i, i + 1) + + def f(x): + item = x[s] + + return item if item else default + + return self._apply(f) def slice(self, start=None, stop=None, step=None): """ - Slice substrings from each element in the array. + Slice substrings from each string in the array. Parameters ---------- @@ -359,7 +365,7 @@ def count(self, pat, flags=0): def startswith(self, pat): """ - Test if the start of each string element matches a pattern. + Test if the start of each string in the array matches a pattern. Parameters ---------- @@ -378,7 +384,7 @@ def startswith(self, pat): def endswith(self, pat): """ - Test if the end of each string element matches a pattern. + Test if the end of each string in the array matches a pattern. Parameters ---------- @@ -432,8 +438,7 @@ def pad(self, width, side="left", fillchar=" "): def center(self, width, fillchar=" "): """ - Filling left and right side of strings in the array with an - additional character. + Pad left and right side of each string in the array. Parameters ---------- @@ -451,8 +456,7 @@ def center(self, width, fillchar=" "): def ljust(self, width, fillchar=" "): """ - Filling right side of strings in the array with an additional - character. + Pad right side of each string in the array. Parameters ---------- @@ -470,7 +474,7 @@ def ljust(self, width, fillchar=" "): def rjust(self, width, fillchar=" "): """ - Filling left side of strings in the array with an additional character. + Pad left side of each string in the array. Parameters ---------- @@ -488,7 +492,7 @@ def rjust(self, width, fillchar=" "): def zfill(self, width): """ - Pad strings in the array by prepending '0' characters. + Pad each string in the array by prepending '0' characters. Strings in the array are padded with '0' characters on the left of the string to reach a total string length `width`. Strings @@ -508,7 +512,7 @@ def zfill(self, width): def contains(self, pat, case=True, flags=0, regex=True): """ - Test if pattern or regex is contained within a string of the array. + Test if pattern or regex is contained within each string of the array. Return boolean array based on whether a given pattern or regex is contained within a string of the array. @@ -554,7 +558,7 @@ def contains(self, pat, case=True, flags=0, regex=True): def match(self, pat, case=True, flags=0): """ - Determine if each string matches a regular expression. + Determine if each string in the array matches a regular expression. Parameters ---------- @@ -613,7 +617,7 @@ def strip(self, to_strip=None, side="both"): def lstrip(self, to_strip=None): """ - Remove leading and trailing characters. + Remove leading characters. Strip whitespaces (including newlines) or a set of specified characters from each string in the array from the left side. @@ -633,7 +637,7 @@ def lstrip(self, to_strip=None): def rstrip(self, to_strip=None): """ - Remove leading and trailing characters. + Remove trailing characters. Strip whitespaces (including newlines) or a set of specified characters from each string in the array from the right side. @@ -653,8 +657,7 @@ def rstrip(self, to_strip=None): def wrap(self, width, **kwargs): """ - Wrap long strings in the array to be formatted in paragraphs with - length less than a given width. + Wrap long strings in the array in paragraphs with length less than `width`. This method has the same keyword parameters and defaults as :class:`textwrap.TextWrapper`. @@ -663,38 +666,20 @@ def wrap(self, width, **kwargs): ---------- width : int Maximum line-width - expand_tabs : bool, optional - If true, tab characters will be expanded to spaces (default: True) - replace_whitespace : bool, optional - If true, each whitespace character (as defined by - string.whitespace) remaining after tab expansion will be replaced - by a single space (default: True) - drop_whitespace : bool, optional - If true, whitespace that, after wrapping, happens to end up at the - beginning or end of a line is dropped (default: True) - break_long_words : bool, optional - If true, then words longer than width will be broken in order to - ensure that no lines are longer than width. If it is false, long - words will not be broken, and some lines may be longer than width. - (default: True) - break_on_hyphens : bool, optional - If true, wrapping will occur preferably on whitespace and right - after hyphens in compound words, as it is customary in English. If - false, only whitespaces will be considered as potentially good - places for line breaks, but you need to set break_long_words to - false if you want truly insecable words. (default: True) + **kwargs + keyword arguments passed into :class:`textwrap.TextWrapper`. Returns ------- wrapped : same type as values """ - tw = textwrap.TextWrapper(width=width) + tw = textwrap.TextWrapper(width=width, **kwargs) f = lambda x: "\n".join(tw.wrap(x)) return self._apply(f) def translate(self, table): """ - Map all characters in the string through the given mapping table. + Map characters of each string through the given mapping table. Parameters ---------- diff --git a/xarray/core/common.py b/xarray/core/common.py index 67dc0fda461..bc5035b682e 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -786,7 +786,7 @@ def rolling( self, dim: Mapping[Hashable, int] = None, min_periods: int = None, - center: bool = False, + center: Union[bool, Mapping[Hashable, bool]] = False, keep_attrs: bool = None, **window_kwargs: int, ): @@ -802,7 +802,7 @@ def rolling( Minimum number of observations in window required to have a value (otherwise result is NA). The default, None, is equivalent to setting min_periods equal to the size of the window. - center : boolean, default False + center : boolean, or a mapping, default False Set the labels at the center of the window. keep_attrs : bool, optional If True, the object's attributes (`attrs`) will be copied from @@ -1088,9 +1088,9 @@ def resample( """ # TODO support non-string indexer after removing the old API. + from ..coding.cftimeindex import CFTimeIndex from .dataarray import DataArray from .resample import RESAMPLE_DIM - from ..coding.cftimeindex import CFTimeIndex if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) @@ -1283,8 +1283,8 @@ def isin(self, test_elements): numpy.isin """ from .computation import apply_ufunc - from .dataset import Dataset from .dataarray import DataArray + from .dataset import Dataset from .variable import Variable if isinstance(test_elements, Dataset): diff --git a/xarray/core/computation.py b/xarray/core/computation.py index d8a0c53e817..1f2a8a8e746 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -622,9 +622,8 @@ def func(*arrays): if data.ndim != len(dims): raise ValueError( "applied function returned data with unexpected " - "number of dimensions: {} vs {}, for dimensions {}".format( - data.ndim, len(dims), dims - ) + f"number of dimensions. Received {data.ndim} dimension(s) but " + f"expected {len(dims)} dimensions with names: {dims!r}" ) var = Variable(dims, data, fastpath=True) @@ -976,17 +975,18 @@ def earth_mover_distance(first_samples, .. [2] http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html .. [3] http://xarray.pydata.org/en/stable/computation.html#wrapping-custom-computation """ - from .groupby import GroupBy from .dataarray import DataArray + from .groupby import GroupBy from .variable import Variable if input_core_dims is None: input_core_dims = ((),) * (len(args)) elif len(input_core_dims) != len(args): raise ValueError( - "input_core_dims must be None or a tuple with the length same to " - "the number of arguments. Given input_core_dims: {}, " - "number of args: {}.".format(input_core_dims, len(args)) + f"input_core_dims must be None or a tuple with the length same to " + f"the number of arguments. " + f"Given {len(input_core_dims)} input_core_dims: {input_core_dims}, " + f" but number of args is {len(args)}." ) if kwargs is None: @@ -994,11 +994,17 @@ def earth_mover_distance(first_samples, signature = _UFuncSignature(input_core_dims, output_core_dims) - if exclude_dims and not exclude_dims <= signature.all_core_dims: - raise ValueError( - "each dimension in `exclude_dims` must also be a " - "core dimension in the function signature" - ) + if exclude_dims: + if not isinstance(exclude_dims, set): + raise TypeError( + f"Expected exclude_dims to be a 'set'. Received '{type(exclude_dims).__name__}' instead." + ) + if not exclude_dims <= signature.all_core_dims: + raise ValueError( + f"each dimension in `exclude_dims` must also be a " + f"core dimension in the function signature. " + f"Please make {(exclude_dims - signature.all_core_dims)} a core dimension" + ) if kwargs: func = functools.partial(func, **kwargs) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 7741cbb826b..b42c91c232d 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -116,8 +116,8 @@ def concat( # TODO: add ignore_index arguments copied from pandas.concat # TODO: support concatenating scalar coordinates even if the concatenated # dimension already exists - from .dataset import Dataset from .dataarray import DataArray + from .dataset import Dataset try: first_obj, objs = utils.peek_at(objs) diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py index 87f646352eb..74474f4321e 100644 --- a/xarray/core/dask_array_ops.py +++ b/xarray/core/dask_array_ops.py @@ -32,69 +32,80 @@ def rolling_window(a, axis, window, center, fill_value): """ import dask.array as da + if not hasattr(axis, "__len__"): + axis = [axis] + window = [window] + center = [center] + orig_shape = a.shape - if axis < 0: - axis = a.ndim + axis depth = {d: 0 for d in range(a.ndim)} - depth[axis] = int(window / 2) - # For evenly sized window, we need to crop the first point of each block. - offset = 1 if window % 2 == 0 else 0 - - if depth[axis] > min(a.chunks[axis]): - raise ValueError( - "For window size %d, every chunk should be larger than %d, " - "but the smallest chunk size is %d. Rechunk your array\n" - "with a larger chunk size or a chunk size that\n" - "more evenly divides the shape of your array." - % (window, depth[axis], min(a.chunks[axis])) - ) - - # Although da.overlap pads values to boundaries of the array, - # the size of the generated array is smaller than what we want - # if center == False. - if center: - start = int(window / 2) # 10 -> 5, 9 -> 4 - end = window - 1 - start - else: - start, end = window - 1, 0 - pad_size = max(start, end) + offset - depth[axis] - drop_size = 0 - # pad_size becomes more than 0 when the overlapped array is smaller than - # needed. In this case, we need to enlarge the original array by padding - # before overlapping. - if pad_size > 0: - if pad_size < depth[axis]: - # overlapping requires each chunk larger than depth. If pad_size is - # smaller than the depth, we enlarge this and truncate it later. - drop_size = depth[axis] - pad_size - pad_size = depth[axis] - shape = list(a.shape) - shape[axis] = pad_size - chunks = list(a.chunks) - chunks[axis] = (pad_size,) - fill_array = da.full(shape, fill_value, dtype=a.dtype, chunks=chunks) - a = da.concatenate([fill_array, a], axis=axis) - + offset = [0] * a.ndim + drop_size = [0] * a.ndim + pad_size = [0] * a.ndim + for ax, win, cent in zip(axis, window, center): + if ax < 0: + ax = a.ndim + ax + depth[ax] = int(win / 2) + # For evenly sized window, we need to crop the first point of each block. + offset[ax] = 1 if win % 2 == 0 else 0 + + if depth[ax] > min(a.chunks[ax]): + raise ValueError( + "For window size %d, every chunk should be larger than %d, " + "but the smallest chunk size is %d. Rechunk your array\n" + "with a larger chunk size or a chunk size that\n" + "more evenly divides the shape of your array." + % (win, depth[ax], min(a.chunks[ax])) + ) + + # Although da.overlap pads values to boundaries of the array, + # the size of the generated array is smaller than what we want + # if center == False. + if cent: + start = int(win / 2) # 10 -> 5, 9 -> 4 + end = win - 1 - start + else: + start, end = win - 1, 0 + pad_size[ax] = max(start, end) + offset[ax] - depth[ax] + drop_size[ax] = 0 + # pad_size becomes more than 0 when the overlapped array is smaller than + # needed. In this case, we need to enlarge the original array by padding + # before overlapping. + if pad_size[ax] > 0: + if pad_size[ax] < depth[ax]: + # overlapping requires each chunk larger than depth. If pad_size is + # smaller than the depth, we enlarge this and truncate it later. + drop_size[ax] = depth[ax] - pad_size[ax] + pad_size[ax] = depth[ax] + + # TODO maybe following two lines can be summarized. + a = da.pad( + a, [(p, 0) for p in pad_size], mode="constant", constant_values=fill_value + ) boundary = {d: fill_value for d in range(a.ndim)} # create overlap arrays ag = da.overlap.overlap(a, depth=depth, boundary=boundary) - # apply rolling func - def func(x, window, axis=-1): + def func(x, window, axis): x = np.asarray(x) - rolling = nputils._rolling_window(x, window, axis) - return rolling[(slice(None),) * axis + (slice(offset, None),)] - - chunks = list(a.chunks) - chunks.append(window) + index = [slice(None)] * x.ndim + for ax, win in zip(axis, window): + x = nputils._rolling_window(x, win, ax) + index[ax] = slice(offset[ax], None) + return x[tuple(index)] + + chunks = list(a.chunks) + window + new_axis = [a.ndim + i for i in range(len(axis))] out = ag.map_blocks( - func, dtype=a.dtype, new_axis=a.ndim, chunks=chunks, window=window, axis=axis + func, dtype=a.dtype, new_axis=new_axis, chunks=chunks, window=window, axis=axis ) # crop boundary. - index = (slice(None),) * axis + (slice(drop_size, drop_size + orig_shape[axis]),) - return out[index] + index = [slice(None)] * a.ndim + for ax in axis: + index[ax] = slice(drop_size[ax], drop_size[ax] + orig_shape[ax]) + return out[tuple(index)] def least_squares(lhs, rhs, rcond=None, skipna=False): diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index dbc4877fa1d..4ad29baee04 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -862,8 +862,8 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray": """Returns a copy of this array. If `deep=True`, a deep copy is made of the data array. - Otherwise, a shallow copy is made, so each variable in the new - array's dataset is also a variable in this array's dataset. + Otherwise, a shallow copy is made, and the returned data array's + values are a new view of this data array's values. Use `data` to create a new object with the same structure as original but entirely new data. @@ -1027,7 +1027,7 @@ def isel( missing_dims : {"raise", "warn", "ignore"}, default "raise" What to do if dimensions that should be selected from are not present in the DataArray: - - "exception": raise an exception + - "raise": raise an exception - "warning": raise a warning, and ignore the missing dimensions - "ignore": ignore the missing dimensions **indexers_kwargs : {dim: indexer, ...}, optional @@ -2012,7 +2012,7 @@ def T(self) -> "DataArray": def drop_vars( self, names: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" ) -> "DataArray": - """Drop variables from this DataArray. + """Returns an array with dropped variables. Parameters ---------- @@ -2026,7 +2026,7 @@ def drop_vars( Returns ------- dropped : Dataset - + New Dataset copied from `self` with variables removed. """ ds = self._to_temp_dataset().drop_vars(names, errors=errors) return self._from_temp_dataset(ds) @@ -2365,13 +2365,36 @@ def to_pandas(self) -> Union["DataArray", pd.Series, pd.DataFrame]: indexes = [self.get_index(dim) for dim in self.dims] return constructor(self.values, *indexes) - def to_dataframe(self, name: Hashable = None) -> pd.DataFrame: + def to_dataframe( + self, name: Hashable = None, dim_order: List[Hashable] = None + ) -> pd.DataFrame: """Convert this array and its coordinates into a tidy pandas.DataFrame. The DataFrame is indexed by the Cartesian product of index coordinates (in the form of a :py:class:`pandas.MultiIndex`). Other coordinates are included as columns in the DataFrame. + + Parameters + ---------- + name + Name to give to this array (required if unnamed). + dim_order + Hierarchical dimension order for the resulting dataframe. + Array content is transposed to this order and then written out as flat + vectors in contiguous order, so the last dimension in this list + will be contiguous in the resulting DataFrame. This has a major + influence on which operations are efficient on the resulting + dataframe. + + If provided, must include all dimensions of this DataArray. By default, + dimensions are sorted according to the DataArray dimensions order. + + Returns + ------- + result + DataArray as a pandas DataFrame. + """ if name is None: name = self.name @@ -2381,7 +2404,6 @@ def to_dataframe(self, name: Hashable = None) -> pd.DataFrame: "DataFrame: use the ``name`` parameter" ) - dims = dict(zip(self.dims, self.shape)) # By using a unique name, we can convert a DataArray into a DataFrame # even if it shares a name with one of its coordinates. # I would normally use unique_name = object() but that results in a @@ -2389,7 +2411,13 @@ def to_dataframe(self, name: Hashable = None) -> pd.DataFrame: # been able to debug (possibly a pandas bug?). unique_name = "__unique_name_identifier_z98xfz98xugfg73ho__" ds = self._to_dataset_whole(name=unique_name) - df = ds._to_dataframe(dims) + + if dim_order is None: + ordered_dims = dict(zip(self.dims, self.shape)) + else: + ordered_dims = ds._normalize_dim_order(dim_order=dim_order) + + df = ds._to_dataframe(ordered_dims) df.columns = [name if c == unique_name else c for c in df.columns] return df @@ -3312,24 +3340,21 @@ def map_blocks( This function cannot add a new chunked dimension. - obj: DataArray, Dataset - Passed to the function as its first argument, one block at a time. args: Sequence Passed to func after unpacking and subsetting any xarray objects by blocks. - xarray objects in args must be aligned with obj, otherwise an error is raised. + xarray objects in args must be aligned with this object, otherwise an error is raised. kwargs: Mapping Passed verbatim to func after unpacking. xarray objects, if any, will not be subset to blocks. Passing dask collections in kwargs is not allowed. template: (optional) DataArray, Dataset xarray object representing the final result after compute is called. If not provided, - the function will be first run on mocked-up data, that looks like ``obj`` but + the function will be first run on mocked-up data, that looks like this object but has sizes 0, to determine properties of the returned object such as dtype, variable names, attributes, new dimensions and new indexes (if any). ``template`` must be provided if the function changes the size of existing dimensions. When provided, ``attrs`` on variables in `template` are copied over to the result. Any ``attrs`` set by ``func`` will be ignored. - Returns ------- A single DataArray or Dataset with dask backend, reassembled from the outputs of the @@ -3341,7 +3366,7 @@ def map_blocks( subset to each block. In the more common case where ``func`` can work on numpy arrays, it is recommended to use ``apply_ufunc``. - If none of the variables in ``obj`` is backed by dask arrays, calling this function is + If none of the variables in this object is backed by dask arrays, calling this function is equivalent to calling ``func(obj, *args, **kwargs)``. See Also @@ -3361,9 +3386,12 @@ def map_blocks( ... clim = gb.mean(dim="time") ... return gb - clim >>> time = xr.cftime_range("1990-01", "1992-01", freq="M") + >>> month = xr.DataArray(time.month, coords={"time": time}, dims=["time"]) >>> np.random.seed(123) >>> array = xr.DataArray( - ... np.random.rand(len(time)), dims="time", coords=[time] + ... np.random.rand(len(time)), + ... dims=["time"], + ... coords={"time": time, "month": month}, ... ).chunk() >>> array.map_blocks(calculate_anomaly, template=array).compute() @@ -3374,21 +3402,19 @@ def map_blocks( 0.07673453, 0.22865714, 0.19063865, -0.0590131 ]) Coordinates: * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 + month (time) int64 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 Note that one must explicitly use ``args=[]`` and ``kwargs={}`` to pass arguments to the function being applied in ``xr.map_blocks()``: >>> array.map_blocks( ... calculate_anomaly, kwargs={"groupby_type": "time.year"}, template=array, - ... ) + ... ) # doctest: +ELLIPSIS - array([ 0.15361741, -0.25671244, -0.31600032, 0.008463 , 0.1766172 , - -0.11974531, 0.43791243, 0.14197797, -0.06191987, -0.15073425, - -0.19967375, 0.18619794, -0.05100474, -0.42989909, -0.09153273, - 0.24841842, -0.30708526, -0.31412523, 0.04197439, 0.0422506 , - 0.14482397, 0.35985481, 0.23487834, 0.12144652]) + dask.array Coordinates: - * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 + * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 + month (time) int64 dask.array """ from .parallel import map_blocks @@ -3878,9 +3904,10 @@ def argmin( >>> array.isel(array.argmin(...)) array(-1) - >>> array = xr.DataArray([[[3, 2, 1], [3, 1, 2], [2, 1, 3]], - ... [[1, 3, 2], [2, -5, 1], [2, 3, 1]]], - ... dims=("x", "y", "z")) + >>> array = xr.DataArray( + ... [[[3, 2, 1], [3, 1, 2], [2, 1, 3]], [[1, 3, 2], [2, -5, 1], [2, 3, 1]]], + ... dims=("x", "y", "z"), + ... ) >>> array.min(dim="x") array([[ 1, 2, 1], @@ -3944,7 +3971,7 @@ def argmax( this is deprecated, in future will return a dict with indices for all dimensions; to return a dict with all dimensions now, pass '...'. axis : int, optional - Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments + Axis over which to apply `argmax`. Only one of the 'dim' and 'axis' arguments can be supplied. keep_attrs : bool, optional If True, the attributes (`attrs`) will be copied from the original @@ -3980,9 +4007,10 @@ def argmax( array(3) - >>> array = xr.DataArray([[[3, 2, 1], [3, 1, 2], [2, 1, 3]], - ... [[1, 3, 2], [2, 5, 1], [2, 3, 1]]], - ... dims=("x", "y", "z")) + >>> array = xr.DataArray( + ... [[[3, 2, 1], [3, 1, 2], [2, 1, 3]], [[1, 3, 2], [2, 5, 1], [2, 3, 1]]], + ... dims=("x", "y", "z"), + ... ) >>> array.max(dim="x") array([[3, 3, 2], diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 0d5e793aa94..7708b515427 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1923,7 +1923,7 @@ def isel( missing_dims : {"raise", "warn", "ignore"}, default "raise" What to do if dimensions that should be selected from are not present in the Dataset: - - "exception": raise an exception + - "raise": raise an exception - "warning": raise a warning, and ignore the missing dimensions - "ignore": ignore the missing dimensions **indexers_kwargs : {dim: indexer, ...}, optional @@ -4155,7 +4155,7 @@ def interpolate_na( numpy.interp scipy.interpolate """ - from .missing import interp_na, _apply_over_vars_with_dim + from .missing import _apply_over_vars_with_dim, interp_na new = _apply_over_vars_with_dim( interp_na, @@ -4189,7 +4189,7 @@ def ffill(self, dim: Hashable, limit: int = None) -> "Dataset": ------- Dataset """ - from .missing import ffill, _apply_over_vars_with_dim + from .missing import _apply_over_vars_with_dim, ffill new = _apply_over_vars_with_dim(ffill, self, dim=dim, limit=limit) return new @@ -4214,7 +4214,7 @@ def bfill(self, dim: Hashable, limit: int = None) -> "Dataset": ------- Dataset """ - from .missing import bfill, _apply_over_vars_with_dim + from .missing import _apply_over_vars_with_dim, bfill new = _apply_over_vars_with_dim(bfill, self, dim=dim, limit=limit) return new @@ -4535,23 +4535,75 @@ def to_array(self, dim="variable", name=None): data, coords, dims, attrs=self.attrs, name=name, indexes=indexes ) - def _to_dataframe(self, ordered_dims): + def _normalize_dim_order( + self, dim_order: List[Hashable] = None + ) -> Dict[Hashable, int]: + """ + Check the validity of the provided dimensions if any and return the mapping + between dimension name and their size. + + Parameters + ---------- + dim_order + Dimension order to validate (default to the alphabetical order if None). + + Returns + ------- + result + Validated dimensions mapping. + + """ + if dim_order is None: + dim_order = list(self.dims) + elif set(dim_order) != set(self.dims): + raise ValueError( + "dim_order {} does not match the set of dimensions of this " + "Dataset: {}".format(dim_order, list(self.dims)) + ) + + ordered_dims = {k: self.dims[k] for k in dim_order} + + return ordered_dims + + def _to_dataframe(self, ordered_dims: Mapping[Hashable, int]): columns = [k for k in self.variables if k not in self.dims] data = [ self._variables[k].set_dims(ordered_dims).values.reshape(-1) for k in columns ] - index = self.coords.to_index(ordered_dims) + index = self.coords.to_index([*ordered_dims]) return pd.DataFrame(dict(zip(columns, data)), index=index) - def to_dataframe(self): + def to_dataframe(self, dim_order: List[Hashable] = None) -> pd.DataFrame: """Convert this dataset into a pandas.DataFrame. Non-index variables in this dataset form the columns of the - DataFrame. The DataFrame is be indexed by the Cartesian product of + DataFrame. The DataFrame is indexed by the Cartesian product of this dataset's indices. + + Parameters + ---------- + dim_order + Hierarchical dimension order for the resulting dataframe. All + arrays are transposed to this order and then written out as flat + vectors in contiguous order, so the last dimension in this list + will be contiguous in the resulting DataFrame. This has a major + influence on which operations are efficient on the resulting + dataframe. + + If provided, must include all dimensions of this dataset. By + default, dimensions are sorted alphabetically. + + Returns + ------- + result + Dataset as a pandas DataFrame. + """ - return self._to_dataframe(self.dims) + + ordered_dims = self._normalize_dim_order(dim_order=dim_order) + + return self._to_dataframe(ordered_dims=ordered_dims) def _set_sparse_data_from_dataframe( self, idx: pd.Index, arrays: List[Tuple[Hashable, np.ndarray]], dims: tuple @@ -4705,11 +4757,11 @@ def to_dask_dataframe(self, dim_order=None, set_index=False): influence on which operations are efficient on the resulting dask dataframe. - If provided, must include all dimensions on this dataset. By + If provided, must include all dimensions of this dataset. By default, dimensions are sorted alphabetically. set_index : bool, optional If set_index=True, the dask DataFrame is indexed by this dataset's - coordinate. Since dask DataFrames to not support multi-indexes, + coordinate. Since dask DataFrames do not support multi-indexes, set_index only works if the dataset only contains one dimension. Returns @@ -4720,15 +4772,7 @@ def to_dask_dataframe(self, dim_order=None, set_index=False): import dask.array as da import dask.dataframe as dd - if dim_order is None: - dim_order = list(self.dims) - elif set(dim_order) != set(self.dims): - raise ValueError( - "dim_order {} does not match the set of dimensions on this " - "Dataset: {}".format(dim_order, list(self.dims)) - ) - - ordered_dims = {k: self.dims[k] for k in dim_order} + ordered_dims = self._normalize_dim_order(dim_order=dim_order) columns = list(ordered_dims) columns.extend(k for k in self.coords if k not in self.dims) @@ -4755,6 +4799,8 @@ def to_dask_dataframe(self, dim_order=None, set_index=False): df = dd.concat(series_list, axis=1) if set_index: + dim_order = [*ordered_dims] + if len(dim_order) == 1: (dim,) = dim_order df = df.set_index(dim) @@ -5781,8 +5827,6 @@ def map_blocks( This function cannot add a new chunked dimension. - obj: DataArray, Dataset - Passed to the function as its first argument, one block at a time. args: Sequence Passed to func after unpacking and subsetting any xarray objects by blocks. xarray objects in args must be aligned with obj, otherwise an error is raised. @@ -5791,7 +5835,7 @@ def map_blocks( subset to blocks. Passing dask collections in kwargs is not allowed. template: (optional) DataArray, Dataset xarray object representing the final result after compute is called. If not provided, - the function will be first run on mocked-up data, that looks like ``obj`` but + the function will be first run on mocked-up data, that looks like this object but has sizes 0, to determine properties of the returned object such as dtype, variable names, attributes, new dimensions and new indexes (if any). ``template`` must be provided if the function changes the size of existing dimensions. @@ -5810,7 +5854,7 @@ def map_blocks( subset to each block. In the more common case where ``func`` can work on numpy arrays, it is recommended to use ``apply_ufunc``. - If none of the variables in ``obj`` is backed by dask arrays, calling this function is + If none of the variables in this object is backed by dask arrays, calling this function is equivalent to calling ``func(obj, *args, **kwargs)``. See Also @@ -5830,20 +5874,22 @@ def map_blocks( ... clim = gb.mean(dim="time") ... return gb - clim >>> time = xr.cftime_range("1990-01", "1992-01", freq="M") + >>> month = xr.DataArray(time.month, coords={"time": time}, dims=["time"]) >>> np.random.seed(123) >>> array = xr.DataArray( - ... np.random.rand(len(time)), dims="time", coords=[time] + ... np.random.rand(len(time)), + ... dims=["time"], + ... coords={"time": time, "month": month}, ... ).chunk() >>> ds = xr.Dataset({"a": array}) >>> ds.map_blocks(calculate_anomaly, template=ds).compute() - - array([ 0.12894847, 0.11323072, -0.0855964 , -0.09334032, 0.26848862, - 0.12382735, 0.22460641, 0.07650108, -0.07673453, -0.22865714, - -0.19063865, 0.0590131 , -0.12894847, -0.11323072, 0.0855964 , - 0.09334032, -0.26848862, -0.12382735, -0.22460641, -0.07650108, - 0.07673453, 0.22865714, 0.19063865, -0.0590131 ]) + + Dimensions: (time: 24) Coordinates: * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 + month (time) int64 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 + Data variables: + a (time) float64 0.1289 0.1132 -0.0856 ... 0.2287 0.1906 -0.05901 Note that one must explicitly use ``args=[]`` and ``kwargs={}`` to pass arguments to the function being applied in ``xr.map_blocks()``: @@ -5851,14 +5897,13 @@ def map_blocks( >>> ds.map_blocks( ... calculate_anomaly, kwargs={"groupby_type": "time.year"}, template=ds, ... ) - - array([ 0.15361741, -0.25671244, -0.31600032, 0.008463 , 0.1766172 , - -0.11974531, 0.43791243, 0.14197797, -0.06191987, -0.15073425, - -0.19967375, 0.18619794, -0.05100474, -0.42989909, -0.09153273, - 0.24841842, -0.30708526, -0.31412523, 0.04197439, 0.0422506 , - 0.14482397, 0.35985481, 0.23487834, 0.12144652]) + + Dimensions: (time: 24) Coordinates: - * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 + * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 + month (time) int64 dask.array + Data variables: + a (time) float64 dask.array """ from .parallel import map_blocks @@ -5980,7 +6025,7 @@ def polyfit( skipna_da = np.any(da.isnull()) dims_to_stack = [dimname for dimname in da.dims if dimname != dim] - stacked_coords = {} + stacked_coords: Dict[Hashable, DataArray] = {} if dims_to_stack: stacked_dim = utils.get_temp_dimname(dims_to_stack, "stacked") rhs = da.transpose(dim, *dims_to_stack).stack( diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index df579d23544..377e7377b6a 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -4,9 +4,9 @@ accept or return xarray objects. """ import contextlib +import datetime import inspect import warnings -from distutils.version import LooseVersion from functools import partial import numpy as np @@ -14,21 +14,13 @@ from . import dask_array_compat, dask_array_ops, dtypes, npcompat, nputils from .nputils import nanfirst, nanlast -from .pycompat import dask_array_type +from .pycompat import cupy_array_type, dask_array_type try: import dask.array as dask_array except ImportError: dask_array = None # type: ignore -# TODO: remove after we stop supporting dask < 2.9.1 -try: - import dask - - dask_version = dask.__version__ -except ImportError: - dask_version = None - def _dask_or_eager_func( name, @@ -158,17 +150,23 @@ def trapz(y, x, axis): ) -def asarray(data): +def asarray(data, xp=np): return ( data if (isinstance(data, dask_array_type) or hasattr(data, "__array_function__")) - else np.asarray(data) + else xp.asarray(data) ) def as_shared_dtype(scalars_or_arrays): """Cast a arrays to a shared dtype using xarray's type promotion rules.""" - arrays = [asarray(x) for x in scalars_or_arrays] + + if any([isinstance(x, cupy_array_type) for x in scalars_or_arrays]): + import cupy as cp + + arrays = [asarray(x, xp=cp) for x in scalars_or_arrays] + else: + arrays = [asarray(x) for x in scalars_or_arrays] # Pass arrays directly instead of dtypes to result_type so scalars # get handled properly. # Note that result_type() safely gets the dtype from dask arrays without @@ -211,16 +209,6 @@ def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8): lazy_equiv = lazy_array_equiv(arr1, arr2) if lazy_equiv is None: - # TODO: remove after we require dask >= 2.9.1 - sufficient_dask_version = ( - dask_version is not None and LooseVersion(dask_version) >= "2.9.1" - ) - if not sufficient_dask_version and any( - isinstance(arr, dask_array_type) for arr in [arr1, arr2] - ): - arr1 = np.array(arr1) - arr2 = np.array(arr2) - return bool(isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=True).all()) else: return lazy_equiv @@ -483,8 +471,7 @@ def timedelta_to_numeric(value, datetime_unit="ns", dtype=float): def _to_pytimedelta(array, unit="us"): - index = pd.TimedeltaIndex(array.ravel(), unit=unit) - return index.to_pytimedelta().reshape(array.shape) + return array.astype(f"timedelta64[{unit}]").astype(datetime.timedelta) def np_timedelta64_to_float(array, datetime_unit): diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 28eaae5f05b..9aa20f2b87e 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -261,6 +261,8 @@ def inline_variable_array_repr(var, max_width): return inline_dask_repr(var.data) elif isinstance(var._data, sparse_array_type): return inline_sparse_repr(var.data) + elif hasattr(var._data, "_repr_inline_"): + return var._data._repr_inline_(max_width) elif hasattr(var._data, "__array_function__"): return maybe_truncate(repr(var._data).replace("\n", " "), max_width) else: diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 400ef61502e..5521b33e2e4 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -1,18 +1,23 @@ import uuid from collections import OrderedDict -from functools import partial +from functools import lru_cache, partial from html import escape import pkg_resources from .formatting import inline_variable_array_repr, short_data_repr -CSS_FILE_PATH = "/".join(("static", "css", "style.css")) -CSS_STYLE = pkg_resources.resource_string("xarray", CSS_FILE_PATH).decode("utf8") +STATIC_FILES = ("static/html/icons-svg-inline.html", "static/css/style.css") -ICONS_SVG_PATH = "/".join(("static", "html", "icons-svg-inline.html")) -ICONS_SVG = pkg_resources.resource_string("xarray", ICONS_SVG_PATH).decode("utf8") +@lru_cache(None) +def _load_static_files(): + """Lazily load the resource files into memory the first time they are needed + """ + return [ + pkg_resources.resource_string("xarray", fname).decode("utf8") + for fname in STATIC_FILES + ] def short_data_repr_html(array): @@ -233,9 +238,10 @@ def _obj_repr(obj, header_components, sections): header = f"
{''.join(h for h in header_components)}
" sections = "".join(f"
  • {s}
  • " for s in sections) + icons_svg, css_style = _load_static_files() return ( "
    " - f"{ICONS_SVG}" + f"{icons_svg}" f"
    {escape(repr(obj))}
    " "