diff --git a/.github/ISSUE_TEMPLATE/bugreport.yml b/.github/ISSUE_TEMPLATE/bugreport.yml index 043584f3ea6..ba5bc8abaea 100644 --- a/.github/ISSUE_TEMPLATE/bugreport.yml +++ b/.github/ISSUE_TEMPLATE/bugreport.yml @@ -54,6 +54,12 @@ body: attributes: label: Environment description: | - Paste the output of `xr.show_versions()` here + Paste the output of `xr.show_versions()` between the `
` tags, leaving an empty line following the opening tag. + value: | +
+ + + +
validations: required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 0ad7e5f3e13..994c594685d 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -6,3 +6,9 @@ contact_links: Ask questions and discuss with other community members here. If you have a question like "How do I concatenate a list of datasets?" then please include a self-contained reproducible example if possible. + - name: Raster analysis usage question + url: https://github.com/corteva/rioxarray/discussions + about: | + If you are using the rioxarray extension (engine='rasterio'), or have questions about + raster analysis such as geospatial formats, coordinate reprojection, etc., + please use the rioxarray discussion forum. diff --git a/.github/stale.yml b/.github/stale.yml index f4057844d01..e29b7ddcc5e 100644 --- a/.github/stale.yml +++ b/.github/stale.yml @@ -1,7 +1,7 @@ # Configuration for probot-stale - https://github.com/probot/stale # Number of days of inactivity before an Issue or Pull Request becomes stale -daysUntilStale: 700 # start with a large number and reduce shortly +daysUntilStale: 600 # start with a large number and reduce shortly # Number of days of inactivity before an Issue or Pull Request with the stale label is closed. # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. @@ -14,10 +14,10 @@ exemptLabels: - "[Status] Maybe Later" # Set to true to ignore issues in a project (defaults to false) -exemptProjects: false +exemptProjects: true # Set to true to ignore issues in a milestone (defaults to false) -exemptMilestones: false +exemptMilestones: true # Set to true to ignore issues with an assignee (defaults to false) exemptAssignees: true @@ -31,6 +31,9 @@ markComment: | If this issue remains relevant, please comment here or remove the `stale` label; otherwise it will be marked as closed automatically +closeComment: | + The stalebot didn't hear anything for a while, so it closed this. Please reopen if this is still an issue. + # Comment to post when removing the stale label. # unmarkComment: > # Your comment here. @@ -40,8 +43,7 @@ markComment: | # Your comment here. # Limit the number of actions per hour, from 1-30. Default is 30 -limitPerRun: 1 # start with a small number - +limitPerRun: 2 # start with a small number # Limit to only `issues` or `pulls` # only: issues diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 6d482445f96..034ffee40ad 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -67,7 +67,7 @@ jobs: cp benchmarks/README_CI.md benchmarks.log .asv/results/ working-directory: ${{ env.ASV_DIR }} - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 if: always() with: name: asv-benchmark-results-${{ runner.os }} diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index ef1666359fe..f2542ab52d5 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -109,7 +109,7 @@ jobs: $PYTEST_EXTRA_FLAGS - name: Upload code coverage to Codecov - uses: codecov/codecov-action@v2.1.0 + uses: codecov/codecov-action@v3.0.0 with: file: ./coverage.xml flags: unittests,${{ matrix.env }} diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 205265b8c54..a5c1a2de5ad 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -98,13 +98,13 @@ jobs: - name: Upload test results if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: Test results for ${{ runner.os }}-${{ matrix.python-version }} path: pytest.xml - name: Upload code coverage to Codecov - uses: codecov/codecov-action@v2.1.0 + uses: codecov/codecov-action@v3.0.0 with: file: ./coverage.xml flags: unittests @@ -118,7 +118,7 @@ jobs: if: github.repository == 'pydata/xarray' steps: - name: Upload - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: Event File path: ${{ github.event_path }} diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index c88cf556a50..9cad271ce6f 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -41,7 +41,7 @@ jobs: else echo "✅ Looks good" fi - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: name: releases path: dist @@ -54,7 +54,7 @@ jobs: name: Install Python with: python-version: 3.8 - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v3 with: name: releases path: dist @@ -85,7 +85,7 @@ jobs: if: github.event_name == 'release' runs-on: ubuntu-latest steps: - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v3 with: name: releases path: dist diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index 6091306ed8b..81d1c7db4b8 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -92,7 +92,7 @@ jobs: && steps.status.outcome == 'failure' && github.event_name == 'schedule' && github.repository == 'pydata/xarray' - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: output-${{ matrix.python-version }}-log path: output-${{ matrix.python-version }}-log @@ -114,7 +114,7 @@ jobs: - uses: actions/setup-python@v3 with: python-version: "3.x" - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v3 with: path: /tmp/workspace/logs - name: Move all log files into a single directory diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 47f61054b7b..be87d823c98 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ # https://pre-commit.com/ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.1.0 + rev: v4.2.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -19,7 +19,7 @@ repos: hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: v2.31.1 + rev: v2.32.0 hooks: - id: pyupgrade args: diff --git a/README.md b/README.md new file mode 100644 index 00000000000..57a68d42192 --- /dev/null +++ b/README.md @@ -0,0 +1,130 @@ +# xarray: N-D labeled arrays and datasets + +[![image](https://github.com/pydata/xarray/workflows/CI/badge.svg?branch=main)](https://github.com/pydata/xarray/actions?query=workflow%3ACI) +[![image](https://codecov.io/gh/pydata/xarray/branch/main/graph/badge.svg)](https://codecov.io/gh/pydata/xarray) +[![image](https://readthedocs.org/projects/xray/badge/?version=latest)](https://docs.xarray.dev/) +[![image](https://img.shields.io/badge/benchmarked%20by-asv-green.svg?style=flat)](https://pandas.pydata.org/speed/xarray/) +[![image](https://img.shields.io/pypi/v/xarray.svg)](https://pypi.python.org/pypi/xarray/) +[![image](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/python/black) +[![image](https://zenodo.org/badge/DOI/10.5281/zenodo.598201.svg)](https://doi.org/10.5281/zenodo.598201) +[![image](https://img.shields.io/twitter/follow/xarray_dev?style=social)](https://twitter.com/xarray_dev) + +**xarray** (formerly **xray**) is an open source project and Python +package that makes working with labelled multi-dimensional arrays +simple, efficient, and fun! + +Xarray introduces labels in the form of dimensions, coordinates and +attributes on top of raw [NumPy](https://www.numpy.org)-like arrays, +which allows for a more intuitive, more concise, and less error-prone +developer experience. The package includes a large and growing library +of domain-agnostic functions for advanced analytics and visualization +with these data structures. + +Xarray was inspired by and borrows heavily from +[pandas](https://pandas.pydata.org), the popular data analysis package +focused on labelled tabular data. It is particularly tailored to working +with [netCDF](https://www.unidata.ucar.edu/software/netcdf) files, which +were the source of xarray\'s data model, and integrates tightly with +[dask](https://dask.org) for parallel computing. + +## Why xarray? + +Multi-dimensional (a.k.a. N-dimensional, ND) arrays (sometimes called +"tensors") are an essential part of computational science. They are +encountered in a wide range of fields, including physics, astronomy, +geoscience, bioinformatics, engineering, finance, and deep learning. In +Python, [NumPy](https://www.numpy.org) provides the fundamental data +structure and API for working with raw ND arrays. However, real-world +datasets are usually more than just raw numbers; they have labels which +encode information about how the array values map to locations in space, +time, etc. + +Xarray doesn\'t just keep track of labels on arrays \-- it uses them to +provide a powerful and concise interface. For example: + +- Apply operations over dimensions by name: `x.sum('time')`. +- Select values by label instead of integer location: + `x.loc['2014-01-01']` or `x.sel(time='2014-01-01')`. +- Mathematical operations (e.g., `x - y`) vectorize across multiple + dimensions (array broadcasting) based on dimension names, not shape. +- Flexible split-apply-combine operations with groupby: + `x.groupby('time.dayofyear').mean()`. +- Database like alignment based on coordinate labels that smoothly + handles missing values: `x, y = xr.align(x, y, join='outer')`. +- Keep track of arbitrary metadata in the form of a Python dictionary: + `x.attrs`. + +## Documentation + +Learn more about xarray in its official documentation at +. + +Try out an [interactive Jupyter +notebook](https://mybinder.org/v2/gh/pydata/xarray/main?urlpath=lab/tree/doc/examples/weather-data.ipynb). + +## Contributing + +You can find information about contributing to xarray at our +[Contributing +page](https://docs.xarray.dev/en/latest/contributing.html#). + +## Get in touch + +- Ask usage questions ("How do I?") on + [StackOverflow](https://stackoverflow.com/questions/tagged/python-xarray). +- Report bugs, suggest features or view the source code [on + GitHub](https://github.com/pydata/xarray). +- For less well defined questions or ideas, or to announce other + projects of interest to xarray users, use the [mailing + list](https://groups.google.com/forum/#!forum/xarray). + +## NumFOCUS + +[![image](https://numfocus.org/wp-content/uploads/2017/07/NumFocus_LRG.png)](https://numfocus.org/) + +Xarray is a fiscally sponsored project of +[NumFOCUS](https://numfocus.org), a nonprofit dedicated to supporting +the open source scientific computing community. If you like Xarray and +want to support our mission, please consider making a +[donation](https://numfocus.salsalabs.org/donate-to-xarray/) to support +our efforts. + +## History + +Xarray is an evolution of an internal tool developed at [The Climate +Corporation](http://climate.com/). It was originally written by Climate +Corp researchers Stephan Hoyer, Alex Kleeman and Eugene Brevdo and was +released as open source in May 2014. The project was renamed from +"xray" in January 2016. Xarray became a fiscally sponsored project of +[NumFOCUS](https://numfocus.org) in August 2018. + +## License + +Copyright 2014-2019, xarray Developers + +Licensed under the Apache License, Version 2.0 (the "License"); you +may not use this file except in compliance with the License. You may +obtain a copy of the License at + + + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Xarray bundles portions of pandas, NumPy and Seaborn, all of which are +available under a "3-clause BSD" license: + +- pandas: setup.py, xarray/util/print_versions.py +- NumPy: xarray/core/npcompat.py +- Seaborn: _determine_cmap_params in xarray/core/plot/utils.py + +Xarray also bundles portions of CPython, which is available under the +"Python Software Foundation License" in xarray/core/pycompat.py. + +Xarray uses icons from the icomoon package (free version), which is +available under the "CC BY 4.0" license. + +The full text of these licenses are included in the licenses directory. diff --git a/README.rst b/README.rst deleted file mode 100644 index e07febdf747..00000000000 --- a/README.rst +++ /dev/null @@ -1,148 +0,0 @@ -xarray: N-D labeled arrays and datasets -======================================= - -.. image:: https://github.com/pydata/xarray/workflows/CI/badge.svg?branch=main - :target: https://github.com/pydata/xarray/actions?query=workflow%3ACI -.. image:: https://codecov.io/gh/pydata/xarray/branch/main/graph/badge.svg - :target: https://codecov.io/gh/pydata/xarray -.. image:: https://readthedocs.org/projects/xray/badge/?version=latest - :target: https://docs.xarray.dev/ -.. image:: https://img.shields.io/badge/benchmarked%20by-asv-green.svg?style=flat - :target: https://pandas.pydata.org/speed/xarray/ -.. image:: https://img.shields.io/pypi/v/xarray.svg - :target: https://pypi.python.org/pypi/xarray/ -.. image:: https://img.shields.io/badge/code%20style-black-000000.svg - :target: https://github.com/python/black -.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.598201.svg - :target: https://doi.org/10.5281/zenodo.598201 -.. image:: https://img.shields.io/twitter/follow/xarray_dev?style=social - :target: https://twitter.com/xarray_dev - - -**xarray** (formerly **xray**) is an open source project and Python package -that makes working with labelled multi-dimensional arrays simple, -efficient, and fun! - -Xarray introduces labels in the form of dimensions, coordinates and -attributes on top of raw NumPy_-like arrays, which allows for a more -intuitive, more concise, and less error-prone developer experience. -The package includes a large and growing library of domain-agnostic functions -for advanced analytics and visualization with these data structures. - -Xarray was inspired by and borrows heavily from pandas_, the popular data -analysis package focused on labelled tabular data. -It is particularly tailored to working with netCDF_ files, which were the -source of xarray's data model, and integrates tightly with dask_ for parallel -computing. - -.. _NumPy: https://www.numpy.org -.. _pandas: https://pandas.pydata.org -.. _dask: https://dask.org -.. _netCDF: https://www.unidata.ucar.edu/software/netcdf - -Why xarray? ------------ - -Multi-dimensional (a.k.a. N-dimensional, ND) arrays (sometimes called -"tensors") are an essential part of computational science. -They are encountered in a wide range of fields, including physics, astronomy, -geoscience, bioinformatics, engineering, finance, and deep learning. -In Python, NumPy_ provides the fundamental data structure and API for -working with raw ND arrays. -However, real-world datasets are usually more than just raw numbers; -they have labels which encode information about how the array values map -to locations in space, time, etc. - -Xarray doesn't just keep track of labels on arrays -- it uses them to provide a -powerful and concise interface. For example: - -- Apply operations over dimensions by name: ``x.sum('time')``. -- Select values by label instead of integer location: - ``x.loc['2014-01-01']`` or ``x.sel(time='2014-01-01')``. -- Mathematical operations (e.g., ``x - y``) vectorize across multiple - dimensions (array broadcasting) based on dimension names, not shape. -- Flexible split-apply-combine operations with groupby: - ``x.groupby('time.dayofyear').mean()``. -- Database like alignment based on coordinate labels that smoothly - handles missing values: ``x, y = xr.align(x, y, join='outer')``. -- Keep track of arbitrary metadata in the form of a Python dictionary: - ``x.attrs``. - -Documentation -------------- - -Learn more about xarray in its official documentation at https://docs.xarray.dev/ - -Contributing ------------- - -You can find information about contributing to xarray at our `Contributing page `_. - -Get in touch ------------- - -- Ask usage questions ("How do I?") on `StackOverflow`_. -- Report bugs, suggest features or view the source code `on GitHub`_. -- For less well defined questions or ideas, or to announce other projects of - interest to xarray users, use the `mailing list`_. - -.. _StackOverFlow: https://stackoverflow.com/questions/tagged/python-xarray -.. _mailing list: https://groups.google.com/forum/#!forum/xarray -.. _on GitHub: https://github.com/pydata/xarray - -NumFOCUS --------- - -.. image:: https://numfocus.org/wp-content/uploads/2017/07/NumFocus_LRG.png - :scale: 25 % - :target: https://numfocus.org/ - -Xarray is a fiscally sponsored project of NumFOCUS_, a nonprofit dedicated -to supporting the open source scientific computing community. If you like -Xarray and want to support our mission, please consider making a donation_ -to support our efforts. - -.. _donation: https://numfocus.salsalabs.org/donate-to-xarray/ - -History -------- - -Xarray is an evolution of an internal tool developed at `The Climate -Corporation`__. It was originally written by Climate Corp researchers Stephan -Hoyer, Alex Kleeman and Eugene Brevdo and was released as open source in -May 2014. The project was renamed from "xray" in January 2016. Xarray became a -fiscally sponsored project of NumFOCUS_ in August 2018. - -__ http://climate.com/ -.. _NumFOCUS: https://numfocus.org - -License -------- - -Copyright 2014-2019, xarray Developers - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Xarray bundles portions of pandas, NumPy and Seaborn, all of which are available -under a "3-clause BSD" license: -- pandas: setup.py, xarray/util/print_versions.py -- NumPy: xarray/core/npcompat.py -- Seaborn: _determine_cmap_params in xarray/core/plot/utils.py - -Xarray also bundles portions of CPython, which is available under the "Python -Software Foundation License" in xarray/core/pycompat.py. - -Xarray uses icons from the icomoon package (free version), which is -available under the "CC BY 4.0" license. - -The full text of these licenses are included in the licenses directory. diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 8ed9e47be01..30bc9f858f2 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -319,66 +319,6 @@ IndexVariable.sizes IndexVariable.values - ufuncs.angle - ufuncs.arccos - ufuncs.arccosh - ufuncs.arcsin - ufuncs.arcsinh - ufuncs.arctan - ufuncs.arctan2 - ufuncs.arctanh - ufuncs.ceil - ufuncs.conj - ufuncs.copysign - ufuncs.cos - ufuncs.cosh - ufuncs.deg2rad - ufuncs.degrees - ufuncs.exp - ufuncs.expm1 - ufuncs.fabs - ufuncs.fix - ufuncs.floor - ufuncs.fmax - ufuncs.fmin - ufuncs.fmod - ufuncs.fmod - ufuncs.frexp - ufuncs.hypot - ufuncs.imag - ufuncs.iscomplex - ufuncs.isfinite - ufuncs.isinf - ufuncs.isnan - ufuncs.isreal - ufuncs.ldexp - ufuncs.log - ufuncs.log10 - ufuncs.log1p - ufuncs.log2 - ufuncs.logaddexp - ufuncs.logaddexp2 - ufuncs.logical_and - ufuncs.logical_not - ufuncs.logical_or - ufuncs.logical_xor - ufuncs.maximum - ufuncs.minimum - ufuncs.nextafter - ufuncs.rad2deg - ufuncs.radians - ufuncs.real - ufuncs.rint - ufuncs.sign - ufuncs.signbit - ufuncs.sin - ufuncs.sinh - ufuncs.sqrt - ufuncs.square - ufuncs.tan - ufuncs.tanh - ufuncs.trunc - plot.plot plot.line plot.step diff --git a/doc/api.rst b/doc/api.rst index 7fdd775e168..644b86cdebb 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -610,84 +610,6 @@ Plotting DataArray.plot.step DataArray.plot.surface -.. _api.ufuncs: - -Universal functions -=================== - -.. warning:: - - With recent versions of NumPy, Dask and xarray, NumPy ufuncs are now - supported directly on all xarray and Dask objects. This obviates the need - for the ``xarray.ufuncs`` module, which should not be used for new code - unless compatibility with versions of NumPy prior to v1.13 is - required. They will be removed once support for NumPy prior to - v1.17 is dropped. - -These functions are copied from NumPy, but extended to work on NumPy arrays, -dask arrays and all xarray objects. You can find them in the ``xarray.ufuncs`` -module: - -:py:attr:`~ufuncs.angle` -:py:attr:`~ufuncs.arccos` -:py:attr:`~ufuncs.arccosh` -:py:attr:`~ufuncs.arcsin` -:py:attr:`~ufuncs.arcsinh` -:py:attr:`~ufuncs.arctan` -:py:attr:`~ufuncs.arctan2` -:py:attr:`~ufuncs.arctanh` -:py:attr:`~ufuncs.ceil` -:py:attr:`~ufuncs.conj` -:py:attr:`~ufuncs.copysign` -:py:attr:`~ufuncs.cos` -:py:attr:`~ufuncs.cosh` -:py:attr:`~ufuncs.deg2rad` -:py:attr:`~ufuncs.degrees` -:py:attr:`~ufuncs.exp` -:py:attr:`~ufuncs.expm1` -:py:attr:`~ufuncs.fabs` -:py:attr:`~ufuncs.fix` -:py:attr:`~ufuncs.floor` -:py:attr:`~ufuncs.fmax` -:py:attr:`~ufuncs.fmin` -:py:attr:`~ufuncs.fmod` -:py:attr:`~ufuncs.fmod` -:py:attr:`~ufuncs.frexp` -:py:attr:`~ufuncs.hypot` -:py:attr:`~ufuncs.imag` -:py:attr:`~ufuncs.iscomplex` -:py:attr:`~ufuncs.isfinite` -:py:attr:`~ufuncs.isinf` -:py:attr:`~ufuncs.isnan` -:py:attr:`~ufuncs.isreal` -:py:attr:`~ufuncs.ldexp` -:py:attr:`~ufuncs.log` -:py:attr:`~ufuncs.log10` -:py:attr:`~ufuncs.log1p` -:py:attr:`~ufuncs.log2` -:py:attr:`~ufuncs.logaddexp` -:py:attr:`~ufuncs.logaddexp2` -:py:attr:`~ufuncs.logical_and` -:py:attr:`~ufuncs.logical_not` -:py:attr:`~ufuncs.logical_or` -:py:attr:`~ufuncs.logical_xor` -:py:attr:`~ufuncs.maximum` -:py:attr:`~ufuncs.minimum` -:py:attr:`~ufuncs.nextafter` -:py:attr:`~ufuncs.rad2deg` -:py:attr:`~ufuncs.radians` -:py:attr:`~ufuncs.real` -:py:attr:`~ufuncs.rint` -:py:attr:`~ufuncs.sign` -:py:attr:`~ufuncs.signbit` -:py:attr:`~ufuncs.sin` -:py:attr:`~ufuncs.sinh` -:py:attr:`~ufuncs.sqrt` -:py:attr:`~ufuncs.square` -:py:attr:`~ufuncs.tan` -:py:attr:`~ufuncs.tanh` -:py:attr:`~ufuncs.trunc` - IO / Conversion =============== diff --git a/doc/ecosystem.rst b/doc/ecosystem.rst index 2b49b1529e1..61b60ab9e83 100644 --- a/doc/ecosystem.rst +++ b/doc/ecosystem.rst @@ -74,6 +74,7 @@ Extend xarray capabilities - `nxarray `_: NeXus input/output capability for xarray. - `xarray-compare `_: xarray extension for data comparison. - `xarray-dataclasses `_: xarray extension for typed DataArray and Dataset creation. +- `xarray_einstats `_: Statistics, linear algebra and einops for xarray - `xarray_extras `_: Advanced algorithms for xarray objects (e.g. integrations/interpolations). - `xpublish `_: Publish Xarray Datasets via a Zarr compatible REST API. - `xrft `_: Fourier transforms for xarray data. diff --git a/doc/internals/how-to-add-new-backend.rst b/doc/internals/how-to-add-new-backend.rst index 506a8eb21be..bb497a1c062 100644 --- a/doc/internals/how-to-add-new-backend.rst +++ b/doc/internals/how-to-add-new-backend.rst @@ -439,27 +439,25 @@ currently available in :py:mod:`~xarray.backends` module. .. _RST preferred_chunks: -Backend preferred chunks -^^^^^^^^^^^^^^^^^^^^^^^^ - -The backend is not directly involved in `Dask `__ -chunking, since it is internally managed by Xarray. However, the backend can -define the preferred chunk size inside the variable’s encoding -``var.encoding["preferred_chunks"]``. The ``preferred_chunks`` may be useful -to improve performances with lazy loading. ``preferred_chunks`` shall be a -dictionary specifying chunk size per dimension like -``{“dim1”: 1000, “dim2”: 2000}`` or -``{“dim1”: [1000, 100], “dim2”: [2000, 2000, 2000]]}``. - -The ``preferred_chunks`` is used by Xarray to define the chunk size in some -special cases: - -- if ``chunks`` along a dimension is ``None`` or not defined -- if ``chunks`` is ``"auto"``. - -In the first case Xarray uses the chunks size specified in -``preferred_chunks``. -In the second case Xarray accommodates ideal chunk sizes, preserving if -possible the "preferred_chunks". The ideal chunk size is computed using -:py:func:`dask.array.core.normalize_chunks`, setting -``previous_chunks = preferred_chunks``. +Preferred chunk sizes +^^^^^^^^^^^^^^^^^^^^^ + +To potentially improve performance with lazy loading, the backend may define for each +variable the chunk sizes that it prefers---that is, sizes that align with how the +variable is stored. (Note that the backend is not directly involved in `Dask +`__ chunking, because Xarray internally manages chunking.) To define +the preferred chunk sizes, store a mapping within the variable's encoding under the key +``"preferred_chunks"`` (that is, ``var.encoding["preferred_chunks"]``). The mapping's +keys shall be the names of dimensions with preferred chunk sizes, and each value shall +be the corresponding dimension's preferred chunk sizes expressed as either an integer +(such as ``{"dim1": 1000, "dim2": 2000}``) or a tuple of integers (such as ``{"dim1": +(1000, 100), "dim2": (2000, 2000, 2000)}``). + +Xarray uses the preferred chunk sizes in some special cases of the ``chunks`` argument +of the :py:func:`~xarray.open_dataset` and :py:func:`~xarray.open_mfdataset` functions. +If ``chunks`` is a ``dict``, then for any dimensions missing from the keys or whose +value is ``None``, Xarray sets the chunk sizes to the preferred sizes. If ``chunks`` +equals ``"auto"``, then Xarray seeks ideal chunk sizes informed by the preferred chunk +sizes. Specifically, it determines the chunk sizes using +:py:func:`dask.array.core.normalize_chunks` with the ``previous_chunks`` argument set +according to the preferred chunk sizes. diff --git a/doc/internals/zarr-encoding-spec.rst b/doc/internals/zarr-encoding-spec.rst index f8bffa6e82f..7f468b8b0db 100644 --- a/doc/internals/zarr-encoding-spec.rst +++ b/doc/internals/zarr-encoding-spec.rst @@ -32,9 +32,11 @@ the variable dimension names and then removed from the attributes dictionary returned to the user. Because of these choices, Xarray cannot read arbitrary array data, but only -Zarr data with valid ``_ARRAY_DIMENSIONS`` attributes on each array. +Zarr data with valid ``_ARRAY_DIMENSIONS`` or +`NCZarr `_ attributes +on each array (NCZarr dimension names are defined in the ``.zarray`` file). -After decoding the ``_ARRAY_DIMENSIONS`` attribute and assigning the variable +After decoding the ``_ARRAY_DIMENSIONS`` or NCZarr attribute and assigning the variable dimensions, Xarray proceeds to [optionally] decode each variable using its standard CF decoding machinery used for NetCDF data (see :py:func:`decode_cf`). diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index ddde0bf5888..81fa29bdf5f 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -518,8 +518,11 @@ the ability to store and analyze datasets far too large fit onto disk Xarray can't open just any zarr dataset, because xarray requires special metadata (attributes) describing the dataset dimensions and coordinates. -At this time, xarray can only open zarr datasets that have been written by -xarray. For implementation details, see :ref:`zarr_encoding`. +At this time, xarray can only open zarr datasets with these special attributes, +such as zarr datasets written by xarray, +`netCDF `_, +or `GDAL `_. +For implementation details, see :ref:`zarr_encoding`. To write a dataset with zarr, we use the :py:meth:`Dataset.to_zarr` method. @@ -548,6 +551,11 @@ store is already present at that path, an error will be raised, preventing it from being overwritten. To override this behavior and overwrite an existing store, add ``mode='w'`` when invoking :py:meth:`~Dataset.to_zarr`. +.. note:: + + xarray does not write NCZarr attributes. Therefore, NCZarr data must be + opened in read-only mode. + To store variable length strings, convert them to object arrays first with ``dtype=object``. diff --git a/doc/user-guide/plotting.rst b/doc/user-guide/plotting.rst index f514b4ecbef..78182ed265f 100644 --- a/doc/user-guide/plotting.rst +++ b/doc/user-guide/plotting.rst @@ -251,7 +251,7 @@ Finally, if a dataset does not have any coordinates it enumerates all data point .. ipython:: python :okwarning: - air1d_multi = air1d_multi.drop(["date", "time", "decimal_day"]) + air1d_multi = air1d_multi.drop_vars(["date", "time", "decimal_day"]) air1d_multi.plot() The same applies to 2D plots below. diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 55de76bb9e7..4882402073c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,8 @@ v2022.03.1 (unreleased) New Features ~~~~~~~~~~~~ +- The `zarr` backend is now able to read NCZarr. + By `Mattia Almansi `_. - Add a weighted ``quantile`` method to :py:class:`~core.weighted.DatasetWeighted` and :py:class:`~core.weighted.DataArrayWeighted` (:pull:`6059`). By `Christian Jauvin `_ and `David Huard `_. @@ -36,6 +38,9 @@ New Features elements which trigger summarization rather than full repr in (numpy) array detailed views of the html repr (:pull:`6400`). By `Benoît Bovy `_. +- Allow passing chunks in ``**kwargs`` form to :py:meth:`Dataset.chunk`, :py:meth:`DataArray.chunk`, and + :py:meth:`Variable.chunk`. (:pull:`6471`) + By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ @@ -46,6 +51,9 @@ Breaking changes - Many arguments like ``keep_attrs``, ``axis``, and ``skipna`` are now keyword only for all reduction operations like ``.mean``. By `Deepak Cherian `_, `Jimmy Westling `_. +- Xarray's ufuncs have been removed, now that they can be replaced by numpy's ufuncs in all + supported versions of numpy. + By `Maximilian Roos `_. Deprecations ~~~~~~~~~~~~ @@ -62,16 +70,37 @@ Bug fixes coordinates. See the corresponding pull-request on GitHub for more details. (:pull:`5692`). By `Benoît Bovy `_. - Fixed "unhashable type" error trying to read NetCDF file with variable having its 'units' - attribute not ``str`` (e.g. ``numpy.ndarray``) (:issue:`6368`). - By `Oleh Khoma `_. + attribute not ``str`` (e.g. ``numpy.ndarray``) (:issue:`6368`). By `Oleh Khoma `_. +- Omit warning about specified dask chunks separating chunks on disk when the + underlying array is empty (e.g., because of an empty dimension) (:issue:`6401`). + By `Joseph K Aicher `_. - Fixed the poor html repr performance on large multi-indexes (:pull:`6400`). By `Benoît Bovy `_. - Allow fancy indexing of duck dask arrays along multiple dimensions. (:pull:`6414`) By `Justus Magin `_. +- In the API for backends, support dimensions that express their preferred chunk sizes + as a tuple of integers. (:issue:`6333`, :pull:`6334`) + By `Stan West `_. +- Fix bug in :py:func:`where` when passing non-xarray objects with ``keep_attrs=True``. (:issue:`6444`, :pull:`6461`) + By `Sam Levang `_. +- Allow passing both ``other`` and ``drop=True`` arguments to ``xr.DataArray.where`` + and ``xr.Dataset.where`` (:pull:`6466`, :pull:`6467`). + By `Michael Delgado `_. +- Ensure dtype encoding attributes are not added or modified on variables that + contain datetime-like values prior to being passed to + :py:func:`xarray.conventions.decode_cf_variable` (:issue:`6453`, + :pull:`6489`). By `Spencer Clark `_. +- Dark themes are now properly detected in Furo-themed Sphinx documents (:issue:`6500`, :pull:`6501`). + By `Kevin Paul `_. Documentation ~~~~~~~~~~~~~ +- Revise the documentation for developers on specifying a backend's preferred chunk + sizes. In particular, correct the syntax and replace lists with tuples in the + examples. (:issue:`6333`, :pull:`6334`) + By `Stan West `_. + Performance ~~~~~~~~~~~ diff --git a/xarray/__init__.py b/xarray/__init__.py index aa9739d3d35..46dcf0e9b32 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -1,4 +1,4 @@ -from . import testing, tutorial, ufuncs +from . import testing, tutorial from .backends.api import ( load_dataarray, load_dataset, @@ -53,7 +53,6 @@ # `mypy --strict` running in projects that import xarray. __all__ = ( # Sub-packages - "ufuncs", "testing", "tutorial", # Top-level functions diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 548b98048ba..9967b0a08c0 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1273,7 +1273,7 @@ def _validate_region(ds, region): f"{list(region.keys())}, but that is not " f"the case for some variables here. To drop these variables " f"from this dataset before exporting to zarr, write: " - f".drop({non_matching_vars!r})" + f".drop_vars({non_matching_vars!r})" ) diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 4c1ce1ef09d..df3ee364546 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -86,7 +86,8 @@ def _open_scipy_netcdf(filename, mode, mmap, version): ) except TypeError as e: # TODO: gzipped loading only works with NetCDF3 files. - if "is not a valid NetCDF 3 file" in e.message: + errmsg = e.args[0] + if "is not a valid NetCDF 3 file" in errmsg: raise ValueError("gzipped file loading only supports NetCDF 3 files.") else: raise diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index aca0b8064f5..104f8aca58f 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -1,3 +1,4 @@ +import json import os import warnings @@ -178,19 +179,37 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks): raise AssertionError("We should never get here. Function logic must be wrong.") -def _get_zarr_dims_and_attrs(zarr_obj, dimension_key): +def _get_zarr_dims_and_attrs(zarr_obj, dimension_key, try_nczarr): # Zarr arrays do not have dimensions. To get around this problem, we add # an attribute that specifies the dimension. We have to hide this attribute # when we send the attributes to the user. # zarr_obj can be either a zarr group or zarr array try: + # Xarray-Zarr dimensions = zarr_obj.attrs[dimension_key] - except KeyError: - raise KeyError( - f"Zarr object is missing the attribute `{dimension_key}`, which is " - "required for xarray to determine variable dimensions." - ) - attributes = HiddenKeyDict(zarr_obj.attrs, [dimension_key]) + except KeyError as e: + if not try_nczarr: + raise KeyError( + f"Zarr object is missing the attribute `{dimension_key}`, which is " + "required for xarray to determine variable dimensions." + ) from e + + # NCZarr defines dimensions through metadata in .zarray + zarray_path = os.path.join(zarr_obj.path, ".zarray") + zarray = json.loads(zarr_obj.store[zarray_path]) + try: + # NCZarr uses Fully Qualified Names + dimensions = [ + os.path.basename(dim) for dim in zarray["_NCZARR_ARRAY"]["dimrefs"] + ] + except KeyError as e: + raise KeyError( + f"Zarr object is missing the attribute `{dimension_key}` and the NCZarr metadata, " + "which are required for xarray to determine variable dimensions." + ) from e + + nc_attrs = [attr for attr in zarr_obj.attrs if attr.startswith("_NC")] + attributes = HiddenKeyDict(zarr_obj.attrs, [dimension_key] + nc_attrs) return dimensions, attributes @@ -409,7 +428,10 @@ def ds(self): def open_store_variable(self, name, zarr_array): data = indexing.LazilyIndexedArray(ZarrArrayWrapper(name, self)) - dimensions, attributes = _get_zarr_dims_and_attrs(zarr_array, DIMENSION_KEY) + try_nczarr = self._mode == "r" + dimensions, attributes = _get_zarr_dims_and_attrs( + zarr_array, DIMENSION_KEY, try_nczarr + ) attributes = dict(attributes) encoding = { "chunks": zarr_array.chunks, @@ -430,26 +452,24 @@ def get_variables(self): ) def get_attrs(self): - return dict(self.zarr_group.attrs.asdict()) + return { + k: v + for k, v in self.zarr_group.attrs.asdict().items() + if not k.startswith("_NC") + } def get_dimensions(self): + try_nczarr = self._mode == "r" dimensions = {} for k, v in self.zarr_group.arrays(): - try: - for d, s in zip(v.attrs[DIMENSION_KEY], v.shape): - if d in dimensions and dimensions[d] != s: - raise ValueError( - f"found conflicting lengths for dimension {d} " - f"({s} != {dimensions[d]})" - ) - dimensions[d] = s - - except KeyError: - raise KeyError( - f"Zarr object is missing the attribute `{DIMENSION_KEY}`, " - "which is required for xarray to determine " - "variable dimensions." - ) + dim_names, _ = _get_zarr_dims_and_attrs(v, DIMENSION_KEY, try_nczarr) + for d, s in zip(dim_names, v.shape): + if d in dimensions and dimensions[d] != s: + raise ValueError( + f"found conflicting lengths for dimension {d} " + f"({s} != {dimensions[d]})" + ) + dimensions[d] = s return dimensions def set_dimensions(self, variables, unlimited_dims=None): @@ -645,7 +665,7 @@ def open_zarr( The `store` object should be a valid store for a Zarr group. `store` variables must contain dimension metadata encoded in the - `_ARRAY_DIMENSIONS` attribute. + `_ARRAY_DIMENSIONS` attribute or must have NCZarr format. Parameters ---------- diff --git a/xarray/conventions.py b/xarray/conventions.py index ae915069947..102ef003186 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -7,7 +7,7 @@ from .coding import strings, times, variables from .coding.variables import SerializationWarning, pop_to from .core import duck_array_ops, indexing -from .core.common import contains_cftime_datetimes +from .core.common import _contains_datetime_like_objects, contains_cftime_datetimes from .core.pycompat import is_duck_dask_array from .core.variable import IndexVariable, Variable, as_variable @@ -340,6 +340,11 @@ def decode_cf_variable( A variable holding the decoded equivalent of var. """ var = as_variable(var) + + # Ensure datetime-like Variables are passed through unmodified (GH 6453) + if _contains_datetime_like_objects(var): + return var + original_dtype = var.dtype if decode_timedelta is None: @@ -770,7 +775,7 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): # this will copy coordinates from encoding to attrs if "coordinates" in attrs # after the next line, "coordinates" is never in encoding # we get support for attrs["coordinates"] for free. - coords_str = pop_to(encoding, attrs, "coordinates") + coords_str = pop_to(encoding, attrs, "coordinates") or attrs.get("coordinates") if not coords_str and variable_coordinates[name]: coordinates_text = " ".join( str(coord_name) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index d201e3a613f..e29d2b2a67f 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -927,8 +927,8 @@ def _get_broadcast_dims_map_common_coords(args, exclude): for dim in arg.dims: if dim not in common_coords and dim not in exclude: dims_map[dim] = arg.sizes[dim] - if dim in arg.coords: - common_coords[dim] = arg.coords[dim].variable + if dim in arg._indexes: + common_coords.update(arg.xindexes.get_all_coords(dim)) return dims_map, common_coords diff --git a/xarray/core/common.py b/xarray/core/common.py index c33db4a62ea..3db9b1cfa0c 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1197,8 +1197,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): By default, these locations filled with NA. drop : bool, optional If True, coordinate labels that only correspond to False values of - the condition are dropped from the result. Mutually exclusive with - ``other``. + the condition are dropped from the result. Returns ------- @@ -1251,6 +1250,14 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): [15., nan, nan, nan]]) Dimensions without coordinates: x, y + >>> a.where(a.x + a.y < 4, -1, drop=True) + + array([[ 0, 1, 2, 3], + [ 5, 6, 7, -1], + [10, 11, -1, -1], + [15, -1, -1, -1]]) + Dimensions without coordinates: x, y + See Also -------- numpy.where : corresponding numpy function @@ -1264,9 +1271,6 @@ def where(self, cond, other=dtypes.NA, drop: bool = False): cond = cond(self) if drop: - if other is not dtypes.NA: - raise ValueError("cannot set `other` if drop=True") - if not isinstance(cond, (Dataset, DataArray)): raise TypeError( f"cond argument is {cond!r} but must be a {Dataset!r} or {DataArray!r}" @@ -1858,7 +1862,10 @@ def _contains_cftime_datetimes(array) -> bool: def contains_cftime_datetimes(var) -> bool: """Check if an xarray.Variable contains cftime.datetime objects""" - return _contains_cftime_datetimes(var.data) + if var.dtype == np.dtype("O") and var.size > 0: + return _contains_cftime_datetimes(var.data) + else: + return False def _contains_datetime_like_objects(var) -> bool: diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 7676d8e558c..1834622d96e 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1825,11 +1825,10 @@ def where(cond, x, y, keep_attrs=None): """ if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) - if keep_attrs is True: # keep the attributes of x, the second parameter, by default to # be consistent with the `where` method of `DataArray` and `Dataset` - keep_attrs = lambda attrs, context: attrs[1] + keep_attrs = lambda attrs, context: getattr(x, "attrs", {}) # alignment for three arguments is complicated, so don't support it yet return apply_ufunc( diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index df1e096b021..2cf78fa7c61 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1113,6 +1113,7 @@ def chunk( name_prefix: str = "xarray-", token: str = None, lock: bool = False, + **chunks_kwargs: Any, ) -> DataArray: """Coerce this array's data into a dask arrays with the given chunks. @@ -1136,13 +1137,28 @@ def chunk( lock : optional Passed on to :py:func:`dask.array.from_array`, if the array is not already as dask array. + **chunks_kwargs : {dim: chunks, ...}, optional + The keyword arguments form of ``chunks``. + One of chunks or chunks_kwargs must be provided. Returns ------- chunked : xarray.DataArray """ - if isinstance(chunks, (tuple, list)): + if chunks is None: + warnings.warn( + "None value for 'chunks' is deprecated. " + "It will raise an error in the future. Use instead '{}'", + category=FutureWarning, + ) + chunks = {} + + if isinstance(chunks, (float, str, int)): + chunks = dict.fromkeys(self.dims, chunks) + elif isinstance(chunks, (tuple, list)): chunks = dict(zip(self.dims, chunks)) + else: + chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") ds = self._to_temp_dataset().chunk( chunks, name_prefix=name_prefix, token=token, lock=lock diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 855718cfe74..2c67cd665ca 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3,6 +3,7 @@ import copy import datetime import inspect +import itertools import sys import warnings from collections import defaultdict @@ -171,60 +172,63 @@ def _assert_empty(args: tuple, msg: str = "%s") -> None: raise ValueError(msg % args) -def _check_chunks_compatibility(var, chunks, preferred_chunks): - for dim in var.dims: - if dim not in chunks or (dim not in preferred_chunks): - continue - - preferred_chunks_dim = preferred_chunks.get(dim) - chunks_dim = chunks.get(dim) - - if isinstance(chunks_dim, int): - chunks_dim = (chunks_dim,) - else: - chunks_dim = chunks_dim[:-1] - - if any(s % preferred_chunks_dim for s in chunks_dim): - warnings.warn( - f"Specified Dask chunks {chunks[dim]} would separate " - f"on disks chunk shape {preferred_chunks[dim]} for dimension {dim}. " - "This could degrade performance. " - "Consider rechunking after loading instead.", - stacklevel=2, - ) - - def _get_chunk(var, chunks): - # chunks need to be explicitly computed to take correctly into account - # backend preferred chunking + """ + Return map from each dim to chunk sizes, accounting for backend's preferred chunks. + """ + import dask.array as da if isinstance(var, IndexVariable): return {} + dims = var.dims + shape = var.shape - if isinstance(chunks, int) or (chunks == "auto"): - chunks = dict.fromkeys(var.dims, chunks) - + # Determine the explicit requested chunks. preferred_chunks = var.encoding.get("preferred_chunks", {}) - preferred_chunks_list = [ - preferred_chunks.get(dim, shape) for dim, shape in zip(var.dims, var.shape) - ] - - chunks_list = [ - chunks.get(dim, None) or preferred_chunks.get(dim, None) for dim in var.dims - ] - - output_chunks_list = da.core.normalize_chunks( - chunks_list, - shape=var.shape, - dtype=var.dtype, - previous_chunks=preferred_chunks_list, + preferred_chunk_shape = tuple( + preferred_chunks.get(dim, size) for dim, size in zip(dims, shape) + ) + if isinstance(chunks, Number) or (chunks == "auto"): + chunks = dict.fromkeys(dims, chunks) + chunk_shape = tuple( + chunks.get(dim, None) or preferred_chunk_sizes + for dim, preferred_chunk_sizes in zip(dims, preferred_chunk_shape) + ) + chunk_shape = da.core.normalize_chunks( + chunk_shape, shape=shape, dtype=var.dtype, previous_chunks=preferred_chunk_shape ) - output_chunks = dict(zip(var.dims, output_chunks_list)) - _check_chunks_compatibility(var, output_chunks, preferred_chunks) + # Warn where requested chunks break preferred chunks, provided that the variable + # contains data. + if var.size: + for dim, size, chunk_sizes in zip(dims, shape, chunk_shape): + try: + preferred_chunk_sizes = preferred_chunks[dim] + except KeyError: + continue + # Determine the stop indices of the preferred chunks, but omit the last stop + # (equal to the dim size). In particular, assume that when a sequence + # expresses the preferred chunks, the sequence sums to the size. + preferred_stops = ( + range(preferred_chunk_sizes, size, preferred_chunk_sizes) + if isinstance(preferred_chunk_sizes, Number) + else itertools.accumulate(preferred_chunk_sizes[:-1]) + ) + # Gather any stop indices of the specified chunks that are not a stop index + # of a preferred chunk. Again, omit the last stop, assuming that it equals + # the dim size. + breaks = set(itertools.accumulate(chunk_sizes[:-1])).difference( + preferred_stops + ) + if breaks: + warnings.warn( + "The specified Dask chunks separate the stored chunks along " + f'dimension "{dim}" starting at index {min(breaks)}. This could ' + "degrade performance. Instead, consider rechunking after loading." + ) - return output_chunks + return dict(zip(dims, chunk_shape)) def _maybe_chunk( @@ -1990,6 +1994,7 @@ def chunk( name_prefix: str = "xarray-", token: str = None, lock: bool = False, + **chunks_kwargs: Any, ) -> Dataset: """Coerce all arrays in this dataset into dask arrays with the given chunks. @@ -2003,7 +2008,7 @@ def chunk( Parameters ---------- - chunks : int, "auto" or mapping of hashable to int, optional + chunks : int, tuple of int, "auto" or mapping of hashable to int, optional Chunk sizes along each dimension, e.g., ``5``, ``"auto"``, or ``{"x": 5, "y": 5}``. name_prefix : str, optional @@ -2013,6 +2018,9 @@ def chunk( lock : optional Passed on to :py:func:`dask.array.from_array`, if the array is not already as dask array. + **chunks_kwargs : {dim: chunks, ...}, optional + The keyword arguments form of ``chunks``. + One of chunks or chunks_kwargs must be provided Returns ------- @@ -2024,7 +2032,7 @@ def chunk( Dataset.chunksizes xarray.unify_chunks """ - if chunks is None: + if chunks is None and chunks_kwargs is None: warnings.warn( "None value for 'chunks' is deprecated. " "It will raise an error in the future. Use instead '{}'", @@ -2034,6 +2042,8 @@ def chunk( if isinstance(chunks, (Number, str, int)): chunks = dict.fromkeys(self.dims, chunks) + else: + chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") bad_dims = chunks.keys() - self.dims.keys() if bad_dims: diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 9f5f25c5895..ba44f6d8466 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -266,7 +266,7 @@ def either_dict_or_kwargs( kw_kwargs: Mapping[str, T], func_name: str, ) -> Mapping[Hashable, T]: - if pos_kwargs is None: + if pos_kwargs is None or pos_kwargs == {}: # Need an explicit cast to appease mypy due to invariance; see # https://github.com/python/mypy/issues/6228 return cast(Mapping[Hashable, T], kw_kwargs) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index a21cf8c2d97..05c70390b46 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -5,7 +5,7 @@ import numbers import warnings from datetime import timedelta -from typing import TYPE_CHECKING, Any, Hashable, Mapping, Sequence +from typing import TYPE_CHECKING, Any, Hashable, Literal, Mapping, Sequence import numpy as np import pandas as pd @@ -1012,7 +1012,19 @@ def chunksizes(self) -> Mapping[Any, tuple[int, ...]]: _array_counter = itertools.count() - def chunk(self, chunks={}, name=None, lock=False): + def chunk( + self, + chunks: ( + int + | Literal["auto"] + | tuple[int, ...] + | tuple[tuple[int, ...], ...] + | Mapping[Any, None | int | tuple[int, ...]] + ) = {}, + name: str = None, + lock: bool = False, + **chunks_kwargs: Any, + ) -> Variable: """Coerce this array's data into a dask array with the given chunks. If this variable is a non-dask array, it will be converted to dask @@ -1034,6 +1046,9 @@ def chunk(self, chunks={}, name=None, lock=False): lock : optional Passed on to :py:func:`dask.array.from_array`, if the array is not already as dask array. + **chunks_kwargs : {dim: chunks, ...}, optional + The keyword arguments form of ``chunks``. + One of chunks or chunks_kwargs must be provided. Returns ------- @@ -1049,6 +1064,11 @@ def chunk(self, chunks={}, name=None, lock=False): ) chunks = {} + if isinstance(chunks, (float, str, int, tuple, list)): + pass # dask.array.from_array can handle these directly + else: + chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") + if utils.is_dict_like(chunks): chunks = {self.get_axis_num(dim): chunk for dim, chunk in chunks.items()} diff --git a/xarray/static/css/style.css b/xarray/static/css/style.css index b3b8a162e9a..9fa27c03359 100644 --- a/xarray/static/css/style.css +++ b/xarray/static/css/style.css @@ -14,6 +14,7 @@ } html[theme=dark], +body[data-theme=dark], body.vscode-dark { --xr-font-color0: rgba(255, 255, 255, 1); --xr-font-color2: rgba(255, 255, 255, 0.54); diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 825c6f7130f..81bfeb11a1e 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4,6 +4,7 @@ import math import os.path import pickle +import platform import re import shutil import sys @@ -1756,7 +1757,7 @@ def test_auto_chunk(self): assert v.chunks == original[k].chunks @requires_dask - @pytest.mark.filterwarnings("ignore:Specified Dask chunks") + @pytest.mark.filterwarnings("ignore:The specified Dask chunks separate") def test_manual_chunk(self): original = create_test_data().chunk({"dim1": 3, "dim2": 4, "dim3": 3}) @@ -2210,6 +2211,13 @@ def test_save_emptydim(self, chunk): with self.roundtrip(ds) as ds_reload: assert_identical(ds, ds_reload) + @requires_dask + def test_no_warning_from_open_emptydim_with_chunks(self): + ds = Dataset({"x": (("a", "b"), np.empty((5, 0)))}).chunk({"a": 1}) + with assert_no_warnings(): + with self.roundtrip(ds, open_kwargs=dict(chunks={"a": 1})) as ds_reload: + assert_identical(ds, ds_reload) + @pytest.mark.parametrize("consolidated", [False, True]) @pytest.mark.parametrize("compute", [False, True]) @pytest.mark.parametrize("use_dask", [False, True]) @@ -5296,7 +5304,7 @@ def test_open_dataset_chunking_zarr(chunks, tmp_path): @pytest.mark.parametrize( "chunks", ["auto", -1, {}, {"x": "auto"}, {"x": -1}, {"x": "auto", "y": -1}] ) -@pytest.mark.filterwarnings("ignore:Specified Dask chunks") +@pytest.mark.filterwarnings("ignore:The specified Dask chunks separate") def test_chunking_consintency(chunks, tmp_path): encoded_chunks = {} dask_arr = da.from_array( @@ -5427,3 +5435,51 @@ def test_write_file_from_np_str(str_type, tmpdir) -> None: txr = tdf.to_xarray() txr.to_netcdf(tmpdir.join("test.nc")) + + +@requires_zarr +@requires_netCDF4 +class TestNCZarr: + @staticmethod + def _create_nczarr(filename): + netcdfc_version = Version(nc4.getlibversion().split()[0]) + if netcdfc_version < Version("4.8.1"): + pytest.skip("requires netcdf-c>=4.8.1") + if (platform.system() == "Windows") and (netcdfc_version == Version("4.8.1")): + # Bug in netcdf-c==4.8.1 (typo: Nan instead of NaN) + # https://github.com/Unidata/netcdf-c/issues/2265 + pytest.skip("netcdf-c==4.8.1 has issues on Windows") + + ds = create_test_data() + # Drop dim3: netcdf-c does not support dtype='4.8.1 will add _ARRAY_DIMENSIONS by default + mode = "nczarr" if netcdfc_version == Version("4.8.1") else "nczarr,noxarray" + ds.to_netcdf(f"file://{filename}#mode={mode}") + return ds + + def test_open_nczarr(self): + with create_tmp_file(suffix=".zarr") as tmp: + expected = self._create_nczarr(tmp) + actual = xr.open_zarr(tmp, consolidated=False) + assert_identical(expected, actual) + + def test_overwriting_nczarr(self): + with create_tmp_file(suffix=".zarr") as tmp: + ds = self._create_nczarr(tmp) + expected = ds[["var1"]] + expected.to_zarr(tmp, mode="w") + actual = xr.open_zarr(tmp, consolidated=False) + assert_identical(expected, actual) + + @pytest.mark.parametrize("mode", ["a", "r+"]) + @pytest.mark.filterwarnings("ignore:.*non-consolidated metadata.*") + def test_raise_writing_to_nczarr(self, mode): + with create_tmp_file(suffix=".zarr") as tmp: + ds = self._create_nczarr(tmp) + with pytest.raises( + KeyError, match="missing the attribute `_ARRAY_DIMENSIONS`," + ): + ds.to_zarr(tmp, mode=mode) diff --git a/xarray/tests/test_backends_api.py b/xarray/tests/test_backends_api.py index 352ec6c10f1..0ba446818e5 100644 --- a/xarray/tests/test_backends_api.py +++ b/xarray/tests/test_backends_api.py @@ -1,9 +1,18 @@ +from numbers import Number + import numpy as np +import pytest import xarray as xr from xarray.backends.api import _get_default_engine -from . import assert_identical, requires_netCDF4, requires_scipy +from . import ( + assert_identical, + assert_no_warnings, + requires_dask, + requires_netCDF4, + requires_scipy, +) @requires_netCDF4 @@ -35,3 +44,136 @@ def open_dataset( actual = xr.open_dataset("fake_filename", engine=CustomBackend) assert_identical(expected, actual) + + +class PassThroughBackendEntrypoint(xr.backends.BackendEntrypoint): + """Access an object passed to the `open_dataset` method.""" + + def open_dataset(self, dataset, *, drop_variables=None): + """Return the first argument.""" + return dataset + + +def explicit_chunks(chunks, shape): + """Return explicit chunks, expanding any integer member to a tuple of integers.""" + # Emulate `dask.array.core.normalize_chunks` but for simpler inputs. + return tuple( + ( + (size // chunk) * (chunk,) + + ((size % chunk,) if size % chunk or size == 0 else ()) + ) + if isinstance(chunk, Number) + else chunk + for chunk, size in zip(chunks, shape) + ) + + +@requires_dask +class TestPreferredChunks: + """Test behaviors related to the backend's preferred chunks.""" + + var_name = "data" + + def create_dataset(self, shape, pref_chunks): + """Return a dataset with a variable with the given shape and preferred chunks.""" + dims = tuple(f"dim_{idx}" for idx in range(len(shape))) + return xr.Dataset( + { + self.var_name: xr.Variable( + dims, + np.empty(shape, dtype=np.dtype("V1")), + encoding={"preferred_chunks": dict(zip(dims, pref_chunks))}, + ) + } + ) + + def check_dataset(self, initial, final, expected_chunks): + assert_identical(initial, final) + assert final[self.var_name].chunks == expected_chunks + + @pytest.mark.parametrize( + "shape,pref_chunks", + [ + # Represent preferred chunking with int. + ((5,), (2,)), + # Represent preferred chunking with tuple. + ((5,), ((2, 2, 1),)), + # Represent preferred chunking with int in two dims. + ((5, 6), (4, 2)), + # Represent preferred chunking with tuple in second dim. + ((5, 6), (4, (2, 2, 2))), + ], + ) + @pytest.mark.parametrize("request_with_empty_map", [False, True]) + def test_honor_chunks(self, shape, pref_chunks, request_with_empty_map): + """Honor the backend's preferred chunks when opening a dataset.""" + initial = self.create_dataset(shape, pref_chunks) + # To keep the backend's preferred chunks, the `chunks` argument must be an + # empty mapping or map dimensions to `None`. + chunks = ( + {} + if request_with_empty_map + else dict.fromkeys(initial[self.var_name].dims, None) + ) + final = xr.open_dataset( + initial, engine=PassThroughBackendEntrypoint, chunks=chunks + ) + self.check_dataset(initial, final, explicit_chunks(pref_chunks, shape)) + + @pytest.mark.parametrize( + "shape,pref_chunks,req_chunks", + [ + # Preferred chunking is int; requested chunking is int. + ((5,), (2,), (3,)), + # Preferred chunking is int; requested chunking is tuple. + ((5,), (2,), ((2, 1, 1, 1),)), + # Preferred chunking is tuple; requested chunking is int. + ((5,), ((2, 2, 1),), (3,)), + # Preferred chunking is tuple; requested chunking is tuple. + ((5,), ((2, 2, 1),), ((2, 1, 1, 1),)), + # Split chunks along a dimension other than the first. + ((1, 5), (1, 2), (1, 3)), + ], + ) + def test_split_chunks(self, shape, pref_chunks, req_chunks): + """Warn when the requested chunks separate the backend's preferred chunks.""" + initial = self.create_dataset(shape, pref_chunks) + with pytest.warns(UserWarning): + final = xr.open_dataset( + initial, + engine=PassThroughBackendEntrypoint, + chunks=dict(zip(initial[self.var_name].dims, req_chunks)), + ) + self.check_dataset(initial, final, explicit_chunks(req_chunks, shape)) + + @pytest.mark.parametrize( + "shape,pref_chunks,req_chunks", + [ + # Keep preferred chunks using int representation. + ((5,), (2,), (2,)), + # Keep preferred chunks using tuple representation. + ((5,), (2,), ((2, 2, 1),)), + # Join chunks, leaving a final short chunk. + ((5,), (2,), (4,)), + # Join all chunks with an int larger than the dimension size. + ((5,), (2,), (6,)), + # Join one chunk using tuple representation. + ((5,), (1,), ((1, 1, 2, 1),)), + # Join one chunk using int representation. + ((5,), ((1, 1, 2, 1),), (2,)), + # Join multiple chunks using tuple representation. + ((5,), ((1, 1, 2, 1),), ((2, 3),)), + # Join chunks in multiple dimensions. + ((5, 5), (2, (1, 1, 2, 1)), (4, (2, 3))), + ], + ) + def test_join_chunks(self, shape, pref_chunks, req_chunks): + """Don't warn when the requested chunks join or keep the preferred chunks.""" + initial = self.create_dataset(shape, pref_chunks) + with assert_no_warnings(): + final = xr.open_dataset( + initial, + engine=PassThroughBackendEntrypoint, + chunks=dict(zip(initial[self.var_name].dims, req_chunks)), + ) + self.check_dataset(initial, final, explicit_chunks(req_chunks, shape)) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 6a86738ab2f..7a397428ba3 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1928,6 +1928,10 @@ def test_where_attrs() -> None: expected = xr.DataArray([1, 0], dims="x", attrs={"attr": "x"}) assert_identical(expected, actual) + # ensure keep_attrs can handle scalar values + actual = xr.where(cond, 1, 0, keep_attrs=True) + assert actual.attrs == {} + @pytest.mark.parametrize("use_dask", [True, False]) @pytest.mark.parametrize("use_datetime", [True, False]) diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 83e560e7208..b8b9d19e238 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -9,6 +9,7 @@ Dataset, SerializationWarning, Variable, + cftime_range, coding, conventions, open_dataset, @@ -128,6 +129,25 @@ def test_multidimensional_coordinates(self) -> None: # Should not have any global coordinates. assert "coordinates" not in attrs + def test_var_with_coord_attr(self) -> None: + # regression test for GH6310 + # don't overwrite user-defined "coordinates" attributes + orig = Dataset( + {"values": ("time", np.zeros(2), {"coordinates": "time lon lat"})}, + coords={ + "time": ("time", np.zeros(2)), + "lat": ("time", np.zeros(2)), + "lon": ("time", np.zeros(2)), + }, + ) + # Encode the coordinates, as they would be in a netCDF output file. + enc, attrs = conventions.encode_dataset_coordinates(orig) + # Make sure we have the right coordinates for each variable. + values_coords = enc["values"].attrs.get("coordinates", "") + assert set(values_coords.split()) == {"time", "lat", "lon"} + # Should not have any global coordinates. + assert "coordinates" not in attrs + def test_do_not_overwrite_user_coordinates(self) -> None: orig = Dataset( coords={"x": [0, 1, 2], "y": ("x", [5, 6, 7]), "z": ("x", [8, 9, 10])}, @@ -423,3 +443,25 @@ def test_decode_cf_variable_with_array_units(self) -> None: v = Variable(["t"], [1, 2, 3], {"units": np.array(["foobar"], dtype=object)}) v_decoded = conventions.decode_cf_variable("test2", v) assert_identical(v, v_decoded) + + +def test_decode_cf_variable_timedelta64(): + variable = Variable(["time"], pd.timedelta_range("1D", periods=2)) + decoded = conventions.decode_cf_variable("time", variable) + assert decoded.encoding == {} + assert_identical(decoded, variable) + + +def test_decode_cf_variable_datetime64(): + variable = Variable(["time"], pd.date_range("2000", periods=2)) + decoded = conventions.decode_cf_variable("time", variable) + assert decoded.encoding == {} + assert_identical(decoded, variable) + + +@requires_cftime +def test_decode_cf_variable_cftime(): + variable = Variable(["time"], cftime_range("2000", periods=2)) + decoded = conventions.decode_cf_variable("time", variable) + assert decoded.encoding == {} + assert_identical(decoded, variable) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 872c0c6f1db..df69e8d9d6e 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -10,7 +10,6 @@ from packaging.version import Version import xarray as xr -import xarray.ufuncs as xu from xarray import DataArray, Dataset, Variable from xarray.core import duck_array_ops from xarray.core.pycompat import dask_version @@ -265,18 +264,16 @@ def test_missing_methods(self): except NotImplementedError as err: assert "dask" in str(err) - @pytest.mark.filterwarnings("ignore::FutureWarning") def test_univariate_ufunc(self): u = self.eager_var v = self.lazy_var - self.assertLazyAndAllClose(np.sin(u), xu.sin(v)) + self.assertLazyAndAllClose(np.sin(u), np.sin(v)) - @pytest.mark.filterwarnings("ignore::FutureWarning") def test_bivariate_ufunc(self): u = self.eager_var v = self.lazy_var - self.assertLazyAndAllClose(np.maximum(u, 0), xu.maximum(v, 0)) - self.assertLazyAndAllClose(np.maximum(u, 0), xu.maximum(0, v)) + self.assertLazyAndAllClose(np.maximum(u, 0), np.maximum(v, 0)) + self.assertLazyAndAllClose(np.maximum(u, 0), np.maximum(0, v)) def test_compute(self): u = self.eager_var @@ -605,11 +602,10 @@ def duplicate_and_merge(array): actual = duplicate_and_merge(self.lazy_array) self.assertLazyAndEqual(expected, actual) - @pytest.mark.filterwarnings("ignore::FutureWarning") def test_ufuncs(self): u = self.eager_array v = self.lazy_array - self.assertLazyAndAllClose(np.sin(u), xu.sin(v)) + self.assertLazyAndAllClose(np.sin(u), np.sin(v)) def test_where_dispatching(self): a = np.arange(10) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 65efb3a732c..b8c9edd7258 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -804,6 +804,11 @@ def test_chunk(self): assert isinstance(blocked.data, da.Array) assert "testname_" in blocked.data.name + # test kwargs form of chunks + blocked = unblocked.chunk(dim_0=3, dim_1=3) + assert blocked.chunks == ((3,), (3, 1)) + assert blocked.data.name != first_dask_name + def test_isel(self): assert_identical(self.dv[0], self.dv.isel(x=0)) assert_identical(self.dv, self.dv.isel(x=slice(None))) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 5f368375fc0..c0f7f09ff61 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -921,6 +921,9 @@ def test_chunk(self): expected_chunks = {"dim1": (8,), "dim2": (9,), "dim3": (10,)} assert reblocked.chunks == expected_chunks + # test kwargs form of chunks + assert data.chunk(**expected_chunks).chunks == expected_chunks + def get_dask_names(ds): return {k: v.data.name for k, v in ds.items()} @@ -947,7 +950,7 @@ def get_dask_names(ds): new_dask_names = get_dask_names(reblocked) assert reblocked.chunks == expected_chunks assert_identical(reblocked, data) - # recuhnking with same chunk sizes should not change names + # rechunking with same chunk sizes should not change names for k, v in new_dask_names.items(): assert v == orig_dask_names[k] @@ -2328,6 +2331,18 @@ def test_broadcast_misaligned(self): assert_identical(expected_x2, x2) assert_identical(expected_y2, y2) + def test_broadcast_multi_index(self): + # GH6430 + ds = Dataset( + {"foo": (("x", "y", "z"), np.ones((3, 4, 2)))}, + {"x": ["a", "b", "c"], "y": [1, 2, 3, 4]}, + ) + stacked = ds.stack(space=["x", "y"]) + broadcasted, _ = broadcast(stacked, stacked.space) + + assert broadcasted.xindexes["x"] is broadcasted.xindexes["space"] + assert broadcasted.xindexes["y"] is broadcasted.xindexes["space"] + def test_variable_indexing(self): data = create_test_data() v = data["var1"] @@ -4545,8 +4560,11 @@ def test_where_other(self): actual = ds.where(lambda x: x > 1, -1) assert_equal(expected, actual) - with pytest.raises(ValueError, match=r"cannot set"): - ds.where(ds > 1, other=0, drop=True) + actual = ds.where(ds > 1, other=-1, drop=True) + expected_nodrop = ds.where(ds > 1, -1) + _, expected = xr.align(actual, expected_nodrop, join="left") + assert_equal(actual, expected) + assert actual.a.dtype == int with pytest.raises(ValueError, match=r"cannot align .* are not equal"): ds.where(ds > 1, ds.isel(x=slice(3))) diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index bf4d39105c4..bac1f6407fc 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -7,7 +7,6 @@ from packaging.version import Version import xarray as xr -import xarray.ufuncs as xu from xarray import DataArray, Variable from xarray.core.pycompat import sparse_array_type, sparse_version @@ -279,12 +278,12 @@ def test_unary_op(self): @pytest.mark.filterwarnings("ignore::FutureWarning") def test_univariate_ufunc(self): - assert_sparse_equal(np.sin(self.data), xu.sin(self.var).data) + assert_sparse_equal(np.sin(self.data), np.sin(self.var).data) @pytest.mark.filterwarnings("ignore::FutureWarning") def test_bivariate_ufunc(self): - assert_sparse_equal(np.maximum(self.data, 0), xu.maximum(self.var, 0).data) - assert_sparse_equal(np.maximum(self.data, 0), xu.maximum(0, self.var).data) + assert_sparse_equal(np.maximum(self.data, 0), np.maximum(self.var, 0).data) + assert_sparse_equal(np.maximum(self.data, 0), np.maximum(0, self.var).data) def test_repr(self): expected = dedent( @@ -665,11 +664,6 @@ def test_stack(self): roundtripped = stacked.unstack() assert_identical(arr, roundtripped) - @pytest.mark.filterwarnings("ignore::FutureWarning") - def test_ufuncs(self): - x = self.sp_xr - assert_equal(np.sin(x), xu.sin(x)) - def test_dataarray_repr(self): a = xr.DataArray( sparse.COO.from_numpy(np.ones(4)), diff --git a/xarray/tests/test_ufuncs.py b/xarray/tests/test_ufuncs.py index 590ae9ae003..28e5c6cbcb1 100644 --- a/xarray/tests/test_ufuncs.py +++ b/xarray/tests/test_ufuncs.py @@ -1,10 +1,7 @@ -import pickle - import numpy as np import pytest import xarray as xr -import xarray.ufuncs as xu from . import assert_array_equal from . import assert_identical as assert_identical_ @@ -158,52 +155,3 @@ def test_gufuncs(): fake_gufunc = mock.Mock(signature="(n)->()", autospec=np.sin) with pytest.raises(NotImplementedError, match=r"generalized ufuncs"): xarray_obj.__array_ufunc__(fake_gufunc, "__call__", xarray_obj) - - -def test_xarray_ufuncs_deprecation(): - with pytest.warns(FutureWarning, match="xarray.ufuncs"): - xu.cos(xr.DataArray([0, 1])) - - with assert_no_warnings(): - xu.angle(xr.DataArray([0, 1])) - - -@pytest.mark.filterwarnings("ignore::RuntimeWarning") -@pytest.mark.parametrize( - "name", - [ - name - for name in dir(xu) - if ( - not name.startswith("_") - and hasattr(np, name) - and name not in ["print_function", "absolute_import", "division"] - ) - ], -) -def test_numpy_ufuncs(name, request): - x = xr.DataArray([1, 1]) - - np_func = getattr(np, name) - if hasattr(np_func, "nin") and np_func.nin == 2: - args = (x, x) - else: - args = (x,) - - y = np_func(*args) - - if name in ["angle", "iscomplex"]: - # these functions need to be handled with __array_function__ protocol - assert isinstance(y, np.ndarray) - elif name in ["frexp"]: - # np.frexp returns a tuple - assert not isinstance(y, xr.DataArray) - else: - assert isinstance(y, xr.DataArray) - - -@pytest.mark.filterwarnings("ignore:xarray.ufuncs") -def test_xarray_ufuncs_pickle(): - a = 1.0 - cos_pickled = pickle.loads(pickle.dumps(xu.cos)) - assert_identical(cos_pickled(a), xu.cos(a)) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index b8e2f6f4582..0168f19b921 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -2154,6 +2154,40 @@ def test_coarsen_keep_attrs(self, operation="mean"): class TestVariableWithDask(VariableSubclassobjects): cls = staticmethod(lambda *args: Variable(*args).chunk()) + def test_chunk(self): + unblocked = Variable(["dim_0", "dim_1"], np.ones((3, 4))) + assert unblocked.chunks is None + + blocked = unblocked.chunk() + assert blocked.chunks == ((3,), (4,)) + first_dask_name = blocked.data.name + + blocked = unblocked.chunk(chunks=((2, 1), (2, 2))) + assert blocked.chunks == ((2, 1), (2, 2)) + assert blocked.data.name != first_dask_name + + blocked = unblocked.chunk(chunks=(3, 3)) + assert blocked.chunks == ((3,), (3, 1)) + assert blocked.data.name != first_dask_name + + # name doesn't change when rechunking by same amount + # this fails if ReprObject doesn't have __dask_tokenize__ defined + assert unblocked.chunk(2).data.name == unblocked.chunk(2).data.name + + assert blocked.load().chunks is None + + # Check that kwargs are passed + import dask.array as da + + blocked = unblocked.chunk(name="testname_") + assert isinstance(blocked.data, da.Array) + assert "testname_" in blocked.data.name + + # test kwargs form of chunks + blocked = unblocked.chunk(dim_0=3, dim_1=3) + assert blocked.chunks == ((3,), (3, 1)) + assert blocked.data.name != first_dask_name + @pytest.mark.xfail def test_0d_object_array_with_list(self): super().test_0d_object_array_with_list() diff --git a/xarray/ufuncs.py b/xarray/ufuncs.py deleted file mode 100644 index 24907a158ef..00000000000 --- a/xarray/ufuncs.py +++ /dev/null @@ -1,197 +0,0 @@ -"""xarray specific universal functions - -Handles unary and binary operations for the following types, in ascending -priority order: -- scalars -- numpy.ndarray -- dask.array.Array -- xarray.Variable -- xarray.DataArray -- xarray.Dataset -- xarray.core.groupby.GroupBy - -Once NumPy 1.10 comes out with support for overriding ufuncs, this module will -hopefully no longer be necessary. -""" -import textwrap -import warnings as _warnings - -import numpy as _np - -from .core.dataarray import DataArray as _DataArray -from .core.dataset import Dataset as _Dataset -from .core.groupby import GroupBy as _GroupBy -from .core.pycompat import dask_array_type as _dask_array_type -from .core.variable import Variable as _Variable - -_xarray_types = (_Variable, _DataArray, _Dataset, _GroupBy) -_dispatch_order = (_np.ndarray, _dask_array_type) + _xarray_types -_UNDEFINED = object() - - -def _dispatch_priority(obj): - for priority, cls in enumerate(_dispatch_order): - if isinstance(obj, cls): - return priority - return -1 - - -class _UFuncDispatcher: - """Wrapper for dispatching ufuncs.""" - - def __init__(self, name): - self._name = name - - def __call__(self, *args, **kwargs): - if self._name not in ["angle", "iscomplex"]: - _warnings.warn( - "xarray.ufuncs is deprecated. Instead, use numpy ufuncs directly.", - FutureWarning, - stacklevel=2, - ) - - new_args = args - res = _UNDEFINED - if len(args) > 2 or len(args) == 0: - raise TypeError(f"cannot handle {len(args)} arguments for {self._name!r}") - elif len(args) == 1: - if isinstance(args[0], _xarray_types): - res = args[0]._unary_op(self) - else: # len(args) = 2 - p1, p2 = map(_dispatch_priority, args) - if p1 >= p2: - if isinstance(args[0], _xarray_types): - res = args[0]._binary_op(args[1], self) - else: - if isinstance(args[1], _xarray_types): - res = args[1]._binary_op(args[0], self, reflexive=True) - new_args = tuple(reversed(args)) - - if res is _UNDEFINED: - f = getattr(_np, self._name) - res = f(*new_args, **kwargs) - if res is NotImplemented: - raise TypeError( - f"{self._name!r} not implemented for types ({type(args[0])!r}, {type(args[1])!r})" - ) - return res - - -def _skip_signature(doc, name): - if not isinstance(doc, str): - return doc - - if doc.startswith(name): - signature_end = doc.find("\n\n") - doc = doc[signature_end + 2 :] - - return doc - - -def _remove_unused_reference_labels(doc): - if not isinstance(doc, str): - return doc - - max_references = 5 - for num in range(max_references): - label = f".. [{num}]" - reference = f"[{num}]_" - index = f"{num}. " - - if label not in doc or reference in doc: - continue - - doc = doc.replace(label, index) - - return doc - - -def _dedent(doc): - if not isinstance(doc, str): - return doc - - return textwrap.dedent(doc) - - -def _create_op(name): - func = _UFuncDispatcher(name) - func.__name__ = name - doc = getattr(_np, name).__doc__ - - doc = _remove_unused_reference_labels(_skip_signature(_dedent(doc), name)) - - func.__doc__ = ( - f"xarray specific variant of numpy.{name}. Handles " - "xarray.Dataset, xarray.DataArray, xarray.Variable, " - "numpy.ndarray and dask.array.Array objects with " - "automatic dispatching.\n\n" - f"Documentation from numpy:\n\n{doc}" - ) - return func - - -__all__ = ( # noqa: F822 - "angle", - "arccos", - "arccosh", - "arcsin", - "arcsinh", - "arctan", - "arctan2", - "arctanh", - "ceil", - "conj", - "copysign", - "cos", - "cosh", - "deg2rad", - "degrees", - "exp", - "expm1", - "fabs", - "fix", - "floor", - "fmax", - "fmin", - "fmod", - "fmod", - "frexp", - "hypot", - "imag", - "iscomplex", - "isfinite", - "isinf", - "isnan", - "isreal", - "ldexp", - "log", - "log10", - "log1p", - "log2", - "logaddexp", - "logaddexp2", - "logical_and", - "logical_not", - "logical_or", - "logical_xor", - "maximum", - "minimum", - "nextafter", - "rad2deg", - "radians", - "real", - "rint", - "sign", - "signbit", - "sin", - "sinh", - "sqrt", - "square", - "tan", - "tanh", - "trunc", -) - - -for name in __all__: - globals()[name] = _create_op(name)