Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes handling multiple z axes when vertically regridding #525

Merged
merged 3 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 40 additions & 36 deletions .github/workflows/build_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,34 +14,39 @@ env:
PATHS_IGNORE: '["**/README.rst", "**/docs/**", "**/ISSUE_TEMPLATE/**", "**/pull_request_template.md", "**/.vscode/**"]'

jobs:
pre-commit-hooks:
skip-duplicate-actions:
runs-on: ubuntu-latest
timeout-minutes: 3
outputs:
should_skip: ${{ steps.skip_check.outputs.should_skip }}
paths_result: ${{ steps.skip_check.outputs.paths_result }}
steps:
- id: skip_check
uses: fkirc/skip-duplicate-actions@master
uses: fkirc/skip-duplicate-actions@v5
with:
cancel_others: ${{ env.CANCEL_OTHERS }}
paths_ignore: ${{ env.PATHS_IGNORE }}
do_not_skip: '["push", "workflow_dispatch"]'

- if: ${{ steps.skip_check.outputs.should_skip == 'false' }}
name: Checkout Code Repository
pre-commit-hooks:
needs: skip-duplicate-actions
if: needs.skip-duplicate-actions.outputs.should_skip != 'true'
runs-on: ubuntu-latest
timeout-minutes: 3
steps:
- name: Checkout Code Repository
uses: actions/checkout@v3

- if: ${{ steps.skip_check.outputs.should_skip == 'false' }}
name: Set up Python 3.10
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"

- if: ${{ steps.skip_check.outputs.should_skip == 'false' }}
# Run all pre-commit hooks on all the files.
# Getting only staged files can be tricky in case a new PR is opened
# since the action is run on a branch in detached head state
name: Install and Run Pre-commit
- name: Install and Run Pre-commit
uses: pre-commit/action@v3.0.0

build:
needs: skip-duplicate-actions
if: needs.skip-duplicate-actions.outputs.should_skip != 'true'
name: Build (Python ${{ matrix.python-version }})
runs-on: "ubuntu-latest"
timeout-minutes: 10
Expand All @@ -52,18 +57,9 @@ jobs:
matrix:
python-version: ["3.9", "3.10", "3.11"]
steps:
- id: skip_check
uses: fkirc/skip-duplicate-actions@master
with:
cancel_others: ${{ env.CANCEL_OTHERS }}
paths_ignore: ${{ env.PATHS_IGNORE }}
do_not_skip: '["push", "workflow_dispatch"]'

- if: ${{ steps.skip_check.outputs.should_skip == 'false' }}
uses: actions/checkout@v3
- uses: actions/checkout@v3

- if: ${{ steps.skip_check.outputs.should_skip == 'false' }}
name: Set up Conda Environment
- name: Set up Conda Environment
uses: conda-incubator/setup-miniconda@v2
with:
miniforge-variant: Mambaforge
Expand All @@ -77,14 +73,12 @@ jobs:

# Refresh the cache every 24 hours to avoid inconsistencies of package versions
# between the CI pipeline and local installations.
- if: ${{ steps.skip_check.outputs.should_skip == 'false' }}
id: get-date
- id: get-date
name: Get Date
run: echo "today=$(/bin/date -u '+%Y%m%d')" >> $GITHUB_OUTPUT
shell: bash

- if: ${{ steps.skip_check.outputs.should_skip == 'false' }}
id: cache
- id: cache
name: Cache Conda env
uses: actions/cache@v3
with:
Expand All @@ -96,29 +90,39 @@ jobs:
# Increase this value to reset cache if conda-env/ci.yml has not changed in the workflow
CACHE_NUMBER: 0

- if: |
steps.skip_check.outputs.should_skip == 'false' &&
steps.cache.outputs.cache-hit == 'false'
- if: steps.cache.outputs.cache-hit != 'true'
name: Update environment
run: |
mamba env update -n xcdat_ci -f conda-env/ci.yml
# Make sure the Python version in the env matches the current matrix version.
mamba install -c conda-forge python=${{ matrix.python-version }}

- if: ${{ steps.skip_check.outputs.should_skip == 'false' }}
name: Install xcdat
- name: Install xcdat
# Source: https://github.com/conda/conda-build/issues/4251#issuecomment-1053460542
run: |
python -m pip install --no-build-isolation --no-deps -e .

- if: ${{ steps.skip_check.outputs.should_skip == 'false' }}
name: Run Tests
- name: Run Tests
run: |
pytest

- if: ${{ steps.skip_check.outputs.should_skip == 'false' }}
name: Upload Coverage Report
- name: Upload Coverage Report
uses: codecov/codecov-action@v3
with:
file: "tests_coverage_reports/coverage.xml"
fail_ci_if_error: true

# `build-result` is a workaround to skipped matrix jobs in `build` not being considered "successful",
# which can block PR merges if matrix jobs are required status checks.
# More info: https://github.com/fkirc/skip-duplicate-actions#how-to-use-skip-check-with-required-matrix-jobs
build-result:
name: Build Result
if: needs.skip-duplicate-actions.outputs.should_skip != 'true' && always()
runs-on: ubuntu-latest
needs:
- skip-duplicate-actions
- build
steps:
- name: Mark result as failed
if: needs.build.result != 'success'
run: exit 1
60 changes: 57 additions & 3 deletions tests/test_regrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,18 @@ def setup(self):

self.output_grid = grid.create_grid(lev=np.linspace(10000, 2000, 2))

def test_multiple_z_axes(self):
self.ds = self.ds.assign_coords({"ilev": self.ds.lev.copy().rename("ilev")})

self.ds = self.ds.assign(so_1=self.ds.so.copy().rename("so_1"))

self.ds["so_1"] = self.ds.so_1.swap_dims({"lev": "ilev"})

regridder = xgcm.XGCMRegridder(self.ds, self.output_grid, method="linear")

with pytest.raises(RuntimeError, match=r".*ilev, lev.*"):
regridder.vertical("so", self.ds)

def test_vertical_regrid_level_name_mismatch(self):
self.ds = self.ds.rename({"lev": "plev"})

Expand Down Expand Up @@ -207,7 +219,7 @@ def test_missing_input_z_coord(self):
)

with pytest.raises(
RuntimeError, match="Could not determine 'Z' coordinate in input dataset"
RuntimeError, match="Could not determine `Z` coordinate in dataset."
):
regridder.vertical("ts", ds)

Expand Down Expand Up @@ -235,7 +247,7 @@ def test_missing_input_z_bounds(self):
)

with pytest.raises(
RuntimeError, match="Could not determine 'Z' bounds in input dataset"
RuntimeError, match="Could not determine `Z` bounds in dataset."
):
regridder.vertical("so", ds)

Expand Down Expand Up @@ -1162,6 +1174,35 @@ class TestAccessor:
def setup(self):
self.data = mock.MagicMock()
self.ac = accessor.RegridderAccessor(self.data)
self.vertical_ds = fixtures.generate_lev_dataset()

def test_vertical(self):
output_grid = grid.create_grid(lev=np.linspace(10000, 2000, 2))

output_data = self.vertical_ds.regridder.vertical(
"so", output_grid, tool="xgcm", method="linear"
)

assert output_data.so.shape == (15, 2, 4, 4)

def test_vertical_multiple_z_axes(self):
output_grid = grid.create_grid(lev=np.linspace(10000, 2000, 2))

self.vertical_ds = self.vertical_ds.assign_coords(
{"ilev": self.vertical_ds.lev.copy().rename("ilev")}
)

self.vertical_ds = self.vertical_ds.assign(
so_1=self.vertical_ds.so.copy().rename("so_1")
)

self.vertical_ds["so_1"] = self.vertical_ds.so_1.swap_dims({"lev": "ilev"})

output_data = self.vertical_ds.regridder.vertical(
"so", output_grid, tool="xgcm", method="linear"
)

assert output_data.so.shape == (15, 2, 4, 4)

def test_grid(self):
ds_bounds = fixtures.generate_dataset(
Expand Down Expand Up @@ -1217,7 +1258,8 @@ def test_horizontal_tool_check(self):
):
self.ac.horizontal("ts", mock_data, tool="dummy") # type: ignore

def test_vertical_tool_check(self):
@mock.patch("xcdat.regridder.accessor._get_vertical_input_grid")
def test_vertical_tool_check(self, _get_vertical_input_grid):
mock_regridder = mock.MagicMock()
mock_regridder.return_value.vertical.return_value = "output data"

Expand Down Expand Up @@ -1292,6 +1334,18 @@ def test_preserve_bounds(self):
assert "lev_bnds" in output_ds
assert output_ds.lev_bnds.attrs["source"] == "input_ds"

def test_preserve_bounds_does_not_drop_axis_if_axis_does_not_exist(self):
output_grid = fixtures.generate_lev_dataset()
output_grid = output_grid.drop_dims("lat")

input_ds = output_grid.copy(deep=True)

target = xr.Dataset()
output_ds = base._preserve_bounds(input_ds, output_grid, target, ["Y"])

# Check that lat is still not in the dimensions (nothing happens).
assert "lat" not in output_ds.dims

def test_regridder_implementation(self):
class NewRegridder(base.BaseRegridder):
def __init__(self, src_grid, dst_grid, **options):
Expand Down
23 changes: 22 additions & 1 deletion xcdat/regridder/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,28 @@ def vertical(
f"Tool {e!s} does not exist, valid choices "
f"{list(VERTICAL_REGRID_TOOLS)}"
)
regridder = regrid_tool(self._ds, output_grid, **options)
input_grid = _get_vertical_input_grid(self._ds, data_var)
regridder = regrid_tool(input_grid, output_grid, **options)
output_ds = regridder.vertical(data_var, self._ds)

return output_ds


def _get_vertical_input_grid(ds: xr.Dataset, data_var: str):
coords = get_dim_coords(ds, "Z")

if isinstance(coords, xr.Dataset):
coord_z = set([get_dim_coords(ds[data_var], "Z").name])

all_coords = set(ds.cf[["Z"]].coords.keys())

# need to take the intersection after as `ds.cf[["Z"]]` will hand back data variables
to_drop = all_coords.difference(coord_z).intersection(set(ds.coords.keys()))

shallow = ds.drop_dims(to_drop)

input_grid = shallow.regridder.grid
else:
input_grid = ds.regridder.grid

return input_grid
46 changes: 41 additions & 5 deletions xcdat/regridder/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import xcdat.bounds # noqa: F401
from xcdat._logger import _setup_custom_logger
from xcdat.axis import CFAxisKey
from xcdat.axis import CFAxisKey, get_dim_keys

logger = _setup_custom_logger(__name__)

Expand All @@ -19,7 +19,7 @@ def _preserve_bounds(
input_ds: xr.Dataset,
output_grid: xr.Dataset,
output_ds: xr.Dataset,
ignore_dims: List[CFAxisKey],
drop_axis: List[CFAxisKey],
) -> xr.Dataset:
"""Preserves existing bounds from datasets.

Expand All @@ -33,15 +33,17 @@ def _preserve_bounds(
Output grid Dataset used for regridding.
output_ds : xr.Dataset
Dataset bounds will be copied to.
ignore_dims : List[CFAxisKey]
Dimensions to drop from `input_ds`.
drop_axis : List[CFAxisKey]
Axis or axes to drop from `input_ds`, which drops the related coords
and bounds. For example, dropping the "Y" axis in `input_ds` ensures
that the "Y" axis in `output_grid` is referenced for bounds.

Returns
-------
xr.Dataset
Target Dataset with preserved bounds.
"""
input_ds = input_ds.drop_dims([input_ds.cf[x].name for x in ignore_dims])
input_ds = _drop_axis(input_ds, drop_axis)

for ds in (output_grid, input_ds):
for axis in ("X", "Y", "Z", "T"):
Expand All @@ -56,6 +58,40 @@ def _preserve_bounds(
return output_ds


def _drop_axis(ds: xr.Dataset, axis: List[CFAxisKey]) -> xr.Dataset:
"""Drops an axis or axes in a dataset.

Parameters
----------
ds : xr.Dataset
The dataset.
axis : List[CFAxisKey]
The axis or axes to drop.

Returns
-------
xr.Daatset
The dataset with axis or axes dropped.
"""
dims: List[str] = []

for ax in axis:
try:
dim = get_dim_keys(ds, ax)
except KeyError:
pass
else:
if isinstance(dim, str):
dims.append(dim)
elif isinstance(dim, list):
dims = dims + dim

if len(dims) > 0:
ds = ds.drop_dims(dims)

return ds


class BaseRegridder(abc.ABC):
"""BaseRegridder."""

Expand Down
Loading