-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add NWM zonal averaging workflow (#1547)
- Loading branch information
1 parent
98ed1d2
commit 624979c
Showing
4 changed files
with
115 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,47 @@ | ||
import pytest | ||
import xarray as xr | ||
from coiled.credentials.google import CoiledShippedCredentials | ||
|
||
|
||
@pytest.mark.client("era5_rechunking") | ||
def test_era5_rechunking(client, gcs_url, scale): | ||
# Load dataset | ||
ds = xr.open_zarr( | ||
"gs://weatherbench2/datasets/era5/1959-2023_01_10-full_37-1h-0p25deg-chunk-1.zarr", | ||
).drop_encoding() | ||
def test_era5_rechunking( | ||
gcs_url, | ||
scale, | ||
client_factory, | ||
cluster_kwargs={ | ||
"workspace": "dask-engineering-gcp", | ||
"region": "us-central1", | ||
"wait_for_workers": True, | ||
}, | ||
scale_kwargs={ | ||
"small": {"n_workers": 10}, | ||
"medium": {"n_workers": 100}, | ||
"large": {"n_workers": 100}, | ||
}, | ||
): | ||
with client_factory( | ||
**scale_kwargs[scale], **cluster_kwargs | ||
) as client: # noqa: F841 | ||
# Load dataset | ||
ds = xr.open_zarr( | ||
"gs://weatherbench2/datasets/era5/1959-2023_01_10-full_37-1h-0p25deg-chunk-1.zarr", | ||
).drop_encoding() | ||
|
||
if scale == "small": | ||
# 101.83 GiB (small) | ||
time_range = slice("2020-01-01", "2023-01-01") | ||
variables = ["sea_surface_temperature"] | ||
elif scale == "medium": | ||
# 2.12 TiB (medium) | ||
time_range = slice(None) | ||
variables = ["sea_surface_temperature"] | ||
else: | ||
# 4.24 TiB (large) | ||
# This currently doesn't complete successfully. | ||
time_range = slice(None) | ||
variables = ["sea_surface_temperature", "snow_depth"] | ||
subset = ds[variables].sel(time=time_range) | ||
if scale == "small": | ||
# 101.83 GiB (small) | ||
time_range = slice("2020-01-01", "2023-01-01") | ||
variables = ["sea_surface_temperature"] | ||
elif scale == "medium": | ||
# 2.12 TiB (medium) | ||
time_range = slice(None) | ||
variables = ["sea_surface_temperature"] | ||
else: | ||
# 4.24 TiB (large) | ||
# This currently doesn't complete successfully. | ||
time_range = slice(None) | ||
variables = ["sea_surface_temperature", "snow_depth"] | ||
subset = ds[variables].sel(time=time_range) | ||
|
||
# Rechunk | ||
result = subset.chunk({"time": -1, "longitude": "auto", "latitude": "auto"}) | ||
# Rechunk | ||
result = subset.chunk({"time": -1, "longitude": "auto", "latitude": "auto"}) | ||
|
||
# Write result to cloud storage | ||
result.to_zarr(gcs_url, storage_options={"token": CoiledShippedCredentials()}) | ||
# Write result to cloud storage | ||
result.to_zarr(gcs_url, storage_options={"token": CoiledShippedCredentials()}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
""" | ||
This example was adapted from https://github.com/dcherian/dask-demo/blob/main/nwm-aws.ipynb | ||
""" | ||
|
||
import flox.xarray | ||
import numpy as np | ||
import rioxarray | ||
import xarray as xr | ||
|
||
|
||
def test_nwm( | ||
s3, | ||
scale, | ||
client_factory, | ||
cluster_kwargs={ | ||
"workspace": "dask-engineering", | ||
"region": "us-east-1", | ||
"wait_for_workers": True, | ||
}, | ||
scale_kwargs={ | ||
"small": {"n_workers": 10}, | ||
"large": {"n_workers": 200, "scheduler_memory": "32 GiB"}, | ||
}, | ||
): | ||
with client_factory( | ||
**scale_kwargs[scale], **cluster_kwargs | ||
) as client: # noqa: F841 | ||
ds = xr.open_zarr( | ||
"s3://noaa-nwm-retrospective-2-1-zarr-pds/rtout.zarr", consolidated=True | ||
) | ||
|
||
if scale == "small": | ||
# 6.03 TiB | ||
time_range = slice("2020-01-01", "2020-12-31") | ||
else: | ||
# 252.30 TiB | ||
time_range = slice("1979-02-01", "2020-12-31") | ||
subset = ds.zwattablrt.sel(time=time_range) | ||
|
||
counties = rioxarray.open_rasterio( | ||
s3.open("s3://nwm-250m-us-counties/Counties_on_250m_grid.tif"), | ||
chunks="auto", | ||
).squeeze() | ||
|
||
# Remove any small floating point error in coordinate locations | ||
_, counties_aligned = xr.align(subset, counties, join="override") | ||
counties_aligned = counties_aligned.persist() | ||
|
||
county_id = np.unique(counties_aligned.data).compute() | ||
county_id = county_id[county_id != 0] | ||
county_mean = flox.xarray.xarray_reduce( | ||
subset, | ||
counties_aligned.rename("county"), | ||
func="mean", | ||
expected_groups=(county_id,), | ||
) | ||
|
||
county_mean.compute() |