Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Esmval fixes #40

Draft
wants to merge 12 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ recursive-include etc *
include LICENSE
include requirements.txt
include requirements_dev.txt
recursive-include etc *
14 changes: 14 additions & 0 deletions daops/data_utils/array_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from roocs_utils.xarray_utils import xarray_utils as xu


def mask_data(ds, **operands):
value = operands.get("value")
# convert from string to number
if isinstance(value, str):
value = float(value)

var_id = xu.get_main_variable(ds)

ds = ds.where(ds[var_id] != value)

return ds
31 changes: 31 additions & 0 deletions daops/data_utils/attr_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from roocs_utils.xarray_utils import xarray_utils as xu


def fix_attr_main_var(ds, **operands):
"""
:param ds: Xarray DataSet
:param operands: sequence of arguments
:return: Xarray DataArray
"""
var_id = xu.get_main_variable(ds)

attrs = operands.get("attrs")
for k, v in operands.get("attrs").items():
ds[var_id].attrs[k] = v

return ds


def fix_attr(ds, **operands):
"""
:param ds: Xarray DataSet
:param operands: sequence of arguments
:return: Xarray DataArray
"""
var_id = operands.get("var_id")

attrs = operands.get("attrs")
for k, v in operands.get("attrs").items():
ds[var_id].attrs[k] = v

return ds
11 changes: 11 additions & 0 deletions daops/data_utils/coord_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,14 @@ def add_scalar_coord(ds, **operands):
ds[coord].attrs[k] = v

return ds


def reverse_coords(ds, **operands):
coords = operands.get("coords")

for coord in coords:
attrs = ds[coord].attrs
ds = ds.assign_coords({f"{coord}": ds[coord].values[::-1]})
ds[coord].attrs = attrs

return ds
15 changes: 15 additions & 0 deletions daops/data_utils/var_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from roocs_utils.xarray_utils import xarray_utils as xu


def reverse_2d_vars(ds, **operands):
var_ids = operands.get("var_ids")

for var_id in var_ids:

attrs = ds[var_id].attrs
dims = ds[var_id].dims

ds = ds.assign({f"{var_id}": (dims, ds[var_id].values[::-1, ::-1])})
ds[var_id].attrs = attrs

return ds
6 changes: 3 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import os
import shutil

from git import Repo
import numpy as np
import pandas as pd
import pytest
import xarray as xr
from git import Repo

from tests._common import MINI_ESGF_CACHE_DIR, write_roocs_cfg
from tests._common import MINI_ESGF_CACHE_DIR
from tests._common import write_roocs_cfg

write_roocs_cfg()

Expand Down
23 changes: 23 additions & 0 deletions tests/test_data_utils/test_array_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import numpy as np
import xarray as xr

from daops.data_utils.array_utils import mask_data
from tests._common import MINI_ESGF_MASTER_DIR


def test_mask_data(load_esgf_test_data):
ds = xr.open_mfdataset(
f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/ICHEC"
"/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/*.nc",
combine="by_coords",
use_cftime=True,
)

assert ds.tas.values[0][0][0] == np.float32(246.3239)
assert np.isclose(ds.tas.values[0][0][0], 246.3239)

operands = {
"value": "246.3239",
}
ds_mask_data = mask_data(ds, **operands)
np.testing.assert_equal(ds_mask_data.tas.values[0][0][0], np.nan)
51 changes: 51 additions & 0 deletions tests/test_data_utils/test_attr_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import xarray as xr

from daops.data_utils.attr_utils import fix_attr
from daops.data_utils.attr_utils import fix_attr_main_var
from daops.ops.subset import subset
from tests._common import MINI_ESGF_MASTER_DIR


def test_fix_attr_main_var(load_esgf_test_data):
ds = xr.open_mfdataset(
f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/ICHEC"
"/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/*.nc",
combine="by_coords",
use_cftime=True,
)

assert ds.tas.attrs["standard_name"] == "air_temperature"
assert ds.tas.attrs["long_name"] == "Near-Surface Air Temperature"

operands = {
"attrs": {
"long_name": "False long name",
"standard_name": "fake_standard_name",
},
}
ds_change_metadata = fix_attr_main_var(ds, **operands)
assert ds_change_metadata.tas.attrs["standard_name"] == "fake_standard_name"
assert ds_change_metadata.tas.attrs["long_name"] == "False long name"


def test_fix_attr_var(load_esgf_test_data):
ds = xr.open_mfdataset(
f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/ICHEC"
"/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/*.nc",
combine="by_coords",
use_cftime=True,
)

assert ds.lat.attrs["standard_name"] == "latitude"
assert ds.lat.attrs["long_name"] == "latitude"

operands = {
"var_id": "lat",
"attrs": {
"long_name": "False long name",
"standard_name": "fake_standard_name",
},
}
ds_change_metadata = fix_attr(ds, **operands)
assert ds_change_metadata.lat.attrs["standard_name"] == "fake_standard_name"
assert ds_change_metadata.lat.attrs["long_name"] == "False long name"
42 changes: 39 additions & 3 deletions tests/test_data_utils/test_coord_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,28 @@
import numpy as np
import xarray as xr

from daops.data_utils.coord_utils import add_scalar_coord

from daops.data_utils.coord_utils import reverse_coords
from daops.data_utils.coord_utils import squeeze_dims
from tests._common import MINI_ESGF_MASTER_DIR


def test_squeeze_dims(load_esgf_test_data):
ds = xr.open_mfdataset(
f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/INM/"
"inmcm4/rcp45/mon/ocean/Omon/r1i1p1/latest/zostoga/*.nc",
combine="by_coords",
use_cftime=True,
)

assert "lev" in ds.dims

operands = {"dims": ["lev"]}

ds_squeeze = squeeze_dims(ds, **operands)
assert "lev" not in ds_squeeze.dims


def test_add_scalar_coord(load_esgf_test_data):

ds_no_height = xr.open_mfdataset(
Expand All @@ -19,9 +37,8 @@ def test_add_scalar_coord(load_esgf_test_data):
)
operands = {
"dtype": "float64",
"value": 2.0,
"value": "2.0",
"id": "height",
"length": 1,
"attrs": {
"axis": "Z",
"long_name": "height",
Expand All @@ -33,3 +50,22 @@ def test_add_scalar_coord(load_esgf_test_data):
ds_no_height = add_scalar_coord(ds_no_height, **operands)
assert ds_no_height.height == ds_with_height.height
assert ds_no_height.height.attrs == ds_with_height.height.attrs


def test_reverse_coords(load_esgf_test_data):
ds = xr.open_mfdataset(
f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/ICHEC"
"/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/*.nc",
combine="by_coords",
use_cftime=True,
)

assert np.isclose(ds.lon.values[0], 0)
assert np.isclose(ds.lon.values[-1], 337.5)

operands = {"coords": ["lon"]}

ds_reverse_lon = reverse_coords(ds, **operands)

assert np.isclose(ds_reverse_lon.lon.values[0], 337.5)
assert np.isclose(ds_reverse_lon.lon.values[-1], 0)
31 changes: 31 additions & 0 deletions tests/test_data_utils/test_var_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import numpy as np
import xarray as xr

from daops.data_utils.var_utils import reverse_2d_vars
from tests._common import MINI_ESGF_MASTER_DIR


def test_reverse_2d_vars(load_esgf_test_data):
ds = xr.open_mfdataset(
f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip6/data/CMIP6/CMIP/NCAR/CESM2/amip/r1i1p1f1/Amon/cl/gn/v20190319/*.nc",
combine="by_coords",
use_cftime=True,
)

assert np.isclose(ds.a_bnds.values[0][0], 0)
assert np.isclose(ds.a_bnds.values[-1][-1], 0.00225524)

assert np.isclose(ds.b_bnds.values[0][0], 1)
assert np.isclose(ds.b_bnds.values[-1][-1], 0)

operands = {"var_ids": ["a_bnds", "b_bnds"]}

ds_reverse = reverse_2d_vars(ds, **operands)

assert np.isclose(ds_reverse.a_bnds.values[0][0], 0.00225524)
assert np.isclose(ds_reverse.a_bnds.values[-1][-1], 0)
assert ds_reverse.a_bnds.attrs == ds.a_bnds.attrs

assert np.isclose(ds_reverse.b_bnds.values[0][0], 0)
assert np.isclose(ds_reverse.b_bnds.values[-1][-1], 1)
assert ds_reverse.b_bnds.attrs == ds.b_bnds.attrs
1 change: 0 additions & 1 deletion tests/test_func_chainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from daops import utils
from daops.utils.fixer import FuncChainer

from tests._common import MINI_ESGF_MASTER_DIR

CMIP5_IDS = [
Expand Down
24 changes: 23 additions & 1 deletion tests/test_operations/test_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,29 @@ def test_subset_zostoga_with_apply_fixes_false(tmpdir, load_esgf_test_data):


@pytest.mark.online
def test_subset_t(tmpdir, load_esgf_test_data):
def test_subset_with_several_fixes(tmpdir, load_esgf_test_data):
ds = xr.open_mfdataset(
f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip6/data/CMIP6/CMIP/NCAR/CESM2"
"/amip/r3i1p1f1/Amon/cl/gn/v20190319/*.nc",
use_cftime=True,
combine="by_coords",
)

result = subset(
"CMIP6.CMIP.NCAR.CESM2.amip.r3i1p1f1.Amon.cl.gn.v20190319",
output_dir=tmpdir,
file_namer="simple",
)
_check_output_nc(result)
fixed_ds = xr.open_dataset(result.file_uris[0], use_cftime=True)
assert fixed_ds.lev.standard_name == "atmosphere_hybrid_sigma_pressure_coordinate"
assert fixed_ds.lev.formula_terms == "p0: p0 a: a b: b ps: ps"
assert fixed_ds.lev.values[0] == ds.lev.values[-1]
assert fixed_ds.b_bnds[0][0] == ds.b_bnds[-1][-1]


@pytest.mark.online
def test_subset_t(tmpdir):
result = subset(
CMIP5_IDS[1],
time=("2085-01-16", "2120-12-16"),
Expand Down