diff --git a/MANIFEST.in b/MANIFEST.in index 5f3f2fa..643a47d 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -7,3 +7,4 @@ recursive-include etc * include LICENSE include requirements.txt include requirements_dev.txt +recursive-include etc * diff --git a/daops/data_utils/array_utils.py b/daops/data_utils/array_utils.py index e69de29..d277ee7 100644 --- a/daops/data_utils/array_utils.py +++ b/daops/data_utils/array_utils.py @@ -0,0 +1,14 @@ +from roocs_utils.xarray_utils import xarray_utils as xu + + +def mask_data(ds, **operands): + value = operands.get("value") + # convert from string to number + if isinstance(value, str): + value = float(value) + + var_id = xu.get_main_variable(ds) + + ds = ds.where(ds[var_id] != value) + + return ds diff --git a/daops/data_utils/attr_utils.py b/daops/data_utils/attr_utils.py index e69de29..1d813fc 100644 --- a/daops/data_utils/attr_utils.py +++ b/daops/data_utils/attr_utils.py @@ -0,0 +1,31 @@ +from roocs_utils.xarray_utils import xarray_utils as xu + + +def fix_attr_main_var(ds, **operands): + """ + :param ds: Xarray DataSet + :param operands: sequence of arguments + :return: Xarray DataArray + """ + var_id = xu.get_main_variable(ds) + + attrs = operands.get("attrs") + for k, v in operands.get("attrs").items(): + ds[var_id].attrs[k] = v + + return ds + + +def fix_attr(ds, **operands): + """ + :param ds: Xarray DataSet + :param operands: sequence of arguments + :return: Xarray DataArray + """ + var_id = operands.get("var_id") + + attrs = operands.get("attrs") + for k, v in operands.get("attrs").items(): + ds[var_id].attrs[k] = v + + return ds diff --git a/daops/data_utils/coord_utils.py b/daops/data_utils/coord_utils.py index bfb3d0f..d1ae1ea 100644 --- a/daops/data_utils/coord_utils.py +++ b/daops/data_utils/coord_utils.py @@ -30,3 +30,14 @@ def add_scalar_coord(ds, **operands): ds[coord].attrs[k] = v return ds + + +def reverse_coords(ds, **operands): + coords = operands.get("coords") + + for coord in coords: + attrs = ds[coord].attrs + ds = ds.assign_coords({f"{coord}": ds[coord].values[::-1]}) + ds[coord].attrs = attrs + + return ds diff --git a/daops/data_utils/var_utils.py b/daops/data_utils/var_utils.py index e69de29..86bab08 100644 --- a/daops/data_utils/var_utils.py +++ b/daops/data_utils/var_utils.py @@ -0,0 +1,15 @@ +from roocs_utils.xarray_utils import xarray_utils as xu + + +def reverse_2d_vars(ds, **operands): + var_ids = operands.get("var_ids") + + for var_id in var_ids: + + attrs = ds[var_id].attrs + dims = ds[var_id].dims + + ds = ds.assign({f"{var_id}": (dims, ds[var_id].values[::-1, ::-1])}) + ds[var_id].attrs = attrs + + return ds diff --git a/tests/conftest.py b/tests/conftest.py index eefd9b5..445eaa2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,13 +1,13 @@ import os import shutil -from git import Repo import numpy as np -import pandas as pd import pytest import xarray as xr +from git import Repo -from tests._common import MINI_ESGF_CACHE_DIR, write_roocs_cfg +from tests._common import MINI_ESGF_CACHE_DIR +from tests._common import write_roocs_cfg write_roocs_cfg() diff --git a/tests/test_data_utils/test_array_utils.py b/tests/test_data_utils/test_array_utils.py new file mode 100644 index 0000000..4723b58 --- /dev/null +++ b/tests/test_data_utils/test_array_utils.py @@ -0,0 +1,23 @@ +import numpy as np +import xarray as xr + +from daops.data_utils.array_utils import mask_data +from tests._common import MINI_ESGF_MASTER_DIR + + +def test_mask_data(load_esgf_test_data): + ds = xr.open_mfdataset( + f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/ICHEC" + "/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", + combine="by_coords", + use_cftime=True, + ) + + assert ds.tas.values[0][0][0] == np.float32(246.3239) + assert np.isclose(ds.tas.values[0][0][0], 246.3239) + + operands = { + "value": "246.3239", + } + ds_mask_data = mask_data(ds, **operands) + np.testing.assert_equal(ds_mask_data.tas.values[0][0][0], np.nan) diff --git a/tests/test_data_utils/test_attr_utils.py b/tests/test_data_utils/test_attr_utils.py new file mode 100644 index 0000000..61e6760 --- /dev/null +++ b/tests/test_data_utils/test_attr_utils.py @@ -0,0 +1,51 @@ +import xarray as xr + +from daops.data_utils.attr_utils import fix_attr +from daops.data_utils.attr_utils import fix_attr_main_var +from daops.ops.subset import subset +from tests._common import MINI_ESGF_MASTER_DIR + + +def test_fix_attr_main_var(load_esgf_test_data): + ds = xr.open_mfdataset( + f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/ICHEC" + "/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", + combine="by_coords", + use_cftime=True, + ) + + assert ds.tas.attrs["standard_name"] == "air_temperature" + assert ds.tas.attrs["long_name"] == "Near-Surface Air Temperature" + + operands = { + "attrs": { + "long_name": "False long name", + "standard_name": "fake_standard_name", + }, + } + ds_change_metadata = fix_attr_main_var(ds, **operands) + assert ds_change_metadata.tas.attrs["standard_name"] == "fake_standard_name" + assert ds_change_metadata.tas.attrs["long_name"] == "False long name" + + +def test_fix_attr_var(load_esgf_test_data): + ds = xr.open_mfdataset( + f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/ICHEC" + "/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", + combine="by_coords", + use_cftime=True, + ) + + assert ds.lat.attrs["standard_name"] == "latitude" + assert ds.lat.attrs["long_name"] == "latitude" + + operands = { + "var_id": "lat", + "attrs": { + "long_name": "False long name", + "standard_name": "fake_standard_name", + }, + } + ds_change_metadata = fix_attr(ds, **operands) + assert ds_change_metadata.lat.attrs["standard_name"] == "fake_standard_name" + assert ds_change_metadata.lat.attrs["long_name"] == "False long name" diff --git a/tests/test_data_utils/test_coord_utils.py b/tests/test_data_utils/test_coord_utils.py index 08dcd20..0eecd4e 100644 --- a/tests/test_data_utils/test_coord_utils.py +++ b/tests/test_data_utils/test_coord_utils.py @@ -1,10 +1,28 @@ +import numpy as np import xarray as xr from daops.data_utils.coord_utils import add_scalar_coord - +from daops.data_utils.coord_utils import reverse_coords +from daops.data_utils.coord_utils import squeeze_dims from tests._common import MINI_ESGF_MASTER_DIR +def test_squeeze_dims(load_esgf_test_data): + ds = xr.open_mfdataset( + f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/INM/" + "inmcm4/rcp45/mon/ocean/Omon/r1i1p1/latest/zostoga/*.nc", + combine="by_coords", + use_cftime=True, + ) + + assert "lev" in ds.dims + + operands = {"dims": ["lev"]} + + ds_squeeze = squeeze_dims(ds, **operands) + assert "lev" not in ds_squeeze.dims + + def test_add_scalar_coord(load_esgf_test_data): ds_no_height = xr.open_mfdataset( @@ -19,9 +37,8 @@ def test_add_scalar_coord(load_esgf_test_data): ) operands = { "dtype": "float64", - "value": 2.0, + "value": "2.0", "id": "height", - "length": 1, "attrs": { "axis": "Z", "long_name": "height", @@ -33,3 +50,22 @@ def test_add_scalar_coord(load_esgf_test_data): ds_no_height = add_scalar_coord(ds_no_height, **operands) assert ds_no_height.height == ds_with_height.height assert ds_no_height.height.attrs == ds_with_height.height.attrs + + +def test_reverse_coords(load_esgf_test_data): + ds = xr.open_mfdataset( + f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/ICHEC" + "/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", + combine="by_coords", + use_cftime=True, + ) + + assert np.isclose(ds.lon.values[0], 0) + assert np.isclose(ds.lon.values[-1], 337.5) + + operands = {"coords": ["lon"]} + + ds_reverse_lon = reverse_coords(ds, **operands) + + assert np.isclose(ds_reverse_lon.lon.values[0], 337.5) + assert np.isclose(ds_reverse_lon.lon.values[-1], 0) diff --git a/tests/test_data_utils/test_var_utils.py b/tests/test_data_utils/test_var_utils.py new file mode 100644 index 0000000..d78d21a --- /dev/null +++ b/tests/test_data_utils/test_var_utils.py @@ -0,0 +1,31 @@ +import numpy as np +import xarray as xr + +from daops.data_utils.var_utils import reverse_2d_vars +from tests._common import MINI_ESGF_MASTER_DIR + + +def test_reverse_2d_vars(load_esgf_test_data): + ds = xr.open_mfdataset( + f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip6/data/CMIP6/CMIP/NCAR/CESM2/amip/r1i1p1f1/Amon/cl/gn/v20190319/*.nc", + combine="by_coords", + use_cftime=True, + ) + + assert np.isclose(ds.a_bnds.values[0][0], 0) + assert np.isclose(ds.a_bnds.values[-1][-1], 0.00225524) + + assert np.isclose(ds.b_bnds.values[0][0], 1) + assert np.isclose(ds.b_bnds.values[-1][-1], 0) + + operands = {"var_ids": ["a_bnds", "b_bnds"]} + + ds_reverse = reverse_2d_vars(ds, **operands) + + assert np.isclose(ds_reverse.a_bnds.values[0][0], 0.00225524) + assert np.isclose(ds_reverse.a_bnds.values[-1][-1], 0) + assert ds_reverse.a_bnds.attrs == ds.a_bnds.attrs + + assert np.isclose(ds_reverse.b_bnds.values[0][0], 0) + assert np.isclose(ds_reverse.b_bnds.values[-1][-1], 1) + assert ds_reverse.b_bnds.attrs == ds.b_bnds.attrs diff --git a/tests/test_func_chainer.py b/tests/test_func_chainer.py index 0d01b60..aa7edf2 100644 --- a/tests/test_func_chainer.py +++ b/tests/test_func_chainer.py @@ -5,7 +5,6 @@ from daops import utils from daops.utils.fixer import FuncChainer - from tests._common import MINI_ESGF_MASTER_DIR CMIP5_IDS = [ diff --git a/tests/test_operations/test_subset.py b/tests/test_operations/test_subset.py index 1226878..91fa73d 100644 --- a/tests/test_operations/test_subset.py +++ b/tests/test_operations/test_subset.py @@ -69,7 +69,29 @@ def test_subset_zostoga_with_apply_fixes_false(tmpdir, load_esgf_test_data): @pytest.mark.online -def test_subset_t(tmpdir, load_esgf_test_data): +def test_subset_with_several_fixes(tmpdir, load_esgf_test_data): + ds = xr.open_mfdataset( + f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip6/data/CMIP6/CMIP/NCAR/CESM2" + "/amip/r3i1p1f1/Amon/cl/gn/v20190319/*.nc", + use_cftime=True, + combine="by_coords", + ) + + result = subset( + "CMIP6.CMIP.NCAR.CESM2.amip.r3i1p1f1.Amon.cl.gn.v20190319", + output_dir=tmpdir, + file_namer="simple", + ) + _check_output_nc(result) + fixed_ds = xr.open_dataset(result.file_uris[0], use_cftime=True) + assert fixed_ds.lev.standard_name == "atmosphere_hybrid_sigma_pressure_coordinate" + assert fixed_ds.lev.formula_terms == "p0: p0 a: a b: b ps: ps" + assert fixed_ds.lev.values[0] == ds.lev.values[-1] + assert fixed_ds.b_bnds[0][0] == ds.b_bnds[-1][-1] + + +@pytest.mark.online +def test_subset_t(tmpdir): result = subset( CMIP5_IDS[1], time=("2085-01-16", "2120-12-16"),