Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apply xcdat for mean climate metrics #884

Closed
wants to merge 67 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
67 commits
Select commit Hold shift + click to select a range
8d29964
new file to prototype with xcdat
gleckler1 Jul 14, 2022
a1d152d
working on xcdat fcn
gleckler1 Aug 18, 2022
d7908b5
added clim-xcdat to setup
gleckler1 Aug 18, 2022
5429b40
simplify seasonal clim calculation to avoid unnessasary repeating
lee1043 Aug 25, 2022
8131a60
minor
gleckler1 Aug 25, 2022
8b32025
unsure
gleckler1 Aug 25, 2022
469842d
got JW edits
gleckler1 Aug 25, 2022
d873a78
remove old backup file for obs_info_dict json
lee1043 Oct 20, 2022
d23c66f
clean up
lee1043 Oct 21, 2022
f0d1779
add xcdat and temporary function that can open xml using xcdat
lee1043 Oct 21, 2022
396e631
temporary function that can open xml using xcdat
lee1043 Oct 21, 2022
8fd0d4f
use xcdat for climatology calculation
lee1043 Oct 21, 2022
26761a5
allow dask generating large_chunks to silence large chunk warnings
lee1043 Oct 21, 2022
21fb586
remove cdms code from the part to generate annual cycle files; resul…
lee1043 Oct 21, 2022
6e07563
clean up
lee1043 Oct 21, 2022
3151092
add description as comment
lee1043 Oct 21, 2022
f07d8a9
clean up: simplify repeating lines
lee1043 Oct 21, 2022
acc86e2
Exclude yaml and script as default from pmp output json
lee1043 Oct 25, 2022
0da306a
re-org
lee1043 Oct 27, 2022
f85e36d
add example param files
lee1043 Oct 27, 2022
8ac56ad
re-org files
lee1043 Oct 27, 2022
4e887f8
re-org and clean up
lee1043 Oct 27, 2022
c655b1e
clean up
lee1043 Oct 27, 2022
850a45a
rename mean climate directory
lee1043 Oct 27, 2022
bf91c8b
change import accordingly to the reorganized directories
lee1043 Oct 27, 2022
14d6a5a
clean up
lee1043 Oct 27, 2022
98e7e40
merge lib directories
lee1043 Oct 27, 2022
829fc9e
clean up
lee1043 Oct 27, 2022
ff043f6
clean up
lee1043 Oct 27, 2022
ca9d624
clean up, chmod
lee1043 Oct 27, 2022
dc67c92
clean up
lee1043 Oct 27, 2022
47ccc20
clean up
lee1043 Oct 27, 2022
89eb591
clean up
lee1043 Oct 27, 2022
10b0aea
gather statistics compute functions in one file
lee1043 Oct 27, 2022
b32976b
clean up
lee1043 Oct 27, 2022
de78869
clean up
lee1043 Oct 27, 2022
8c00d84
pre-commit clean up
lee1043 Oct 27, 2022
bf8013a
import path correct
lee1043 Oct 27, 2022
f70f40d
clean up
lee1043 Oct 27, 2022
4b1e424
pre-commit clean up
lee1043 Oct 27, 2022
3dc3a58
Merge branch '859_pjg_xcdatclims_ljw_reorg' of github.com:PCMDI/pcmdi…
lee1043 Oct 27, 2022
de7e291
clean up
lee1043 Oct 27, 2022
ea5e880
bug fix
lee1043 Oct 27, 2022
1a52767
clean up
lee1043 Oct 27, 2022
393e627
clean up
lee1043 Oct 27, 2022
0d68f61
clean up
lee1043 Oct 27, 2022
06560d3
bug fix
lee1043 Oct 27, 2022
4857338
add readme and clean up
lee1043 Oct 27, 2022
709ffee
some progress toward using xcdat for step 2 mean clim metric calculat…
lee1043 Oct 29, 2022
de76499
conversion of metric calculations in progress
lee1043 Oct 30, 2022
3f94d4d
xcdat transition in progress
lee1043 Nov 1, 2022
1b0a585
metrics calculations using xcdat
lee1043 Nov 3, 2022
143f845
update correlation calculation -- use only xcdat/xarray and exclude x…
lee1043 Nov 3, 2022
c431bd8
more efficient calculation
lee1043 Nov 5, 2022
ad9406b
rms_0 clarification added
lee1043 Nov 7, 2022
1b0aa6a
update xcdat version
lee1043 Nov 30, 2022
cfbdf48
Enable statistics calculation -- now output saves in json
lee1043 Dec 1, 2022
0b57dc9
extract variable at specific level from 4d field
lee1043 Dec 1, 2022
d53a75a
load regions_specs as function, not exec. some clean ups
lee1043 Dec 5, 2022
70bccd0
load regions_specs as like function, for xcdat usage
lee1043 Dec 5, 2022
4d44d44
Merge pull request #885 from PCMDI/859_pjg_xcdatclims_ljw_reorg
lee1043 Dec 5, 2022
5cd347d
work toward add new driver ...
lee1043 Dec 7, 2022
5445172
in progress
lee1043 Dec 7, 2022
32fe331
in progress
lee1043 Dec 7, 2022
400f54f
in progress
lee1043 Dec 7, 2022
4de3285
in progress
lee1043 Dec 8, 2022
e8b8a29
clean up
lee1043 Dec 9, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions conda-env/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ dependencies:
- eofs=1.4.0
- seaborn=0.11.1
- enso_metrics=1.1.1
- xcdat=0.4.0
- xmltodict=0.13.0
# Testing
# ==================
- pre_commit=2.15.0
Expand Down
3 changes: 2 additions & 1 deletion pcmdi_metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@
plog.addHandler(ch)
plog.setLevel(LOG_LEVEL)
from . import io # noqa
from . import pcmdi # noqa
#from . import pcmdi # noqa
#from . import mean_climate # noqa
from .version import __git_sha1__, __git_tag_describe__, __version__ # noqa
3 changes: 3 additions & 0 deletions pcmdi_metrics/io/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# init for pcmdi_metrics.io
from .xcdat_openxml import xcdat_open # noqa # isort:skip
from . import base # noqa
from .base import MV2Json # noqa
from .default_regions_define import load_regions_specs # noqa
from .default_regions_define import region_subset # noqa
129 changes: 96 additions & 33 deletions pcmdi_metrics/io/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
import genutil
import MV2
import numpy
import xcdat

import pcmdi_metrics
from pcmdi_metrics import LOG_LEVEL
from pcmdi_metrics.io import xcdat_open

value = 0
cdms2.setNetcdfShuffleFlag(value) # where value is either 0 or 1
Expand Down Expand Up @@ -158,7 +160,17 @@ def __call__(self):
def read(self):
pass

def write(self, data, type="json", mode="w", *args, **kwargs):
def write(
self,
data,
type="json",
mode="w",
include_YAML=False,
include_history=False,
include_script=False,
*args,
**kwargs,
):
self.type = type.lower()
file_name = self()
dir_path = os.path.split(file_name)[0]
Expand Down Expand Up @@ -197,9 +209,17 @@ def write(self, data, type="json", mode="w", *args, **kwargs):
f = open(file_name, "w")
update_dict(out_dict, data)
if "yaml" in out_dict["provenance"]["conda"]:
out_dict["YAML"] = out_dict["provenance"]["conda"]["yaml"]
if include_YAML:
out_dict["YAML"] = out_dict["provenance"]["conda"]["yaml"]
del out_dict["provenance"]["conda"]["yaml"]
# out_dict = OrderedDict({"provenance": generateProvenance()})

if not include_script:
if "script" in out_dict["provenance"].keys():
del out_dict["provenance"]["script"]

if not include_history:
if "history" in out_dict["provenance"].keys():
del out_dict["provenance"]["history"]

json.dump(out_dict, f, cls=CDMSDomainsEncoder, *args, **kwargs)
f.close()
Expand All @@ -211,11 +231,14 @@ def write(self, data, type="json", mode="w", *args, **kwargs):
f.close()

elif self.type == "nc":
"""
f = cdms2.open(file_name, "w")
f.write(data, *args, **kwargs)
f.metrics_git_sha1 = pcmdi_metrics.__git_sha1__
f.uvcdat_version = cdat_info.get_version()
f.close()
"""
data.to_netcdf(file_name)

else:
logging.getLogger("pcmdi_metrics").error("Unknown type: %s" % type)
Expand Down Expand Up @@ -326,6 +349,7 @@ def get_dimensions(json_dict, json_structure):
)

def get(self, var, var_in_file=None, region={}, *args, **kwargs):
print('jwlee-test-get, var, var_in_file:', var, var_in_file)
self.variable = var
self.var_from_file = self.extract_var_from_file(
var, var_in_file, *args, **kwargs
Expand All @@ -339,7 +363,7 @@ def get(self, var, var_in_file=None, region={}, *args, **kwargs):
if self.is_masking():
self.var_from_file = self.mask_var(self.var_from_file)

self.var_from_file = self.set_target_grid_and_mask_in_var(self.var_from_file)
self.var_from_file = self.set_target_grid_and_mask_in_var(self.var_from_file, var)

self.var_from_file = self.set_domain_in_var(self.var_from_file, self.region)

Expand All @@ -348,13 +372,19 @@ def get(self, var, var_in_file=None, region={}, *args, **kwargs):
def extract_var_from_file(self, var, var_in_file, *args, **kwargs):
if var_in_file is None:
var_in_file = var
# self.extension = 'nc'
var_file = cdms2.open(self(), "r")
for att in ["var_in_file,", "varInFile"]:
if att in kwargs:
del kwargs[att]
extracted_var = var_file(var_in_file, *args, **kwargs)
var_file.close()

try:
ds = xcdat_open(self(), data_var=var_in_file, decode_times=True)
except Exception:
ds = xcdat_open(self(), data_var=var_in_file, decode_times=False) # Temporary part to read in cdms written obs4MIP AC files

if 'level' in list(kwargs.keys()):
print("jwlee-test extract_var_from_file kwargs['level']:", kwargs['level'])
level = kwargs['level']
ds = ds.sel(plev=level)

extracted_var = ds

return extracted_var

def is_masking(self):
Expand All @@ -364,46 +394,72 @@ def is_masking(self):
return False

def mask_var(self, var):
"""
self: <pcmdi_metrics.io.base.Base object at 0x7f24a0768a60>
var: <xarray.Dataset>
"""
print('jwlee-test-mask_var, self, var:', self, var)
print('jwlee-test-mask_var, type(self)', type(self))
print('jwlee-test-mask_var, type(var)', type(var))
print('jwlee-test-mask_var, self.mask', self.mask)
print('jwlee-test-mask_var, type(self.mask)', type(self.mask)) # cdms2.tvariable.TransientVariable
print('jwlee-test-mask_var, self.mask.shape', self.mask.shape)
print("jwlee-test-mask_var, tuple(var.dims[d] for d in ['lat', 'lon']):", tuple(var.dims[d] for d in ['lat', 'lon']))
var_shape = tuple(var.dims[d] for d in ['lat', 'lon'])

if self.mask is None:
self.set_file_mask_template()
self.mask = self.get_mask_from_var(var)
if self.mask.shape != var.shape:
#if self.mask.shape != var.shape:
if self.mask.shape != var_shape:
dummy, mask = genutil.grower(var, self.mask)
else:
mask = self.target_mask
mask = MV2.not_equal(mask, self.value)
return MV2.masked_where(mask, var)

def set_target_grid_and_mask_in_var(self, var):
def set_target_grid_and_mask_in_var(self, var, var_in_file):
"""
self: <class 'pcmdi_metrics.io.base.Base'> object
self(): string, path to input file
"""
print('jwlee-test-regrid, set_target_grid_and_mask_in_var start')
if self.target_grid is not None:
var = var.regrid(
self.target_grid,
regridTool=self.regrid_tool,
regridMethod=self.regrid_method,
coordSys="deg",
diag={},
periodicity=1,
)
print('jwlee-test-regrid, type(self):', type(self))
print('jwlee-test-regrid, type(self()):', type(self()))
print('jwlee-test-regrid, self():', self())
print('jwlee-test-regrid, regridder start, var_in_file:', var_in_file)
var.to_netcdf(self().split('/')[-1].split('.nc')[0] + '_test1-org.nc')
var = var.regridder.horizontal(var_in_file, self.target_grid, tool=self.regrid_tool)
print('jwlee-test-regrid, regridder done')
var.to_netcdf(self().split('/')[-1].split('.nc')[0] + '_test2-regridded.nc')
print('jwlee-test-regrid-2, var[var_in_file].shape:', var[var_in_file].shape)
print('jwlee-test-regrid-3, self.target_mask:', self.target_mask)

if self.target_mask is not None:
if self.target_mask.shape != var.shape:
# if self.target_mask.shape != var.shape:
if self.target_mask.shape != var[var_in_file].shape:
dummy, mask = genutil.grower(var, self.target_mask)
else:
mask = self.target_mask
var = MV2.masked_where(mask, var)

print('jwlee-test-regrid-4, set_target_grid_and_mask_in_var done')

return var

def set_domain_in_var(self, var, region):
domain = region.get("domain", None)
if domain is not None:
if isinstance(domain, dict):
var = var(**domain)
elif isinstance(domain, (list, tuple)):
var = var(*domain)
elif isinstance(domain, cdms2.selectors.Selector):
domain.id = region.get("id", "region")
var = var(*[domain])
"""
self: <class 'pcmdi_metrics.io.base.Base'>
var: <xarray.Dataset>
region: <class 'dict'>, e.g., {'domain': Selector(<cdutil.region.DomainComponent object at 0x7fdbe2b70760>), 'id': 'NHEX'}
"""
region_id = region['id']
from pcmdi_metrics.io import load_regions_specs, region_subset
regions_specs = load_regions_specs()
if region_id not in ['global', 'land', 'ocean']:
var = region_subset(var, regions_specs, region=region_id)

return var

def set_file_mask_template(self):
Expand All @@ -416,7 +472,12 @@ def set_file_mask_template(self):

def get_mask_from_var(self, var):
try:
o_mask = self.file_mask_template.get("sftlf")
print('jwlee-test-get_mask_from_var start')
#o_mask = self.file_mask_template.get("sftlf")
o_mask = self.file_mask_template.get("sftlf", var_in_file="sftlf")
print('jwlee-test-get_mask_from_var, self.file_mask_template:', self.file_mask_template)
print('jwlee-test-get_mask_from_var, type(o_mask):', type(o_mask))
print('jwlee-test-get_mask_from_var, o_mask.shape:', o_mask.shape)
except Exception:
o_mask = (
cdutil.generateLandSeaMask(var, regridTool=self.regrid_tool).filled(1.0)
Expand All @@ -431,7 +492,9 @@ def set_target_grid(self, target, regrid_tool="esmf", regrid_method="linear"):
self.regrid_tool = regrid_tool
self.regrid_method = regrid_method
if target == "2.5x2.5":
self.target_grid = cdms2.createUniformGrid(-88.875, 72, 2.5, 0, 144, 2.5)
print('jwlee-test, set_target_grid, start')
# self.target_grid = cdms2.createUniformGrid(-88.875, 72, 2.5, 0, 144, 2.5)
self.target_grid = xcdat.create_uniform_grid(-88.875, 88.625, 2.5, 0, 357.5, 2.5)
self.target_grid_name = target
elif cdms2.isGrid(target):
self.target_grid = target
Expand Down
89 changes: 89 additions & 0 deletions pcmdi_metrics/io/default_regions_define.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
def load_regions_specs():

regions_specs = {
# Mean Climate
"NHEX": {"domain": {"latitude": (30.0, 90)}},
"SHEX": {"domain": {"latitude": (-90.0, -30)}},
"TROPICS": {"domain": {"latitude": (-30.0, 30)}},
"global": {},
"90S50S": {"domain": {"latitude": (-90.0, -50)}},
"50S20S": {"domain": {"latitude": (-50.0, -20)}},
"20S20N": {"domain": {"latitude": (-20.0, 20)}},
"20N50N": {"domain": {"latitude": (20.0, 50)}},
"50N90N": {"domain": {"latitude": (50.0, 90)}},
"land_NHEX": {"value": 100, "domain": {"latitude": (30.0, 90)}},
"land_SHEX": {"value": 100, "domain": {"latitude": (-90.0, -30)}},
"land_TROPICS": {"value": 100, "domain": {"latitude": (-30.0, 30)}},
"land": {"value": 100},
"ocean_NHEX": {"value": 0, "domain": {"latitude": (30.0, 90)}},
"ocean_SHEX": {"value": 0, "domain": {"latitude": (-90.0, -30)}},
"ocean_TROPICS": {"value": 0, "domain": {"latitude": (30.0, 30)}},
"ocean": {"value": 0},
# Modes of variability
"NAM": {"domain": {"latitude": (20.0, 90), "longitude": (-180, 180)}},
"NAO": {"domain": {"latitude": (20.0, 80), "longitude": (-90, 40)}},
"SAM": {"domain": {"latitude": (-20.0, -90), "longitude": (0, 360)}},
"PNA": {"domain": {"latitude": (20.0, 85), "longitude": (120, 240)}},
"PDO": {"domain": {"latitude": (20.0, 70), "longitude": (110, 260)}},
# Monsoon domains for Wang metrics
# All monsoon domains
"AllMW": {"domain": {"latitude": (-40.0, 45.0), "longitude": (0.0, 360.0)}},
"AllM": {"domain": {"latitude": (-45.0, 45.0), "longitude": (0.0, 360.0)}},
# North American Monsoon
"NAMM": {"domain": {"latitude": (0.0, 45.0), "longitude": (210.0, 310.0)}},
# South American Monsoon
"SAMM": {"domain": {"latitude": (-45.0, 0.0), "longitude": (240.0, 330.0)}},
# North African Monsoon
"NAFM": {"domain": {"latitude": (0.0, 45.0), "longitude": (310.0, 60.0)}},
# South African Monsoon
"SAFM": {"domain": {"latitude": (-45.0, 0.0), "longitude": (0.0, 90.0)}},
# Asian Summer Monsoon
"ASM": {"domain": {"latitude": (0.0, 45.0), "longitude": (60.0, 180.0)}},
# Australian Monsoon
"AUSM": {"domain": {"latitude": (-45.0, 0.0), "longitude": (90.0, 160.0)}},
# Monsoon domains for Sperber metrics
# All India rainfall
"AIR": {"domain": {"latitude": (7.0, 25.0), "longitude": (65.0, 85.0)}},
# North Australian
"AUS": {"domain": {"latitude": (-20.0, -10.0), "longitude": (120.0, 150.0)}},
# Sahel
"Sahel": {"domain": {"latitude": (13.0, 18.0), "longitude": (-10.0, 10.0)}},
# Gulf of Guinea
"GoG": {"domain": {"latitude": (0.0, 5.0), "longitude": (-10.0, 10.0)}},
# North American monsoon
"NAmo": {"domain": {"latitude": (20.0, 37.0), "longitude": (-112.0, -103.0)}},
# South American monsoon
"SAmo": {"domain": {"latitude": (-20.0, 2.5), "longitude": (-65.0, -40.0)}},
}

return regions_specs


def region_subset(d, regions_specs, region=None):
"""
d: xarray.Dataset
regions_specs: dict
region: string
"""

if ((region is None) or ((region is not None) and (region not in list(regions_specs.keys())))):
print('Error: region not defined')
else:
if 'domain' in list(regions_specs[region].keys()):
if 'latitude' in list(regions_specs[region]['domain'].keys()):
lat0 = regions_specs[region]['domain']['latitude'][0]
lat1 = regions_specs[region]['domain']['latitude'][1]
if 'latitude' in (d.coords.dims):
d = d.sel(latitude=slice(lat0, lat1))
elif 'lat' in (d.coords.dims):
d = d.sel(lat=slice(lat0, lat1))

if 'longitude' in list(regions_specs[region]['domain'].keys()):
lon0 = regions_specs[region]['domain']['longitude'][0]
lon1 = regions_specs[region]['domain']['longitude'][1]
if 'longitude' in (d.coords.dims):
d = d.sel(longitude=slice(lon0, lon1))
elif 'lon' in (d.coords.dims):
d = d.sel(lon=slice(lon0, lon1))

return d
Loading