Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactored native model fixes by adding common base class NativeDatasetFix #1694

Merged
merged 7 commits into from
Aug 16, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/develop/fixing_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,11 @@ For example, a ``native6`` dataset fix for ERA5 is located `here
and the ``ICON`` fix is located `here
<https://github.com/ESMValGroup/ESMValCore/blob/main/esmvalcore/cmor/_fixes/icon/icon.py>`__.

ESMValTool also provides a base class ``NativeDatasetFix`` that provides
convenient functions useful for all native dataset fixes.
An example for its usage can be found `here
<https://github.com/ESMValGroup/ESMValCore/blob/main/esmvalcore/cmor/_fixes/icon/_base_fixes.py>`__.

.. _add_new_fix_native_datasets_extra_facets:

Extra facets for native datasets
Expand Down
26 changes: 14 additions & 12 deletions esmvalcore/cmor/_fixes/emac/_base_fixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,32 @@
from iris import NameConstraint
from iris.exceptions import ConstraintMismatchError

from ..fix import Fix
from ..native_datasets import NativeDatasetFix

logger = logging.getLogger(__name__)


class EmacFix(Fix):
class EmacFix(NativeDatasetFix):
"""Base class for all EMAC fixes."""

def get_cube(self, cubes, var_names=None):
def get_cube(self, cubes, var_name=None):
"""Extract single cube."""
# If no var_names given, use the CMOR short_name
if var_names is None:
var_names = self.extra_facets.get('raw_name',
self.vardef.short_name)
# If no var_name given, use the CMOR short_name
if var_name is None:
var_name = self.extra_facets.get('raw_name',
self.vardef.short_name)

# Convert var_names to list if only a single var_name is given
if isinstance(var_names, str):
var_names = [var_names]
# Convert to list if only a single var_name is given
if isinstance(var_name, str):
var_names = [var_name]
else:
var_names = var_name

# Try to extract the variable (prioritize variables as given by the
# list)
for var_name in var_names:
for v_name in var_names:
try:
return cubes.extract_cube(NameConstraint(var_name=var_name))
return cubes.extract_cube(NameConstraint(var_name=v_name))
except ConstraintMismatchError:
pass

Expand Down
188 changes: 41 additions & 147 deletions esmvalcore/cmor/_fixes/emac/emac.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,20 @@
from netCDF4 import Dataset
from scipy import constants

from ..shared import (
add_aux_coords_from_cubes,
add_scalar_height_coord,
add_scalar_lambda550nm_coord,
add_scalar_typesi_coord,
)
from ..shared import add_aux_coords_from_cubes
from ._base_fixes import EmacFix, NegateData, SetUnitsTo1

logger = logging.getLogger(__name__)


INVALID_UNITS = {
'kg/m**2s': 'kg m-2 s-1',
}


class AllVars(EmacFix):
"""Fixes for all variables."""

# Dictionary to map invalid units in the data to valid entries
INVALID_UNITS = {
'kg/m**2s': 'kg m-2 s-1',
}

def fix_file(self, filepath, output_dir):
"""Fix file.

Expand Down Expand Up @@ -69,9 +64,10 @@ def fix_metadata(self, cubes):
"""Fix metadata."""
cube = self.get_cube(cubes)

# Fix time
if 'time' in self.vardef.dimensions:
self._fix_time(cube)
# Fix time, latitude, and longitude coordinates
self.fix_regular_time(cube)
self.fix_regular_lat(cube)
self.fix_regular_lon(cube)

# Fix regular pressure levels (considers plev19, plev39, etc.)
for dim_name in self.vardef.dimensions:
Expand All @@ -83,38 +79,14 @@ def fix_metadata(self, cubes):
if 'alevel' in self.vardef.dimensions:
cube = self._fix_alevel(cube, cubes)

# Fix latitude
if 'latitude' in self.vardef.dimensions:
self._fix_lat(cube)

# Fix longitude
if 'longitude' in self.vardef.dimensions:
self._fix_lon(cube)

# Fix scalar coordinates
self._fix_scalar_coords(cube)
self.fix_scalar_coords(cube)

# Fix metadata of variable
self._fix_var_metadata(cube)
self.fix_var_metadata(cube)

return CubeList([cube])

@staticmethod
def _fix_time(cube):
"""Fix time coordinate of cube."""
time_coord = cube.coord('time')
time_coord.var_name = 'time'
time_coord.standard_name = 'time'
time_coord.long_name = 'time'

# Add bounds if possible (not possible if cube only contains single
# time point)
if not time_coord.has_bounds():
try:
time_coord.guess_bounds()
except ValueError:
pass

def _fix_plev(self, cube):
"""Fix regular pressure level coordinate of cube."""
for coord in cube.coords():
Expand All @@ -125,11 +97,7 @@ def _fix_plev(self, cube):
if not coord.units.is_convertible('Pa'):
continue

coord.var_name = 'plev'
coord.standard_name = 'air_pressure'
coord.long_name = 'pressure'
coord.convert_units('Pa')
coord.attributes['positive'] = 'down'
self.fix_plev_metadata(cube, coord)

return

Expand Down Expand Up @@ -227,80 +195,6 @@ def _fix_alevel(cube, cubes):

return cube

@staticmethod
def _fix_lat(cube):
"""Fix latitude coordinate of cube."""
lat = cube.coord('latitude')
lat.var_name = 'lat'
lat.standard_name = 'latitude'
lat.long_name = 'latitude'
lat.convert_units('degrees_north')

# Add bounds if possible (not possible if cube only contains single
# lat point)
if not lat.has_bounds():
try:
lat.guess_bounds()
except ValueError:
pass

@staticmethod
def _fix_lon(cube):
"""Fix longitude coordinate of cube."""
lon = cube.coord('longitude')
lon.var_name = 'lon'
lon.standard_name = 'longitude'
lon.long_name = 'longitude'
lon.convert_units('degrees_east')

# Add bounds if possible (not possible if cube only contains single
# lon point)
if not lon.has_bounds():
try:
lon.guess_bounds()
except ValueError:
pass

def _fix_scalar_coords(self, cube):
"""Fix scalar coordinates."""
if 'height2m' in self.vardef.dimensions:
add_scalar_height_coord(cube, 2.0)
if 'height10m' in self.vardef.dimensions:
add_scalar_height_coord(cube, 10.0)
if 'lambda550nm' in self.vardef.dimensions:
add_scalar_lambda550nm_coord(cube)
if 'typesi' in self.vardef.dimensions:
add_scalar_typesi_coord(cube, 'sea_ice')

def _fix_var_metadata(self, cube):
"""Fix metadata of variable."""
if self.vardef.standard_name == '':
cube.standard_name = None
else:
cube.standard_name = self.vardef.standard_name
cube.var_name = self.vardef.short_name
cube.long_name = self.vardef.long_name

# Fix units
if 'invalid_units' in cube.attributes:
invalid_units = cube.attributes.pop('invalid_units')
new_units = INVALID_UNITS.get(
invalid_units,
invalid_units.replace('**', '^'),
)
try:
cube.units = new_units
except ValueError as exc:
raise ValueError(
f"Failed to fix invalid units '{invalid_units}' for "
f"variable '{self.vardef.short_name}'") from exc
if cube.units != self.vardef.units:
cube.convert_units(self.vardef.units)

# Fix attributes
if self.vardef.positive != '':
cube.attributes['positive'] = self.vardef.positive


Cl = SetUnitsTo1

Expand All @@ -314,8 +208,8 @@ class Clwvi(EmacFix):
def fix_metadata(self, cubes):
"""Fix metadata."""
cube = (
self.get_cube(cubes, var_names=['xlvi_cav', 'xlvi_ave']) +
self.get_cube(cubes, var_names=['xivi_cav', 'xivi_ave'])
self.get_cube(cubes, var_name=['xlvi_cav', 'xlvi_ave']) +
self.get_cube(cubes, var_name=['xivi_cav', 'xivi_ave'])
)
cube.var_name = self.vardef.short_name
return CubeList([cube])
Expand Down Expand Up @@ -351,9 +245,9 @@ class Pr(EmacFix):
def fix_metadata(self, cubes):
"""Fix metadata."""
cube = (
self.get_cube(cubes, var_names=['aprl_cav', 'aprl_ave']) +
self.get_cube(cubes, var_names=['aprc_cav', 'aprc_ave']) +
self.get_cube(cubes, var_names=['aprs_cav', 'aprs_ave'])
self.get_cube(cubes, var_name=['aprl_cav', 'aprl_ave']) +
self.get_cube(cubes, var_name=['aprc_cav', 'aprc_ave']) +
self.get_cube(cubes, var_name=['aprs_cav', 'aprs_ave'])
)
cube.var_name = self.vardef.short_name
return CubeList([cube])
Expand All @@ -365,8 +259,8 @@ class Rlds(EmacFix):
def fix_metadata(self, cubes):
"""Fix metadata."""
cube = (
self.get_cube(cubes, var_names=['flxtbot_cav', 'flxtbot_ave']) -
self.get_cube(cubes, var_names=['tradsu_cav', 'tradsu_ave'])
self.get_cube(cubes, var_name=['flxtbot_cav', 'flxtbot_ave']) -
self.get_cube(cubes, var_name=['tradsu_cav', 'tradsu_ave'])
)
cube.var_name = self.vardef.short_name
return CubeList([cube])
Expand All @@ -387,8 +281,8 @@ class Rsds(EmacFix):
def fix_metadata(self, cubes):
"""Fix metadata."""
cube = (
self.get_cube(cubes, var_names=['flxsbot_cav', 'flxsbot_ave']) -
self.get_cube(cubes, var_names=['sradsu_cav', 'sradsu_ave'])
self.get_cube(cubes, var_name=['flxsbot_cav', 'flxsbot_ave']) -
self.get_cube(cubes, var_name=['sradsu_cav', 'sradsu_ave'])
)
cube.var_name = self.vardef.short_name
return CubeList([cube])
Expand All @@ -400,8 +294,8 @@ class Rsdt(EmacFix):
def fix_metadata(self, cubes):
"""Fix metadata."""
cube = (
self.get_cube(cubes, var_names=['flxstop_cav', 'flxstop_ave']) -
self.get_cube(cubes, var_names=['srad0u_cav', 'srad0u_ave'])
self.get_cube(cubes, var_name=['flxstop_cav', 'flxstop_ave']) -
self.get_cube(cubes, var_name=['srad0u_cav', 'srad0u_ave'])
)
cube.var_name = self.vardef.short_name
return CubeList([cube])
Expand All @@ -422,8 +316,8 @@ class Rtmt(EmacFix):
def fix_metadata(self, cubes):
"""Fix metadata."""
cube = (
self.get_cube(cubes, var_names=['flxttop_cav', 'flxttop_ave']) +
self.get_cube(cubes, var_names=['flxstop_cav', 'flxstop_ave'])
self.get_cube(cubes, var_name=['flxttop_cav', 'flxttop_ave']) +
self.get_cube(cubes, var_name=['flxstop_cav', 'flxstop_ave'])
)
cube.var_name = self.vardef.short_name
return CubeList([cube])
Expand Down Expand Up @@ -486,10 +380,10 @@ class MP_BC_tot(EmacFix): # noqa: N801
def fix_metadata(self, cubes):
"""Fix metadata."""
cube = (
self.get_cube(cubes, var_names=['MP_BC_ki_cav', 'MP_BC_ki_ave']) +
self.get_cube(cubes, var_names=['MP_BC_ks_cav', 'MP_BC_ks_ave']) +
self.get_cube(cubes, var_names=['MP_BC_as_cav', 'MP_BC_as_ave']) +
self.get_cube(cubes, var_names=['MP_BC_cs_cav', 'MP_BC_cs_ave'])
self.get_cube(cubes, var_name=['MP_BC_ki_cav', 'MP_BC_ki_ave']) +
self.get_cube(cubes, var_name=['MP_BC_ks_cav', 'MP_BC_ks_ave']) +
self.get_cube(cubes, var_name=['MP_BC_as_cav', 'MP_BC_as_ave']) +
self.get_cube(cubes, var_name=['MP_BC_cs_cav', 'MP_BC_cs_ave'])
)
cube.var_name = self.vardef.short_name
return CubeList([cube])
Expand All @@ -501,10 +395,10 @@ class MP_DU_tot(EmacFix): # noqa: N801
def fix_metadata(self, cubes):
"""Fix metadata."""
cube = (
self.get_cube(cubes, var_names=['MP_DU_ai_cav', 'MP_DU_ai_ave']) +
self.get_cube(cubes, var_names=['MP_DU_as_cav', 'MP_DU_as_ave']) +
self.get_cube(cubes, var_names=['MP_DU_ci_cav', 'MP_DU_ci_ave']) +
self.get_cube(cubes, var_names=['MP_DU_cs_cav', 'MP_DU_cs_ave'])
self.get_cube(cubes, var_name=['MP_DU_ai_cav', 'MP_DU_ai_ave']) +
self.get_cube(cubes, var_name=['MP_DU_as_cav', 'MP_DU_as_ave']) +
self.get_cube(cubes, var_name=['MP_DU_ci_cav', 'MP_DU_ci_ave']) +
self.get_cube(cubes, var_name=['MP_DU_cs_cav', 'MP_DU_cs_ave'])
)
cube.var_name = self.vardef.short_name
return CubeList([cube])
Expand All @@ -517,13 +411,13 @@ def fix_metadata(self, cubes):
"""Fix metadata."""
cube = (
self.get_cube(
cubes, var_names=['MP_SO4mm_ns_cav', 'MP_SO4mm_ns_ave']) +
cubes, var_name=['MP_SO4mm_ns_cav', 'MP_SO4mm_ns_ave']) +
self.get_cube(
cubes, var_names=['MP_SO4mm_ks_cav', 'MP_SO4mm_ks_ave']) +
cubes, var_name=['MP_SO4mm_ks_cav', 'MP_SO4mm_ks_ave']) +
self.get_cube(
cubes, var_names=['MP_SO4mm_as_cav', 'MP_SO4mm_as_ave']) +
cubes, var_name=['MP_SO4mm_as_cav', 'MP_SO4mm_as_ave']) +
self.get_cube(
cubes, var_names=['MP_SO4mm_cs_cav', 'MP_SO4mm_cs_ave'])
cubes, var_name=['MP_SO4mm_cs_cav', 'MP_SO4mm_cs_ave'])
)
cube.var_name = self.vardef.short_name
return CubeList([cube])
Expand All @@ -535,9 +429,9 @@ class MP_SS_tot(EmacFix): # noqa: N801
def fix_metadata(self, cubes):
"""Fix metadata."""
cube = (
self.get_cube(cubes, var_names=['MP_SS_ks_cav', 'MP_SS_ks_ave']) +
self.get_cube(cubes, var_names=['MP_SS_as_cav', 'MP_SS_as_ave']) +
self.get_cube(cubes, var_names=['MP_SS_cs_cav', 'MP_SS_cs_ave'])
self.get_cube(cubes, var_name=['MP_SS_ks_cav', 'MP_SS_ks_ave']) +
self.get_cube(cubes, var_name=['MP_SS_as_cav', 'MP_SS_as_ave']) +
self.get_cube(cubes, var_name=['MP_SS_cs_cav', 'MP_SS_cs_ave'])
)
cube.var_name = self.vardef.short_name
return CubeList([cube])
Loading