Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve cf conformance 5.6 grid mappings and projections #976

153 changes: 139 additions & 14 deletions compliance_checker/cf/cf_1_7.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,12 @@ def check_cell_boundaries(self, ds):
# Note that test does not check monotonicity
ret_val = []
reasoning = []

for variable_name, boundary_variable_name in cfutil.get_cell_boundary_map(
ds
).items():

print(boundary_variable_name, boundary_variable_name.dtype)
benjwadams marked this conversation as resolved.
Show resolved Hide resolved
variable = ds.variables[variable_name]
valid = True
reasoning = []
Expand Down Expand Up @@ -479,7 +482,7 @@ def _check_gmattr_existence_condition_geoid_name_geoptl_datum_name(self, var):
def _check_gmattr_existence_condition_ell_pmerid_hdatum(self, var):
"""
If one of reference_ellipsoid_name, prime_meridian_name, or
horizontal_datum_name are defined as grid_mapping attributes,
horizontal_datum_name, geographic_crs_name are defined as grid_mapping attributes,
they must all be defined.

:param netCDF4.Variable var
Expand All @@ -501,6 +504,7 @@ def _check_gmattr_existence_condition_ell_pmerid_hdatum(self, var):
"reference_ellipsoid_name",
"prime_meridian_name",
"horizontal_datum_name",
"geographic_crs_name",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add test coverage for added functionality.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in progress ...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do not understand " not covered by test" comment:

there is a test "_check_grid_mapping_attr_condition" that evaluates the 'geographic_crs_name' in the file

]
]
) and (
Expand All @@ -509,6 +513,7 @@ def _check_gmattr_existence_condition_ell_pmerid_hdatum(self, var):
"reference_ellipsoid_name",
"prime_meridian_name",
"horizontal_datum_name",
"geographic_crs_name",
]
).issubset(_ncattrs)
):
Expand Down Expand Up @@ -724,17 +729,107 @@ def _evaluate_towgs84(self, val):
def check_grid_mapping(self, ds):
super(CF1_7Check, self).check_grid_mapping.__doc__
prev_return = super(CF1_7Check, self).check_grid_mapping(ds)
grid_mapping_variables = cfutil.get_grid_mapping_variables(ds)
for var_name in sorted(grid_mapping_variables):
test_ctx = self.get_test_ctx(
BaseCheck.HIGH, self.section_titles["5.6"])
# Get the grid_mapping variable list for Requirements 5.6 [.../9]
grid_mapping_variable_list = []

for item in ds.get_variables_by_attributes(grid_mapping=lambda x: x is not None):
# [1/9] The type of the grid_mapping attribute is a string whose value
# is of the following form, in which brackets indicate optional
# text: grid_mapping_name[: coord_var [coord_var ...]][grid_mapping_name: [coord_var ... ]]
if not isinstance(item.grid_mapping, str):
test_ctx.messages.append("The grid_mapping attribute {} "
"is not a string".format(item.grid_mapping))
test_ctx.out_of += 1
if ' ' in item.grid_mapping:
for x in item.grid_mapping.replace(":", '').split():
grid_mapping_variable_list.append(x)
else:
# [2/9] Note that in its simplest form the attribute comprises just
# a grid_mapping_name as a single word.
grid_mapping_variable_list.append(item.grid_mapping)
#
# Get the CF grid_mapping variables list (== only if the variable exist in the Variables list)
grid_mapping_variables = cfutil.get_grid_mapping_variables(ds)
grid_mapping_variables_coordinates = cfutil.get_grid_mapping_variables_coordinates(ds)

#Check if all grid_mapping variables are listed in the file as expected
# [3/9] Each grid_mapping_name is the name of a variable (known as a
# grid mapping variable), which must exist in the file.
if len(grid_mapping_variable_list) != len(grid_mapping_variables) + len(grid_mapping_variables_coordinates):
test_ctx.messages.append("Not all of the grid_mapping variables exist in the file")
test_ctx.out_of += 1

test_ctx.score += 1

# Get the CF coordinate variables or auxiliary coordinate variables
auxiliary_coordinate_variables = cfutil.get_auxiliary_coordinate_variables(ds)
coordinate_variables = cfutil.get_coordinate_variables(ds)
# Check the grid_mapping variable coordinates if listed.
# [4/9] Each coord_var is the name of a coordinate variable or
# auxiliary coordinate variable, which must exist in the file. If it
# is an auxiliary coordinate variable, it must be listed in the
# coordinates attribute.
for coord_item in grid_mapping_variables_coordinates:
if coord_item not in coordinate_variables and coord_item not in auxiliary_coordinate_variables:
test_ctx.messages.append("{} is not listed as coordinate or auxiliary coord Variable.".format(coord_item))
test_ctx.out_of += 1

test_ctx.score += 1

# Check the gid_mapping variable attributes
for var_name in sorted(grid_mapping_variables):
var = ds.variables[var_name]
test_ctx = self.get_test_ctx(
BaseCheck.HIGH, self.section_titles["5.6"], var.name
)
BaseCheck.HIGH, self.section_titles["5.6"], var.name)

# Recommendation: The grid mapping variables should have 0 dimensions.
if var.dimensions:
test_ctx.messages.append("The grid mapping variable {} should have 0 dimension".format(var.name))
test_ctx.out_of += 1

test_ctx.score += 1

# [5/9] The grid mapping variables must have the grid_mapping_name attribute.
# The legal values for the grid_mapping_name attribute are contained in Appendix F.
if "grid_mapping_name" not in var.ncattrs():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if not hasattr(var, "grid_mapping_name") would be more Pythonic.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed... changed

test_ctx.messages.append("The grid mapping variable "
"{} doesn't have the grid_mapping_name attribute".format(var.name))
test_ctx.out_of += 1
else:
if var.grid_mapping_name not in grid_mapping_dict17:
test_ctx.messages.append("The legal values for the grid_mapping_name attribute "
"{} is not contained in Appendix F".format(var.grid_mapping_name))
test_ctx.out_of += 1

test_ctx.score += 1

# [6/9] The data types of the attributes of the
# grid mapping variable must be specified in Table 1 of Appendix F.
type_map = {"S": "str" , "N": "float64"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The typecheck for N is likely too strong. N simply means that the value has a numeric type, not just floating point.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed.... changed to:
type_map = {"S": "str", "N": ["float64", "int"]}

for attrs_name in var.ncattrs():
if attrs_name in grid_mapping_attr_types17:
attrs_type = np.dtype(type(getattr(var,attrs_name)))
attrs_type17 = type_map[grid_mapping_attr_types17[attrs_name]['type']]
if attrs_type17 != attrs_type:
test_ctx.messages.append("The data types of the attributes "
"{} and {} do not match".format(attrs_type17, attrs_type))
test_ctx.out_of += 1

else:
test_ctx.messages.append("The attribute {} "
"does not exist in Table 1 of Appendix F".format(attrs_name))
test_ctx.out_of += 1
test_ctx.score += 1

# TODO: check cases where crs_wkt provides part of a necessary
# grid_mapping attribute, or where a grid_mapping attribute
# overrides what has been provided in crs_wkt.
# attempt to parse crs_wkt if it is present

# [7/9] If present, the crs_wkt attribute must be a text string conforming to
# the CRS WKT specification described in reference [OGC_CTS].
if "crs_wkt" in var.ncattrs():
crs_wkt = var.crs_wkt
if not isinstance(crs_wkt, str):
Expand All @@ -749,23 +844,39 @@ def check_grid_mapping(self, ds):
str(crs_error)
)
)
else:
test_ctx.score += 1
test_ctx.out_of += 1

# existence_conditions
test_ctx.out_of += 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment about hasattr mentioned above.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed ... changed

test_ctx.score += 1

# existence_conditions
exist_cond_1 = (
self._check_gmattr_existence_condition_geoid_name_geoptl_datum_name(var)
)
test_ctx.assert_true(exist_cond_1[0], exist_cond_1[1])
if exist_cond_1[0] == False:
test_ctx.messages.append("Both geoid_name and geopotential_datum_name "
benjwadams marked this conversation as resolved.
Show resolved Hide resolved
"should not exist for {}".format(var.name))
test_ctx.out_of += 1

test_ctx.score += 1
# test_ctx.assert_true(exist_cond_1[0], exist_cond_1[1])

# [8/9] reference_ellipsoid_name, prime_meridian_name, horizontal_datum_name and
# geographic_crs_name must be all defined if any one is defined.
exist_cond_2 = self._check_gmattr_existence_condition_ell_pmerid_hdatum(var)
test_ctx.assert_true(exist_cond_2[0], exist_cond_2[1])
if exist_cond_2[0] == False:
test_ctx.messages.append("reference_ellipsoid_name, prime_meridian_name, "
"horizontal_datum_name and geographic_crs_name must be all defined "
"if any one is defined")
test_ctx.out_of += 1

test_ctx.score += 1
# test_ctx.assert_true(exist_cond_2[0], exist_cond_2[1])

# handle vertical datum related grid_mapping attributes
vert_datum_attrs = {}
possible_vert_datum_attrs = {"geoid_name", "geopotential_datum_name"}
vert_datum_attrs = possible_vert_datum_attrs.intersection(var.ncattrs())
len_vdatum_name_attrs = len(vert_datum_attrs)
len_vdatum_name_attrs = len(vert_datum_attrs)

# check that geoid_name and geopotential_datum_name are not both
# present in the grid_mapping variable
if len_vdatum_name_attrs == 2:
Expand Down Expand Up @@ -800,8 +911,22 @@ def check_grid_mapping(self, ds):
"Error occurred while trying to query "
"Proj4 SQLite database at {}: {}".format(proj_db_path, str(e))
)
prev_return[var.name] = test_ctx.to_result()

# [9/9] Check If projected_crs_name is defined then geographic_crs_name must be also.
test_possible = {"projected_crs_name","geographic_crs_name"}
if "projected_crs_name" in var.ncattrs():
test_attr = test_possible.intersection(var.ncattrs())
len_test_attr = len(test_attr)

if len_test_attr != 2:
test_ctx.messages.append("We do not have both projected_crs_name "
"and geographic_crs_name defined")
test_ctx.out_of += 1

test_ctx.score += 1

prev_return[var.name] = test_ctx.to_result()

return prev_return

def check_standard_name_deprecated_modifiers(self, ds):
Expand Down
38 changes: 35 additions & 3 deletions compliance_checker/cfutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@ def get_cell_boundary_map(ds):
for variable in ds.get_variables_by_attributes(bounds=lambda x: x is not None):
if variable.bounds in ds.variables:
boundary_map[variable.name] = variable.bounds

return boundary_map


Expand Down Expand Up @@ -776,11 +777,42 @@ def get_grid_mapping_variables(ds):
:param netCDF4.Dataset ds: An open netCDF4 Dataset
"""
grid_mapping_variables = set()
for ncvar in ds.get_variables_by_attributes(grid_mapping=lambda x: x is not None):
if ncvar.grid_mapping in ds.variables:
grid_mapping_variables.add(ncvar.grid_mapping)
for ncvar in ds.get_variables_by_attributes(grid_mapping=lambda x: x is not None and isinstance(x, str)):

if ' ' in ncvar.grid_mapping:
grid_mapping_list = [
x.split(':')[0] for x in ncvar.grid_mapping.split(' ') if ':' in x
]
if grid_mapping_list:
for item in grid_mapping_list:
if item in ds.variables:
grid_mapping_variables.add(item)
else:
if ncvar.grid_mapping in ds.variables:
grid_mapping_variables.add(ncvar.grid_mapping)

return grid_mapping_variables

def get_grid_mapping_variables_coordinates(ds):
"""
Returns a list of grid mapping variables' coordinates

:param netCDF4.Dataset ds: An open netCDF4 Dataset
"""

grid_mapping_variables_coordinates = set()
for ncvar in ds.get_variables_by_attributes(grid_mapping=lambda x: x is not None and isinstance(x, str)):

if ' ' in ncvar.grid_mapping:
coord_mapping_list = [
x.split(':')[0] for x in ncvar.grid_mapping.split(' ') if not ':' in x
]
if coord_mapping_list:
for coord_item in coord_mapping_list:
if coord_item in ds.variables:
grid_mapping_variables_coordinates.add(coord_item)

return grid_mapping_variables_coordinates

@lru_cache(128)
def get_axis_map(ds, variable):
Expand Down
98 changes: 88 additions & 10 deletions compliance_checker/tests/test_cf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1293,16 +1293,18 @@ def test_check_reduced_horizontal_grid(self):
assert len([r for r in results if r.value[0] < r.value[1]]) == 1
assert all(r.name == "§5.3 Reduced Horizontal Grid" for r in results)

def test_check_grid_mapping(self):
dataset = self.load_dataset(STATIC_FILES["mapping"])
results = self.cf.check_grid_mapping(dataset)

assert len(results) == 6
assert len([r.value for r in results.values() if r.value[0] < r.value[1]]) == 0
expected_name = (
"§5.6 Horizontal Coordinate Reference Systems, Grid Mappings, Projections"
)
assert all(r.name == expected_name for r in results.values())
# def test_check_grid_mapping(self):
Copy link
Contributor

@benjwadams benjwadams Jan 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove commented function

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

commented function deleted.

# dataset = self.load_dataset(STATIC_FILES["mapping"])
# # print(dataset.variables)
# results = self.cf.check_grid_mapping(dataset)
# print(results.values)

# assert len(results) == 6
# assert len([r.value for r in results.values() if r.value[0] < r.value[1]]) == 0
# expected_name = (
# "§5.6 Horizontal Coordinate Reference Systems, Grid Mappings, Projections"
# )
# assert all(r.name == expected_name for r in results.values())

def test_is_geophysical(self):

Expand Down Expand Up @@ -1945,6 +1947,82 @@ def test_cell_measures(self):
message = "Cell measure variable box_area referred to by PS is not present in dataset variables"
assert message in messages

def test_check_grid_mapping(self):
# create cells with grid_apping variable
dataset = MockTimeSeries()

# add variable to the file as auxilary coordinate
# NOTE: Dimension variable It is a one-dimensional variable with the same name as its dimension [e.g.,
# time(time) ], and it is defined as a numeric data type with values that are
# ordered monotonically. Missing values are not allowed in coordinate variables.
#
dataset.createDimension("y", 500)
dataset.createDimension("x", 500)
dataset.createVariable("y", "d", ("y",))
dataset.createVariable("x", "d", ("x",))
y = dataset.variables["y"]
y.axis = "Y"
y.standard_name = "projection_y_coordinate"
x = dataset.variables["x"]
x.axis = "X"
x.standard_name = "projection_x_coordinate"

# add a variable with "grid_mapping" as an attribute
dataset.createVariable("temp", "d", ("time", "x", "y"))
dataset.createVariable("crsOSGB", "i")
dataset.createVariable("crsWGS84", "i")

temp = dataset.variables["temp"]
temp.standard_name = "air_temperature"
temp.units = "K"
temp.coordinates = "lat lon"
temp.grid_mapping = "crsOSGB: z y crsWGS84: lat lon"

# create grid_mapping crsOSGB ;
crsOSGB = dataset.variables["crsOSGB"]
crsOSGB.grid_mapping_name = "transverse_mercator"
crsOSGB.semi_major_axis = "6377563.396"
crsOSGB.inverse_flattening = 299.3249646
crsOSGB.longitude_of_prime_meridian = 0.0
crsOSGB.latitude_of_projection_origin = 49.0
crsOSGB.longitude_of_central_meridian = -2.0
crsOSGB.scale_factor_at_central_meridian = "0.9996012717"
crsOSGB.false_easting = 400000.0
crsOSGB.false_northing = -100000.0
crsOSGB.unit = "metre"

# create grid_mapping crsWGS84
crsWGS84 = dataset.variables["crsWGS84"]
crsWGS84.grid_mapping_name = "latitude_longitude"
crsWGS84.longitude_of_prime_meridian = 0.0
crsWGS84.semi_major_axis = 6378137.0
crsWGS84.inverse_flattening = 298.257223563
crsWGS84.reference_ellipsoid_name = "testing"
crsWGS84.prime_meridian_name = "testing"
crsWGS84.horizontal_datum_name = "testing"
crsWGS84.geographic_crs_name = "testing"
crsWGS84.projected_crs_name = "testing"

results = self.cf.check_grid_mapping(dataset)
assert len(results) == 3
assert len([r.value for r in results.values() if r.value[0] < r.value[1]]) == 1
expected_name = (
"§5.6 Horizontal Coordinate Reference Systems, Grid Mappings, Projections"
)
assert all(r.name == expected_name for r in results.values())

# test a different datastet
dataset = self.load_dataset(STATIC_FILES["mapping"])
# print(dataset.variables)
results = self.cf.check_grid_mapping(dataset)
assert len(results) == 6
assert len([r.value for r in results.values() if r.value[0] < r.value[1]]) == 1
expected_name = (
"§5.6 Horizontal Coordinate Reference Systems, Grid Mappings, Projections"
)
assert all(r.name == expected_name for r in results.values())


def test_variable_features(self):
with MockTimeSeries() as dataset:
# I hope to never see an attribute value like this, but since
Expand Down