Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve cf conformance 5.6 grid mappings and projections #976

150 changes: 126 additions & 24 deletions compliance_checker/cf/cf_1_7.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,11 @@
# Note that test does not check monotonicity
ret_val = []
reasoning = []

for variable_name, boundary_variable_name in cfutil.get_cell_boundary_map(
ds
).items():

variable = ds.variables[variable_name]
valid = True
reasoning = []
Expand Down Expand Up @@ -479,7 +481,7 @@
def _check_gmattr_existence_condition_ell_pmerid_hdatum(self, var):
"""
If one of reference_ellipsoid_name, prime_meridian_name, or
horizontal_datum_name are defined as grid_mapping attributes,
horizontal_datum_name, geographic_crs_name are defined as grid_mapping attributes,
they must all be defined.

:param netCDF4.Variable var
Expand All @@ -501,6 +503,7 @@
"reference_ellipsoid_name",
"prime_meridian_name",
"horizontal_datum_name",
"geographic_crs_name",

Check warning on line 506 in compliance_checker/cf/cf_1_7.py

View check run for this annotation

Codecov / codecov/patch

compliance_checker/cf/cf_1_7.py#L506

Added line #L506 was not covered by tests
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add test coverage for added functionality.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in progress ...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do not understand " not covered by test" comment:

there is a test "_check_grid_mapping_attr_condition" that evaluates the 'geographic_crs_name' in the file

]
]
) and (
Expand All @@ -509,6 +512,7 @@
"reference_ellipsoid_name",
"prime_meridian_name",
"horizontal_datum_name",
"geographic_crs_name",
]
).issubset(_ncattrs)
):
Expand Down Expand Up @@ -724,17 +728,105 @@
def check_grid_mapping(self, ds):
super(CF1_7Check, self).check_grid_mapping.__doc__
prev_return = super(CF1_7Check, self).check_grid_mapping(ds)
grid_mapping_variables = cfutil.get_grid_mapping_variables(ds)
for var_name in sorted(grid_mapping_variables):
test_ctx = self.get_test_ctx(
BaseCheck.HIGH, self.section_titles["5.6"])
# Get the grid_mapping variable list for Requirements 5.6 [.../9]
grid_mapping_variable_list = []

for item in ds.get_variables_by_attributes(grid_mapping=lambda x: x is not None):
# [1/9] The type of the grid_mapping attribute is a string whose value
# is of the following form, in which brackets indicate optional
# text: grid_mapping_name[: coord_var [coord_var ...]][grid_mapping_name: [coord_var ... ]]
if not isinstance(item.grid_mapping, str):
test_ctx.messages.append("The grid_mapping attribute {} "
"is not a string".format(item.grid_mapping))
test_ctx.out_of += 1
else:
# [2/9] Note that in its simplest form the attribute comprises just
# a grid_mapping_name as a single word.
grid_mapping_variable_list.append(regex.findall(r"(\w+)",item.grid_mapping))

#
# Get the CF grid_mapping variables list (== only if the variable exist in the Variables list)
grid_mapping_variables = cfutil.get_grid_mapping_variables(ds)
grid_mapping_variables_coordinates = cfutil.get_grid_mapping_variables_coordinates(ds)

#Check if all grid_mapping variables are listed in the file as expected
# [3/9] Each grid_mapping_name is the name of a variable (known as a
# grid mapping variable), which must exist in the file.
if len(grid_mapping_variable_list) != len(grid_mapping_variables) + len(grid_mapping_variables_coordinates):
test_ctx.messages.append("Not all of the grid_mapping variables exist in the file")
test_ctx.out_of += 1

test_ctx.score += 1

# Get the CF coordinate variables or auxiliary coordinate variables
auxiliary_coordinate_variables = cfutil.get_auxiliary_coordinate_variables(ds)
coordinate_variables = cfutil.get_coordinate_variables(ds)
# Check the grid_mapping variable coordinates if listed.
# [4/9] Each coord_var is the name of a coordinate variable or
# auxiliary coordinate variable, which must exist in the file. If it
# is an auxiliary coordinate variable, it must be listed in the
# coordinates attribute.
for coord_item in grid_mapping_variables_coordinates:
if coord_item not in coordinate_variables and coord_item not in auxiliary_coordinate_variables:
test_ctx.messages.append("{} is not listed as coordinate or auxiliary coord Variable.".format(coord_item))
test_ctx.out_of += 1

test_ctx.score += 1

# Check the gid_mapping variable attributes
for var_name in sorted(grid_mapping_variables):
var = ds.variables[var_name]
test_ctx = self.get_test_ctx(
BaseCheck.HIGH, self.section_titles["5.6"], var.name
)
BaseCheck.HIGH, self.section_titles["5.6"], var.name)

# Recommendation: The grid mapping variables should have 0 dimensions.
if var.dimensions:
test_ctx.messages.append("The grid mapping variable {} should have 0 dimension".format(var.name))
test_ctx.out_of += 1

test_ctx.score += 1

# [5/9] The grid mapping variables must have the grid_mapping_name attribute.
# The legal values for the grid_mapping_name attribute are contained in Appendix F.
if "grid_mapping_name" not in var.ncattrs():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if not hasattr(var, "grid_mapping_name") would be more Pythonic.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed... changed

test_ctx.messages.append("The grid mapping variable "
"{} doesn't have the grid_mapping_name attribute".format(var.name))
test_ctx.out_of += 1
else:
if var.grid_mapping_name not in grid_mapping_dict17:
test_ctx.messages.append("The legal values for the grid_mapping_name attribute "
"{} is not contained in Appendix F".format(var.grid_mapping_name))
test_ctx.out_of += 1

test_ctx.score += 1

# [6/9] The data types of the attributes of the
# grid mapping variable must be specified in Table 1 of Appendix F.
type_map = {"S": "str" , "N": "float64"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The typecheck for N is likely too strong. N simply means that the value has a numeric type, not just floating point.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed.... changed to:
type_map = {"S": "str", "N": ["float64", "int"]}

for attrs_name in var.ncattrs():
if attrs_name in grid_mapping_attr_types17:
attrs_type = np.dtype(type(getattr(var,attrs_name)))
attrs_type17 = type_map[grid_mapping_attr_types17[attrs_name]['type']]
if attrs_type17 != attrs_type:
test_ctx.messages.append("The data types of the attributes "
"{} and {} do not match".format(attrs_type17, attrs_type))
test_ctx.out_of += 1

else:
test_ctx.messages.append("The attribute {} "
"does not exist in Table 1 of Appendix F".format(attrs_name))
test_ctx.out_of += 1

Check warning on line 820 in compliance_checker/cf/cf_1_7.py

View check run for this annotation

Codecov / codecov/patch

compliance_checker/cf/cf_1_7.py#L820

Added line #L820 was not covered by tests
test_ctx.score += 1

# TODO: check cases where crs_wkt provides part of a necessary
# grid_mapping attribute, or where a grid_mapping attribute
# overrides what has been provided in crs_wkt.
# attempt to parse crs_wkt if it is present

# [7/9] If present, the crs_wkt attribute must be a text string conforming to
# the CRS WKT specification described in reference [OGC_CTS].
if "crs_wkt" in var.ncattrs():
crs_wkt = var.crs_wkt
if not isinstance(crs_wkt, str):
Expand All @@ -749,33 +841,29 @@
str(crs_error)
)
)
else:
test_ctx.score += 1
test_ctx.out_of += 1

test_ctx.out_of += 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment about hasattr mentioned above.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed ... changed

test_ctx.score += 1

# existence_conditions
# Both geoid_name and geopotential_datum_name cannot exist
exist_cond_1 = (
self._check_gmattr_existence_condition_geoid_name_geoptl_datum_name(var)
)
test_ctx.assert_true(exist_cond_1[0], exist_cond_1[1])
test_ctx.messages.append(exist_cond_1)
test_ctx.score += 1

# [8/9] reference_ellipsoid_name, prime_meridian_name, horizontal_datum_name and
# geographic_crs_name must be all defined if any one is defined.
exist_cond_2 = self._check_gmattr_existence_condition_ell_pmerid_hdatum(var)
test_ctx.assert_true(exist_cond_2[0], exist_cond_2[1])
test_ctx.messages.append(exist_cond_2)
test_ctx.score += 1

# handle vertical datum related grid_mapping attributes
vert_datum_attrs = {}
possible_vert_datum_attrs = {"geoid_name", "geopotential_datum_name"}
vert_datum_attrs = possible_vert_datum_attrs.intersection(var.ncattrs())
len_vdatum_name_attrs = len(vert_datum_attrs)
# check that geoid_name and geopotential_datum_name are not both
# present in the grid_mapping variable
if len_vdatum_name_attrs == 2:
test_ctx.out_of += 1
test_ctx.messages.append(
"Cannot have both 'geoid_name' and "
"'geopotential_datum_name' attributes in "
"grid mapping variable '{}'".format(var.name)
)
elif len_vdatum_name_attrs == 1:
vert_datum_attrs = possible_vert_datum_attrs.intersection(var.ncattrs())

if exist_cond_1[0] == True and vert_datum_attrs:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just use if exist_cond_1[0] and vert_datum_attrs if exist_cond[0] is expected to be boolean.

# should be one or zero attrs
proj_db_path = os.path.join(pyproj.datadir.get_data_dir(), "proj.db")
try:
Expand All @@ -800,8 +888,22 @@
"Error occurred while trying to query "
"Proj4 SQLite database at {}: {}".format(proj_db_path, str(e))
)
prev_return[var.name] = test_ctx.to_result()

# [9/9] Check If projected_crs_name is defined then geographic_crs_name must be also.
test_possible = {"projected_crs_name","geographic_crs_name"}
if "projected_crs_name" in var.ncattrs():
test_attr = test_possible.intersection(var.ncattrs())

Check warning on line 895 in compliance_checker/cf/cf_1_7.py

View check run for this annotation

Codecov / codecov/patch

compliance_checker/cf/cf_1_7.py#L894-L895

Added lines #L894 - L895 were not covered by tests
len_test_attr = len(test_attr)

if len_test_attr != 2:
test_ctx.messages.append("We do not have both projected_crs_name "
"and geographic_crs_name defined")
test_ctx.out_of += 1

Check warning on line 902 in compliance_checker/cf/cf_1_7.py

View check run for this annotation

Codecov / codecov/patch

compliance_checker/cf/cf_1_7.py#L902

Added line #L902 was not covered by tests
test_ctx.score += 1

Check warning on line 904 in compliance_checker/cf/cf_1_7.py

View check run for this annotation

Codecov / codecov/patch

compliance_checker/cf/cf_1_7.py#L904

Added line #L904 was not covered by tests
prev_return[var.name] = test_ctx.to_result()

return prev_return

def check_standard_name_deprecated_modifiers(self, ds):
Expand Down
38 changes: 35 additions & 3 deletions compliance_checker/cfutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@ def get_cell_boundary_map(ds):
for variable in ds.get_variables_by_attributes(bounds=lambda x: x is not None):
if variable.bounds in ds.variables:
boundary_map[variable.name] = variable.bounds

return boundary_map


Expand Down Expand Up @@ -776,11 +777,42 @@ def get_grid_mapping_variables(ds):
:param netCDF4.Dataset ds: An open netCDF4 Dataset
"""
grid_mapping_variables = set()
for ncvar in ds.get_variables_by_attributes(grid_mapping=lambda x: x is not None):
if ncvar.grid_mapping in ds.variables:
grid_mapping_variables.add(ncvar.grid_mapping)
for ncvar in ds.get_variables_by_attributes(grid_mapping=lambda x: x is not None and isinstance(x, str)):

if ' ' in ncvar.grid_mapping:
grid_mapping_list = [
x.split(':')[0] for x in ncvar.grid_mapping.split(' ') if ':' in x
]
if grid_mapping_list:
for item in grid_mapping_list:
if item in ds.variables:
grid_mapping_variables.add(item)
else:
if ncvar.grid_mapping in ds.variables:
grid_mapping_variables.add(ncvar.grid_mapping)

return grid_mapping_variables

def get_grid_mapping_variables_coordinates(ds):
"""
Returns a list of grid mapping variables' coordinates

:param netCDF4.Dataset ds: An open netCDF4 Dataset
"""

grid_mapping_variables_coordinates = set()
for ncvar in ds.get_variables_by_attributes(grid_mapping=lambda x: x is not None and isinstance(x, str)):

if ' ' in ncvar.grid_mapping:
coord_mapping_list = [
x.split(':')[0] for x in ncvar.grid_mapping.split(' ') if not ':' in x
]
if coord_mapping_list:
for coord_item in coord_mapping_list:
if coord_item in ds.variables:
grid_mapping_variables_coordinates.add(coord_item)

return grid_mapping_variables_coordinates

@lru_cache(128)
def get_axis_map(ds, variable):
Expand Down
96 changes: 76 additions & 20 deletions compliance_checker/tests/test_cf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1293,17 +1293,6 @@ def test_check_reduced_horizontal_grid(self):
assert len([r for r in results if r.value[0] < r.value[1]]) == 1
assert all(r.name == "§5.3 Reduced Horizontal Grid" for r in results)

def test_check_grid_mapping(self):
dataset = self.load_dataset(STATIC_FILES["mapping"])
results = self.cf.check_grid_mapping(dataset)

assert len(results) == 6
assert len([r.value for r in results.values() if r.value[0] < r.value[1]]) == 0
expected_name = (
"§5.6 Horizontal Coordinate Reference Systems, Grid Mappings, Projections"
)
assert all(r.name == expected_name for r in results.values())

def test_is_geophysical(self):

# check whether string type variable, which are not `cf_role`, are
Expand Down Expand Up @@ -1470,15 +1459,6 @@ def test_compress_packed(self):
self.assertFalse(results[0].value)
self.assertFalse(results[1].value)

# def test_check_all_features_are_same_type(self):
# dataset = self.load_dataset(STATIC_FILES["rutgers"])
# result = self.cf.check_all_features_are_same_type(dataset)
# assert result

# dataset = self.load_dataset(STATIC_FILES["featureType"])
# result = self.cf.check_all_features_are_same_type(dataset)
# assert result

def test_featureType_is_case_insensitive(self):
"""
Tests that the featureType attribute is case insensitive
Expand Down Expand Up @@ -1945,6 +1925,82 @@ def test_cell_measures(self):
message = "Cell measure variable box_area referred to by PS is not present in dataset variables"
assert message in messages

def test_check_grid_mapping(self):
# create cells with grid_apping variable
dataset = MockTimeSeries()

# add variable to the file as auxilary coordinate
# NOTE: Dimension variable It is a one-dimensional variable with the same name as its dimension [e.g.,
# time(time) ], and it is defined as a numeric data type with values that are
# ordered monotonically. Missing values are not allowed in coordinate variables.
#
dataset.createDimension("y", 500)
dataset.createDimension("x", 500)
dataset.createVariable("y", "d", ("y",))
dataset.createVariable("x", "d", ("x",))
y = dataset.variables["y"]
y.axis = "Y"
y.standard_name = "projection_y_coordinate"
x = dataset.variables["x"]
x.axis = "X"
x.standard_name = "projection_x_coordinate"

# add a variable with "grid_mapping" as an attribute
dataset.createVariable("temp", "d", ("time", "x", "y"))
dataset.createVariable("crsOSGB", "i")
dataset.createVariable("crsWGS84", "i")

temp = dataset.variables["temp"]
temp.standard_name = "air_temperature"
temp.units = "K"
temp.coordinates = "lat lon"
temp.grid_mapping = "crsOSGB: z y crsWGS84: lat lon"

# create grid_mapping crsOSGB ;
crsOSGB = dataset.variables["crsOSGB"]
crsOSGB.grid_mapping_name = "transverse_mercator"
crsOSGB.semi_major_axis = "6377563.396"
crsOSGB.inverse_flattening = 299.3249646
crsOSGB.longitude_of_prime_meridian = 0.0
crsOSGB.latitude_of_projection_origin = 49.0
crsOSGB.longitude_of_central_meridian = -2.0
crsOSGB.scale_factor_at_central_meridian = "0.9996012717"
crsOSGB.false_easting = 400000.0
crsOSGB.false_northing = -100000.0
crsOSGB.unit = "metre"

# create grid_mapping crsWGS84
crsWGS84 = dataset.variables["crsWGS84"]
crsWGS84.grid_mapping_name = "latitude_longitude"
crsWGS84.longitude_of_prime_meridian = 0.0
crsWGS84.semi_major_axis = 6378137.0
crsWGS84.inverse_flattening = 298.257223563
crsWGS84.reference_ellipsoid_name = "testing"
crsWGS84.prime_meridian_name = "testing"
crsWGS84.horizontal_datum_name = "testing"
crsWGS84.geographic_crs_name = "testing"
crsWGS84.projected_crs_name = "testing"

results = self.cf.check_grid_mapping(dataset)
assert len(results) == 3
assert len([r.value for r in results.values() if r.value[0] < r.value[1]]) == 1
expected_name = (
"§5.6 Horizontal Coordinate Reference Systems, Grid Mappings, Projections"
)
assert all(r.name == expected_name for r in results.values())

# test a different datastet
dataset = self.load_dataset(STATIC_FILES["mapping"])
# print(dataset.variables)
results = self.cf.check_grid_mapping(dataset)
assert len(results) == 6
assert len([r.value for r in results.values() if r.value[0] < r.value[1]]) == 1
expected_name = (
"§5.6 Horizontal Coordinate Reference Systems, Grid Mappings, Projections"
)
assert all(r.name == expected_name for r in results.values())


def test_variable_features(self):
with MockTimeSeries() as dataset:
# I hope to never see an attribute value like this, but since
Expand Down