From 823b343b6d282062b43474d1bbadcc409b73b052 Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Wed, 18 Oct 2023 14:38:30 -0300 Subject: [PATCH 1/5] pre-commits --- .gitignore | 2 +- LICENSE.txt | 1 - README.md | 2 +- cc_plugin_glider/glider_dac.py | 815 ++++++++++-------- cc_plugin_glider/required_var_attrs.py | 485 ++++++----- .../tests/data/IOOS_Glider_NetCDF_v3.0.cdl | 3 +- cc_plugin_glider/tests/data/no_qc.cdl | 3 +- cc_plugin_glider/tests/data/seanames.xml | 10 +- cc_plugin_glider/tests/resources.py | 34 +- cc_plugin_glider/tests/test_glidercheck.py | 270 +++--- cc_plugin_glider/util.py | 79 +- pyproject.toml | 1 - 12 files changed, 905 insertions(+), 800 deletions(-) diff --git a/.gitignore b/.gitignore index 89f37ed..ec1b0d2 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,4 @@ env .DS_Store .pytest_cache -cc_plugin_glider/_version.py \ No newline at end of file +cc_plugin_glider/_version.py diff --git a/LICENSE.txt b/LICENSE.txt index 8f71f43..8dada3e 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -199,4 +199,3 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. - diff --git a/README.md b/README.md index 5ae047e..7e7482d 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ See the [ioos/compliance-checker](https://github.com/ioos/compliance-checker) fo ## Summary of the Checks -The checks have been designed to help data providers submit the highest quality data to the GliderDAC. Submitting uncompliant data to the DAC may result in services not working. For example, not providing the correct Sea Name in the GLobal atttributes may put your glider depoyment into the wrong region on the GliderMap. Not providing proper metadata about the platform and instruments, and attribution may prevent NCEI from archiving your data. And not making your files CF compliant could prevent the files from showing up on ERDDAP and THREDDS servers all together. +The checks have been designed to help data providers submit the highest quality data to the GliderDAC. Submitting uncompliant data to the DAC may result in services not working. For example, not providing the correct Sea Name in the GLobal attributes may put your glider deployment into the wrong region on the GliderMap. Not providing proper metadata about the platform and instruments, and attribution may prevent NCEI from archiving your data. And not making your files CF compliant could prevent the files from showing up on ERDDAP and THREDDS servers all together. ### High priority checks Failures in these checks should be addressed before submitting to the GliderDAC! diff --git a/cc_plugin_glider/glider_dac.py b/cc_plugin_glider/glider_dac.py index 0579aa7..f76b5d6 100644 --- a/cc_plugin_glider/glider_dac.py +++ b/cc_plugin_glider/glider_dac.py @@ -1,27 +1,26 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- -''' +""" cc_plugin_glider.glider_dac.py Compliance Test Suite for the IOOS National Glider Data Assembly Center https://github.com/ioos/ioosngdac/wiki -''' +""" + +import os +import warnings -from __future__ import (absolute_import, division, print_function) -from cc_plugin_glider import util -from compliance_checker import __version__ -from compliance_checker.base import BaseCheck, BaseNCCheck, Result, TestCtx -from compliance_checker.cf.cf import CF1_6Check -from six.moves.urllib.parse import urljoin import numpy as np import requests import six -import warnings -import os -from requests.exceptions import RequestException +from compliance_checker import __version__ +from compliance_checker.base import BaseCheck, BaseNCCheck, Result, TestCtx +from compliance_checker.cf.cf import CF1_6Check from lxml import etree +from requests.exceptions import RequestException +from six.moves.urllib.parse import urljoin +from cc_plugin_glider import util try: basestring @@ -31,53 +30,55 @@ class GliderCheck(BaseNCCheck): register_checker = True - _cc_spec = 'gliderdac' - _cc_spec_version = '3.0' + _cc_spec = "gliderdac" + _cc_spec_version = "3.0" _cc_checker_version = __version__ - _cc_url = 'https://github.com/ioos/ioosngdac/wiki/NGDAC-NetCDF-File-Format-Version-2' - _cc_display_headers = { - 3: 'Required', - 2: 'Recommended', - 1: 'Suggested' - } - acceptable_platform_types = { - 'Seaglider', - 'Spray Glider', - 'Slocum Glider' - } + _cc_url = ( + "https://github.com/ioos/ioosngdac/wiki/NGDAC-NetCDF-File-Format-Version-2" + ) + _cc_display_headers = {3: "Required", 2: "Recommended", 1: "Suggested"} + acceptable_platform_types = {"Seaglider", "Spray Glider", "Slocum Glider"} def __init__(self): # try to get the sea names table - ncei_base_table_url = 'https://gliders.ioos.us/ncei_authority_tables/' + ncei_base_table_url = "https://gliders.ioos.us/ncei_authority_tables/" # might refactor if the authority tables names change - table_type = {'project': 'projects.txt', - 'platform': 'platforms.txt', - 'instrument': 'instruments.txt', - 'institution': 'institutions.txt'} + table_type = { + "project": "projects.txt", + "platform": "platforms.txt", + "instrument": "instruments.txt", + "institution": "institutions.txt", + } # some top level attrs map to other things - var_remap = {'platform': 'id', - 'instrument': 'make_model'} + var_remap = {"platform": "id", "instrument": "make_model"} self.auth_tables = {} for global_att_name, table_file in six.iteritems(table_type): # instruments have to be handled specially since they aren't # global attributes table_loc = urljoin(ncei_base_table_url, table_file) - self.auth_tables[global_att_name] = GliderCheck.request_resource(table_loc, - os.environ.get("{}_TABLE".format(global_att_name.upper())), - lambda s: set(s.splitlines())) + self.auth_tables[global_att_name] = GliderCheck.request_resource( + table_loc, + os.environ.get(f"{global_att_name.upper()}_TABLE"), + lambda s: set(s.splitlines()), + ) # handle NCEI sea names table - sea_names_url = 'https://www.ncei.noaa.gov/data/oceans/ncei/vocabulary/seanames.xml' + sea_names_url = ( + "https://www.ncei.noaa.gov/data/oceans/ncei/vocabulary/seanames.xml" + ) + def sea_name_parse(text): """Helper function to handle utf-8 parsing of sea name XML table""" - utf8_parser = etree.XMLParser(encoding='utf-8') - tree = etree.fromstring(text.encode('utf-8'), parser=utf8_parser) - return set(tree.xpath('./seaname/seaname/text()')) + utf8_parser = etree.XMLParser(encoding="utf-8") + tree = etree.fromstring(text.encode("utf-8"), parser=utf8_parser) + return set(tree.xpath("./seaname/seaname/text()")) - self.auth_tables['sea_name'] = GliderCheck.request_resource(sea_names_url, - os.environ.get("SEA_NAME_TABLE"), - sea_name_parse) + self.auth_tables["sea_name"] = GliderCheck.request_resource( + sea_names_url, + os.environ.get("SEA_NAME_TABLE"), + sea_name_parse, + ) @classmethod def request_resource(cls, url, backup_resource, fn): @@ -86,11 +87,12 @@ def request_resource(cls, url, backup_resource, fn): fail_flag = False if backup_resource is not None: try: - with open(backup_resource, 'r') as f: + with open(backup_resource) as f: text_contents = f.read() - except IOError as e: - warnings.warn("Could not open {}, falling back to web " - "request".format(backup_resource)) + except OSError: + warnings.warn( + f"Could not open {backup_resource}, falling back to web " "request", + ) fail_flag = True elif backup_resource is None or fail_flag: @@ -98,13 +100,15 @@ def request_resource(cls, url, backup_resource, fn): resp = requests.get(url, timeout=10) resp.raise_for_status() text_contents = resp.text - except RequestException as e: - warnings.warn("Requests exception encountered while fetching data from {}".format(url)) + except RequestException: + warnings.warn( + f"Requests exception encountered while fetching data from {url}", + ) try: return fn(text_contents) except Exception as e: - warnings.warn("Could not deserialize input text: {}".format(str(e))) + warnings.warn(f"Could not deserialize input text: {str(e)}") return None cf_checks = CF1_6Check() @@ -113,11 +117,10 @@ def request_resource(cls, url, backup_resource, fn): def make_result(cls, level, score, out_of, name, messages): return Result(level, (score, out_of), name, messages) - def setup(self, dataset): self.dataset = dataset - ''' + """ HIGH priority checks: check_required_variables @@ -132,32 +135,33 @@ def setup(self, dataset): check_monotonically_increasing_time check_dim_no_data check_depth_array - ''' + """ + def check_required_variables(self, dataset): - ''' + """ Verifies the dataset has the required variables - ''' + """ required_variables = [ - 'trajectory', - 'time', - 'lat', - 'lon', - 'pressure', - 'depth', - 'temperature', - 'conductivity', - 'density', - 'profile_id', - 'profile_time', - 'profile_lat', - 'profile_lon', - 'time_uv', - 'lat_uv', - 'lon_uv', - 'u', - 'v', - 'platform', - 'instrument_ctd' + "trajectory", + "time", + "lat", + "lon", + "pressure", + "depth", + "temperature", + "conductivity", + "density", + "profile_id", + "profile_time", + "profile_lat", + "profile_lon", + "time_uv", + "lat_uv", + "lon_uv", + "u", + "v", + "platform", + "instrument_ctd", ] level = BaseCheck.HIGH @@ -168,114 +172,114 @@ def check_required_variables(self, dataset): test = variable in dataset.variables score += int(test) if not test: - messages.append('Variable {} is missing'.format(variable)) - return self.make_result(level, score, out_of, 'Required Variables', messages) + messages.append(f"Variable {variable} is missing") + return self.make_result(level, score, out_of, "Required Variables", messages) def check_dimensions(self, dataset): - ''' + """ NetCDF files submitted by the individual glider operators contain 2 dimension variables: - time - traj - ''' + """ level = BaseCheck.HIGH score = 0 messages = [] - required_dimensions = [ - 'time', - 'traj_strlen' - ] + required_dimensions = ["time", "traj_strlen"] out_of = len(required_dimensions) for dimension in required_dimensions: test = dimension in dataset.dimensions score += int(test) if not test: - messages.append('%s is not a valid dimension' % dimension) - return self.make_result(level, score, out_of, - 'Required Dimensions', messages) + messages.append("%s is not a valid dimension" % dimension) + return self.make_result(level, score, out_of, "Required Dimensions", messages) def check_lat_lon_attributes(self, dataset): - ''' + """ Validates that lat and lon have correct attributes TODO: Does this need to be its own check? Is it high priority? - ''' + """ level = BaseCheck.HIGH out_of = 0 score = 0 messages = [] - check_vars = ['lat', 'lon'] + check_vars = ["lat", "lon"] for var in check_vars: stat, num_checks, msgs = util._check_variable_attrs(dataset, var) score += int(stat) out_of += num_checks messages.extend(msgs) - return self.make_result(level, score, out_of, - 'Lat and Lon attributes', messages) + return self.make_result( + level, + score, + out_of, + "Lat and Lon attributes", + messages, + ) def check_time_attributes(self, dataset): - ''' + """ Verifies that the time coordinate variable is correct - ''' + """ level = BaseCheck.HIGH - score, out_of, messages = util._check_variable_attrs(dataset, 'time') + score, out_of, messages = util._check_variable_attrs(dataset, "time") - return self.make_result(level, score, out_of, - 'Time Variable', messages) + return self.make_result(level, score, out_of, "Time Variable", messages) def check_pressure_depth_attributes(self, dataset): - ''' + """ Verifies that the pressure coordinate/data variable is correct - ''' + """ level = BaseCheck.HIGH out_of = 0 score = 0 messages = [] - check_vars = ['pressure', 'depth'] + check_vars = ["pressure", "depth"] for var in check_vars: stat, num_checks, msgs = util._check_variable_attrs(dataset, var) score += int(stat) out_of += num_checks messages.extend(msgs) - return self.make_result(level, score, out_of, - 'Depth/Pressure variable attributes', messages) + return self.make_result( + level, + score, + out_of, + "Depth/Pressure variable attributes", + messages, + ) def check_ctd_variable_attributes(self, dataset): - ''' + """ Verifies that the CTD Variables are the correct data type and contain the correct metadata - ''' + """ level = BaseCheck.HIGH out_of = 0 score = 0 messages = [] - check_vars = [ - 'temperature', - 'conductivity', - 'salinity', - 'density' - ] + check_vars = ["temperature", "conductivity", "salinity", "density"] for var in check_vars: stat, num_checks, msgs = util._check_variable_attrs(dataset, var) score += int(stat) out_of += num_checks messages.extend(msgs) - return self.make_result(level, score, out_of, 'CTD Variables', messages) + return self.make_result(level, score, out_of, "CTD Variables", messages) def check_profile_variable_attributes_and_types(self, dataset): - ''' + """ Verifies that the profile variables are the of the correct data type and contain the correct metadata - ''' + """ level = BaseCheck.HIGH out_of = 0 @@ -283,14 +287,14 @@ def check_profile_variable_attributes_and_types(self, dataset): messages = [] check_vars = [ - 'profile_id', - 'profile_time', - 'profile_lat', - 'profile_lon', - 'lat_uv', - 'lon_uv', - 'u', - 'v' + "profile_id", + "profile_time", + "profile_lat", + "profile_lon", + "lat_uv", + "lon_uv", + "u", + "v", ] for var in check_vars: stat, num_checks, msgs = util._check_variable_attrs(dataset, var) @@ -298,50 +302,49 @@ def check_profile_variable_attributes_and_types(self, dataset): out_of += num_checks messages.extend(msgs) - return self.make_result(level, score, out_of, - 'Profile Variables', messages) + return self.make_result(level, score, out_of, "Profile Variables", messages) def check_global_attributes(self, dataset): - ''' + """ Verifies the base metadata in the global attributes TODO: update this check to use @check_has on next cc release - ''' + """ level = BaseCheck.HIGH global_attributes = [ - 'Conventions', - 'Metadata_Conventions', - 'comment', - 'contributor_name', - 'contributor_role', - 'creator_email', - 'creator_name', - 'creator_url', - 'date_created', - 'date_issued', - 'date_modified', - 'format_version', - 'history', - 'id', - 'institution', - 'keywords', - 'keywords_vocabulary', - 'license', - 'metadata_link', - 'naming_authority', + "Conventions", + "Metadata_Conventions", + "comment", + "contributor_name", + "contributor_role", + "creator_email", + "creator_name", + "creator_url", + "date_created", + "date_issued", + "date_modified", + "format_version", + "history", + "id", + "institution", + "keywords", + "keywords_vocabulary", + "license", + "metadata_link", + "naming_authority", # 'platform_type', # platform_type check is more involved below - 'processing_level', - 'project', - 'publisher_email', - 'publisher_name', - 'publisher_url', - 'references', + "processing_level", + "project", + "publisher_email", + "publisher_name", + "publisher_url", + "references", # 'sea_name', # sea_name check is more involved below - 'source', - 'standard_name_vocabulary', - 'summary', - 'title', - 'wmo_id' + "source", + "standard_name_vocabulary", + "summary", + "title", + "wmo_id", ] out_of = 0 @@ -353,9 +356,9 @@ def check_global_attributes(self, dataset): score += int(test) out_of += 1 if not test: - messages.append('Attr %s not present' % field) + messages.append("Attr %s not present" % field) continue - v = getattr(dataset, field, '') + v = getattr(dataset, field, "") if isinstance(v, basestring): test = len(v.strip()) > 0 else: @@ -363,36 +366,40 @@ def check_global_attributes(self, dataset): score += int(test) out_of += 1 if not test: - messages.append('Attr %s is empty' % field) + messages.append("Attr %s is empty" % field) - ''' + """ Verify that sea_name attribute exists and is valid - ''' - if self.auth_tables['sea_name'] is not None: - sea_names = {sn.lower() for sn in self.auth_tables['sea_name']} + """ + if self.auth_tables["sea_name"] is not None: + sea_names = {sn.lower() for sn in self.auth_tables["sea_name"]} else: raise RuntimeError("Was unable to fetch sea names table") - sea_name = getattr(dataset, 'sea_name', '').replace(', ', ',') + sea_name = getattr(dataset, "sea_name", "").replace(", ", ",") if sea_name: # Ok score a point for the fact that the attribute exists score += 1 out_of += 1 - sea_name = sea_name.split(',') + sea_name = sea_name.split(",") for sea in sea_name: test = sea.lower() in sea_names score += int(test) out_of += 1 if not test: - messages.append(('sea_name attribute should be from the NODC sea names list:' - ' {} is not a valid sea name').format(sea)) + messages.append( + ( + "sea_name attribute should be from the NODC sea names list:" + f" {sea} is not a valid sea name" + ), + ) else: out_of += 1 - messages.append('Attr sea_name not present') + messages.append("Attr sea_name not present") - ''' + """ Verify that platform_type attribute exists and is valid - ''' - platform_type = getattr(dataset, 'platform_type', '') + """ + platform_type = getattr(dataset, "platform_type", "") if platform_type: # Score a point for the fact that the attribute exists score += 1 @@ -402,17 +409,25 @@ def check_global_attributes(self, dataset): score += int(test) out_of += 1 if not test: - messages.append(('platform_type {} is not one of the NCEI accepted platforms for archiving: {}' - ).format(platform_type, ",".join(self.acceptable_platform_types))) + messages.append( + ( + "platform_type {} is not one of the NCEI accepted platforms for archiving: {}" + ).format(platform_type, ",".join(self.acceptable_platform_types)), + ) else: out_of += 1 - messages.append('Attr platform_type not present') + messages.append("Attr platform_type not present") - return self.make_result(level, score, out_of, - 'Required Global Attributes', messages) + return self.make_result( + level, + score, + out_of, + "Required Global Attributes", + messages, + ) def check_standard_names(self, dataset): - ''' + """ Check a variables's standard_name attribute to ensure that it meets CF compliance. @@ -422,53 +437,61 @@ def check_standard_names(self, dataset): :param netCDF4.Dataset dataset: An open netCDF dataset :return: List of results - ''' + """ results = self.cf_checks.check_standard_name(dataset) for dd in results: # Update the check name - dd.name = 'Standard Names' + dd.name = "Standard Names" return results def check_monotonically_increasing_time(self, ds): - ''' + """ Check if all times are monotonically increasing - ''' + """ # shouldn't this already be handled by CF trajectory featureType? - test_ctx = TestCtx(BaseCheck.HIGH, 'Profile data is valid') - test_ctx.assert_true(np.all(np.diff(ds.variables['time']) > 0), 'Time variable is not monotonically increasing') + test_ctx = TestCtx(BaseCheck.HIGH, "Profile data is valid") + test_ctx.assert_true( + np.all(np.diff(ds.variables["time"]) > 0), + "Time variable is not monotonically increasing", + ) return test_ctx.to_result() def check_dim_no_data(self, dataset): - ''' + """ Checks that cartesian product of the depth and time variables have more than 2 valid values. - ''' - test_ctx = TestCtx(BaseCheck.HIGH, 'Profile data is valid') + """ + test_ctx = TestCtx(BaseCheck.HIGH, "Profile data is valid") # check that cartesian product of non-nodata/_FillValue values >= 2 # count here checks the count of non-masked data - if 'time' in dataset.variables and 'depth' in dataset.variables: - test = (dataset.variables['time'][:].count() * - dataset.variables['depth'][:].count()) >= 2 - test_ctx.assert_true(test, "Time and depth " - "variables must have at least " - "two valid data points together") + if "time" in dataset.variables and "depth" in dataset.variables: + test = ( + dataset.variables["time"][:].count() + * dataset.variables["depth"][:].count() + ) >= 2 + test_ctx.assert_true( + test, + "Time and depth " + "variables must have at least " + "two valid data points together", + ) return test_ctx.to_result() def check_depth_array(self, dataset): - ''' + """ Checks that the profile data is valid (abs sum of diff > 0 for depth data) - ''' - test_ctx = TestCtx(BaseCheck.HIGH, 'Profile data is valid') - if 'depth' in dataset.variables: - depth = dataset.variables['depth'][:] - test_ctx.assert_true(np.abs(np.diff(depth[~depth.mask]).sum()) > - 1e-4, - "Depth array must be valid, ie abs(Z0 - Zend) > 0" - ) + """ + test_ctx = TestCtx(BaseCheck.HIGH, "Profile data is valid") + if "depth" in dataset.variables: + depth = dataset.variables["depth"][:] + test_ctx.assert_true( + np.abs(np.diff(depth[~depth.mask]).sum()) > 1e-4, + "Depth array must be valid, ie abs(Z0 - Zend) > 0", + ) return test_ctx.to_result() - ''' + """ MEDIUM priority checks: check_qc_variables @@ -480,53 +503,60 @@ def check_depth_array(self, dataset): check_dtype check_valid_min_dtype check_valid_max_dtype - ''' + """ + def check_qc_variables(self, dataset): - ''' + """ Verifies the dataset has all the required QC variables - ''' + """ level = BaseCheck.MEDIUM score = 0 out_of = 0 - qc_variables = ["{}_qc".format(s) for s in [ - 'time', - 'lat', - 'lon', - 'pressure', - 'depth', - 'temperature', - 'conductivity', - 'density', - 'profile_time', - 'profile_lat', - 'profile_lon', - 'time_uv', - 'lat_uv', - 'lon_uv', - 'u', - 'v' - ]] + qc_variables = [ + f"{s}_qc" + for s in [ + "time", + "lat", + "lon", + "pressure", + "depth", + "temperature", + "conductivity", + "density", + "profile_time", + "profile_lat", + "profile_lon", + "time_uv", + "lat_uv", + "lon_uv", + "u", + "v", + ] + ] # The None means just check the attribute exists, not a value required_attributes = { - 'flag_meanings': None, - 'flag_values': None, - 'long_name': None, - 'standard_name': None, - 'valid_max': None, - 'valid_min': None + "flag_meanings": None, + "flag_values": None, + "long_name": None, + "standard_name": None, + "valid_max": None, + "valid_min": None, } messages = [] for qc_var in qc_variables: - pass_stat, num_checks, msgs = util._check_variable_attrs(dataset, qc_var, - required_attributes) + pass_stat, num_checks, msgs = util._check_variable_attrs( + dataset, + qc_var, + required_attributes, + ) out_of += num_checks score += int(pass_stat) messages.extend(msgs) - return self.make_result(level, score, out_of, 'QC Variables', messages) + return self.make_result(level, score, out_of, "QC Variables", messages) def check_trajectory_variables(self, dataset): - ''' + """ The trajectory variable stores a character array that identifies the deployment during which the data was gathered. This variable is used by the DAC to aggregate all individual NetCDF profiles containing the same @@ -534,33 +564,35 @@ def check_trajectory_variables(self, dataset): should be a character array that uniquely identifies the deployment and each individual NetCDF file from the deployment data set should have the same value. - ''' + """ level = BaseCheck.MEDIUM out_of = 0 score = 0 messages = [] - if 'trajectory' not in dataset.variables: + if "trajectory" not in dataset.variables: return - test = dataset.variables['trajectory'].dimensions == ('traj_strlen',) + test = dataset.variables["trajectory"].dimensions == ("traj_strlen",) score += int(test) out_of += 1 if not test: - messages.append('trajectory has an invalid dimension') + messages.append("trajectory has an invalid dimension") - pass_stat, num_checks, attr_msgs = util._check_variable_attrs(dataset, 'trajectory') + pass_stat, num_checks, attr_msgs = util._check_variable_attrs( + dataset, + "trajectory", + ) score += int(pass_stat) out_of += num_checks messages.extend(attr_msgs) - return self.make_result(level, score, out_of, - 'Trajectory Variable', messages) + return self.make_result(level, score, out_of, "Trajectory Variable", messages) def check_container_variables(self, dataset): - ''' + """ Verifies that the dimensionless container variables are the correct data type and contain the required metadata - ''' + """ level = BaseCheck.MEDIUM out_of = 0 @@ -568,8 +600,8 @@ def check_container_variables(self, dataset): messages = [] check_vars = [ - 'platform', - 'instrument_ctd', + "platform", + "instrument_ctd", ] for var in check_vars: stat, num_checks, msgs = util._check_variable_attrs(dataset, var) @@ -577,62 +609,79 @@ def check_container_variables(self, dataset): out_of += num_checks messages.extend(msgs) - return self.make_result(level, score, out_of, - 'Container Variables', messages) + return self.make_result(level, score, out_of, "Container Variables", messages) def check_qartod(self, dataset): - ''' + """ If the qartod variables exist, check the attributes - ''' - test_ctx = TestCtx(BaseCheck.MEDIUM, 'QARTOD Variables') + """ + test_ctx = TestCtx(BaseCheck.MEDIUM, "QARTOD Variables") qartod_variables = [ - 'qartod_{}_climatological_flag', - 'qartod_{}_flat_line_flag', - 'qartod_{}_gross_range_flag', - 'qartod_{}_rate_of_chagne_flag', - 'qartod_{}_spike_flag' + "qartod_{}_climatological_flag", + "qartod_{}_flat_line_flag", + "qartod_{}_gross_range_flag", + "qartod_{}_rate_of_chagne_flag", + "qartod_{}_spike_flag", ] # Iterate through each physical variable and each qartod variable name # and check the attributes of all variables if they exist - for param in ('temperature', 'conductivity', 'density', 'pressure'): + for param in ("temperature", "conductivity", "density", "pressure"): for qartod in qartod_variables: qartod_var = qartod.format(param) if qartod_var not in dataset.variables: continue ncvar = dataset.variables[qartod_var] - valid_min = getattr(ncvar, 'valid_min', None) - valid_max = getattr(ncvar, 'valid_max', None) - flag_values = getattr(ncvar, 'flag_values', None) - test_ctx.assert_true(getattr(ncvar, '_FillValue', None) == np.int8(9), - 'variable {} must have a _FillValue of 9b'.format(qartod_var)) - - test_ctx.assert_true(getattr(ncvar, 'long_name', ''), - 'attribute {}:long_name must be a non-empty string'.format(qartod_var)) - - test_ctx.assert_true(getattr(ncvar, 'flag_meanings', ''), - 'attribute {}:flag_meanings must be a non-empty string'.format(qartod_var)) - - test_ctx.assert_true(isinstance(flag_values, np.ndarray), - 'attribute {}:flag_values must be defined as an array of bytes'.format(qartod_var)) + valid_min = getattr(ncvar, "valid_min", None) + valid_max = getattr(ncvar, "valid_max", None) + flag_values = getattr(ncvar, "flag_values", None) + test_ctx.assert_true( + getattr(ncvar, "_FillValue", None) == np.int8(9), + f"variable {qartod_var} must have a _FillValue of 9b", + ) + + test_ctx.assert_true( + getattr(ncvar, "long_name", ""), + "attribute {}:long_name must be a non-empty string".format( + qartod_var, + ), + ) + + test_ctx.assert_true( + getattr(ncvar, "flag_meanings", ""), + "attribute {}:flag_meanings must be a non-empty string".format( + qartod_var, + ), + ) + + test_ctx.assert_true( + isinstance(flag_values, np.ndarray), + "attribute {}:flag_values must be defined as an array of bytes".format( + qartod_var, + ), + ) if isinstance(flag_values, np.ndarray): dtype = flag_values.dtype - test_ctx.assert_true(util.compare_dtype(dtype, np.dtype('|i1')), - 'attribute {}:flag_values has an illegal data-type, must ' - 'be byte'.format(qartod_var)) - - valid_min_dtype = getattr(valid_min, 'dtype', None) - test_ctx.assert_true(util.compare_dtype(valid_min_dtype, - np.dtype('|i1')), - 'attribute {}:valid_min must be of type byte'.format(qartod_var)) - - valid_max_dtype = getattr(valid_max, 'dtype', None) - test_ctx.assert_true(util.compare_dtype(valid_max_dtype, - np.dtype('|i1')), - 'attribute {}:valid_max must be of type byte'.format(qartod_var)) + test_ctx.assert_true( + util.compare_dtype(dtype, np.dtype("|i1")), + "attribute {}:flag_values has an illegal data-type, must " + "be byte".format(qartod_var), + ) + + valid_min_dtype = getattr(valid_min, "dtype", None) + test_ctx.assert_true( + util.compare_dtype(valid_min_dtype, np.dtype("|i1")), + f"attribute {qartod_var}:valid_min must be of type byte", + ) + + valid_max_dtype = getattr(valid_max, "dtype", None) + test_ctx.assert_true( + util.compare_dtype(valid_max_dtype, np.dtype("|i1")), + f"attribute {qartod_var}:valid_max must be of type byte", + ) if test_ctx.out_of == 0: return None @@ -640,9 +689,9 @@ def check_qartod(self, dataset): return test_ctx.to_result() def check_ancillary_variables(self, dataset): - ''' + """ Check that the variables defined in ancillary_variables attribute exist - ''' + """ level = BaseCheck.MEDIUM out_of = 0 score = 0 @@ -650,24 +699,25 @@ def check_ancillary_variables(self, dataset): check_vars = dataset.variables for var in check_vars: - if hasattr(dataset.variables[var], 'ancillary_variables'): + if hasattr(dataset.variables[var], "ancillary_variables"): ancillary_variables = dataset.variables[var].ancillary_variables for acv in ancillary_variables.split(): out_of += 1 test = acv in dataset.variables score += int(test) if not test: - msg = ('Invalid ancillary_variables attribute for {}, ' - '{} is not a variable'.format(var, acv)) + msg = ( + "Invalid ancillary_variables attribute for {}, " + "{} is not a variable".format(var, acv) + ) messages.append(msg) - return self.make_result(level, score, out_of, - 'Ancillary Variables', messages) + return self.make_result(level, score, out_of, "Ancillary Variables", messages) def check_dtype(self, dataset): - ''' + """ Check that variables are of the correct datatype - ''' + """ level = BaseCheck.MEDIUM out_of = 0 score = 0 @@ -680,144 +730,165 @@ def check_dtype(self, dataset): out_of += num_checks messages.extend(msgs) - return self.make_result(level, score, out_of, - 'Correct variable data types', messages) + return self.make_result( + level, + score, + out_of, + "Correct variable data types", + messages, + ) def check_valid_min_dtype(self, dataset): - ''' + """ Check that the valid attributes are valid data types - ''' - test_ctx = TestCtx(BaseCheck.MEDIUM, 'Correct valid_min data types') + """ + test_ctx = TestCtx(BaseCheck.MEDIUM, "Correct valid_min data types") for var_name in dataset.variables: ncvar = dataset.variables[var_name] - valid_min = getattr(dataset.variables[var_name], 'valid_min', None) + valid_min = getattr(dataset.variables[var_name], "valid_min", None) if isinstance(valid_min, basestring): - valid_min_dtype = 'string' + valid_min_dtype = "string" elif isinstance(valid_min, float): - valid_min_dtype = 'float64' + valid_min_dtype = "float64" elif isinstance(valid_min, int): - valid_min_dtype = 'int64' + valid_min_dtype = "int64" else: - valid_min_dtype = str(getattr(valid_min, 'dtype', None)) + valid_min_dtype = str(getattr(valid_min, "dtype", None)) if valid_min is not None: - test_ctx.assert_true(util.compare_dtype(np.dtype(valid_min_dtype), ncvar.dtype), - '{}:valid_min has a different data type, {}, than variable {}, ' - '{}'.format(var_name, valid_min_dtype, var_name, - str(ncvar.dtype))) + test_ctx.assert_true( + util.compare_dtype(np.dtype(valid_min_dtype), ncvar.dtype), + "{}:valid_min has a different data type, {}, than variable {}, " + "{}".format(var_name, valid_min_dtype, var_name, str(ncvar.dtype)), + ) return test_ctx.to_result() def check_valid_max_dtype(self, dataset): - ''' + """ Check that the valid attributes are valid data types - ''' - test_ctx = TestCtx(BaseCheck.MEDIUM, 'Correct valid_max data types') + """ + test_ctx = TestCtx(BaseCheck.MEDIUM, "Correct valid_max data types") for var_name in dataset.variables: ncvar = dataset.variables[var_name] - valid_max = getattr(dataset.variables[var_name], 'valid_max', None) + valid_max = getattr(dataset.variables[var_name], "valid_max", None) if isinstance(valid_max, basestring): - valid_max_dtype = 'string' + valid_max_dtype = "string" elif isinstance(valid_max, float): - valid_max_dtype = 'float64' + valid_max_dtype = "float64" elif isinstance(valid_max, int): - valid_max_dtype = 'int64' + valid_max_dtype = "int64" else: - valid_max_dtype = str(getattr(valid_max, 'dtype', None)) + valid_max_dtype = str(getattr(valid_max, "dtype", None)) if valid_max is not None: - test_ctx.assert_true(util.compare_dtype(np.dtype(valid_max_dtype), ncvar.dtype), - '{}:valid_max has a different data type, {}, than variable {} ' - '{}'.format(var_name, valid_max_dtype, str(ncvar.dtype), - var_name)) + test_ctx.assert_true( + util.compare_dtype(np.dtype(valid_max_dtype), ncvar.dtype), + "{}:valid_max has a different data type, {}, than variable {} " + "{}".format(var_name, valid_max_dtype, str(ncvar.dtype), var_name), + ) return test_ctx.to_result() - ''' + """ LOW priority checks: check_ioos_ra check_valid_lon check_ncei_tables - ''' + """ + def check_ioos_ra(self, dataset): - ''' + """ Check if the ioos_regional_association attribute exists, if it does check that it's not empty - ''' - test_ctx = TestCtx(BaseCheck.LOW, 'IOOS Regional Association Attribute') + """ + test_ctx = TestCtx(BaseCheck.LOW, "IOOS Regional Association Attribute") - ioos_ra = getattr(dataset, 'ioos_regional_association', None) + ioos_ra = getattr(dataset, "ioos_regional_association", None) - test_ctx.assert_true(ioos_ra, - 'ioos_regional_association global attribute should be defined') + test_ctx.assert_true( + ioos_ra, + "ioos_regional_association global attribute should be defined", + ) return test_ctx.to_result() def check_valid_lon(self, dataset): - ''' + """ Check the valid_min and valid max for longitude variable - ''' - test_ctx = TestCtx(BaseCheck.LOW, 'Longitude valid_min valid_max not [-90, 90]') + """ + test_ctx = TestCtx(BaseCheck.LOW, "Longitude valid_min valid_max not [-90, 90]") - if 'lon' not in dataset.variables: + if "lon" not in dataset.variables: return - longitude = dataset.variables['lon'] - valid_min = getattr(longitude, 'valid_min') - valid_max = getattr(longitude, 'valid_max') - test_ctx.assert_true(not(valid_min == -90 and valid_max == 90), - "Longitude's valid_min and valid_max are [-90, 90], it's likely this was a mistake") + longitude = dataset.variables["lon"] + valid_min = longitude.valid_min + valid_max = longitude.valid_max + test_ctx.assert_true( + not (valid_min == -90 and valid_max == 90), + "Longitude's valid_min and valid_max are [-90, 90], it's likely this was a mistake", + ) return test_ctx.to_result() def check_ncei_tables(self, dataset): - ''' + """ Checks the project, platform id, instrument make_model, and institution against lists of values provided by NCEI - ''' - test_ctx = TestCtx(BaseCheck.LOW, "File has NCEI approved project, " - "institution, platform_type, and " - "instrument") - ncei_base_table_url = 'https://gliders.ioos.us/ncei_authority_tables/' + """ + test_ctx = TestCtx( + BaseCheck.LOW, + "File has NCEI approved project, " + "institution, platform_type, and " + "instrument", + ) + ncei_base_table_url = "https://gliders.ioos.us/ncei_authority_tables/" # might refactor if the authority tables names change - table_type = {'project': 'projects.txt', - 'platform': 'platforms.txt', - 'instrument': 'instruments.txt', - 'institution': 'institutions.txt'} + table_type = { + "project": "projects.txt", + "platform": "platforms.txt", + "instrument": "instruments.txt", + "institution": "institutions.txt", + } # some top level attrs map to other things - var_remap = {'platform': 'id', - 'instrument': 'make_model'} + var_remap = {"platform": "id", "instrument": "make_model"} for global_att_name, table_file in six.iteritems(table_type): # instruments have to be handled specially since they aren't # global attributes - if global_att_name not in {'instrument', 'platform'}: + if global_att_name not in {"instrument", "platform"}: global_att_present = hasattr(dataset, global_att_name) - test_ctx.assert_true(global_att_present, - "Attribute {} not in dataset".format(global_att_name)) + test_ctx.assert_true( + global_att_present, + f"Attribute {global_att_name} not in dataset", + ) if not global_att_present: continue - if global_att_name not in {'instrument', 'platform'}: + if global_att_name not in {"instrument", "platform"}: global_att_present = hasattr(dataset, global_att_name) - test_ctx.assert_true(global_att_present, - "Attribute {} not in dataset".format(global_att_name)) + test_ctx.assert_true( + global_att_present, + f"Attribute {global_att_name} not in dataset", + ) if not global_att_present: continue if self.auth_tables[global_att_name] is not None: check_set = self.auth_tables[global_att_name] else: - raise RuntimeError("Was unable to fetch {} table" - .format(global_att_name)) + raise RuntimeError( + f"Was unable to fetch {global_att_name} table", + ) # not truly a global attribute here, needs special handling for # instrument case - if global_att_name in {'instrument', 'platform'}: + if global_att_name in {"instrument", "platform"}: # variables which contain an instrument attribute, # which should point to an instrument variable kwargs = {global_att_name: lambda v: v is not None} @@ -826,39 +897,49 @@ def check_ncei_tables(self, dataset): var_name_set = {getattr(v, global_att_name) for v in att_vars} # treat no instruments/platforms defined as an error - test_ctx.assert_true(len(var_name_set) > 0, - "Cannot find any {} attributes " - "in dataset".format(global_att_name)) + test_ctx.assert_true( + len(var_name_set) > 0, + f"Cannot find any {global_att_name} attributes " "in dataset", + ) for var_name in var_name_set: if var_name not in dataset.variables: - msg = "Referenced {} variable {} does not exist".format(global_att_name, - var_name) + msg = "Referenced {} variable {} does not exist".format( + global_att_name, + var_name, + ) test_ctx.assert_true(False, msg) continue var = dataset.variables[var_name] # have to use .ncattrs, hangs if using `in var` ? - var_attr_exists = (var_remap[global_att_name] in var.ncattrs()) - msg = "Attribute {} should exist in variable {}".format(var_remap[global_att_name], - var_name) + var_attr_exists = var_remap[global_att_name] in var.ncattrs() + msg = "Attribute {} should exist in variable {}".format( + var_remap[global_att_name], + var_name, + ) test_ctx.assert_true(var_attr_exists, msg) if not var_attr_exists: continue search_attr = getattr(var, var_remap[global_att_name]) - msg = "Attribute {} '{}' for variable {} not contained in {} authority table".format(var_remap[global_att_name], - search_attr, var_name, - global_att_name) + msg = "Attribute {} '{}' for variable {} not contained in {} authority table".format( + var_remap[global_att_name], + search_attr, + var_name, + global_att_name, + ) test_ctx.assert_true(search_attr in check_set, msg) else: # check for global attribute existence already handled above global_att_value = getattr(dataset, global_att_name) - msg = "Global attribute {} value '{}' not contained in {} authority table".format(global_att_name, - global_att_value, - global_att_name) + msg = "Global attribute {} value '{}' not contained in {} authority table".format( + global_att_name, + global_att_value, + global_att_name, + ) test_ctx.assert_true(global_att_value in check_set, msg) return test_ctx.to_result() diff --git a/cc_plugin_glider/required_var_attrs.py b/cc_plugin_glider/required_var_attrs.py index 22cf297..94c9745 100644 --- a/cc_plugin_glider/required_var_attrs.py +++ b/cc_plugin_glider/required_var_attrs.py @@ -1,273 +1,268 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- -''' +""" cc_plugin_glider/required_var_attrs.py Dictionary of required variables and their attributes Attributes with values set to None mean we only check that the attribute exists, not whether the value matches -''' +""" required_var_attrs = { - 'time': { - 'dtype': 'f8', - 'standard_name': 'time', - 'units': 'seconds since 1970-01-01T00:00:00Z', - 'calendar': 'gregorian', - 'long_name': 'Time', - 'observation_type': 'measured', + "time": { + "dtype": "f8", + "standard_name": "time", + "units": "seconds since 1970-01-01T00:00:00Z", + "calendar": "gregorian", + "long_name": "Time", + "observation_type": "measured", }, - 'lat': { - 'standard_name': 'latitude', - 'units': 'degrees_north', - '_FillValue': None, - 'ancillary_variables': None, - 'comment': None, - 'coordinate_reference_frame': None, - 'long_name': None, - 'observation_type': None, - 'platform': None, - 'reference': None, - 'valid_max': None, - 'valid_min': None + "lat": { + "standard_name": "latitude", + "units": "degrees_north", + "_FillValue": None, + "ancillary_variables": None, + "comment": None, + "coordinate_reference_frame": None, + "long_name": None, + "observation_type": None, + "platform": None, + "reference": None, + "valid_max": None, + "valid_min": None, }, - 'lon': { - 'standard_name': 'longitude', - 'units': 'degrees_east', - '_FillValue': None, - 'ancillary_variables': None, - 'comment': None, - 'coordinate_reference_frame': None, - 'long_name': None, - 'observation_type': None, - 'platform': None, - 'reference': None, - 'valid_max': None, - 'valid_min': None + "lon": { + "standard_name": "longitude", + "units": "degrees_east", + "_FillValue": None, + "ancillary_variables": None, + "comment": None, + "coordinate_reference_frame": None, + "long_name": None, + "observation_type": None, + "platform": None, + "reference": None, + "valid_max": None, + "valid_min": None, }, - 'trajectory': { - 'cf_role': None, - 'comment': None, - 'long_name': None + "trajectory": {"cf_role": None, "comment": None, "long_name": None}, + "profile_id": { + "dtype": " Southern Ocean 67 - Sea region south of 60 degrees South latitude, circumnavigating the globe in the southern hemisphere. + Sea region south of 60 degrees South latitude, circumnavigating the globe in the southern hemisphere. For details, see http://marineregions.org/gazetteer.php?p=details&id=1907, last accessed on 2018-04-20. @@ -834,7 +834,7 @@ approximately 63-65N, 161.5-165.0W Bo Hai Bo Hai may also be identified as Bo Sea, Bohai Sea, Bohai Gulf, Bohai. -According to the IHO Working Group in 2012, the following proposals regarding 'Bo Hai' from China were accepted: +According to the IHO Working Group in 2012, the following proposals regarding 'Bo Hai' from China were accepted: - Proposal: "To consider Bo Hai a separate body from Yellow Sea." Comment: "The following to be inserted in the introductory pages of S-23 (Important Notice): "The limits prescribed in this publication are not IHO endorsements of a coastal State‟s legal position with regard to the Law of the Sea"." @@ -950,7 +950,7 @@ The Irminger Sea was named after Danish vice-admiral Carl Ludvig Christian Irmin On the West. A line running westerly from Black Rock Point (50°44',5N) in Vancouver Island through the Scott Islands in such a way that all the narrow waters between these islands are included in the Coastal Waters, thence to Cape St. James (Southern extremity of Queen Charlotte Islands), through this group in the same way, then from Cape Knox (54°10′N 133°06′W) Northward to the Western extreme of Langara Island and on to Point Cornwallis (132°52'W) in the Prince of Wales group, thence along the Western shores of this group, of Baranof, Kruzof, Chicagof, and Yakobi Islands, so that all the narrow waters between them are included in the coastal waters, and, finally, from Cape Bingham (58°04'N) in Yakobi Island to Cape Spencer (58°12′N 136°39′W). All the narrow waters between the islands in the Scott Islands, the Kerouard Islands, the Queen Charlotte Islands and the Alexander Archipelago, are included in the Coastal Waters. Limits of Oceans and Seas, 3rd Ed., 1953. IHB Special Publication 23, International Hydrographic Bureau, p. 34 at http://www.iho-ohi.net/iho_pubs/standard/S-23/S23_1953.pdf. (Last accessed on 2015-02-10.) -These waterbodies include the Salish Sea (Strait of Juan de Fuca, Puget Sound, Strait of Georgia, and connecting waterbodies), Queen Charlotte Strait, Queen Charlotte Sound, Hecate Strait, Dixon Entrance, Clarence Strait, Sumner Strait, Chatham Strait, Sitka Sound, Frederick Sound, Stephens Passage and Icy Strait. +These waterbodies include the Salish Sea (Strait of Juan de Fuca, Puget Sound, Strait of Georgia, and connecting waterbodies), Queen Charlotte Strait, Queen Charlotte Sound, Hecate Strait, Dixon Entrance, Clarence Strait, Sumner Strait, Chatham Strait, Sitka Sound, Frederick Sound, Stephens Passage and Icy Strait. Wikipedia entry at http://en.wikipedia.org/wiki/Coastal_Waters_of_Southeast_Alaska_and_British_Columbia. (Last accessed on 2015-02-10.) @@ -1017,11 +1017,11 @@ In the Sound: A line joining Stevns Lighthouse (55°17' N, 12°27' E) and Falste 52 The limits of the Japan Sea: -On the Southwest: The Northeastern limit of the Eastern China Sea (50) and the Western limit of the Inland Sea (53). +On the Southwest: The Northeastern limit of the Eastern China Sea (50) and the Western limit of the Inland Sea (53). On the Southeast (In Simonoseki Kaikyo): A line running from Nagoya Saki (130°49.5'E) in Kyūshū through the islands of Uma Sima and Muture Sima (33°58.5'N) to Murasaki Hana (34°01' N) in Honsyū. -On the East (In the Tsugaru Kaiko): From the extremity of Siriya Saki (141°28' E) to the extremity of Esan Saki (41°48' N). +On the East (In the Tsugaru Kaiko): From the extremity of Siriya Saki (141°28' E) to the extremity of Esan Saki (41°48' N). On the Northeast (In La Perouse Strait - Soya Kaikyo): A line joining Soni Misaki and Nishi Notaro Misaki (45°55' N). diff --git a/cc_plugin_glider/tests/resources.py b/cc_plugin_glider/tests/resources.py index 527d8f5..657eb3e 100644 --- a/cc_plugin_glider/tests/resources.py +++ b/cc_plugin_glider/tests/resources.py @@ -1,35 +1,35 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- -''' +""" cc_plugin_glider/tests/resources.py -''' -from pkg_resources import resource_filename +""" import os import subprocess +from pkg_resources import resource_filename + def get_filename(path): - ''' + """ Returns the path to a valid dataset - ''' - filename = resource_filename('cc_plugin_glider', path) - nc_path = filename.replace('.cdl', '.nc') + """ + filename = resource_filename("cc_plugin_glider", path) + nc_path = filename.replace(".cdl", ".nc") if not os.path.exists(nc_path): generate_dataset(filename, nc_path) return nc_path def generate_dataset(cdl_path, nc_path): - subprocess.call(['ncgen', '-o', nc_path, cdl_path]) + subprocess.call(["ncgen", "-o", nc_path, cdl_path]) STATIC_FILES = { - 'bad_metadata': get_filename('tests/data/bad_metadata.cdl'), - 'glider_std': get_filename('tests/data/IOOS_Glider_NetCDF_v2.0.cdl'), - 'glider_std3': get_filename('tests/data/IOOS_Glider_NetCDF_v3.0.cdl'), - 'bad_location': get_filename('tests/data/bad_location.cdl'), - 'bad_qc': get_filename('tests/data/bad_qc.cdl'), - 'no_qc': get_filename('tests/data/no_qc.cdl'), - 'bad_standard_name': get_filename('tests/data/bad_standard_name.cdl'), - 'bad_units': get_filename('tests/data/bad_units.cdl'), + "bad_metadata": get_filename("tests/data/bad_metadata.cdl"), + "glider_std": get_filename("tests/data/IOOS_Glider_NetCDF_v2.0.cdl"), + "glider_std3": get_filename("tests/data/IOOS_Glider_NetCDF_v3.0.cdl"), + "bad_location": get_filename("tests/data/bad_location.cdl"), + "bad_qc": get_filename("tests/data/bad_qc.cdl"), + "no_qc": get_filename("tests/data/no_qc.cdl"), + "bad_standard_name": get_filename("tests/data/bad_standard_name.cdl"), + "bad_units": get_filename("tests/data/bad_units.cdl"), } diff --git a/cc_plugin_glider/tests/test_glidercheck.py b/cc_plugin_glider/tests/test_glidercheck.py index 4a6e0ff..3b4d7a4 100644 --- a/cc_plugin_glider/tests/test_glidercheck.py +++ b/cc_plugin_glider/tests/test_glidercheck.py @@ -1,20 +1,20 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- -''' +""" cc_plugin_glider/tests/test_glidercheck.py -''' -from __future__ import (absolute_import, division, print_function) -from ..glider_dac import GliderCheck -from cc_plugin_glider.tests.resources import STATIC_FILES -from compliance_checker.tests.helpers import MockTimeSeries -from cc_plugin_glider import util -from netCDF4 import Dataset -from six.moves.urllib.parse import urljoin -import six +""" import os +import unittest + import numpy as np import requests_mock -import unittest +from compliance_checker.tests.helpers import MockTimeSeries +from netCDF4 import Dataset +from six.moves.urllib.parse import urljoin + +from cc_plugin_glider import util +from cc_plugin_glider.tests.resources import STATIC_FILES + +from ..glider_dac import GliderCheck try: basestring @@ -33,26 +33,30 @@ def shortDescription(self): # ion.module:TestClassName.test_function_name def __repr__(self): name = self.id() - name = name.split('.') + name = name.split(".") if name[0] not in ["ion", "pyon"]: - return "%s (%s)" % (name[-1], '.'.join(name[:-1])) + return "%s (%s)" % (name[-1], ".".join(name[:-1])) else: - return "%s ( %s )" % (name[-1], '.'.join(name[:-2]) + ":" + '.'.join(name[-2:])) # noqa + return "%s ( %s )" % ( + name[-1], + ".".join(name[:-2]) + ":" + ".".join(name[-2:]), + ) # noqa + __str__ = __repr__ def get_dataset(self, nc_dataset): - ''' + """ Return a pairwise object for the dataset - ''' + """ if isinstance(nc_dataset, basestring): - nc_dataset = Dataset(nc_dataset, 'r') + nc_dataset = Dataset(nc_dataset, "r") self.addCleanup(nc_dataset.close) return nc_dataset def setUp(self): # set up authority tables to prevent needing to fetch resources over # network, deal with changes, etc - ncei_base_table_url = 'https://gliders.ioos.us/ncei_authority_tables/' + ncei_base_table_url = "https://gliders.ioos.us/ncei_authority_tables/" # this is only a small subset institutions = """MARACOOS University of Delaware @@ -67,92 +71,99 @@ def setUp(self): Sea-Bird GCTD Seabird GPCTD""" with requests_mock.Mocker() as mock: - mock.get(urljoin(ncei_base_table_url, 'institutions.txt'), - text=institutions) - mock.get(urljoin(ncei_base_table_url, 'projects.txt'), - text=projects) - mock.get(urljoin(ncei_base_table_url, 'platforms.txt'), - text=platforms) - mock.get(urljoin(ncei_base_table_url, 'instruments.txt'), - text=instrument_makes) - with open(os.path.join(os.path.dirname(__file__), 'data', 'seanames.xml'), 'r', encoding="utf8") as seanames_file: + mock.get( + urljoin(ncei_base_table_url, "institutions.txt"), + text=institutions, + ) + mock.get(urljoin(ncei_base_table_url, "projects.txt"), text=projects) + mock.get(urljoin(ncei_base_table_url, "platforms.txt"), text=platforms) + mock.get( + urljoin(ncei_base_table_url, "instruments.txt"), + text=instrument_makes, + ) + with open( + os.path.join(os.path.dirname(__file__), "data", "seanames.xml"), + encoding="utf8", + ) as seanames_file: seanames_content = seanames_file.read() - mock.get("https://www.ncei.noaa.gov/data/oceans/ncei/vocabulary/seanames.xml", - text=seanames_content) + mock.get( + "https://www.ncei.noaa.gov/data/oceans/ncei/vocabulary/seanames.xml", + text=seanames_content, + ) self.check = GliderCheck() def test_location(self): - ''' + """ Checks that a file with the proper lat and lon do work - ''' - dataset = self.get_dataset(STATIC_FILES['glider_std']) + """ + dataset = self.get_dataset(STATIC_FILES["glider_std"]) result = self.check.check_lat_lon_attributes(dataset) self.assertTrue(result.value) def test_location_fail(self): - ''' + """ Ensures the checks fail for location - ''' - dataset = self.get_dataset(STATIC_FILES['bad_location']) + """ + dataset = self.get_dataset(STATIC_FILES["bad_location"]) result = self.check.check_lat_lon_attributes(dataset) # File has no lat lon vars, so the test should skip self.assertEqual(result.value, (0, 0)) def test_ctd_fail(self): - ''' + """ Ensures the ctd checks fail for temperature - ''' - dataset = self.get_dataset(STATIC_FILES['bad_qc']) + """ + dataset = self.get_dataset(STATIC_FILES["bad_qc"]) self.check.setup(dataset) result = self.check.check_ctd_variable_attributes(dataset) self.assertEqual(result.value, (38, 52)) def test_ctd_vars(self): - ''' + """ Ensures the ctd checks for the correct file - ''' - dataset = self.get_dataset(STATIC_FILES['glider_std']) + """ + dataset = self.get_dataset(STATIC_FILES["glider_std"]) self.check.setup(dataset) result = self.check.check_ctd_variable_attributes(dataset) - self.assertIn('Variable temperature attribute accuracy is empty', result.msgs) + self.assertIn("Variable temperature attribute accuracy is empty", result.msgs) def test_global_fail(self): - ''' + """ Tests that the global checks fail where appropriate - ''' - dataset = self.get_dataset(STATIC_FILES['bad_qc']) + """ + dataset = self.get_dataset(STATIC_FILES["bad_qc"]) self.check.setup(dataset) result = self.check.check_global_attributes(dataset) self.assertEqual(result.value, (42, 64)) def test_global(self): - ''' + """ Tests that the global checks work - ''' - dataset = self.get_dataset(STATIC_FILES['glider_std']) + """ + dataset = self.get_dataset(STATIC_FILES["glider_std"]) result = self.check.check_global_attributes(dataset) self.assertEqual(result.value[0], result.value[1]) def test_metadata(self): - ''' + """ Tests that empty attributes fail - ''' - dataset = self.get_dataset(STATIC_FILES['bad_metadata']) + """ + dataset = self.get_dataset(STATIC_FILES["bad_metadata"]) result = self.check.check_global_attributes(dataset) self.assertEqual(result.value, (41, 64)) def test_standard_names(self): - ''' + """ Tests that a file with an invalid standard name is caught (temperature) - ''' + """ # This one should pass - dataset = self.get_dataset(STATIC_FILES['glider_std']) + dataset = self.get_dataset(STATIC_FILES["glider_std"]) results = self.check.check_standard_names(dataset) for each in results: self.assertEqual(each.value[0], each.value[1]) # This will fail - dataset = self.get_dataset(STATIC_FILES['bad_standard_name']) + dataset = self.get_dataset(STATIC_FILES["bad_standard_name"]) results = self.check.check_standard_names(dataset) score = 0 out_of = 0 @@ -164,73 +175,75 @@ def test_standard_names(self): assert len(results) == 10 def test_units(self): - ''' + """ Tests that a fie with invalid units is caught (temperature) - ''' - dataset = self.get_dataset(STATIC_FILES['bad_units']) + """ + dataset = self.get_dataset(STATIC_FILES["bad_units"]) results = self.check.check_ctd_variable_attributes(dataset) - self.assertIn('Variable temperature units attribute must be convertible to degrees_C', - results.msgs) + self.assertIn( + "Variable temperature units attribute must be convertible to degrees_C", + results.msgs, + ) def test_valid_lon(self): - dataset = self.get_dataset(STATIC_FILES['bad_metadata']) + dataset = self.get_dataset(STATIC_FILES["bad_metadata"]) result = self.check.check_valid_lon(dataset) assert result.value == (0, 1) - dataset = self.get_dataset(STATIC_FILES['glider_std']) + dataset = self.get_dataset(STATIC_FILES["glider_std"]) result = self.check.check_valid_lon(dataset) assert result.value == (1, 1) def test_ioos_ra(self): - dataset = self.get_dataset(STATIC_FILES['glider_std']) + dataset = self.get_dataset(STATIC_FILES["glider_std"]) result = self.check.check_ioos_ra(dataset) assert result.value == (0, 1) - dataset = self.get_dataset(STATIC_FILES['glider_std3']) + dataset = self.get_dataset(STATIC_FILES["glider_std3"]) result = self.check.check_ioos_ra(dataset) assert result.value == (1, 1) def test_valid_min_dtype(self): - dataset = self.get_dataset(STATIC_FILES['glider_std']) + dataset = self.get_dataset(STATIC_FILES["glider_std"]) result = self.check.check_valid_min_dtype(dataset) assert result.value == (30, 32) - dataset = self.get_dataset(STATIC_FILES['glider_std3']) + dataset = self.get_dataset(STATIC_FILES["glider_std3"]) result = self.check.check_valid_min_dtype(dataset) assert result.value == (58, 58) def test_valid_max_dtype(self): - dataset = self.get_dataset(STATIC_FILES['glider_std']) + dataset = self.get_dataset(STATIC_FILES["glider_std"]) result = self.check.check_valid_max_dtype(dataset) assert result.value == (30, 32) - dataset = self.get_dataset(STATIC_FILES['glider_std3']) + dataset = self.get_dataset(STATIC_FILES["glider_std3"]) result = self.check.check_valid_max_dtype(dataset) assert result.value == (58, 58) def test_qc_variables(self): - dataset = self.get_dataset(STATIC_FILES['glider_std']) + dataset = self.get_dataset(STATIC_FILES["glider_std"]) self.check.setup(dataset) result = self.check.check_qc_variables(dataset) self.assertEqual(result.value[0], result.value[1]) - dataset = self.get_dataset(STATIC_FILES['glider_std3']) + dataset = self.get_dataset(STATIC_FILES["glider_std3"]) self.check.setup(dataset) result = self.check.check_qc_variables(dataset) self.assertEqual(result.value[0], result.value[1]) - dataset = self.get_dataset(STATIC_FILES['bad_qc']) + dataset = self.get_dataset(STATIC_FILES["bad_qc"]) self.check.setup(dataset) result = self.check.check_qc_variables(dataset) assert sorted(result.msgs) == [ - 'Variable depth_qc must contain attribute: flag_meanings', - 'Variable depth_qc must contain attribute: flag_values', - 'Variable pressure_qc must contain attribute: long_name', - 'Variable pressure_qc must contain attribute: standard_name' + "Variable depth_qc must contain attribute: flag_meanings", + "Variable depth_qc must contain attribute: flag_values", + "Variable pressure_qc must contain attribute: long_name", + "Variable pressure_qc must contain attribute: standard_name", ] - dataset = self.get_dataset(STATIC_FILES['no_qc']) + dataset = self.get_dataset(STATIC_FILES["no_qc"]) self.check.setup(dataset) result = self.check.check_qc_variables(dataset) self.assertEqual(result.value[0], result.value[1]) @@ -242,11 +255,11 @@ def test_compare_string_var_dtype(self): string returns Python `str` type instead of a NumPy dtype """ ts = MockTimeSeries() - str_var = ts.createVariable('str_var', str, ('time',)) + str_var = ts.createVariable("str_var", str, ("time",)) self.assertTrue(util.compare_dtype(str_var.dtype, str)) def test_time_series_variables(self): - dataset = self.get_dataset(STATIC_FILES['no_qc']) + dataset = self.get_dataset(STATIC_FILES["no_qc"]) result = self.check.check_time_attributes(dataset) self.assertEqual(result.value[0], result.value[1]) @@ -254,11 +267,11 @@ def test_time_monotonically_increasing(self): """Checks that the time variable is monotonically increasing""" ts = MockTimeSeries() # first check failure case - ts.variables['time'][:] = np.zeros(500) + ts.variables["time"][:] = np.zeros(500) result = self.check.check_monotonically_increasing_time(ts) self.assertLess(result.value[0], result.value[1]) # now make a monotonically increasing time variable - ts.variables['time'][:] = np.linspace(1, 500, 500) + ts.variables["time"][:] = np.linspace(1, 500, 500) result = self.check.check_monotonically_increasing_time(ts) self.assertEqual(result.value[0], result.value[1]) @@ -268,14 +281,14 @@ def test_time_depth_non_nan(self): have at least two non-NaN combinations """ ts = MockTimeSeries() - ts.variables['time'][0] = 0 - ts.variables['depth'][0] = 5 + ts.variables["time"][0] = 0 + ts.variables["depth"][0] = 5 # cartesian product should only contain one element and fail result = self.check.check_dim_no_data(ts) self.assertLess(result.value[0], result.value[1]) # adding one more coordinate variable should make the number of passing # combinations equal to two, which should pass this check - ts.variables['time'][1] = 1 + ts.variables["time"][1] = 1 result = self.check.check_dim_no_data(ts) self.assertEqual(result.value[0], result.value[1]) @@ -285,46 +298,53 @@ def test_depth_diff(self): the end time is non-negligible """ ts = MockTimeSeries() - ts.variables['depth'][:] = np.ma.array(np.zeros(500)) + ts.variables["depth"][:] = np.ma.array(np.zeros(500)) result = self.check.check_depth_array(ts) self.assertLess(result.value[0], result.value[1]) - depth_arr = ts.variables['depth'][:] = np.ma.array(np.linspace(1, 500, - 500)) + depth_arr = ts.variables["depth"][:] = np.ma.array(np.linspace(1, 500, 500)) result = self.check.check_depth_array(ts) # mark every other value as bad/_FillValue. Diff should exclude the # masked points and result should pass depth_arr[:-1:2] = np.ma.masked - ts.variables['depth'][:] = depth_arr + ts.variables["depth"][:] = depth_arr self.assertEqual(result.value[0], result.value[1]) # set all to flagged, which should max of empty array, which seems # to be zero under numpy's implementation. depth_arr[:] = np.ma.masked - ts.variables['depth'][:] = depth_arr + ts.variables["depth"][:] = depth_arr self.assertLessEqual(result.value[0], result.value[1]) def test_seanames(self): - ''' + """ Tests that sea names error message appears - ''' - dataset = self.get_dataset(STATIC_FILES['bad_metadata']) + """ + dataset = self.get_dataset(STATIC_FILES["bad_metadata"]) result = self.check.check_global_attributes(dataset) - self.assertIn(('sea_name attribute should be from the NODC sea names list:' - ' is not a valid sea name'), result.msgs) + self.assertIn( + ( + "sea_name attribute should be from the NODC sea names list:" + " is not a valid sea name" + ), + result.msgs, + ) def test_platform_type(self): - ''' + """ Tests that platform_type check is applied - ''' - dataset = self.get_dataset(STATIC_FILES['bad_metadata']) + """ + dataset = self.get_dataset(STATIC_FILES["bad_metadata"]) result = self.check.check_global_attributes(dataset) - self.assertIn(('platform_type Slocum is not one of the NCEI accepted platforms for archiving: {}' - ).format(",".join(self.check.acceptable_platform_types)), result.msgs) + self.assertIn( + ( + "platform_type Slocum is not one of the NCEI accepted platforms for archiving: {}" + ).format(",".join(self.check.acceptable_platform_types)), + result.msgs, + ) def test_ncei_compliance(self): """Tests that the NCEI compliance suite works""" - - ncei_base_table_url = 'https://gliders.ioos.us/ncei_authority_tables/' + ncei_base_table_url = "https://gliders.ioos.us/ncei_authority_tables/" # this is only a small subset institutions = """MARACOOS University of Delaware @@ -339,30 +359,30 @@ def test_ncei_compliance(self): Sea-Bird GCTD Seabird GPCTD""" mock_nc_file = MockTimeSeries() - mock_nc_file.project = 'MARACOOS' - mock_nc_file.institution = 'MARACOOS' - instrument_var = mock_nc_file.createVariable('instrument', 'i', ()) - instrument_var.make_model = 'Sea-Bird GCTD' - mock_nc_file.variables['depth'].instrument = 'instrument' - mock_nc_file.variables['depth'].platform = 'platform' - platform_var = mock_nc_file.createVariable('platform', 'i', ()) + mock_nc_file.project = "MARACOOS" + mock_nc_file.institution = "MARACOOS" + instrument_var = mock_nc_file.createVariable("instrument", "i", ()) + instrument_var.make_model = "Sea-Bird GCTD" + mock_nc_file.variables["depth"].instrument = "instrument" + mock_nc_file.variables["depth"].platform = "platform" + platform_var = mock_nc_file.createVariable("platform", "i", ()) platform_var.id = "Test123" result = self.check.check_ncei_tables(mock_nc_file) # everything should pass here self.assertEqual(result.value[0], result.value[1]) # now change to values that should fail - mock_nc_file.project = 'N/A' - mock_nc_file.institution = 'N/A' + mock_nc_file.project = "N/A" + mock_nc_file.institution = "N/A" # set instrument_var make_model to something not contained in the # list - instrument_var.make_model = 'Unknown' - platform_var.id = 'No platform' + instrument_var.make_model = "Unknown" + platform_var.id = "No platform" # create a dummy variable which points to an instrument that doesn't # exist - dummy_var = mock_nc_file.createVariable('dummy', 'i', ()) - dummy_var.instrument = 'nonexistent_var_name' - dummy_var.platform = 'nonexistent_var_name_2' + dummy_var = mock_nc_file.createVariable("dummy", "i", ()) + dummy_var.instrument = "nonexistent_var_name" + dummy_var.platform = "nonexistent_var_name_2" result_fail = self.check.check_ncei_tables(mock_nc_file) expected_msg_set = { "Global attribute project value 'N/A' not contained in project authority table", @@ -370,27 +390,29 @@ def test_ncei_compliance(self): "Attribute make_model 'Unknown' for variable instrument not contained in instrument authority table", "Referenced instrument variable nonexistent_var_name does not exist", "Attribute id 'No platform' for variable platform not contained in platform authority table", - "Referenced platform variable nonexistent_var_name_2 does not exist" + "Referenced platform variable nonexistent_var_name_2 does not exist", } self.assertSetEqual(expected_msg_set, set(result_fail.msgs)) # remove attributes which need to be checked to see if properly # detected - del (instrument_var.make_model, platform_var.id, - mock_nc_file.project, mock_nc_file.institution) + del ( + instrument_var.make_model, + platform_var.id, + mock_nc_file.project, + mock_nc_file.institution, + ) missing_attr_results = self.check.check_ncei_tables(mock_nc_file) expected_missing_msgs = { "Attribute project not in dataset", "Attribute institution not in dataset", "Attribute make_model should exist in variable instrument", - "Attribute id should exist in variable platform" + "Attribute id should exist in variable platform", } # check that all the missing attribute messages are contained in the # test results - self.assertTrue(expected_missing_msgs <= - set(missing_attr_results.msgs)) + self.assertTrue(expected_missing_msgs <= set(missing_attr_results.msgs)) - for name in ('institution', 'instrument', 'platform', 'project'): + for name in ("institution", "instrument", "platform", "project"): self.check.auth_tables[name] = None - self.assertRaises(RuntimeError, self.check.check_ncei_tables, - mock_nc_file) + self.assertRaises(RuntimeError, self.check.check_ncei_tables, mock_nc_file) diff --git a/cc_plugin_glider/util.py b/cc_plugin_glider/util.py index c77f506..ee90585 100644 --- a/cc_plugin_glider/util.py +++ b/cc_plugin_glider/util.py @@ -1,31 +1,30 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- -''' +""" cc_plugin_glider/util.py -''' -import csv +""" +from operator import eq + import numpy as np -from cc_plugin_glider.required_var_attrs import required_var_attrs from cf_units import Unit -from operator import eq -from pkg_resources import resource_filename + +from cc_plugin_glider.required_var_attrs import required_var_attrs + def compare_dtype(dt1, dt2): - ''' + """ Helper function to compare two numpy dtypes to see if they are equivalent aside from endianness. Returns True if the two are equivalent, False otherwise. - ''' + """ # string types map directly to Python string. Decompose into numpy types # otherwise - return eq(*(dt if dt == str else dt.kind + str(dt.itemsize) - for dt in (dt1, dt2))) + return eq(*(dt if dt == str else dt.kind + str(dt.itemsize) for dt in (dt1, dt2))) def _check_dtype(dataset, var_name): - ''' + """ Convenience method to check a variable datatype validity - ''' + """ score = 0 out_of = 0 messages = [] @@ -35,32 +34,35 @@ def _check_dtype(dataset, var_name): var = dataset.variables[var_name] var_dict = required_var_attrs.get(var_name, {}) - expected_dtype = var_dict.get('dtype', None) + expected_dtype = var_dict.get("dtype", None) if expected_dtype is not None: out_of += 1 score += 1 if not compare_dtype(var.dtype, np.dtype(expected_dtype)): - messages.append('Variable {} is expected to have a dtype of ' - '{}, instead has a dtype of {}' - ''.format(var_name, var.dtype, expected_dtype)) + messages.append( + f"Variable {var_name} is expected to have a dtype of " + f"{var.dtype}, instead has a dtype of {expected_dtype}" + "", + ) score -= 1 # check that the fill value is of the expected dtype as well - if hasattr(var, '_FillValue') and hasattr(var._FillValue, 'dtype'): + if hasattr(var, "_FillValue") and hasattr(var._FillValue, "dtype"): if not compare_dtype(var.dtype, var._FillValue.dtype): - messages.append('Variable {} _FillValue dtype does not ' - 'match variable dtype' - ''.format(var_name, var._FillValue.dtype, - var.dtype)) + messages.append( + f"Variable {var_name} _FillValue dtype does not " + "match variable dtype" + "", + ) out_of += 1 return (score, out_of, messages) def _check_variable_attrs(dataset, var_name, required_attributes=None): - ''' + """ Convenience method to check a variable attributes based on the expected_vars dict - ''' + """ score = 0 out_of = 0 messages = [] @@ -74,15 +76,16 @@ def _check_variable_attrs(dataset, var_name, required_attributes=None): check_attrs = required_attributes or required_var_attrs.get(var_name, {}) var_attrs = set(var.ncattrs()) for attr in check_attrs: - if attr == 'dtype': + if attr == "dtype": # dtype check is special, see above continue out_of += 1 score += 1 # Check if the attribute is present if attr not in var_attrs: - messages.append('Variable {} must contain attribute: {}' - ''.format(var_name, attr)) + messages.append( + f"Variable {var_name} must contain attribute: {attr}" "", + ) score -= 1 continue @@ -90,9 +93,11 @@ def _check_variable_attrs(dataset, var_name, required_attributes=None): if check_attrs[attr] is not None: if getattr(var, attr) != check_attrs[attr]: # No match, this may be an error, but first an exception for units - if attr == 'units': - msg = ('Variable {} units attribute must be ' - 'convertible to {}'.format(var_name, check_attrs[attr])) + if attr == "units": + msg = ( + f"Variable {var_name} units attribute must be " + f"convertible to {check_attrs[attr]}" + ) try: cur_unit = Unit(var.units) comp_unit = Unit(check_attrs[attr]) @@ -103,7 +108,13 @@ def _check_variable_attrs(dataset, var_name, required_attributes=None): messages.append(msg) score -= 1 else: - messages.append('Variable {} attribute {} must be {}'.format(var_name, attr, check_attrs[attr])) + messages.append( + "Variable {} attribute {} must be {}".format( + var_name, + attr, + check_attrs[attr], + ), + ) score -= 1 else: # Final check to make sure the attribute isn't an empty string @@ -111,11 +122,11 @@ def _check_variable_attrs(dataset, var_name, required_attributes=None): # try stripping whitespace, and return an error if empty att_strip = getattr(var, attr).strip() if not att_strip: - messages.append('Variable {} attribute {} is empty' - ''.format(var_name, attr)) + messages.append( + f"Variable {var_name} attribute {attr} is empty" "", + ) score -= 1 except AttributeError: pass return (score, out_of, messages) - diff --git a/pyproject.toml b/pyproject.toml index 99a0e0a..6b53453 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,6 @@ dynamic = [ documentation = "http://ioos.github.io/compliance-checker/" homepage = "https://github.com/ioos/cc-plugin-glider" repository = "https://github.com/ioos/cc-plugin-glider" - [project.entry-points."compliance_checker.suites"] "gliderdac" = "cc_plugin_glider.glider_dac:GliderCheck" From 0a07bd7653ddaf39ec20fa9a2fa0307cb9c1e15a Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Wed, 18 Oct 2023 14:50:01 -0300 Subject: [PATCH 2/5] fix pkg_resources deprecation --- cc_plugin_glider/tests/resources.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cc_plugin_glider/tests/resources.py b/cc_plugin_glider/tests/resources.py index 657eb3e..1b2c8d7 100644 --- a/cc_plugin_glider/tests/resources.py +++ b/cc_plugin_glider/tests/resources.py @@ -5,18 +5,18 @@ import os import subprocess -from pkg_resources import resource_filename +import importlib def get_filename(path): """ Returns the path to a valid dataset """ - filename = resource_filename("cc_plugin_glider", path) - nc_path = filename.replace(".cdl", ".nc") - if not os.path.exists(nc_path): + filename = importlib.resources.files("cc_plugin_glider") / path + nc_path = filename.with_suffix(".nc") + if not nc_path.exists(): generate_dataset(filename, nc_path) - return nc_path + return str(nc_path) def generate_dataset(cdl_path, nc_path): From 559c1a1ba6a0062113624d3c2fcd2e5dbb4d5f23 Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Wed, 18 Oct 2023 15:17:18 -0300 Subject: [PATCH 3/5] remove six, py2k syntax, fix remaining lints --- cc_plugin_glider/glider_dac.py | 29 +++++++------------- cc_plugin_glider/required_var_attrs.py | 3 --- cc_plugin_glider/tests/resources.py | 5 +--- cc_plugin_glider/tests/test_glidercheck.py | 31 +++------------------- cc_plugin_glider/util.py | 1 - 5 files changed, 15 insertions(+), 54 deletions(-) diff --git a/cc_plugin_glider/glider_dac.py b/cc_plugin_glider/glider_dac.py index f76b5d6..da4f16f 100644 --- a/cc_plugin_glider/glider_dac.py +++ b/cc_plugin_glider/glider_dac.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - """ cc_plugin_glider.glider_dac.py @@ -9,24 +7,18 @@ import os import warnings +from urllib.parse import urljoin import numpy as np import requests -import six from compliance_checker import __version__ from compliance_checker.base import BaseCheck, BaseNCCheck, Result, TestCtx from compliance_checker.cf.cf import CF1_6Check from lxml import etree from requests.exceptions import RequestException -from six.moves.urllib.parse import urljoin from cc_plugin_glider import util -try: - basestring -except NameError: - basestring = str - class GliderCheck(BaseNCCheck): register_checker = True @@ -49,11 +41,9 @@ def __init__(self): "instrument": "instruments.txt", "institution": "institutions.txt", } - # some top level attrs map to other things - var_remap = {"platform": "id", "instrument": "make_model"} self.auth_tables = {} - for global_att_name, table_file in six.iteritems(table_type): + for global_att_name, table_file in table_type.items(): # instruments have to be handled specially since they aren't # global attributes table_loc = urljoin(ncei_base_table_url, table_file) @@ -91,7 +81,8 @@ def request_resource(cls, url, backup_resource, fn): text_contents = f.read() except OSError: warnings.warn( - f"Could not open {backup_resource}, falling back to web " "request", + f"Could not open {backup_resource}, falling back to web request", + stacklevel=2, ) fail_flag = True @@ -103,12 +94,13 @@ def request_resource(cls, url, backup_resource, fn): except RequestException: warnings.warn( f"Requests exception encountered while fetching data from {url}", + stacklevel=2, ) try: return fn(text_contents) except Exception as e: - warnings.warn(f"Could not deserialize input text: {str(e)}") + warnings.warn(f"Could not deserialize input text: {str(e)}", stacklevel=2) return None cf_checks = CF1_6Check() @@ -359,7 +351,7 @@ def check_global_attributes(self, dataset): messages.append("Attr %s not present" % field) continue v = getattr(dataset, field, "") - if isinstance(v, basestring): + if isinstance(v, str): test = len(v.strip()) > 0 else: test = True @@ -748,7 +740,7 @@ def check_valid_min_dtype(self, dataset): ncvar = dataset.variables[var_name] valid_min = getattr(dataset.variables[var_name], "valid_min", None) - if isinstance(valid_min, basestring): + if isinstance(valid_min, str): valid_min_dtype = "string" elif isinstance(valid_min, float): valid_min_dtype = "float64" @@ -776,7 +768,7 @@ def check_valid_max_dtype(self, dataset): ncvar = dataset.variables[var_name] valid_max = getattr(dataset.variables[var_name], "valid_max", None) - if isinstance(valid_max, basestring): + if isinstance(valid_max, str): valid_max_dtype = "string" elif isinstance(valid_max, float): valid_max_dtype = "float64" @@ -847,7 +839,6 @@ def check_ncei_tables(self, dataset): "institution, platform_type, and " "instrument", ) - ncei_base_table_url = "https://gliders.ioos.us/ncei_authority_tables/" # might refactor if the authority tables names change table_type = { "project": "projects.txt", @@ -858,7 +849,7 @@ def check_ncei_tables(self, dataset): # some top level attrs map to other things var_remap = {"platform": "id", "instrument": "make_model"} - for global_att_name, table_file in six.iteritems(table_type): + for global_att_name, _ in table_type.items(): # instruments have to be handled specially since they aren't # global attributes if global_att_name not in {"instrument", "platform"}: diff --git a/cc_plugin_glider/required_var_attrs.py b/cc_plugin_glider/required_var_attrs.py index 94c9745..29fba0d 100644 --- a/cc_plugin_glider/required_var_attrs.py +++ b/cc_plugin_glider/required_var_attrs.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """ cc_plugin_glider/required_var_attrs.py @@ -101,7 +100,6 @@ "precision": None, "reference_datum": None, "resolution": None, - "standard_name": None, "valid_max": None, "valid_min": None, }, @@ -120,7 +118,6 @@ "precision": None, "reference_datum": None, "resolution": None, - "standard_name": None, "valid_max": None, "valid_min": None, }, diff --git a/cc_plugin_glider/tests/resources.py b/cc_plugin_glider/tests/resources.py index 1b2c8d7..78de724 100644 --- a/cc_plugin_glider/tests/resources.py +++ b/cc_plugin_glider/tests/resources.py @@ -1,11 +1,8 @@ -#!/usr/bin/env python """ cc_plugin_glider/tests/resources.py """ -import os -import subprocess - import importlib +import subprocess def get_filename(path): diff --git a/cc_plugin_glider/tests/test_glidercheck.py b/cc_plugin_glider/tests/test_glidercheck.py index 3b4d7a4..4332cc0 100644 --- a/cc_plugin_glider/tests/test_glidercheck.py +++ b/cc_plugin_glider/tests/test_glidercheck.py @@ -1,26 +1,20 @@ -#!/usr/bin/env python """ cc_plugin_glider/tests/test_glidercheck.py """ import os import unittest +from urllib.parse import urljoin import numpy as np import requests_mock from compliance_checker.tests.helpers import MockTimeSeries from netCDF4 import Dataset -from six.moves.urllib.parse import urljoin from cc_plugin_glider import util from cc_plugin_glider.tests.resources import STATIC_FILES from ..glider_dac import GliderCheck -try: - basestring -except NameError: - basestring = str - class TestGliderCheck(unittest.TestCase): # @see @@ -35,12 +29,9 @@ def __repr__(self): name = self.id() name = name.split(".") if name[0] not in ["ion", "pyon"]: - return "%s (%s)" % (name[-1], ".".join(name[:-1])) + return f"{name[-1]} ({'.'.join(name[:-1])})" else: - return "%s ( %s )" % ( - name[-1], - ".".join(name[:-2]) + ":" + ".".join(name[-2:]), - ) # noqa + return f"{name[-1]} ({'.'.join(name[:-2])} : {'.'.join(name[-2:])})" __str__ = __repr__ @@ -48,7 +39,7 @@ def get_dataset(self, nc_dataset): """ Return a pairwise object for the dataset """ - if isinstance(nc_dataset, basestring): + if isinstance(nc_dataset, str): nc_dataset = Dataset(nc_dataset, "r") self.addCleanup(nc_dataset.close) return nc_dataset @@ -344,20 +335,6 @@ def test_platform_type(self): def test_ncei_compliance(self): """Tests that the NCEI compliance suite works""" - ncei_base_table_url = "https://gliders.ioos.us/ncei_authority_tables/" - # this is only a small subset - institutions = """MARACOOS -University of Delaware -Woods Hole Oceanographic Institution""" - - projects = """MARACOOS""" - - platforms = """Test123""" - - instrument_makes = """Seabird GCTD -Sea-Bird 41CP -Sea-Bird GCTD -Seabird GPCTD""" mock_nc_file = MockTimeSeries() mock_nc_file.project = "MARACOOS" mock_nc_file.institution = "MARACOOS" diff --git a/cc_plugin_glider/util.py b/cc_plugin_glider/util.py index ee90585..4e90c09 100644 --- a/cc_plugin_glider/util.py +++ b/cc_plugin_glider/util.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """ cc_plugin_glider/util.py """ From ac72646e3b0de77b44c73a3184640c18c345a8e0 Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Wed, 18 Oct 2023 15:21:41 -0300 Subject: [PATCH 4/5] add .pre-commit-config.yaml --- .pre-commit-config.yaml | 62 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..c6f22c8 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,62 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: check-ast + - id: debug-statements + - id: end-of-file-fixer + - id: check-docstring-first + - id: check-added-large-files + - id: requirements-txt-fixer + - id: file-contents-sorter + files: requirements-dev.txt + +- repo: https://github.com/psf/black + rev: 23.10.0 + hooks: + - id: black + language_version: python3 + +- repo: https://github.com/keewis/blackdoc + rev: v0.3.8 + hooks: + - id: blackdoc + +- repo: https://github.com/codespell-project/codespell + rev: v2.2.6 + hooks: + - id: codespell + exclude: > + (?x)^( + .*\.xml| + .*\.cdl + )$ + args: + - --ignore-words-list=pres + +- repo: https://github.com/asottile/add-trailing-comma + rev: v3.1.0 + hooks: + - id: add-trailing-comma + +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.0 + hooks: + - id: ruff + +- repo: https://github.com/tox-dev/pyproject-fmt + rev: 1.2.0 + hooks: + - id: pyproject-fmt + +ci: + autofix_commit_msg: | + [pre-commit.ci] auto fixes from pre-commit.com hooks + + for more information, see https://pre-commit.ci + autofix_prs: false + autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate' + autoupdate_schedule: monthly + skip: [] + submodules: false From 11d558584490277efaf635c4d82824d604ca87a5 Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Thu, 19 Oct 2023 18:56:02 -0300 Subject: [PATCH 5/5] drop py38 --- .github/workflows/tests.yml | 4 ++-- pyproject.toml | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 66f458e..3fc3134 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -10,7 +10,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: [ "3.9", "3.10", "3.11" ] os: [windows-latest, ubuntu-latest, macos-latest] fail-fast: false @@ -37,7 +37,7 @@ jobs: - name: Test latest compliance-checker shell: bash -l {0} - if: ${{ (matrix.os == 'ubuntu-latest') && (matrix.python-version == '3.10') }} + if: ${{ (matrix.os == 'ubuntu-latest') && (matrix.python-version == '3.11') }} run: > micromamba remove compliance-checker --yes && pip install git+https://github.com/ioos/compliance-checker.git diff --git a/pyproject.toml b/pyproject.toml index 6b53453..a7cac82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,10 +13,9 @@ license = {text = "Apache-2.0"} maintainers = [ {name = "Robert Fratantonio", email="robert.fratantonio@rpsgroup.com"}, ] -requires-python = ">=3.8" +requires-python = ">=3.9" classifiers = [ "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11",