From c2fce233cd8d35f2b622d143e4b524426fe1dcc9 Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 22 Mar 2023 17:18:18 -0500 Subject: [PATCH 1/3] Add squre bracket in column validation for spreadsheets. Update error handling slightly(error list is now sorted by context always) --- hed/errors/__init__.py | 2 +- hed/errors/error_messages.py | 25 ++++- hed/errors/error_reporter.py | 71 ++++++++++--- hed/errors/error_types.py | 7 ++ hed/models/base_input.py | 56 +++++++---- hed/models/sidecar.py | 3 +- hed/schema/schema_compliance.py | 2 +- hed/validator/sidecar_validator.py | 5 +- hed/validator/spreadsheet_validator.py | 99 +++++++++++++++++-- tests/errors/test_error_reporter.py | 8 +- tests/models/test_base_input.py | 48 ++------- tests/schema/test_convert_tags.py | 2 +- tests/validator/test_onset_validator.py | 4 +- tests/validator/test_spreadsheet_validator.py | 57 +++++++++++ tests/validator/test_tag_validator_base.py | 2 +- 15 files changed, 298 insertions(+), 93 deletions(-) create mode 100644 tests/validator/test_spreadsheet_validator.py diff --git a/hed/errors/__init__.py b/hed/errors/__init__.py index 0583dd562..c2f58a07c 100644 --- a/hed/errors/__init__.py +++ b/hed/errors/__init__.py @@ -1,4 +1,4 @@ -from .error_reporter import ErrorHandler, get_exception_issue_string, get_printable_issue_string +from .error_reporter import ErrorHandler, get_exception_issue_string, get_printable_issue_string, sort_issues from .error_types import DefinitionErrors, OnsetErrors, SchemaErrors, SchemaWarnings, SidecarErrors, ValidationErrors from .error_types import ErrorContext, ErrorSeverity from .exceptions import HedExceptions, HedFileError diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index 9ae9557f3..fffc549dd 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -6,7 +6,7 @@ from hed.errors.error_reporter import hed_error, hed_tag_error from hed.errors.error_types import ValidationErrors, SchemaErrors, \ - SidecarErrors, SchemaWarnings, ErrorSeverity, DefinitionErrors, OnsetErrors + SidecarErrors, SchemaWarnings, ErrorSeverity, DefinitionErrors, OnsetErrors, ColumnErrors @hed_tag_error(ValidationErrors.HED_UNITS_INVALID) @@ -342,3 +342,26 @@ def onset_wrong_placeholder(tag, has_placeholder): if has_placeholder: return f"Onset/offset def tag {tag} expects a placeholder value, but does not have one." return f"Onset/offset def tag {tag} should not have a placeholder, but has one." + + +@hed_error(ColumnErrors.INVALID_COLUMN_REF) +def invalid_column_ref(bad_refs): + return f"Bad column references found(columns do not exist): {bad_refs}" + + +@hed_error(ColumnErrors.SELF_COLUMN_REF) +def self_column_ref(self_ref): + return f"Column references itself: {self_ref}" + + +@hed_error(ColumnErrors.NESTED_COLUMN_REF) +def nested_column_ref(column_name, ref_column): + return f"Column {column_name} has a nested reference to {ref_column}. " \ + f"Column reference columns cannot contain other column references." + + +@hed_error(ColumnErrors.MALFORMED_COLUMN_REF) +def nested_column_ref(column_name, index, symbol): + return f"Column {column_name} has a malformed column reference. Improper symbol {symbol} found at index {index}." + + diff --git a/hed/errors/error_reporter.py b/hed/errors/error_reporter.py index 4a7fd91a9..cb1a959d5 100644 --- a/hed/errors/error_reporter.py +++ b/hed/errors/error_reporter.py @@ -10,6 +10,27 @@ error_functions = {} +# Controls if the default issue printing skips adding indentation for this context +no_tab_context = {ErrorContext.HED_STRING, ErrorContext.SCHEMA_ATTRIBUTE} + +# Default sort ordering for issues list +default_sort_list = [ + ErrorContext.CUSTOM_TITLE, + ErrorContext.FILE_NAME, + ErrorContext.SIDECAR_COLUMN_NAME, + ErrorContext.SIDECAR_KEY_NAME, + ErrorContext.ROW, + ErrorContext.COLUMN, + ErrorContext.HED_STRING, + ErrorContext.SCHEMA_SECTION, + ErrorContext.SCHEMA_TAG, + ErrorContext.SCHEMA_ATTRIBUTE, +] + +# ErrorContext which is expected to be int based. +int_sort_list = [ + ErrorContext.ROW, +] def _register_error_function(error_type, wrapper_func): if error_type in error_functions: @@ -153,19 +174,23 @@ def __init__(self, check_for_warnings=True): self.error_context = [] self._check_for_warnings = check_for_warnings - def push_error_context(self, context_type, context, increment_depth_after=True): + def push_error_context(self, context_type, context): """ Push a new error context to narrow down error scope. Parameters: context_type (ErrorContext): A value from ErrorContext representing the type of scope. context (str, int, or HedString): The main value for the context_type. - increment_depth_after (bool): If True, add an extra tab to any subsequent errors in the scope. Notes: The context depends on the context_type. For ErrorContext.FILE_NAME this would be the actual filename. """ - self.error_context.append((context_type, context, increment_depth_after)) + if context is None: + if context_type in int_sort_list: + context = 0 + else: + context_type = "" + self.error_context.append((context_type, context)) def pop_error_context(self): """ Remove the last scope from the error context. @@ -292,8 +317,8 @@ def _add_context_to_errors(error_object, error_context_to_add): """ if error_object is None: error_object = {} - for (context_type, context, increment_count) in error_context_to_add: - error_object[context_type] = (context, increment_count) + for (context_type, context) in error_context_to_add: + error_object[context_type] = context return error_object @@ -330,7 +355,7 @@ def _get_tag_span_to_error_object(error_object): else: return None, None - hed_string = error_object[ErrorContext.HED_STRING][0] + hed_string = error_object[ErrorContext.HED_STRING] span = hed_string._get_org_span(source_tag) return span @@ -385,6 +410,7 @@ def filter_issues_by_severity(issues_list, severity): def get_exception_issue_string(issues, title=None): """ Return a string with issues list flatted into single string, one issue per line. + Possibly being deprecated. Parameters: issues (list): A list of strings containing issues to print. @@ -410,6 +436,29 @@ def get_exception_issue_string(issues, title=None): return issue_str +def sort_issues(issues, reverse=False): + """Sorts a list of issues by the error context values. + + Parameters: + issues (list): A list of dictionaries representing the issues to be sorted. + reverse (bool, optional): If True, sorts the list in descending order. Default is False. + + Returns: + list: The sorted list of issues.""" + def _get_keys(d): + result = [] + for key in default_sort_list: + if key in int_sort_list: + result.append(d.get(key, -1)) + else: + result.append(d.get(key, "")) + return tuple(result) + + issues = sorted(issues, key=_get_keys, reverse=reverse) + + return issues + + def get_printable_issue_string(issues, title=None, severity=None, skip_filename=True): """ Return a string with issues list flatted into single string, one per line. @@ -471,7 +520,7 @@ def _get_context_from_issue(val_issue, skip_filename=True): if skip_filename and key == ErrorContext.FILE_NAME: continue if key.startswith("ec_"): - single_issue_context.append((key, *val_issue[key])) + single_issue_context.append((key, val_issue[key])) return single_issue_context @@ -512,7 +561,7 @@ def _get_context_string(single_issue_context, last_used_context): """ Convert a single context list into the final human readable output form. Parameters: - single_issue_context (list): A list of tuples containing the context(context_type, context, increment_tab) + single_issue_context (list): A list of tuples containing the context(context_type, context) last_used_context (list): A list of tuples containing the last drawn context. Returns: @@ -528,18 +577,18 @@ def _get_context_string(single_issue_context, last_used_context): tab_count = 0 found_difference = False for i, context_tuple in enumerate(single_issue_context): - (context_type, context, increment_tab) = context_tuple + (context_type, context) = context_tuple if len(last_used_context) > i and not found_difference: last_drawn = last_used_context[i] # Was drawn, and hasn't changed. if last_drawn == context_tuple: - if increment_tab: + if context_type not in no_tab_context: tab_count += 1 continue context_string += _format_single_context_string(context_type, context, tab_count) found_difference = True - if increment_tab: + if context_type not in no_tab_context: tab_count += 1 tab_string = '\t' * tab_count diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index ac76f6992..66cfaecd1 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -117,3 +117,10 @@ class OnsetErrors: ONSET_PLACEHOLDER_WRONG = "ONSET_PLACEHOLDER_WRONG" ONSET_TOO_MANY_DEFS = "ONSET_TOO_MANY_DEFS" ONSET_TAG_OUTSIDE_OF_GROUP = "ONSET_TAG_OUTSIDE_OF_GROUP" + + +class ColumnErrors: + INVALID_COLUMN_REF = "INVALID_COLUMN_REF" + SELF_COLUMN_REF = "SELF_COLUMN_REF" + NESTED_COLUMN_REF = "NESTED_COLUMN_REF" + MALFORMED_COLUMN_REF = "MALFORMED_COLUMN_REF" diff --git a/hed/models/base_input.py b/hed/models/base_input.py index af6249f56..f0e4209c2 100644 --- a/hed/models/base_input.py +++ b/hed/models/base_input.py @@ -354,33 +354,45 @@ def _dataframe_has_names(dataframe): return True return False - def assemble(self, mapper=None): + def assemble(self, mapper=None, skip_square_brackets=False): """ Assembles the hed strings Parameters: mapper(ColumnMapper or None): Generally pass none here unless you want special behavior. - + skip_square_brackets (bool): If True, don't plug in square bracket values into columns. Returns: Dataframe: the assembled dataframe """ if mapper is None: mapper = self._mapper + all_columns = self._handle_transforms(mapper) + if skip_square_brackets: + return all_columns + transformers, _ = mapper.get_transformers() + + return self._handle_square_brackets(all_columns, list(transformers)) + + def _handle_transforms(self, mapper): transformers, need_categorical = mapper.get_transformers() - if not transformers: - return self._dataframe - all_columns = self._dataframe - if need_categorical: - all_columns[need_categorical] = all_columns[need_categorical].astype('category') + if transformers: + all_columns = self._dataframe + if need_categorical: + all_columns[need_categorical] = all_columns[need_categorical].astype('category') - all_columns = all_columns.transform(transformers) + all_columns = all_columns.transform(transformers) + + if need_categorical: + all_columns[need_categorical] = all_columns[need_categorical].astype('str') + else: + all_columns = self._dataframe - return self._insert_columns(all_columns, list(transformers.keys())) + return all_columns @staticmethod - def _find_column_refs(df): + def _find_column_refs(df, column_names): found_column_references = [] - for column_name in df: + for column_name in column_names: df_temp = df[column_name].str.findall("\[([a-z_\-0-9]+)\]", re.IGNORECASE) u_vals = pd.Series([j for i in df_temp if isinstance(i, list) for j in i], dtype=str) u_vals = u_vals.unique() @@ -391,21 +403,23 @@ def _find_column_refs(df): return found_column_references @staticmethod - def _insert_columns(df, known_columns=None): - if known_columns is None: - known_columns = list(df.columns) - possible_column_references = [f"{column_name}" for column_name in df.columns if + def _handle_square_brackets(df, known_columns=None): + """ + Plug in square brackets with other columns + + If known columns is passed, only use those columns to find or replace references. + """ + if known_columns is not None: + column_names = list(known_columns) + else: + column_names = list(df.columns) + possible_column_references = [f"{column_name}" for column_name in column_names if isinstance(column_name, str) and column_name.lower() != "hed"] - found_column_references = BaseInput._find_column_refs(df) + found_column_references = BaseInput._find_column_refs(df, column_names) - invalid_replacements = [col for col in found_column_references if col not in possible_column_references] - if invalid_replacements: - # todo: This check may be moved to validation - raise ValueError(f"Bad column references found(columns do not exist): {invalid_replacements}") valid_replacements = [col for col in found_column_references if col in possible_column_references] # todo: break this into a sub function(probably) - column_names = known_columns for column_name in valid_replacements: column_names.remove(column_name) saved_columns = df[valid_replacements] diff --git a/hed/models/sidecar.py b/hed/models/sidecar.py index 280eba77d..958cadfba 100644 --- a/hed/models/sidecar.py +++ b/hed/models/sidecar.py @@ -255,8 +255,7 @@ def extract_definitions(self, hed_schema=None, error_handler=None): if hed_schema: for hed_string, column_data, _ in self.hed_string_iter(error_handler): hed_string_obj = HedString(hed_string, hed_schema) - error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj, - increment_depth_after=False) + error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj) self._extract_definition_issues += def_dict.check_for_definitions(hed_string_obj, error_handler) error_handler.pop_error_context() diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py index 84c2accbf..c0b821723 100644 --- a/hed/schema/schema_compliance.py +++ b/hed/schema/schema_compliance.py @@ -60,7 +60,7 @@ def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handl for attribute_name in tag_entry.attributes: validator = schema_attribute_validators.get(attribute_name) if validator: - error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name, False) + error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name) new_issues = validator(hed_schema, tag_entry, tag_entry.attributes[attribute_name]) error_handler.add_context_and_filter(new_issues) issues_list += new_issues diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py index af12005b1..daa71fb07 100644 --- a/hed/validator/sidecar_validator.py +++ b/hed/validator/sidecar_validator.py @@ -4,6 +4,7 @@ from hed import HedString from hed import Sidecar from hed.models.column_metadata import ColumnMetadata +from hed.errors.error_reporter import sort_issues class SidecarValidator: @@ -49,8 +50,7 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None) for hed_string, column_data, position in sidecar.hed_string_iter(error_handler): hed_string_obj = HedString(hed_string, hed_schema=self._schema, def_dict=sidecar_def_dict) - error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj, - increment_depth_after=False) + error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj) new_issues = hed_validator.run_basic_checks(hed_string_obj, allow_placeholders=True) if not new_issues: new_issues = hed_validator.run_full_string_checks(hed_string_obj) @@ -61,6 +61,7 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None) error_handler.pop_error_context() error_handler.pop_error_context() + issues = sort_issues(issues) return issues def validate_structure(self, sidecar, error_handler): diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py index ba1f341ac..8b8aa9b1f 100644 --- a/hed/validator/spreadsheet_validator.py +++ b/hed/validator/spreadsheet_validator.py @@ -1,9 +1,12 @@ import pandas as pd +import re from hed import BaseInput from hed.errors import ErrorHandler, ValidationErrors, ErrorContext +from hed.errors.error_types import ColumnErrors from hed.models import ColumnType from hed import HedString from hed.models.hed_string_group import HedStringGroup +from hed.errors.error_reporter import sort_issues PANDAS_COLUMN_PREFIX_TO_IGNORE = "Unnamed: " @@ -25,6 +28,7 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None): Parameters: data (BaseInput or pd.DataFrame): Input data to be validated. + If a dataframe, it is assumed to be assembled already. def_dicts(list of DefDict or DefDict): all definitions to use for validation name(str): The name to report errors from this file as error_handler (ErrorHandler): Error context to use. Creates a new one if None @@ -41,31 +45,32 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None): # Check the structure of the input data, if it's a BaseInput if isinstance(data, BaseInput): issues += self._validate_column_structure(data, error_handler) - # todo ian: Add more checks here for column inserters + issues += self._validate_square_brackets(data.assemble(skip_square_brackets=True), error_handler) data = data.dataframe_a # Check the rows of the input data issues += self._run_checks(data, error_handler) error_handler.pop_error_context() + + issues = sort_issues(issues) return issues def _run_checks(self, data, error_handler): issues = [] + columns = list(data.columns) for row_number, text_file_row in enumerate(data.itertuples(index=False)): error_handler.push_error_context(ErrorContext.ROW, row_number) row_strings = [] new_column_issues = [] - # todo: make this report the correct column numbers(somehow - it almost surely doesn't right now) for column_number, cell in enumerate(text_file_row): if not cell or cell == "n/a": continue - error_handler.push_error_context(ErrorContext.COLUMN, column_number) + error_handler.push_error_context(ErrorContext.COLUMN, columns[column_number]) column_hed_string = HedString(cell) row_strings.append(column_hed_string) - error_handler.push_error_context(ErrorContext.HED_STRING, column_hed_string, - increment_depth_after=False) + error_handler.push_error_context(ErrorContext.HED_STRING, column_hed_string) new_column_issues = self._hed_validator.run_basic_checks(column_hed_string, allow_placeholders=False) error_handler.add_context_and_filter(new_column_issues) @@ -77,7 +82,7 @@ def _run_checks(self, data, error_handler): continue else: row_string = HedStringGroup(row_strings) - error_handler.push_error_context(ErrorContext.HED_STRING, row_string, increment_depth_after=False) + error_handler.push_error_context(ErrorContext.HED_STRING, row_string) new_column_issues = self._hed_validator.run_full_string_checks(row_string) error_handler.add_context_and_filter(new_column_issues) @@ -113,3 +118,85 @@ def _validate_column_structure(self, base_input, error_handler): error_handler.pop_error_context() return issues + + @staticmethod + def _validate_column_refs(df, error_handler): + possible_column_references = [f"{column_name}" for column_name in df.columns if + isinstance(column_name, str) and column_name.lower() != "hed"] + + issues = [] + found_column_references = {} + for column_name in df: + matches = df[column_name].str.findall("\[([a-z_\-\s0-9]+)(? Date: Wed, 22 Mar 2023 20:23:19 -0500 Subject: [PATCH 2/3] Further fix shrinking/expanding(with some test cases). Start updating errors to spec names. --- hed/errors/error_messages.py | 35 +++-- hed/errors/error_types.py | 20 ++- hed/models/definition_dict.py | 2 + hed/models/definition_entry.py | 5 +- hed/models/hed_group.py | 3 - hed/models/hed_string.py | 2 + hed/models/hed_tag.py | 13 ++ hed/validator/def_validator.py | 24 +++- hed/validator/tag_validator.py | 6 +- tests/validator/test_def_validator.py | 177 ++++++++++++++++++++++++++ tests/validator/test_tag_validator.py | 14 +- 11 files changed, 266 insertions(+), 35 deletions(-) diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index fffc549dd..ca379992f 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -31,14 +31,14 @@ def val_error_tag_extended(tag, problem_tag): return f"Hed tag is extended. '{problem_tag}' in {tag}" -@hed_error(ValidationErrors.HED_CHARACTER_INVALID) +@hed_error(ValidationErrors.CHARACTER_INVALID) def val_error_invalid_char(source_string, char_index): character = source_string[char_index] return f'Invalid character "{character}" at index {char_index}"' @hed_tag_error(ValidationErrors.INVALID_TAG_CHARACTER, has_sub_tag=True, - actual_code=ValidationErrors.HED_CHARACTER_INVALID) + actual_code=ValidationErrors.CHARACTER_INVALID) def val_error_invalid_tag_character(tag, problem_tag): return f"Invalid character '{problem_tag}' in {tag}" @@ -49,7 +49,7 @@ def val_error_tildes_not_supported(source_string, char_index): return f"Tildes not supported. Replace (a ~ b ~ c) with (a, (b, c)). '{character}' at index {char_index}'" -@hed_error(ValidationErrors.HED_COMMA_MISSING) +@hed_error(ValidationErrors.COMMA_MISSING) def val_error_comma_missing(tag): return f"Comma missing after - '{tag}'" @@ -143,27 +143,44 @@ def val_error_sidecar_key_missing(invalid_key, category_keys): return f"Category key '{invalid_key}' does not exist in column. Valid keys are: {category_keys}" -@hed_tag_error(ValidationErrors.HED_DEF_UNMATCHED) -def val_error_def_unmatched(tag): - return f"A data-recording’s Def tag cannot be matched to definition. Tag: '{tag}'" -@hed_tag_error(ValidationErrors.HED_DEF_EXPAND_INVALID) +@hed_tag_error(ValidationErrors.HED_DEF_EXPAND_INVALID, actual_code=ValidationErrors.DEF_EXPAND_INVALID) def val_error_bad_def_expand(tag, actual_def, found_def): return f"A data-recording’s Def-expand tag does not match the given definition." + \ f"Tag: '{tag}'. Actual Def: {actual_def}. Found Def: {found_def}" -@hed_tag_error(ValidationErrors.HED_DEF_VALUE_MISSING, actual_code=ValidationErrors.HED_DEF_VALUE_INVALID) +@hed_tag_error(ValidationErrors.HED_DEF_UNMATCHED, actual_code=ValidationErrors.DEF_INVALID) +def val_error_def_unmatched(tag): + return f"A data-recording’s Def tag cannot be matched to definition. Tag: '{tag}'" + + +@hed_tag_error(ValidationErrors.HED_DEF_VALUE_MISSING, actual_code=ValidationErrors.DEF_INVALID) def val_error_def_value_missing(tag): return f"A def tag requires a placeholder value, but was not given one. Definition: '{tag}'" -@hed_tag_error(ValidationErrors.HED_DEF_VALUE_EXTRA, actual_code=ValidationErrors.HED_DEF_VALUE_INVALID) +@hed_tag_error(ValidationErrors.HED_DEF_VALUE_EXTRA, actual_code=ValidationErrors.DEF_INVALID) def val_error_def_value_extra(tag): return f"A def tag does not take a placeholder value, but was given one. Definition: '{tag}" +@hed_tag_error(ValidationErrors.HED_DEF_EXPAND_UNMATCHED, actual_code=ValidationErrors.DEF_EXPAND_INVALID) +def val_error_def_expand_unmatched(tag): + return f"A data-recording’s Def-expand tag cannot be matched to definition. Tag: '{tag}'" + + +@hed_tag_error(ValidationErrors.HED_DEF_EXPAND_VALUE_MISSING, actual_code=ValidationErrors.DEF_EXPAND_INVALID) +def val_error_def_expand_value_missing(tag): + return f"A Def-expand tag requires a placeholder value, but was not given one. Definition: '{tag}'" + + +@hed_tag_error(ValidationErrors.HED_DEF_EXPAND_VALUE_EXTRA, actual_code=ValidationErrors.DEF_EXPAND_INVALID) +def val_error_def_expand_value_extra(tag): + return f"A Def-expand tag does not take a placeholder value, but was given one. Definition: '{tag}" + + @hed_tag_error(ValidationErrors.HED_TOP_LEVEL_TAG, actual_code=ValidationErrors.HED_TAG_GROUP_ERROR) def val_error_top_level_tag(tag): return f"A tag that must be in a top level group was found in another location. {str(tag)}" diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index 66cfaecd1..c4fb5df5f 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -21,11 +21,22 @@ class ErrorContext: class ValidationErrors: # General validation errors - HED_CHARACTER_INVALID = 'HED_CHARACTER_INVALID' - HED_COMMA_MISSING = 'HED_COMMA_MISSING' + CHARACTER_INVALID = 'CHARACTER_INVALID' + COMMA_MISSING = 'COMMA_MISSING' + DEF_EXPAND_INVALID = "DEF_EXPAND_INVALID" + DEF_INVALID = "DEF_INVALID" + + # NOT OFFICIAL HED_DEF_UNMATCHED = "HED_DEF_UNMATCHED" + HED_DEF_VALUE_MISSING = "HED_DEF_VALUE_MISSING" + HED_DEF_VALUE_EXTRA = "HED_DEF_VALUE_EXTRA" + HED_DEF_EXPAND_INVALID = "HED_DEF_EXPAND_INVALID" - HED_DEF_VALUE_INVALID = "HED_DEF_VALUE_INVALID" + HED_DEF_EXPAND_UNMATCHED = "HED_DEF_EXPAND_UNMATCHED" + HED_DEF_EXPAND_VALUE_MISSING = "HED_DEF_EXPAND_VALUE_MISSING" + HED_DEF_EXPAND_VALUE_EXTRA = "HED_DEF_EXPAND_VALUE_EXTRA" + # END NOT OFFICIAL + HED_DEFINITION_INVALID = "HED_DEFINITION_INVALID" HED_NODE_NAME_EMPTY = 'HED_NODE_NAME_EMPTY' HED_ONSET_OFFSET_ERROR = 'HED_ONSET_OFFSET_ERROR' @@ -70,8 +81,7 @@ class ValidationErrors: HED_MULTIPLE_TOP_TAGS = "HED_MULTIPLE_TOP_TAGS" HED_TAG_GROUP_TAG = "HED_TAG_GROUP_TAG" - HED_DEF_VALUE_MISSING = "HED_DEF_VALUE_MISSING" - HED_DEF_VALUE_EXTRA = "HED_DEF_VALUE_EXTRA" + class SidecarErrors: diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py index ca3b06b34..04cbfc440 100644 --- a/hed/models/definition_dict.py +++ b/hed/models/definition_dict.py @@ -184,7 +184,9 @@ def construct_def_tag(self, hed_tag): hed_tag(HedTag): The hed tag to identify definition contents in """ if hed_tag.short_base_tag in {DefTagNames.DEF_ORG_KEY, DefTagNames.DEF_EXPAND_ORG_KEY}: + save_parent = hed_tag._parent def_contents = self._get_definition_contents(hed_tag) + hed_tag._parent = save_parent if def_contents is not None: hed_tag._expandable = def_contents hed_tag._expanded = hed_tag.short_base_tag == DefTagNames.DEF_EXPAND_ORG_KEY diff --git a/hed/models/definition_entry.py b/hed/models/definition_entry.py index cb7581fa3..27c89d33b 100644 --- a/hed/models/definition_entry.py +++ b/hed/models/definition_entry.py @@ -26,13 +26,14 @@ def __init__(self, name, contents, takes_value, source_context): if contents: add_group_to_dict(contents, self.tag_dict) - def get_definition(self, replace_tag, placeholder_value=None): + def get_definition(self, replace_tag, placeholder_value=None, return_copy_of_tag=False): """ Return a copy of the definition with the tag expanded and the placeholder plugged in. Parameters: replace_tag (HedTag): The def hed tag to replace with an expanded version placeholder_value (str or None): If present and required, will replace any pound signs in the definition contents. + return_copy_of_tag(bool): Set to true for validation Returns: str: The expanded def tag name @@ -45,6 +46,8 @@ def get_definition(self, replace_tag, placeholder_value=None): if self.takes_value == (placeholder_value is None): return None, [] + if return_copy_of_tag: + replace_tag = replace_tag.copy() output_contents = [replace_tag] name = self.name if self.contents: diff --git a/hed/models/hed_group.py b/hed/models/hed_group.py index 6df911801..7273d956c 100644 --- a/hed/models/hed_group.py +++ b/hed/models/hed_group.py @@ -132,9 +132,6 @@ def copy(self): Returns: HedGroup: The copied group. - Notes: - - The parent tag is removed. - """ save_parent = self._parent self._parent = None diff --git a/hed/models/hed_string.py b/hed/models/hed_string.py index db16833f1..7be20fb5d 100644 --- a/hed/models/hed_string.py +++ b/hed/models/hed_string.py @@ -96,6 +96,7 @@ def shrink_defs(self): expanded_parent = def_expand_group._parent if expanded_parent: def_expand_tag.short_base_tag = DefTagNames.DEF_ORG_KEY + def_expand_tag._parent = expanded_parent expanded_parent.replace(def_expand_group, def_expand_tag) return self @@ -118,6 +119,7 @@ def expand_defs(self): for tag, group in replacements: tag_parent = tag._parent tag_parent.replace(tag, group) + tag._parent = group tag.short_base_tag = DefTagNames.DEF_EXPAND_KEY return self diff --git a/hed/models/hed_tag.py b/hed/models/hed_tag.py index 29bcf8cf6..689eeac1d 100644 --- a/hed/models/hed_tag.py +++ b/hed/models/hed_tag.py @@ -54,6 +54,19 @@ def __init__(self, hed_string, span=None, hed_schema=None, def_dict=None): if def_dict: def_dict.construct_def_tag(self) + def copy(self): + """ Return a deep copy of this tag. + + Returns: + HedTag: The copied group. + + """ + save_parent = self._parent + self._parent = None + return_copy = copy.deepcopy(self) + self._parent = save_parent + return return_copy + @property def schema_prefix(self): """ Library prefix for this tag if one exists. diff --git a/hed/validator/def_validator.py b/hed/validator/def_validator.py index 24a3d8e5b..5b18cd466 100644 --- a/hed/validator/def_validator.py +++ b/hed/validator/def_validator.py @@ -51,7 +51,7 @@ def _validate_def_contents(self, def_tag, def_expand_group): issues """ def_issues = [] - + is_def_tag = def_expand_group is not def_tag is_label_tag = def_tag.extension_or_value_portion placeholder = None found_slash = is_label_tag.find("/") @@ -62,17 +62,27 @@ def _validate_def_contents(self, def_tag, def_expand_group): label_tag_lower = is_label_tag.lower() def_entry = self.defs.get(label_tag_lower) if def_entry is None: - def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_UNMATCHED, tag=def_tag) + error_code = ValidationErrors.HED_DEF_UNMATCHED + if is_def_tag: + error_code = ValidationErrors.HED_DEF_EXPAND_UNMATCHED + def_issues += ErrorHandler.format_error(error_code, tag=def_tag) else: - def_tag_name, def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder) + def_tag_name, def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder, + return_copy_of_tag=True) if def_tag_name: - if def_expand_group is not def_tag and def_expand_group != def_contents: + if is_def_tag and def_expand_group != def_contents: def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_EXPAND_INVALID, tag=def_tag, actual_def=def_contents, found_def=def_expand_group) elif def_entry.takes_value: - def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_VALUE_MISSING, tag=def_tag) + error_code = ValidationErrors.HED_DEF_VALUE_MISSING + if is_def_tag: + error_code = ValidationErrors.HED_DEF_EXPAND_VALUE_MISSING + def_issues += ErrorHandler.format_error(error_code, tag=def_tag) else: - def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_VALUE_EXTRA, tag=def_tag) + error_code = ValidationErrors.HED_DEF_VALUE_EXTRA + if is_def_tag: + error_code = ValidationErrors.HED_DEF_EXPAND_VALUE_EXTRA + def_issues += ErrorHandler.format_error(error_code, tag=def_tag) - return def_issues + return def_issues \ No newline at end of file diff --git a/hed/validator/tag_validator.py b/hed/validator/tag_validator.py index 2d08eae62..4ecadfc5a 100644 --- a/hed/validator/tag_validator.py +++ b/hed/validator/tag_validator.py @@ -201,13 +201,13 @@ def check_delimiter_issues_in_hed_string(self, hed_string): if current_tag.strip() == self.OPENING_GROUP_CHARACTER: current_tag = '' else: - issues += ErrorHandler.format_error(ValidationErrors.HED_COMMA_MISSING, tag=current_tag) + issues += ErrorHandler.format_error(ValidationErrors.COMMA_MISSING, tag=current_tag) elif last_non_empty_valid_character == "," and current_character == self.CLOSING_GROUP_CHARACTER: issues += ErrorHandler.format_error(ValidationErrors.HED_TAG_EMPTY, source_string=hed_string, char_index=i) elif TagValidator._comma_is_missing_after_closing_parentheses(last_non_empty_valid_character, current_character): - issues += ErrorHandler.format_error(ValidationErrors.HED_COMMA_MISSING, tag=current_tag[:-1]) + issues += ErrorHandler.format_error(ValidationErrors.COMMA_MISSING, tag=current_tag[:-1]) break last_non_empty_valid_character = current_character last_non_empty_valid_index = i @@ -495,7 +495,7 @@ def _report_invalid_character_error(self, hed_string, index): list: A singleton list with a dictionary representing the error. """ - error_type = ValidationErrors.HED_CHARACTER_INVALID + error_type = ValidationErrors.CHARACTER_INVALID character = hed_string[index] if character == "~": error_type = ValidationErrors.HED_TILDES_UNSUPPORTED diff --git a/tests/validator/test_def_validator.py b/tests/validator/test_def_validator.py index f889b36f1..6bef321a7 100644 --- a/tests/validator/test_def_validator.py +++ b/tests/validator/test_def_validator.py @@ -117,3 +117,180 @@ def test_duplicate_def(self): self.assertEqual(len(def_dict.issues), 2) self.assertTrue('ec_row' in def_dict.issues[0]) + + +class TestDefErrors(unittest.TestCase): + basic_hed_string_with_def_first_paren = None + + @classmethod + def setUpClass(cls): + cls.base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/') + hed_xml_file = os.path.realpath(os.path.join(cls.base_data_dir, "schema_tests/HED8.0.0t.xml")) + cls.hed_schema = schema.load_schema(hed_xml_file) + cls.def_contents_string = "(Item/TestDef1,Item/TestDef2)" + cls.basic_definition_string = f"(Definition/TestDef,{cls.def_contents_string})" + cls.basic_definition_string_no_paren = f"Definition/TestDef,{cls.def_contents_string}" + cls.label_def_string = "Def/TestDef" + cls.expanded_def_string = f"(Def-expand/TestDef,{cls.def_contents_string})" + cls.basic_hed_string = "Item/BasicTestTag1,Item/BasicTestTag2" + cls.basic_hed_string_with_def = f"{cls.basic_hed_string},{cls.label_def_string}" + cls.basic_hed_string_with_def_first = f"{cls.label_def_string},{cls.basic_hed_string}" + cls.basic_hed_string_with_def_first_paren = f"({cls.label_def_string},{cls.basic_hed_string})" + cls.placeholder_label_def_string = "Def/TestDefPlaceholder/2471" + cls.placeholder_definition_contents = "(Item/TestDef1/#,Item/TestDef2)" + cls.placeholder_definition_string = f"(Definition/TestDefPlaceholder/#,{cls.placeholder_definition_contents})" + cls.placeholder_definition_string_no_paren = \ + f"Definition/TestDefPlaceholder/#,{cls.placeholder_definition_contents}" + cls.placeholder_expanded_def_string = "(Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2))" + + cls.placeholder_hed_string_with_def = f"{cls.basic_hed_string},{cls.placeholder_label_def_string}" + cls.placeholder_hed_string_with_def_first = f"{cls.placeholder_label_def_string},{cls.basic_hed_string}" + cls.placeholder_hed_string_with_def_first_paren = f"({cls.placeholder_label_def_string},{cls.basic_hed_string})" + + def base_def_validator(self, test_strings, result_strings, expand_defs, shrink_defs, + remove_definitions, extra_ops=None, + basic_definition_string=None): + if not basic_definition_string: + basic_definition_string = self.basic_definition_string + def_dict = DefValidator(basic_definition_string, hed_schema=self.hed_schema) + + + for key in test_strings: + string, expected_result = test_strings[key], result_strings[key] + test_string = HedString(string, self.hed_schema, def_dict) + def_issues = def_dict.validate_def_tags(test_string) + if remove_definitions: + test_string.remove_definitions() + if expand_defs: + test_string.expand_defs() + if shrink_defs: + test_string.shrink_defs() + self.assertEqual(False, bool(def_issues)) + self.assertEqual(test_string.get_as_short(), expected_result) + + def test_expand_def_tags(self): + basic_def_strings = { + 'str_no_defs': self.basic_definition_string, + 'str2': self.basic_definition_string_no_paren, + 'str3': self.basic_hed_string + "," + self.basic_definition_string, + 'str4': self.basic_definition_string + "," + self.basic_hed_string, + 'str5': self.basic_hed_string_with_def, + 'str6': self.basic_hed_string_with_def_first, + 'str7': self.basic_hed_string_with_def_first_paren, + 'str8': "(" + self.basic_hed_string_with_def_first_paren + ")", + } + expanded_def_strings = { + 'str_no_defs': "", + 'str2': self.basic_definition_string_no_paren, + 'str3': self.basic_hed_string, + 'str4': self.basic_hed_string, + 'str5': self.basic_hed_string + "," + self.expanded_def_string, + 'str6': self.expanded_def_string + "," + self.basic_hed_string, + 'str7': "(" + self.expanded_def_string + "," + self.basic_hed_string + ")", + 'str8': "((" + self.expanded_def_string + "," + self.basic_hed_string + "))", + } + expanded_def_strings_with_definition = { + 'str_no_defs': self.basic_definition_string, + 'str2': self.basic_definition_string_no_paren, + 'str3': self.basic_hed_string + "," + self.basic_definition_string, + 'str4': self.basic_definition_string + "," + self.basic_hed_string, + 'str5': self.basic_hed_string + "," + self.expanded_def_string, + 'str6': self.expanded_def_string + "," + self.basic_hed_string, + 'str7': "(" + self.expanded_def_string + "," + self.basic_hed_string + ")", + 'str8': "((" + self.expanded_def_string + "," + self.basic_hed_string + "))", + } + + self.base_def_validator(basic_def_strings, expanded_def_strings_with_definition, + expand_defs=True, + shrink_defs=False, remove_definitions=False) + self.base_def_validator(basic_def_strings, basic_def_strings, + expand_defs=False, shrink_defs=False, remove_definitions=False) + self.base_def_validator(basic_def_strings, basic_def_strings, + expand_defs=False, shrink_defs=True, remove_definitions=False) + self.base_def_validator(expanded_def_strings_with_definition, basic_def_strings, + expand_defs=False, shrink_defs=True, + remove_definitions=False) + self.base_def_validator(expanded_def_strings_with_definition, expanded_def_strings_with_definition, + expand_defs=True, shrink_defs=False, + remove_definitions=False) + self.base_def_validator(basic_def_strings, expanded_def_strings, + expand_defs=True, shrink_defs=False, remove_definitions=True) + + def test_expand_def_tags_placeholder(self): + basic_def_strings = { + 'str_no_defs': self.placeholder_definition_string, + 'str2': self.placeholder_definition_string_no_paren, + 'str3': self.basic_hed_string + "," + self.placeholder_definition_string, + 'str4': self.placeholder_definition_string + "," + self.basic_hed_string, + 'str5': self.placeholder_hed_string_with_def, + 'str6': self.placeholder_hed_string_with_def_first, + 'str7': self.placeholder_hed_string_with_def_first_paren, + } + expanded_def_strings = { + 'str_no_defs': "", + 'str2': self.placeholder_definition_string_no_paren, + 'str3': self.basic_hed_string, + 'str4': self.basic_hed_string, + 'str5': self.basic_hed_string + "," + self.placeholder_expanded_def_string, + 'str6': self.placeholder_expanded_def_string + "," + self.basic_hed_string, + 'str7': "(" + self.placeholder_expanded_def_string + "," + self.basic_hed_string + ")", + } + expanded_def_strings_with_definition = { + 'str_no_defs': self.placeholder_definition_string, + 'str2': self.placeholder_definition_string_no_paren, + 'str3': self.basic_hed_string + "," + self.placeholder_definition_string, + 'str4': self.placeholder_definition_string + "," + self.basic_hed_string, + 'str5': self.basic_hed_string + "," + self.placeholder_expanded_def_string, + 'str6': self.placeholder_expanded_def_string + "," + self.basic_hed_string, + 'str7': "(" + self.placeholder_expanded_def_string + "," + self.basic_hed_string + ")", + } + + self.base_def_validator(basic_def_strings, expanded_def_strings_with_definition, + expand_defs=True, shrink_defs=False, + remove_definitions=False, basic_definition_string=self.placeholder_definition_string) + + self.base_def_validator(basic_def_strings, basic_def_strings, + expand_defs=False, shrink_defs=False, + remove_definitions=False, basic_definition_string=self.placeholder_definition_string) + + self.base_def_validator(basic_def_strings, basic_def_strings, + expand_defs=False, shrink_defs=True, + remove_definitions=False, basic_definition_string=self.placeholder_definition_string) + + self.base_def_validator(expanded_def_strings_with_definition, basic_def_strings, + expand_defs=False, shrink_defs=True, + remove_definitions=False, basic_definition_string=self.placeholder_definition_string) + + self.base_def_validator(basic_def_strings, expanded_def_strings, + expand_defs=True, shrink_defs=False, + remove_definitions=True, basic_definition_string=self.placeholder_definition_string) + + + # todo ian: finish updating these + # # special case test + # def test_changing_tag_then_def_mapping(self): + # def_dict = DefinitionDict() + # def_string = HedString(self.basic_definition_string) + # def_string.convert_to_canonical_forms(None) + # def_dict.check_for_definitions(def_string) + # def_mapper = DefMapper(def_dict) + # validator = HedValidator(self.hed_schema) + # hed_ops = [validator, def_mapper] + # + # test_string = HedString(self.label_def_string) + # tag = test_string.children[0] + # tag.tag = "Organizational-property/" + str(tag) + # def_issues = test_string.validate(hed_ops, expand_defs=True) + # self.assertFalse(def_issues) + # self.assertEqual(test_string.get_as_short(), f"{self.expanded_def_string}") + # + # test_string = HedString(self.label_def_string) + # tag = test_string.children[0] + # tag.tag = "Organizational-property22/" + str(tag) + # def_issues = test_string.validate(hed_ops, expand_defs=True) + # self.assertTrue(def_issues) + + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py index dc0fb910a..939423638 100644 --- a/tests/validator/test_tag_validator.py +++ b/tests/validator/test_tag_validator.py @@ -621,9 +621,9 @@ def test_malformed_delimiters(self): # 'emptyGroup': False } expected_issues = { - 'missingOpeningComma': self.format_error(ValidationErrors.HED_COMMA_MISSING, + 'missingOpeningComma': self.format_error(ValidationErrors.COMMA_MISSING, tag="Action/Reach/To touch("), - 'missingClosingComma': self.format_error(ValidationErrors.HED_COMMA_MISSING, + 'missingClosingComma': self.format_error(ValidationErrors.COMMA_MISSING, tag="Participant/Effect/Body part/Arm)"), 'extraOpeningComma': self.format_error(ValidationErrors.HED_TAG_EMPTY, source_string=test_strings['extraOpeningComma'], @@ -674,7 +674,7 @@ def test_malformed_delimiters(self): 'validNestedParentheses2': [], 'validNestedParentheses3': [], 'validNestedParentheses4': [], - 'invalidNestedParentheses': self.format_error(ValidationErrors.HED_COMMA_MISSING, + 'invalidNestedParentheses': self.format_error(ValidationErrors.COMMA_MISSING, tag="Thing)) "), # 'emptyGroup': [] } @@ -698,13 +698,13 @@ def test_invalid_characters(self): 'closingBracket': False } expected_issues = { - 'openingBrace': self.format_error(ValidationErrors.HED_CHARACTER_INVALID, char_index=45, + 'openingBrace': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45, source_string=test_strings['openingBrace']), - 'closingBrace': self.format_error(ValidationErrors.HED_CHARACTER_INVALID, char_index=45, + 'closingBrace': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45, source_string=test_strings['closingBrace']), - 'openingBracket': self.format_error(ValidationErrors.HED_CHARACTER_INVALID, char_index=45, + 'openingBracket': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45, source_string=test_strings['openingBracket']), - 'closingBracket': self.format_error(ValidationErrors.HED_CHARACTER_INVALID, char_index=45, + 'closingBracket': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45, source_string=test_strings['closingBracket']) } self.validator_semantic(test_strings, expected_results, expected_issues, False) From 77ea23c6f23266529a71131db1eb453e89af33ea Mon Sep 17 00:00:00 2001 From: IanCa Date: Thu, 23 Mar 2023 18:05:31 -0500 Subject: [PATCH 3/3] Block HED from appearing in sidecars --- hed/errors/error_messages.py | 7 ++++++- hed/errors/error_types.py | 3 ++- hed/validator/sidecar_validator.py | 23 ++++++++++++++++++++++- spec_tests/test_errors.py | 24 ++++++++++++++++++++---- 4 files changed, 50 insertions(+), 7 deletions(-) diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index ca379992f..7fd609a64 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -277,7 +277,12 @@ def sidecar_error_unknown_column(column_name): @hed_error(SidecarErrors.SIDECAR_HED_USED, actual_code=SidecarErrors.SIDECAR_INVALID) -def sidecar_hed_used(): +def SIDECAR_HED_USED(): + return "'HED' is a reserved name and cannot be used as a sidecar except in expected places." + + +@hed_error(SidecarErrors.SIDECAR_HED_USED_COLUMN, actual_code=SidecarErrors.SIDECAR_INVALID) +def SIDECAR_HED_USED_COLUMN(): return "'HED' is a reserved name and cannot be used as a sidecar column name" diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index c4fb5df5f..272bfe299 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -92,8 +92,9 @@ class SidecarErrors: INVALID_POUND_SIGNS_VALUE = 'invalidNumberPoundSigns' INVALID_POUND_SIGNS_CATEGORY = 'tooManyPoundSigns' UNKNOWN_COLUMN_TYPE = 'sidecarUnknownColumn' - SIDECAR_HED_USED = 'SIDECAR_HED_USED' + SIDECAR_HED_USED_COLUMN = 'SIDECAR_HED_USED_COLUMN' SIDECAR_NA_USED = 'SIDECAR_NA_USED' + SIDECAR_HED_USED = 'SIDECAR_HED_USED' class SchemaErrors: HED_SCHEMA_DUPLICATE_NODE = 'HED_SCHEMA_DUPLICATE_NODE' diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py index daa71fb07..8c68808e8 100644 --- a/hed/validator/sidecar_validator.py +++ b/hed/validator/sidecar_validator.py @@ -81,6 +81,23 @@ def validate_structure(self, sidecar, error_handler): error_handler.pop_error_context() return all_validation_issues + @staticmethod + def _check_for_key(key, data): + if isinstance(data, dict): + if key in data: + return bool(data[key]) + else: + for sub_data in data.values(): + result = SidecarValidator._check_for_key(key, sub_data) + if result is not None: + return result + elif isinstance(data, list): + for sub_data in data: + result = SidecarValidator._check_for_key(key, sub_data) + if result is not None: + return result + return None + def _validate_column_structure(self, column_name, dict_for_entry, error_handler): """ Checks primarily for type errors such as expecting a string and getting a list in a json sidecar. @@ -93,13 +110,17 @@ def _validate_column_structure(self, column_name, dict_for_entry, error_handler) """ val_issues = [] if column_name in self.reserved_column_names: - val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED) + val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED_COLUMN) return val_issues column_type = Sidecar._detect_column_type(dict_for_entry=dict_for_entry) if column_type is None: val_issues += error_handler.format_error_with_context(SidecarErrors.UNKNOWN_COLUMN_TYPE, column_name=column_name) + elif column_type == ColumnType.Ignore: + found_hed = self._check_for_key("HED", dict_for_entry) + if found_hed: + val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED) elif column_type == ColumnType.Categorical: raw_hed_dict = dict_for_entry["HED"] if not raw_hed_dict: diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py index 9c80d4d98..81942a915 100644 --- a/spec_tests/test_errors.py +++ b/spec_tests/test_errors.py @@ -11,6 +11,13 @@ from hed.errors import ErrorHandler, get_printable_issue_string +known_errors = [ + 'SIDECAR_INVALID', + 'CHARACTER_INVALID', + 'COMMA_MISSING', + "DEF_EXPAND_INVALID", + "DEF_INVALID", +] skip_tests = ["VERSION_DEPRECATED", "CHARACTER_INVALID", "STYLE_WARNING"] @@ -30,6 +37,12 @@ def run_single_test(self, test_file): test_info = json.load(fp) for info in test_info: error_code = info['error_code'] + verify_code = False + if error_code in known_errors: + verify_code = True + + # To be deprecated once we add this to all tests + self._verify_code = verify_code if error_code in skip_tests: print(f"Skipping {error_code} test") continue @@ -62,6 +75,13 @@ def report_result(self, expected_result, issues, error_code, description, name, print(f"Passed '{test_type}' (which should fail) '{name}': {test}") print(get_printable_issue_string(issues)) self.fail_count.append(name) + elif self._verify_code: + if any(issue['code'] == error_code for issue in issues): + return + print(f"{error_code}: {description}") + print(f"Failed '{test_type}' (unexpected errors found) '{name}': {test}") + print(get_printable_issue_string(issues)) + self.fail_count.append(name) else: if issues: print(f"{error_code}: {description}") @@ -75,9 +95,6 @@ def _run_single_string_test(self, info, schema, def_dict, error_code, descriptio for test in tests: test_string = HedString(test, schema) - # This expand should not be required here. - def_dict.expand_def_tags(test_string) - issues = string_validator.run_basic_checks(test_string, False) issues += string_validator.run_full_string_checks(test_string) error_handler.add_context_and_filter(issues) @@ -86,7 +103,6 @@ def _run_single_string_test(self, info, schema, def_dict, error_code, descriptio def _run_single_sidecar_test(self, info, schema, def_dict, error_code, description, name, error_handler): for result, tests in info.items(): for test in tests: - # Well this is a disaster buffer = io.BytesIO(json.dumps(test).encode("utf-8")) sidecar = Sidecar(buffer) issues = sidecar.validate(hed_schema=schema, extra_def_dicts=def_dict, error_handler=error_handler)