From 9e8671d6bc07c0605fd58211335a5a1ed5f5f5af Mon Sep 17 00:00:00 2001 From: IanCa Date: Thu, 9 Jun 2022 15:53:44 -0500 Subject: [PATCH] Rewrite sidecar to have a baseclass, and also takeover sidecar merging from column_mapper. Add new generic functions to hed_ops for mass updating a list/dict of hed strings --- hed/models/column_mapper.py | 62 ++-- hed/models/column_metadata.py | 315 ++--------------- hed/models/hed_ops.py | 142 +++++++- hed/models/sidecar.py | 329 ++++++++---------- hed/models/sidecar_base.py | 266 ++++++++++++++ hed/models/tabular_input.py | 17 +- hed/tools/bids/bids_sidecar_file.py | 2 +- .../data/sidecar_tests/json_errors_minor.json | 33 ++ tests/data/sidecar_tests/test_merged1.json | 14 + tests/data/sidecar_tests/test_merged2.json | 25 ++ .../sidecar_tests/test_merged_merged.json | 31 ++ tests/models/test_column_mapper.py | 12 +- tests/models/test_sidecar.py | 33 +- tests/tools/analysis/test_annotation_util.py | 6 +- tests/validator/test_hed_validator.py | 12 +- 15 files changed, 760 insertions(+), 539 deletions(-) create mode 100644 hed/models/sidecar_base.py create mode 100644 tests/data/sidecar_tests/json_errors_minor.json create mode 100644 tests/data/sidecar_tests/test_merged1.json create mode 100644 tests/data/sidecar_tests/test_merged2.json create mode 100644 tests/data/sidecar_tests/test_merged_merged.json diff --git a/hed/models/column_mapper.py b/hed/models/column_mapper.py index 82cd98b50..f8b3198a2 100644 --- a/hed/models/column_mapper.py +++ b/hed/models/column_mapper.py @@ -17,14 +17,12 @@ class ColumnMapper: Notes: - Functions and variables column and row indexing starts at 0. """ - def __init__(self, sidecars=None, tag_columns=None, column_prefix_dictionary=None, + def __init__(self, sidecar=None, tag_columns=None, column_prefix_dictionary=None, attribute_columns=None, optional_tag_columns=None): """ Constructor for ColumnMapper. Args: - sidecars (Sidecar, string, or list of these): A list of Sidecars or - filenames to gather ColumnDefinitions from. - Sidecars later in the list override those earlier in the list. + sidecar (Sidecar): A sidecar to gather column data from. tag_columns: (list): A list of ints or strings containing the columns that contain the HED tags. Sidecar column definitions will take precedent if there is a conflict with tag_columns. column_prefix_dictionary (dict): Dictionary with keys that are column numbers and values are HED tag @@ -55,9 +53,8 @@ def __init__(self, sidecars=None, tag_columns=None, column_prefix_dictionary=Non self._na_patterns = ["n/a", "nan"] self._finalize_mapping_issues = [] - self._has_sidecars = False - if sidecars: - self.add_sidecars(sidecars) + self._sidecar = None + self._set_sidecar(sidecar) self.add_columns(attribute_columns) self.set_tag_columns(tag_columns, optional_tag_columns, False) @@ -66,18 +63,23 @@ def __init__(self, sidecars=None, tag_columns=None, column_prefix_dictionary=Non # finalize the column map based on initial settings with no header self._finalize_mapping() - def add_sidecars(self, sidecars): - """ Add sidecar column info. + def _set_sidecar(self, sidecar): + """ Set the sidecar this column mapper uses Args: - sidecars (list): A list of filenames or loaded sidecar files in any mix. + sidecar (Sidecar or None): the sidecar to use + + Returns: """ - self._has_sidecars = True - sidecars = Sidecar.load_multiple_sidecars(sidecars) - for sidecar in sidecars: - for column_data in sidecar: - self._add_column_data(column_data) + if self._sidecar: + raise ValueError("Trying to set a second sidecar on a column mapper.") + if not sidecar: + return None + for column_data in sidecar.column_data: + self._add_column_data(column_data) + + self._sidecar = sidecar def set_column_prefix_dict(self, column_prefix_dictionary, finalize_mapping=True): """ Replace the column prefix dictionary @@ -128,7 +130,7 @@ def set_column_map(self, new_column_map=None): Args: new_column_map (list or dict): Either an ordered list of the column names or column_number:column name - dictionary. In both cases column numbers start at 0 + dictionary. In both cases, column numbers start at 0 Returns: list: List of issues. Each issue is a dictionary. @@ -309,7 +311,7 @@ def _finalize_mapping(self): found_named_tag_columns[column_name] = column_number elif column_name.startswith(PANDAS_COLUMN_PREFIX_TO_IGNORE): continue - elif self._has_sidecars: + elif self._sidecar: if column_number not in all_tag_columns: self._finalize_mapping_issues += ErrorHandler.format_error(ValidationErrors.HED_UNKNOWN_COLUMN, extra_column_name=column_name) @@ -347,8 +349,9 @@ def get_def_dicts(self): list: A list of DefinitionDict objects corresponding to each column entry. """ - def_dicts = [entry.def_dict for entry in self.column_data.values()] - return def_dicts + if self._sidecar: + return self._sidecar.get_def_dicts() + return [] def get_column_mapping_issues(self): """ Get all the issues with finalizing column mapping. Primarily a missing required column. @@ -358,24 +361,3 @@ def get_column_mapping_issues(self): """ return self._finalize_mapping_issues - - def validate_column_data(self, hed_ops, error_handler=None, **kwargs): - """ Validate the column data. - - Args: - hed_ops (list, func, or HedOps): A func, a HedOps or a list of these to apply to the - hed strings in the sidecars. - error_handler (ErrorHandler or None): Used to report errors. Uses a default one if none passed in. - kwargs: See models.hed_ops.translate_ops or the specific hed_ops for additional options. - - Returns: - list: A list of syntax and semantic issues found in the definitions. Each issue is a dictionary. - - """ - if error_handler is None: - error_handler = ErrorHandler() - all_validation_issues = [] - for column_data in self.column_data.values(): - all_validation_issues += column_data.validate_column(hed_ops, error_handler=error_handler, **kwargs) - - return all_validation_issues diff --git a/hed/models/column_metadata.py b/hed/models/column_metadata.py index 80e5604bf..2eab23516 100644 --- a/hed/models/column_metadata.py +++ b/hed/models/column_metadata.py @@ -1,17 +1,14 @@ from enum import Enum from hed.models.hed_string import HedString -from hed.models.definition_dict import DefinitionDict -from hed.errors.error_types import SidecarErrors, ErrorContext, ValidationErrors -from hed.errors import error_reporter +from hed.errors.error_types import SidecarErrors, ValidationErrors from hed.errors.error_reporter import ErrorHandler -from hed.models.hed_ops import translate_ops -import copy class ColumnType(Enum): - """The overall column_type of a column in column mapper, eg treat it as HED tags. + """ The overall column_type of a column in column mapper, eg treat it as HED tags. - Mostly internal to column mapper related code""" + Mostly internal to column mapper related code + """ Unknown = None # Do not return this column at all Ignore = "ignore" @@ -26,28 +23,25 @@ class ColumnType(Enum): class ColumnMetadata: - """ Column in a ColumnMapper or top-level Sidecar dict. """ + """ Column in a ColumnMapper. """ - def __init__(self, column_type=None, name=None, hed_dict=None, column_prefix=None, error_handler=None): + def __init__(self, column_type=None, name=None, hed_dict=None, column_prefix=None): """ A single column entry in the column mapper. Args: column_type (ColumnType or None): How to treat this column when reading data. name (str, int, or None): The column_name or column number identifying this column. If name is a string, you'll need to use a column map to set the number later. - hed_dict (dict or None): The loaded data (usually from json) for the given def - At a minimum, this needs "HED" in the dict for several ColumnType + hed_dict (dict or str or None): The loaded data (usually from json) for the given def + For category columns, this is a dict. + For value columns, it's a string. column_prefix (str or None): If present, prepend the given column_prefix to all hed tags in the columns. Only works on ColumnType HedTags. - error_handler (ErrorHandler or None): Used to report errors. Uses a default if None. Notes: - Each column from which data is retrieved must have a ColumnMetadata representing its contents. - The column_prefix dictionaries are used when the column is processed. """ - if column_type is None or column_type == ColumnType.Unknown: - column_type = ColumnMetadata._detect_column_type(hed_dict) - if hed_dict is None: hed_dict = {} @@ -55,129 +49,17 @@ def __init__(self, column_type=None, name=None, hed_dict=None, column_prefix=Non self.column_name = name self.column_prefix = column_prefix self._hed_dict = hed_dict - self._def_removed_hed_dict = {} - self._def_dict = self.extract_definitions(error_handler=error_handler) - - @property - def def_dict(self): - """ Return the definition dictionary for this column. - - Returns: - DefinitionDict: Contains all the definitions located in the column. - - """ - return self._def_dict @property def hed_dict(self): - """ The loaded dict for any given entry. + """ The hed strings for any given entry. Returns: - dict: A dict which generally contains a "HED" entry and optional others like description. + dict or str: A string or dict of strings for this column """ return self._hed_dict - def hed_string_iter(self, hed_ops=None, error_handler=None, **kwargs): - """ Iterator yielding column hed strings. - - Args: - hed_ops (func, HedOps, or list of these): The HedOps or funcs to apply to the hed strings before returning. - error_handler (ErrorHandler): The error handler to use for context, uses a default one if none. - kwargs: See models.hed_ops.translate_ops or the specific hed_ops for additional options - - Yields: - tuple: - - HedString: The hed string at a given column and key position. - - str: Indication of the where hed string was loaded from so it can be later set by the user. - - list: Issues found applying hed_ops. Each issue is a dictionary. - - """ - if error_handler is None: - error_handler = ErrorHandler() - - if not isinstance(self._hed_dict, dict): - return - - tag_funcs = [] - if hed_ops: - tag_funcs = translate_ops(hed_ops, error_handler=error_handler, **kwargs) - - for hed_string_obj, key_name in self._hed_iter(): - new_col_issues = [] - error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name) - if not hed_string_obj: - new_col_issues += ErrorHandler.format_error(SidecarErrors.BLANK_HED_STRING) - error_handler.add_context_to_issues(new_col_issues) - yield hed_string_obj, key_name, new_col_issues - else: - error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj, - increment_depth_after=False) - if tag_funcs: - new_col_issues += hed_string_obj.apply_funcs(tag_funcs) - - error_handler.add_context_to_issues(new_col_issues) - yield hed_string_obj, key_name, new_col_issues - error_handler.pop_error_context() - error_handler.pop_error_context() - - def _hed_iter(self, also_return_bad_types=False): - """ Iterate over the hed string entries. - - Args: - also_return_bad_types (bool): If true, this can yield types other than HedString, otherwise skips these. - - Yields: - tuple: - - HedString: Individual hed strings for different entries. - - str: The position to pass back to set this string. - - """ - hed_strings = self._hed_dict.get("HED", None) - if isinstance(hed_strings, dict): - for key, hed_string in hed_strings.items(): - if isinstance(hed_string, str): - hed_string = HedString(hed_string) - elif not also_return_bad_types: - continue - - yield hed_string, key - elif isinstance(hed_strings, str): - hed_string = HedString(hed_strings) - yield hed_string, None - - def set_hed_string(self, new_hed_string, position=None, set_def_removed=False): - """ Set a hed string for a category key/etc. - - Args: - new_hed_string (str or HedString): The new hed_string to replace the value at position. - position (str, optional): This should only be a value returned from hed_string_iter. - set_def_removed (bool): If True, set the version with definitions removed, rather than the normal version. - - Raises: - TypeError: If the mapping cannot occur. - - """ - hed_strings = self._hed_dict.get("HED", None) - if isinstance(hed_strings, dict): - if position is None: - raise TypeError("Error: Trying to set a category HED string with no category") - if position not in self._hed_dict["HED"]: - raise TypeError("Error: Not allowed to add new categories to a column") - if set_def_removed: - self._def_removed_hed_dict[position] = str(new_hed_string) - else: - self._hed_dict["HED"][position] = str(new_hed_string) - elif isinstance(hed_strings, (str, HedString)): - if position is not None: - raise TypeError("Error: Trying to set a value HED string with a category") - if set_def_removed: - self._def_removed_hed_dict = str(new_hed_string) - else: - self._hed_dict["HED"] = str(new_hed_string) - else: - raise TypeError("Error: Trying to set a HED string on a column_type that doesn't support it.") - def _get_category_hed_string(self, category): """ Fetch the hed string for a category key. @@ -191,7 +73,7 @@ def _get_category_hed_string(self, category): if self.column_type != ColumnType.Categorical: return None - return self._def_removed_hed_dict.get(category, None) + return self._hed_dict.get(category, None) def _get_value_hed_string(self): """ Fetch the hed string in a value column. @@ -203,7 +85,7 @@ def _get_value_hed_string(self): if self.column_type != ColumnType.Value: return None - return self._def_removed_hed_dict + return self._hed_dict def expand(self, input_text): """ Expand text using the rules for this column. @@ -228,7 +110,7 @@ def expand(self, input_text): return HedString(final_text), False else: return None, ErrorHandler.format_error(ValidationErrors.HED_SIDECAR_KEY_MISSING, invalid_key=input_text, - category_keys=list(self._hed_dict["HED"].keys())) + category_keys=list(self._hed_dict.keys())) elif column_type == ColumnType.Value: prelim_text = self._get_value_hed_string() final_text = prelim_text.replace("#", input_text) @@ -280,168 +162,23 @@ def remove_prefix(self, original_tag, current_tag_text): return current_tag_text @staticmethod - def _detect_column_type(dict_for_entry): - """ Determine the ColumnType of a given json entry. - - Args: - dict_for_entry (dict): The loaded json entry a specific column. - Generally has a "HED" entry among other optional ones. - - Returns: - ColumnType: The determined type of given column. Returns None if unknown. - - """ - if not dict_for_entry or not isinstance(dict_for_entry, dict): - return ColumnType.Attribute - - minimum_required_keys = ("HED",) - if not set(minimum_required_keys).issubset(dict_for_entry.keys()): - return ColumnType.Attribute - - hed_entry = dict_for_entry["HED"] - if isinstance(hed_entry, dict): - return ColumnType.Categorical - - if not isinstance(hed_entry, str): - return None - - if "#" not in dict_for_entry["HED"]: - return None - - return ColumnType.Value - - def get_definition_issues(self): - """ Return the issues found extracting definitions. - - Returns: - list: A list of issues found when parsing definitions. Individual issues are dictionaries. - - """ - return self._def_dict.get_definition_issues() - - def validate_column(self, hed_ops, error_handler, **kwargs): - """ Run the given hed_ops on this column. - - Args: - hed_ops (list or func or HedOps) A list of HedOps of funcs or a HedOps or func to apply - to the hed strings in the columns. - error_handler (ErrorHandler or None): Used to report errors. Uses a default one if none passed in. - kwargs: See models.hed_ops.translate_ops or the specific hed_ops for additional options. - - Returns: - list: Issues found by the given hed_ops. Each issue is a dictionary. - - """ - if error_handler is None: - error_handler = error_reporter.ErrorHandler() - - if not isinstance(hed_ops, list): - hed_ops = [hed_ops] - hed_ops = hed_ops.copy() - hed_ops.append(self._validate_pound_sign_count) - error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, self.column_name) - - col_validation_issues = self._run_ops(hed_ops, allow_placeholders=True, - error_handler=error_handler, **kwargs) - col_validation_issues += self._validate_column_structure(error_handler) - col_validation_issues += self.get_definition_issues() - error_handler.pop_error_context() - return col_validation_issues - - def _validate_column_structure(self, error_handler): - """ Checks primarily for type errors such as expecting a string and getting a list in a json sidecar. + def expected_pound_sign_count(column_type): + """ Return how many pound signs a column string should have. Args: - error_handler (ErrorHandler) Sets the context for the error reporting. Cannot be None. + column_type(ColumnType): The type of the column Returns: - list: Issues in performing the operations. Each issue is a dictionary. - - """ - val_issues = [] - if self.column_type is None: - val_issues += ErrorHandler.format_error(SidecarErrors.UNKNOWN_COLUMN_TYPE, - column_name=self.column_name) - elif self.column_type == ColumnType.Categorical: - raw_hed_dict = self._hed_dict["HED"] - if not raw_hed_dict: - val_issues += ErrorHandler.format_error(SidecarErrors.BLANK_HED_STRING) - - error_handler.add_context_to_issues(val_issues) - - for hed_string_obj, key_name in self._hed_iter(also_return_bad_types=True): - new_col_issues = [] - error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name) - if not isinstance(hed_string_obj, HedString): - new_col_issues += ErrorHandler.format_error(SidecarErrors.WRONG_HED_DATA_TYPE, - given_type=type(hed_string_obj), - expected_type="str") - error_handler.add_context_to_issues(new_col_issues) - val_issues += new_col_issues - - return val_issues - - def _run_ops(self, hed_ops, error_handler, **kwargs): - col_validation_issues = [] - for _, _, col_issues in self.hed_string_iter(hed_ops, error_handler=error_handler, **kwargs): - col_validation_issues += col_issues - - return col_validation_issues - - def _validate_pound_sign_count(self, hed_string): - """ Check if a given hed string in the column has the correct number of pound signs. - - Args: - hed_string (str or HedString): HED string to be checked. - - Returns: - list: Issues due to pound sign errors. Each issue is a dictionary. - - Notes: - Normally the number of # should be either 0 or 1, but sometimes will be higher due to the - presence of definition tags. - + tuple: + expected_count(int): The expected count. 0 or 1 + error_type(str): The type of the error we should issue """ - if self.column_type == ColumnType.Value or self.column_type == ColumnType.Attribute: - expected_pound_sign_count = 1 + if column_type == ColumnType.Value or column_type == ColumnType.Attribute: + expected_count = 1 error_type = SidecarErrors.INVALID_POUND_SIGNS_VALUE - elif self.column_type == ColumnType.HEDTags or self.column_type == ColumnType.Categorical: - expected_pound_sign_count = 0 + elif column_type == ColumnType.HEDTags or column_type == ColumnType.Categorical: + expected_count = 0 error_type = SidecarErrors.INVALID_POUND_SIGNS_CATEGORY else: - return [] - - # Make a copy without definitions to check placeholder count. - hed_string_copy = copy.deepcopy(hed_string) - hed_string_copy.remove_definitions() - - if hed_string_copy.lower().count("#") != expected_pound_sign_count: - return ErrorHandler.format_error(error_type, pound_sign_count=str(hed_string_copy).count("#")) - - return [] - - def extract_definitions(self, error_handler=None): - """ Gather and validate definitions in metadata. - - Args: - error_handler (ErrorHandler): The error handler to use for context, uses a default one if None. - - Returns: - DefinitionDict: Contains all the definitions located in the column. - issues: List of issues encountered in extracting the definitions. Each issue is a dictionary. - - """ - if error_handler is None: - error_handler = ErrorHandler() - new_def_dict = DefinitionDict() - hed_ops = [] - hed_ops.append(new_def_dict) - hed_ops.append(HedString.remove_definitions) - - all_issues = [] - for hed_string, key_name, issues in self.hed_string_iter(hed_ops=hed_ops, allow_placeholders=True, - error_handler=error_handler): - self.set_hed_string(hed_string, key_name, set_def_removed=True) - all_issues += issues - - return new_def_dict + return 0, None + return expected_count, error_type diff --git a/hed/models/hed_ops.py b/hed/models/hed_ops.py index 07cc87f8d..68f5bbdc7 100644 --- a/hed/models/hed_ops.py +++ b/hed/models/hed_ops.py @@ -2,6 +2,8 @@ from functools import partial from hed.schema import HedSchema, HedSchemaGroup +from hed.errors.error_types import ErrorContext, SidecarErrors +from hed.errors import ErrorHandler # These are the defaults if you pass in nothing. Most built in routes will have other default values. @@ -22,8 +24,7 @@ def translate_ops(hed_ops, split_ops=False, **kwargs): Args: hed_ops (list): A list of func or HedOps or HedSchema to apply to hed strings. split_ops (bool): If true, will split the operations into separate lists of tag and string operations. - - kwargs (dict): An optional dictionary of name-value pairs representing parameters passed to each HedOps + kwargs (dict): An optional dictionary of name-value pairs representing parameters passed to each HedOps Returns: list or tuple: A list of functions to apply or a tuple containing separate lists of tag and string ops. @@ -74,6 +75,143 @@ def translate_ops(hed_ops, split_ops=False, **kwargs): return tag_funcs + string_funcs +def apply_ops(hed_strings, hed_ops, **kwargs): + """ Convenience function to update a list/dict of hed strings + + Args: + hed_strings(str, dict, list): A list/dict/str to update + hed_ops (list or HedOps or func): A list of func or HedOps or HedSchema to apply to hed strings. + kwargs (dict): An optional dictionary of name-value pairs representing parameters passed to each HedOps + + Returns: + tuple: + hed_strings(str, dict, list): Same type as input + issues(list): A list of issues found applying the hed_ops + """ + from hed.models.hed_string import HedString + + if not hed_strings: + return hed_strings, [] + issues = [] + tag_funcs = translate_ops(hed_ops, **kwargs) + if isinstance(hed_strings, str): + hed_string_obj = HedString(hed_strings) + issues += hed_string_obj.apply_funcs(tag_funcs) + return str(hed_string_obj), issues + elif isinstance(hed_strings, dict): + return_dict = {} + for key, hed_string in hed_strings.items(): + hed_string_obj = HedString(hed_string) + issues += hed_string_obj.apply_funcs(tag_funcs) + return_dict[key] = str(hed_string_obj) + return return_dict, issues + elif isinstance(hed_strings, list): + return_list = [] + for hed_string in hed_strings: + hed_string_obj = HedString(hed_string) + issues += hed_string_obj.apply_funcs(tag_funcs) + return_list.append(str(hed_string_obj)) + return return_list, issues + + raise ValueError("Unaccounted for type in apply_ops") + + +def hed_string_iter(hed_strings, tag_funcs, error_handler): + """ Iterate over the given dict of strings, returning HedStrings + + Also gives issues for blank strings + + Args: + hed_strings(dict or str): A hed_string or dict of hed strings + tag_funcs (list of funcs): The functions to apply before returning + error_handler (ErrorHandler): The error handler to use for context, uses a default one if none. + + Yields: + tuple: + - HedString: The hed string at a given column and key position. + - str: Indication of the where hed string was loaded from so it can be later set by the user. + - list: Issues found applying hed_ops. Each issue is a dictionary. + + """ + for hed_string_obj, key_name in _hed_iter_low(hed_strings): + new_col_issues = [] + error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name) + if not hed_string_obj: + new_col_issues += ErrorHandler.format_error(SidecarErrors.BLANK_HED_STRING) + error_handler.add_context_to_issues(new_col_issues) + yield hed_string_obj, key_name, new_col_issues + else: + error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj, + increment_depth_after=False) + if tag_funcs: + new_col_issues += hed_string_obj.apply_funcs(tag_funcs) + + error_handler.add_context_to_issues(new_col_issues) + yield hed_string_obj, key_name, new_col_issues + error_handler.pop_error_context() + error_handler.pop_error_context() + + +def _hed_iter_low(hed_strings): + """ Iterate over the hed string entries. + + Used by hed_string_iter + + Args: + hed_strings(dict or str): A hed_string or dict of hed strings + + Yields: + tuple: + - HedString: Individual hed strings for different entries. + - str: The position to pass back to set this string. + + """ + from hed.models.hed_string import HedString + + if isinstance(hed_strings, dict): + for key, hed_string in hed_strings.items(): + if isinstance(hed_string, str): + hed_string = HedString(hed_string) + else: + continue + yield hed_string, key + elif isinstance(hed_strings, str): + hed_string = HedString(hed_strings) + yield hed_string, None + + +def set_hed_string(new_hed_string, hed_strings, position=None): + """ Set a hed string for a category key/etc. + + Args: + new_hed_string (str or HedString): The new hed_string to replace the value at position. + hed_strings(dict or str or HedString): The hed strings we want to update + position (str, optional): This should only be a value returned from hed_string_iter. + + Returns: + updated_string (str or dict): The newly updated string/dict. + Raises: + TypeError: If the mapping cannot occur. + + """ + from hed.models.hed_string import HedString + + if isinstance(hed_strings, dict): + if position is None: + raise TypeError("Error: Trying to set a category HED string with no category") + if position not in hed_strings: + raise TypeError("Error: Not allowed to add new categories to a column") + hed_strings[position] = str(new_hed_string) + elif isinstance(hed_strings, (str, HedString)): + if position is not None: + raise TypeError("Error: Trying to set a value HED string with a category") + hed_strings = str(new_hed_string) + else: + raise TypeError("Error: Trying to set a HED string on a column_type that doesn't support it.") + + return hed_strings + + class HedOps: """ Base class to support HedOps. diff --git a/hed/models/sidecar.py b/hed/models/sidecar.py index 18c9fa0a5..2db19e034 100644 --- a/hed/models/sidecar.py +++ b/hed/models/sidecar.py @@ -1,42 +1,88 @@ import json from hed.models.column_metadata import ColumnMetadata -from hed.errors.error_types import ErrorContext -from hed.errors import error_reporter +from hed.errors.error_types import ErrorContext, SidecarErrors from hed.errors import ErrorHandler from hed.errors.exceptions import HedFileError, HedExceptions from hed.models.hed_string import HedString -from hed.models.def_mapper import DefMapper +from hed.models.column_metadata import ColumnType +from hed.models.hed_ops import apply_ops, hed_string_iter, set_hed_string +from hed.models.sidecar_base import SidecarBase -class Sidecar: +class Sidecar(SidecarBase): """ Contents of a JSON file or merged file. - Notes: - - The Sidecar maintains its own definition dictionaries. - """ - def __init__(self, file, name=None): + def __init__(self, files, name=None): """ Construct a Sidecar object representing a JSON file. Args: - file (str or FileLike): A string or file-like object representing a JSON file. + files (str or FileLike or list): A string or file-like object representing a JSON file, or a list of such. name (str or None): Optional name identifying this sidecar, generally a filename. """ - self._column_data = {} - self.name = name - if file: - self.load_sidecar_file(file) + super().__init__(name) + self.loaded_dict = self.load_sidecar_files(files) + self.def_dict = self.extract_definitions() - def __iter__(self): - """ An iterator to go over the individual column metadata. + @property + def column_data(self): + """ Generates the list of ColumnMetadata for this sidecar Returns: - iterator: An iterator over the column metadata values. + list(ColumnMetadata): the list of column metadata defined by this sidecar + """ + for col_name, col_dict in self.loaded_dict.items(): + yield self._generate_single_column(col_name, col_dict) + + def _hed_string_iter(self, tag_funcs, error_handler): + """ Low level function to retrieve hed string in sidecar + + Args: + tag_funcs(list): A list of functions to apply to returned strings + error_handler(ErrorHandler): Error handler to use for context + + Yields: + tuple: + string(HedString): The retrieved and modified string + position(tuple): The location of this hed string. Black box. + issues(list): A list of issues running the tag_funcs. + """ + for column_name, dict_for_entry in self.loaded_dict.items(): + error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name) + hed_dict = dict_for_entry.get("HED", {}) + for (hed_string_obj, position, issues) in hed_string_iter(hed_dict, tag_funcs, error_handler): + yield hed_string_obj, (column_name, position), issues + + error_handler.pop_error_context() + + def _set_hed_string(self, new_hed_string, position): + """ Low level function to update hed string in sidecar + + Args: + new_hed_string (str or HedString): The new hed_string to replace the value at position. + position (tuple): The value returned from hed_string_iter. + """ + column_name, position = position + hed_dict = self.loaded_dict[column_name] + hed_dict["HED"] = set_hed_string(new_hed_string, hed_dict["HED"], position) + + def validate_structure(self, error_handler): + """ Validate the raw structure of this sidecar. + + Args: + error_handler(ErrorHandler): The error handler to use for error context + Returns: + issues(list): A list of issues found with the structure """ - return iter(self._column_data.values()) + all_validation_issues = [] + for column_name, dict_for_entry in self.loaded_dict.items(): + error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name) + all_validation_issues += self._validate_column_structure(column_name, dict_for_entry, error_handler) + error_handler.pop_error_context() + return all_validation_issues def save_as_json(self, save_filename): """ Save column metadata to a JSON file. @@ -45,11 +91,8 @@ def save_as_json(self, save_filename): save_filename (str): Path to save file """ - output_dict = {} - for entry in self._column_data.values(): - output_dict[entry.column_name] = entry.hed_dict with open(save_filename, "w") as fp: - json.dump(output_dict, fp, indent=4) + json.dump(self.loaded_dict, fp, indent=4) def get_as_json_string(self): """ Return this sidecar's column metadata as a string. @@ -58,10 +101,7 @@ def get_as_json_string(self): str: The json string representing this sidecar. """ - output_dict = {} - for entry in self._column_data.values(): - output_dict[entry.column_name] = entry.hed_dict - return json.dumps(output_dict, indent=4) + return json.dumps(self.loaded_dict, indent=4) def load_sidecar_file(self, file): """ Load column metadata from a given json file. @@ -71,140 +111,56 @@ def load_sidecar_file(self, file): Raises: HedFileError: If the file was not found or could not be parsed into JSON. - - Notes: - - Multiple files can be loaded into one Sidecar, but it is discouraged. - """ - if isinstance(file, str): + if not file: + return {} + elif isinstance(file, str): try: with open(file, "r") as fp: if not self.name: self.name = file - self._load_json_columns(fp) + return self._load_json_file(fp) except FileNotFoundError as e: raise HedFileError(HedExceptions.FILE_NOT_FOUND, e.strerror, file) except TypeError as e: raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(e), file) else: - self._load_json_columns(file) - - def _load_json_columns(self, fp): - """ Parse a JSON file into columns and load in the column entry dictionary. - - Args: - fp (File-like): The JSON source stream. + return self._load_json_file(file) - Raises: - HedFileError: If the file cannot be parsed. - - """ - try: - loaded_defs = json.load(fp) - for col_name, col_dict in loaded_defs.items(): - self._add_single_column(col_name, col_dict) - except json.decoder.JSONDecodeError as e: - raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), self.name) - - @staticmethod - def load_multiple_sidecars(input_list): - """ Utility for loading multiple json files. + def load_sidecar_files(self, files): + """ Load json from a given file or list Args: - input_list (list): A list of filenames or Sidecar files in any mix. - - Returns: - list: A list sidecars. - + files (str or FileLike or list): A string or file-like object representing a JSON file, or a list of such. Raises: - HedFileError: If any of the files are not found. - - """ - if not isinstance(input_list, list): - input_list = [input_list] - - loaded_files = [] - for json_file in input_list: - if isinstance(json_file, str): - json_file = Sidecar(json_file) - loaded_files.append(json_file) - return loaded_files - - def hed_string_iter(self, hed_ops=None, error_handler=None, expand_defs=False, remove_definitions=False, - allow_placeholders=True, extra_def_dicts=None, **kwargs): - """ Iterator over hed strings in columns. - - Args: - hed_ops (func, HedOps, list): A HedOps, funcs or list of these to apply to the hed strings - before returning - error_handler (ErrorHandler): The error handler to use for context, uses a default one if none. - expand_defs (bool): If True, expand all def tags located in the strings. - remove_definitions (bool): If True, remove all definitions found in the string. - allow_placeholders (bool): If False, placeholders will be marked as validation warnings. - extra_def_dicts (DefinitionDict, list, None): Extra dicts to add to the list. - kwargs: See models.hed_ops.translate_ops or the specific hed_ops for additional options. - - Yields: - tuple: - - HedString: A HedString at a given column and key position. - - tuple: Indicates where hed_string was loaded from so it can be later set by the user - - list: A list of issues found performing ops. Each issue is a dictionary. - + HedFileError: If the file was not found or could not be parsed into JSON. """ - if error_handler is None: - error_handler = ErrorHandler() - hed_ops = self._standardize_ops(hed_ops) - if expand_defs or remove_definitions: - self._add_definition_mapper(hed_ops, extra_def_dicts) - for column_name, entry in self._column_data.items(): - error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name) - for (hed_string_obj, position, issues) in entry.hed_string_iter(hed_ops=hed_ops, - error_handler=error_handler, - expand_defs=expand_defs, - allow_placeholders=allow_placeholders, - remove_definitions=remove_definitions, - **kwargs): - yield hed_string_obj, (column_name, position), issues - - error_handler.pop_error_context() - - def set_hed_string(self, new_hed_string, position): - """ Set a provided column/category key/etc. + if not files: + return {} + if not isinstance(files, list): + files = [files] - Args: - new_hed_string (str or HedString): The new hed_string to replace the value at position. - position (tuple): The (HedString, str, list) tuple returned from hed_string_iter. - - """ - column_name, position = position - entry = self._column_data[column_name] - entry.set_hed_string(new_hed_string, position) + merged_dict = {} + for file in files: + loaded_json = self.load_sidecar_file(file) + merged_dict.update(loaded_json) + return merged_dict - def _add_definition_mapper(self, hed_ops, extra_def_dicts=None): - """ Add a DefMapper if the hed_ops list doesn't have one. + def _load_json_file(self, fp): + """ Load the raw json of a given file Args: - hed_ops (list): A list of HedOps - extra_def_dicts (list): DefDicts from outside. - - Returns: - DefMapper: A shallow copy of the hed_ops list with a DefMapper added if there wasn't one. + fp (File-like): The JSON source stream. + Raises: + HedFileError: If the file cannot be parsed. """ - if not any(isinstance(hed_op, DefMapper) for hed_op in hed_ops): - def_dicts = self.get_def_dicts(extra_def_dicts) - def_mapper = DefMapper(def_dicts) - hed_ops.append(def_mapper) - return def_mapper - return None - - @staticmethod - def _standardize_ops(hed_ops): - if not isinstance(hed_ops, list): - hed_ops = [hed_ops] - return hed_ops.copy() + try: + return json.load(fp) + except json.decoder.JSONDecodeError as e: + raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), self.name) - def _add_single_column(self, column_name, dict_for_entry, column_type=None): + def _generate_single_column(self, column_name, dict_for_entry, column_type=None): """ Create a single column metadata entry and add to this sidecar. Args: @@ -214,56 +170,77 @@ def _add_single_column(self, column_name, dict_for_entry, column_type=None): This overrides auto-detection from the dict_for_entry. """ - column_entry = ColumnMetadata(column_type, column_name, dict_for_entry) - self._column_data[column_name] = column_entry + if column_type is None: + column_type = self._detect_column_type(dict_for_entry) + if dict_for_entry: + hed_dict = dict_for_entry.get("HED") + else: + hed_dict = None + def_removed_dict, _ = apply_ops(hed_dict, HedString.remove_definitions) + column_entry = ColumnMetadata(column_type, column_name, def_removed_dict) + return column_entry - def get_def_dicts(self, extra_def_dicts=None): - """ Return DefinitionDicts for the columns in this sidecar. + @staticmethod + def _detect_column_type(dict_for_entry): + """ Determine the ColumnType of a given json entry. Args: - extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list. + dict_for_entry (dict): The loaded json entry a specific column. + Generally has a "HED" entry among other optional ones. Returns: - list: A list of definition dicts for each column plus any found in extra_def_dicts. + ColumnType: The determined type of given column. Returns None if unknown. """ - def_dicts = [column_entry.def_dict for column_entry in self] - if extra_def_dicts: - if not isinstance(extra_def_dicts, list): - extra_def_dicts = [extra_def_dicts] - def_dicts += extra_def_dicts - return def_dicts + if not dict_for_entry or not isinstance(dict_for_entry, dict): + return ColumnType.Attribute + + minimum_required_keys = ("HED",) + if not set(minimum_required_keys).issubset(dict_for_entry.keys()): + return ColumnType.Attribute + + hed_entry = dict_for_entry["HED"] + if isinstance(hed_entry, dict): + return ColumnType.Categorical + + if not isinstance(hed_entry, str): + return None + + if "#" not in dict_for_entry["HED"]: + return None + + return ColumnType.Value - def validate_entries(self, hed_ops=None, name=None, extra_def_dicts=None, - error_handler=None, **kwargs): - """ Run the given hed_ops on all columns in this sidecar. + def _validate_column_structure(self, column_name, dict_for_entry, error_handler): + """ Checks primarily for type errors such as expecting a string and getting a list in a json sidecar. Args: - hed_ops (list, func, or HedOps): A HedOps, func or list of these to apply to hed strings in this sidecar. - name (str): If present, will use this as the filename for context, rather than using the actual filename - Useful for temp filenames. - extra_def_dicts: (DefinitionDict, list, or None): If present use these in addition to sidecar's def dicts. - error_handler (ErrorHandler or None): Used to report errors. Uses a default one if none passed in. - kwargs: See models.hed_ops.translate_ops or the specific hed_ops for additional options. + error_handler (ErrorHandler) Sets the context for the error reporting. Cannot be None. Returns: - list: The list of validation issues found. Individual issues are in the form of a dict. + list: Issues in performing the operations. Each issue is a dictionary. """ - if error_handler is None: - error_handler = error_reporter.ErrorHandler() - if not name: - name = self.name - if name: - error_handler.push_error_context(ErrorContext.FILE_NAME, name, False) - - hed_ops = self._standardize_ops(hed_ops) - def_mapper = self._add_definition_mapper(hed_ops, extra_def_dicts) - - all_validation_issues = [] - all_validation_issues += def_mapper.issues - for column_data in self: - all_validation_issues += column_data.validate_column(hed_ops, error_handler=error_handler, **kwargs) - if name: - error_handler.pop_error_context() - return all_validation_issues + val_issues = [] + column_type = self._detect_column_type(dict_for_entry=dict_for_entry) + if column_type is None: + val_issues += ErrorHandler.format_error(SidecarErrors.UNKNOWN_COLUMN_TYPE, + column_name=column_name) + elif column_type == ColumnType.Categorical: + raw_hed_dict = dict_for_entry["HED"] + if not raw_hed_dict: + val_issues += ErrorHandler.format_error(SidecarErrors.BLANK_HED_STRING) + if not isinstance(raw_hed_dict, dict): + val_issues += ErrorHandler.format_error(SidecarErrors.WRONG_HED_DATA_TYPE, + given_type=type(raw_hed_dict), + expected_type="dict") + for key_name, hed_string in raw_hed_dict.items(): + if not isinstance(hed_string, str): + error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name) + val_issues += ErrorHandler.format_error(SidecarErrors.WRONG_HED_DATA_TYPE, + given_type=type(hed_string), + expected_type="str") + error_handler.pop_error_context() + error_handler.add_context_to_issues(val_issues) + + return val_issues diff --git a/hed/models/sidecar_base.py b/hed/models/sidecar_base.py new file mode 100644 index 000000000..4fcade309 --- /dev/null +++ b/hed/models/sidecar_base.py @@ -0,0 +1,266 @@ +import copy +from hed.models.column_metadata import ColumnMetadata +from hed.errors.error_types import ErrorContext +from hed.errors import error_reporter +from hed.errors import ErrorHandler +from hed.models.hed_string import HedString +from hed.models.def_mapper import DefMapper +from hed.models.hed_ops import translate_ops, apply_ops +from hed.models.definition_dict import DefinitionDict +from functools import partial + + +class SidecarBase: + """ Baseclass for specialized spreadsheet sidecars + + To subclass this class, you'll want to override at the minimum: + _hed_string_iter + _set_hed_string + validate_structure + column_data property <- This is the only truly mandatory one + + """ + def __init__(self, name=None): + """ Initialize a sidecar baseclass + + Args: + name (str or None): Optional name identifying this sidecar, generally a filename. + + """ + self.name = name + # Expected to be called in subclass after data is loaded + # self.def_dict = self.extract_definitions() + + @property + def column_data(self): + """ Generates the list of ColumnMetadata for this sidecar + + Returns: + list(ColumnMetadata): the list of column metadata defined by this sidecar + """ + return [] + + def _hed_string_iter(self, tag_funcs, error_handler): + """ Low level function to retrieve hed string in sidecar + + Args: + tag_funcs(list): A list of functions to apply to returned strings + error_handler(ErrorHandler): Error handler to use for context + + Yields: + tuple: + string(HedString): The retrieved and modified string + position(tuple): The location of this hed string. Black box. + issues(list): A list of issues running the tag_funcs. + """ + yield + + def _set_hed_string(self, new_hed_string, position): + """ Low level function to update hed string in sidecar + + Args: + new_hed_string (str or HedString): The new hed_string to replace the value at position. + position (tuple): The value returned from hed_string_iter. + """ + return + + def validate_structure(self, error_handler): + """ Validate the raw structure of this sidecar. + + Args: + error_handler(ErrorHandler): The error handler to use for error context + + Returns: + issues(list): A list of issues found with the structure + """ + return [] + + def __iter__(self): + """ An iterator to go over the individual column metadata. + + Returns: + iterator: An iterator over the column metadata values. + + """ + return iter(self.column_data) + + def hed_string_iter(self, hed_ops=None, error_handler=None, expand_defs=False, remove_definitions=False, + allow_placeholders=True, extra_def_dicts=None, **kwargs): + """ Iterator over hed strings in columns. + + Args: + hed_ops (func, HedOps, list): A HedOps, funcs or list of these to apply to the hed strings + before returning + error_handler (ErrorHandler): The error handler to use for context, uses a default one if none. + expand_defs (bool): If True, expand all def tags located in the strings. + remove_definitions (bool): If True, remove all definitions found in the string. + allow_placeholders (bool): If False, placeholders will be marked as validation warnings. + extra_def_dicts (DefinitionDict, list, None): Extra dicts to add to the list. + kwargs: See models.hed_ops.translate_ops or the specific hed_ops for additional options. + + Yields: + tuple: + - HedString: A HedString at a given column and key position. + - tuple: Indicates where hed_string was loaded from so it can be later set by the user + - list: A list of issues found performing ops. Each issue is a dictionary. + + """ + if error_handler is None: + error_handler = ErrorHandler() + hed_ops = self._standardize_ops(hed_ops) + if expand_defs or remove_definitions: + self._add_definition_mapper(hed_ops, extra_def_dicts) + tag_funcs = translate_ops(hed_ops, error_handler=error_handler, expand_defs=expand_defs, + allow_placeholders=allow_placeholders, remove_definitions=remove_definitions, + **kwargs) + + return self._hed_string_iter(tag_funcs, error_handler) + + def set_hed_string(self, new_hed_string, position): + """ Set a provided column/category key/etc. + + Args: + new_hed_string (str or HedString): The new hed_string to replace the value at position. + position (tuple): The (HedString, str, list) tuple returned from hed_string_iter. + + """ + return self._set_hed_string(new_hed_string, position) + + def _add_definition_mapper(self, hed_ops, extra_def_dicts=None): + """ Add a DefMapper if the hed_ops list doesn't have one. + + Args: + hed_ops (list): A list of HedOps + extra_def_dicts (list): DefDicts from outside. + + Returns: + DefMapper: A shallow copy of the hed_ops list with a DefMapper added if there wasn't one. + + """ + def_mapper_list = [hed_op for hed_op in hed_ops if isinstance(hed_op, DefMapper)] + + if not def_mapper_list: + def_dicts = self.get_def_dicts(extra_def_dicts) + def_mapper = DefMapper(def_dicts) + hed_ops.append(def_mapper) + return def_mapper + return def_mapper_list[0] + + @staticmethod + def _standardize_ops(hed_ops): + if not isinstance(hed_ops, list): + hed_ops = [hed_ops] + return hed_ops.copy() + + def get_def_dicts(self, extra_def_dicts=None): + """ Returns the definition dict for this sidecar. + + Args: + extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list. + + Returns: + list: A list with the sidecar def_dict plus any found in extra_def_dicts. + + """ + def_dicts = [self.def_dict] + if extra_def_dicts: + if not isinstance(extra_def_dicts, list): + extra_def_dicts = [extra_def_dicts] + def_dicts += extra_def_dicts + return def_dicts + + def validate_entries(self, hed_ops=None, name=None, extra_def_dicts=None, + error_handler=None, **kwargs): + """ Run the given hed_ops on all columns in this sidecar. + + Args: + hed_ops (list, func, or HedOps): A HedOps, func or list of these to apply to hed strings in this sidecar. + name (str): If present, will use this as the filename for context, rather than using the actual filename + Useful for temp filenames. + extra_def_dicts: (DefinitionDict, list, or None): If present use these in addition to sidecar's def dicts. + error_handler (ErrorHandler or None): Used to report errors. Uses a default one if none passed in. + kwargs: See models.hed_ops.translate_ops or the specific hed_ops for additional options. + + Returns: + list: The list of validation issues found. Individual issues are in the form of a dict. + + """ + if error_handler is None: + error_handler = error_reporter.ErrorHandler() + if not name: + name = self.name + if name: + error_handler.push_error_context(ErrorContext.FILE_NAME, name, False) + + all_validation_issues = self.validate_structure(error_handler) + + # Early out major errors so the rest of our code can assume they won't happen. + if all_validation_issues: + return all_validation_issues + + hed_ops = self._standardize_ops(hed_ops) + def_mapper = self._add_definition_mapper(hed_ops, extra_def_dicts) + all_validation_issues += def_mapper.issues + + for hed_string, key_name, issues in self.hed_string_iter(hed_ops=hed_ops, allow_placeholders=True, + error_handler=error_handler, **kwargs): + self.set_hed_string(hed_string, key_name) + all_validation_issues += issues + + # Finally check what requires the final mapped data to check + for column_data in self.column_data: + validate_pound_func = partial(self._validate_pound_sign_count, column_type=column_data.column_type) + _, issues = apply_ops(column_data.hed_dict, validate_pound_func) + all_validation_issues += issues + all_validation_issues += self.def_dict.get_definition_issues() + if name: + error_handler.pop_error_context() + return all_validation_issues + + def extract_definitions(self, error_handler=None): + """ Gather and validate definitions in metadata. + + Args: + error_handler (ErrorHandler): The error handler to use for context, uses a default one if None. + + Returns: + DefinitionDict: Contains all the definitions located in the column. + issues: List of issues encountered in extracting the definitions. Each issue is a dictionary. + + """ + if error_handler is None: + error_handler = ErrorHandler() + new_def_dict = DefinitionDict() + hed_ops = [] + hed_ops.append(new_def_dict) + + all_issues = [] + for hed_string, key_name, issues in self.hed_string_iter(hed_ops=hed_ops, allow_placeholders=True, + error_handler=error_handler): + all_issues += issues + + return new_def_dict + + def _validate_pound_sign_count(self, hed_string, column_type): + """ Check if a given hed string in the column has the correct number of pound signs. + + Args: + hed_string (str or HedString): HED string to be checked. + + Returns: + list: Issues due to pound sign errors. Each issue is a dictionary. + + Notes: + Normally the number of # should be either 0 or 1, but sometimes will be higher due to the + presence of definition tags. + + """ + # Make a copy without definitions to check placeholder count. + expected_count, error_type = ColumnMetadata.expected_pound_sign_count(column_type) + hed_string_copy = copy.deepcopy(hed_string) + hed_string_copy.remove_definitions() + + if hed_string_copy.lower().count("#") != expected_count: + return ErrorHandler.format_error(error_type, pound_sign_count=str(hed_string_copy).count("#")) + + return [] diff --git a/hed/models/tabular_input.py b/hed/models/tabular_input.py index 9825c304b..5efc28614 100644 --- a/hed/models/tabular_input.py +++ b/hed/models/tabular_input.py @@ -29,13 +29,12 @@ def __init__(self, file=None, sidecar=None, attribute_columns=None, extra_def_di """ if attribute_columns is None: attribute_columns = ["duration", "onset"] - if sidecar: - sidecar_list = Sidecar.load_multiple_sidecars(sidecar) - else: - sidecar_list = None - new_mapper = ColumnMapper(sidecars=sidecar_list, optional_tag_columns=[self.HED_COLUMN_NAME], + if sidecar and not isinstance(sidecar, Sidecar): + sidecar = Sidecar(sidecar) + new_mapper = ColumnMapper(sidecar=sidecar, optional_tag_columns=[self.HED_COLUMN_NAME], attribute_columns=attribute_columns) + self._sidecar = sidecar self._also_gather_defs = also_gather_defs self._extra_def_dicts = extra_def_dicts def_mapper = self.create_def_mapper(new_mapper, extra_def_dicts) @@ -73,16 +72,16 @@ def mapper (DefMapper): A class to validate or expand definitions with the given return def_mapper - def reset_column_mapper(self, sidecars=None, attribute_columns=None): + def reset_column_mapper(self, sidecar=None, attribute_columns=None): """ Change the sidecars and settings. Args: - sidecars (str or [str] or Sidecar or [Sidecar]): A list of json filenames to pull sidecar info from. + sidecar (str or [str] or Sidecar or [Sidecar]): A list of json filenames to pull sidecar info from. attribute_columns (str or int or [str] or [int]): Column names or numbers to treat as attributes. Default: ["duration", "onset"] """ - new_mapper = ColumnMapper(sidecars=sidecars, optional_tag_columns=[self.HED_COLUMN_NAME], + new_mapper = ColumnMapper(sidecar=sidecar, optional_tag_columns=[self.HED_COLUMN_NAME], attribute_columns=attribute_columns) self._def_mapper = self.create_def_mapper(new_mapper, self._extra_def_dicts) @@ -106,4 +105,4 @@ def validate_file_sidecars(self, hed_ops=None, error_handler=None, **kwargs): if not isinstance(hed_ops, list): hed_ops = [hed_ops] hed_ops.append(self._def_mapper) - return self._mapper.validate_column_data(hed_ops, error_handler=error_handler, **kwargs) + return self._sidecar.validate_entries(hed_ops, error_handler=error_handler, **kwargs) diff --git a/hed/tools/bids/bids_sidecar_file.py b/hed/tools/bids/bids_sidecar_file.py index fa2db933a..a30500570 100644 --- a/hed/tools/bids/bids_sidecar_file.py +++ b/hed/tools/bids/bids_sidecar_file.py @@ -66,7 +66,7 @@ def set_contents(self, content_info=None, overwrite=False): raise HedFileError("InvalidJSONSidecarContents", f"Attempt to set {self.file_path} to invalid {str(content_info)}", "") self.has_hed = self.is_hed(file_contents) - self.contents = Sidecar(file=io.StringIO(json.dumps(file_contents)), + self.contents = Sidecar(files=io.StringIO(json.dumps(file_contents)), name=os.path.realpath(os.path.basename(self.file_path))) @staticmethod diff --git a/tests/data/sidecar_tests/json_errors_minor.json b/tests/data/sidecar_tests/json_errors_minor.json new file mode 100644 index 000000000..d4f5e8f4b --- /dev/null +++ b/tests/data/sidecar_tests/json_errors_minor.json @@ -0,0 +1,33 @@ +{ + "onset": { + "Description": "Onset of the go signal which is the disappearance of the cue bar", + "Units": "s" + }, + "error_blank_hed_string": { + "Description": "Subject presses both buttons causing cue bar to start count-down display", + "Levels" : { + "cue": "Subject presses both buttons causing cue bar to start count-down display", + "go": "Bar disappears cueing the subject to make a choice about which hand to raise", + "right-raised": "Subject releases right button and raises right hand", + "left-raised": "Subject releases left button and raises left hand", + "left-raised-match": "Subject raised left hand and computer presented image of left hand.", + "left-raised-nomatch": "Subject raised left hand and computer presented image of right hand.", + "right-raised-match": "Subject raised right hand and computer presented image of right hand.", + "right-raised-nomatch": "Subject raised right hand and computer presented image of left hand." + }, + "HED": { + "cue": "", + "go": "Description/Onset of the go signal which is the disappearance of the cue bar, Sensory-event, (Visual, Experimental-stimulus, Instructional, Rectangle/#)", + "right-raised": "Description/Subject raised right hand thereby releasing right button, (Agent-action, Experimental-participant, (Lift, (Right-side, Hand)), (Release, (Right-side, Button)))", + "left-raised": "Description/Subject raised left hand thereby releasing left button, (Agent-action, Experimental-participant, (Lift, (Left-side, Hand)), (Release, (Left-side, Button)))", + "left-raised-match": "Description/Computer presented image of left hand thereby matching a previously raised left hand, (Sensory-event, Visual, (Drawing, ID/left_hand.png,(Left-side, Hand)), (Feedback, (Intended-effect, Penalty)))", + "left-raised-nomatch": "Description/Computer presented image of left hand thereby not matching previously raised right hand, (Sensory-event, Visual, (Drawing, ID/right_hand.png, (Left-side, Hand)), (Feedback, (Intended-effect, Reward)))", + "right-raised-match": "Description/Computer presented image of right hand thereby matching a previously raised right hand, (Sensory-event, Visual, (Drawing, ID/right_hand.png,(Right-side, Hand)), (Feedback, (Intended-effect, Penalty)))", + "right-raised-nomatch": "Description/Computer presented image of right hand thereby not matching a previously raised left hand, (Sensory-event, Visual, (Drawing, ID/right_hand.png, (Right-side, Hand)), (Feedback, (Intended-effect, Reward)))" + } + }, + "trial": { + "Description": "The number of the trial in the experiment.", + "HED": "Description/The number of the trial in the experiment, (Experimental-trial, Item-count/#)" + } +} \ No newline at end of file diff --git a/tests/data/sidecar_tests/test_merged1.json b/tests/data/sidecar_tests/test_merged1.json new file mode 100644 index 000000000..29284b083 --- /dev/null +++ b/tests/data/sidecar_tests/test_merged1.json @@ -0,0 +1,14 @@ +{ + "response_time": { + "LongName": "Response time after stimulus", + "Description": "Time from stimulus presentation until subject presses button", + "Units": "ms", + "HED": "Action/Button press/#" + }, + "response_time2": { + "LongName": "Response time after stimulus", + "Description": "Time from stimulus presentation until subject presses button", + "Units": "ms", + "HED": "Action/Button press/#" + } +} \ No newline at end of file diff --git a/tests/data/sidecar_tests/test_merged2.json b/tests/data/sidecar_tests/test_merged2.json new file mode 100644 index 000000000..c1900b4f1 --- /dev/null +++ b/tests/data/sidecar_tests/test_merged2.json @@ -0,0 +1,25 @@ +{ + "trial_type": { + "LongName": "Event category", + "Description": "Indicator of type of action that is expected", + "Levels": { + "go": "A red square is displayed to indicate starting", + "stop": "A blue square is displayed to indicate stopping" + }, + "HED": { + "go": "Item/ItemTag1", + "stop": "Item/ItemTag2" + } + }, + "stim_file": { + "LongName": "Stimulus file name", + "Description": "Relative path of the stimulus image file", + "HED": "Attribute/File/#" + }, + "response_time2": { + "LongName": "Response time after stimulus", + "Description": "Time from stimulus presentation until subject presses button", + "Units": "ms", + "HED": "Action/Button press/ValueReplaced/#" + } +} \ No newline at end of file diff --git a/tests/data/sidecar_tests/test_merged_merged.json b/tests/data/sidecar_tests/test_merged_merged.json new file mode 100644 index 000000000..8e8047585 --- /dev/null +++ b/tests/data/sidecar_tests/test_merged_merged.json @@ -0,0 +1,31 @@ +{ + "trial_type": { + "LongName": "Event category", + "Description": "Indicator of type of action that is expected", + "Levels": { + "go": "A red square is displayed to indicate starting", + "stop": "A blue square is displayed to indicate stopping" + }, + "HED": { + "go": "Item/ItemTag1", + "stop": "Item/ItemTag2" + } + }, + "response_time": { + "LongName": "Response time after stimulus", + "Description": "Time from stimulus presentation until subject presses button", + "Units": "ms", + "HED": "Action/Button press/#" + }, + "stim_file": { + "LongName": "Stimulus file name", + "Description": "Relative path of the stimulus image file", + "HED": "Attribute/File/#" + }, + "response_time2": { + "LongName": "Response time after stimulus", + "Description": "Time from stimulus presentation until subject presses button", + "Units": "ms", + "HED": "Action/Button press/ValueReplaced/#" + } +} \ No newline at end of file diff --git a/tests/models/test_column_mapper.py b/tests/models/test_column_mapper.py index 30587d88a..5c7dae967 100644 --- a/tests/models/test_column_mapper.py +++ b/tests/models/test_column_mapper.py @@ -4,6 +4,8 @@ from hed.models import ColumnMapper, ColumnType, ColumnMetadata, HedString, model_constants from hed.schema import load_schema from hed import HedFileError +from hed.models.sidecar import Sidecar + class Test(unittest.TestCase): schema_file = '../data/schema_test_data/HED8.0.0t.xml' @@ -77,12 +79,12 @@ def test_optional_column(self): def test_add_json_file_events(self): mapper = ColumnMapper() - mapper.add_sidecars(self.basic_events_json) + mapper._set_sidecar(Sidecar(self.basic_events_json)) self.assertTrue(len(mapper.column_data) >= 2) def test__detect_event_type(self): mapper = ColumnMapper() - mapper.add_sidecars(self.basic_events_json) + mapper._set_sidecar(Sidecar(self.basic_events_json)) self.assertTrue(mapper.column_data[self.basic_event_name].column_type == self.basic_event_type) def test_add_attribute_columns(self): @@ -118,14 +120,14 @@ def test__finalize_mapping(self): def test_expand_column(self): mapper = ColumnMapper() - mapper.add_sidecars(self.basic_events_json) + mapper._set_sidecar(Sidecar(self.basic_events_json)) mapper.set_column_map(self.basic_column_map) expanded_column = mapper._expand_column(2, "go") self.assertTrue(isinstance(expanded_column[0], HedString)) def test_expand_row_tags(self): mapper = ColumnMapper() - mapper.add_sidecars(self.basic_events_json) + mapper._set_sidecar(Sidecar(self.basic_events_json)) mapper.add_columns(self.basic_attribute_column) mapper.set_column_map(self.basic_column_map) expanded_row = mapper.expand_row_tags(self.basic_event_row) @@ -134,7 +136,7 @@ def test_expand_row_tags(self): def test_expansion_issues(self): mapper = ColumnMapper() - mapper.add_sidecars(self.basic_events_json) + mapper._set_sidecar(Sidecar(self.basic_events_json)) mapper.add_columns(self.basic_attribute_column) mapper.set_column_map(self.basic_column_map) expanded_row = mapper.expand_row_tags(self.basic_event_row_invalid) diff --git a/tests/models/test_sidecar.py b/tests/models/test_sidecar.py index 1037aae9a..c88f9f692 100644 --- a/tests/models/test_sidecar.py +++ b/tests/models/test_sidecar.py @@ -21,9 +21,11 @@ def setUpClass(cls): cls.json_without_definitions_filename = \ os.path.join(cls.base_data_dir, "sidecar_tests/both_types_events_without_definitions.json") cls.json_errors_filename = os.path.join(cls.base_data_dir, "sidecar_tests/json_errors.json") + cls.json_errors_filename_minor = os.path.join(cls.base_data_dir, "sidecar_tests/json_errors_minor.json") cls.default_sidecar = Sidecar(cls.json_filename) cls.json_def_sidecar = Sidecar(cls.json_def_filename) cls.errors_sidecar = Sidecar(cls.json_errors_filename) + cls.errors_sidecar_minor = Sidecar(cls.json_errors_filename_minor) cls.json_without_definitions_sidecar = Sidecar(cls.json_without_definitions_filename) cls.base_output_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/") @@ -43,14 +45,14 @@ def test_invalid_filenames(self): json_dict = Sidecar(None) except HedFileError: pass - self.assertTrue(len(json_dict._column_data) == 0) + self.assertTrue(len(json_dict.loaded_dict) == 0) json_dict = None try: json_dict = Sidecar("") except HedFileError: pass - self.assertTrue(len(json_dict._column_data) == 0) + self.assertTrue(len(json_dict.loaded_dict) == 0) def test_name(self): invalid_json = "invalidxmlfile.json" @@ -78,14 +80,17 @@ def test__iter__(self): def test_validate_column_group(self): validator = HedValidator(hed_schema=None) - validation_issues = self.json_def_sidecar.validate_entries(validator, check_for_warnings=True) - self.assertEqual(len(validation_issues), 0) - - validation_issues = self.default_sidecar.validate_entries(validator, check_for_warnings=True) - self.assertEqual(len(validation_issues), 0) + # validation_issues = self.json_def_sidecar.validate_entries(validator, check_for_warnings=True) + # self.assertEqual(len(validation_issues), 0) + # + # validation_issues = self.default_sidecar.validate_entries(validator, check_for_warnings=True) + # self.assertEqual(len(validation_issues), 0) validation_issues = self.errors_sidecar.validate_entries(validator, check_for_warnings=True) - self.assertEqual(len(validation_issues), 15) + self.assertEqual(len(validation_issues), 4) + + validation_issues2 = self.errors_sidecar_minor.validate_entries(validator, check_for_warnings=True) + self.assertEqual(len(validation_issues2), 10) validation_issues = self.json_without_definitions_sidecar.validate_entries(validator, check_for_warnings=True) self.assertEqual(len(validation_issues), 1) @@ -125,5 +130,17 @@ def test_save_load2(self): for str1, str2 in zip(sidecar.hed_string_iter(), reloaded_sidecar.hed_string_iter()): self.assertEqual(str1, str2) + def test_merged_sidecar(self): + base_folder = self.base_data_dir + "sidecar_tests/" + combined_sidecar_json = base_folder + "test_merged_merged.json" + sidecar_json1 = base_folder + "test_merged1.json" + sidecar_json2 = base_folder + "test_merged2.json" + + sidecar = Sidecar([sidecar_json1, sidecar_json2]) + sidecar2 = Sidecar(combined_sidecar_json) + + self.assertEqual(sidecar.loaded_dict, sidecar2.loaded_dict) + + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/analysis/test_annotation_util.py b/tests/tools/analysis/test_annotation_util.py index bc06ce899..b6be63a66 100644 --- a/tests/tools/analysis/test_annotation_util.py +++ b/tests/tools/analysis/test_annotation_util.py @@ -211,17 +211,17 @@ def test_hed_to_df_with_definitions(self): def test_hed_to_df_to_hed(self): validator = HedValidator(self.hed_schema) - side1 = Sidecar(file=self.json_path, name="sidecar_face.json") + side1 = Sidecar(files=self.json_path, name="sidecar_face.json") issues1 = side1.validate_entries(validator, check_for_warnings=True) self.assertFalse(issues1, "hed_to_df_to_hed is starting with a valid JSON sidecar") df1 = hed_to_df(self.sidecar_face) self.assertIsInstance(df1, DataFrame, "hed_to_df_to_hed starting sidecar can be converted to df") hed2 = df_to_hed(df1, description_tag=True) - side2 = Sidecar(file=io.StringIO(json.dumps(hed2)), name='JSON_Sidecar2') + side2 = Sidecar(files=io.StringIO(json.dumps(hed2)), name='JSON_Sidecar2') issues2 = side2.validate_entries(validator, check_for_warnings=True) self.assertFalse(issues2, "hed_to_df_to_hed is valid after conversion back and forth with description True") hed3 = df_to_hed(df1, description_tag=False) - side3 = Sidecar(file=io.StringIO(json.dumps(hed3)), name='JSON_Sidecar2') + side3 = Sidecar(files=io.StringIO(json.dumps(hed3)), name='JSON_Sidecar2') issues3 = side3.validate_entries(validator, check_for_warnings=True) self.assertFalse(issues3, "hed_to_df_to_hed is valid after conversion back and forth with description False") diff --git a/tests/validator/test_hed_validator.py b/tests/validator/test_hed_validator.py index d5b6b0137..cacf94a24 100644 --- a/tests/validator/test_hed_validator.py +++ b/tests/validator/test_hed_validator.py @@ -108,12 +108,12 @@ def test_complex_file_validation_invalid(self): "../data/validator_tests/bids_events_bad_defs.json") validator = HedValidator(hed_schema=hed_schema) sidecar = Sidecar(json_path) - # issues = sidecar.validate_entries(hed_ops=validator, check_for_warnings=True) - # self.assertEqual(len(issues), 4) + issues = sidecar.validate_entries(hed_ops=validator, check_for_warnings=True) + self.assertEqual(len(issues), 4) input_file = TabularInput(events_path, sidecar=sidecar) - # - # validation_issues = input_file.validate_file_sidecars(validator, check_for_warnings=True) - # self.assertEqual(len(validation_issues), 4) + + validation_issues = input_file.validate_file_sidecars(validator, check_for_warnings=True) + self.assertEqual(len(validation_issues), 4) validation_issues = input_file.validate_file(validator, check_for_warnings=True) self.assertEqual(len(validation_issues), 42) @@ -137,7 +137,7 @@ def test_complex_file_validation_invalid_definitions_removed(self): self.assertEqual(len(validation_issues1), 4) validation_issues = input_file.validate_file(validator) - self.assertEqual(len(validation_issues), 42) + self.assertEqual(len(validation_issues), 21) def test_file_bad_defs_in_spreadsheet(self): schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),