diff --git a/hed/errors/error_reporter.py b/hed/errors/error_reporter.py index d40c29b9d..a36195435 100644 --- a/hed/errors/error_reporter.py +++ b/hed/errors/error_reporter.py @@ -19,17 +19,13 @@ def _register_error_function(error_type, wrapper_func): def hed_error(error_type, default_severity=ErrorSeverity.ERROR, actual_code=None): - """ - Decorator for errors in error handler or inherited classes. - - Parameters - ---------- - error_type : str - This should be a value from error_types, but doesn't strictly have to be. - default_severity: ErrorSeverity - The default severity for the decorated error - actual_code: str - The actual error to report to the outside world + """ Decorator for errors in error handler or inherited classes. + + Args: + error_type (str): A value from error_types or optionally another value. + default_severity (ErrorSeverity): The default severity for the decorated error. + actual_code (str): The actual error to report to the outside world. + """ if actual_code is None: actual_code = error_type @@ -37,21 +33,16 @@ def hed_error(error_type, default_severity=ErrorSeverity.ERROR, actual_code=None def inner_decorator(func): @wraps(func) def wrapper(*args, severity=default_severity, **kwargs): + """ Wrapper function for error handling non-tag errors. + + Args: + args (args): non keyword args. + severity (ErrorSeverity): Will override the default error value if passed. + kwargs (**kwargs): Any keyword args to be passed down to error message function. + + Returns: + list: A list of dict with the errors.= """ - Wrapper function for error handling non-tag errors - - Parameters - ---------- - args: - Any other non keyword args - severity: ErrorSeverity, Optional - Will override the default error value if passed.(If you want to turn a warning into an error) - kwargs: - Any keyword args to be passed down to error message function - Returns - ------- - error_list: [{}] - """ base_message, error_vars = func(*args, **kwargs) error_object = ErrorHandler._create_error_object(actual_code, base_message, severity, **error_vars) return error_object @@ -63,19 +54,14 @@ def wrapper(*args, severity=default_severity, **kwargs): def hed_tag_error(error_type, default_severity=ErrorSeverity.ERROR, has_sub_tag=False, actual_code=None): - """ - Decorator for errors in error handler or inherited classes. - - Parameters - ---------- - error_type : str - This should be a value from error_types, but doesn't strictly have to be. - default_severity: ErrorSeverity - The default severity for the decorated error - has_sub_tag : bool - Determines if this error message also wants a sub_tag passed down. eg "This" in "This/Is/A/Tag" - actual_code: str - The actual error to report to the outside world + """ Decorator for errors in error handler or inherited classes. + + Args: + error_type (str): A value from error_types or optionally another value. + default_severity (ErrorSeverity): The default severity for the decorated error. + has_sub_tag (bool): If true, this error message also wants a sub_tag passed down. eg "This" in "This/Is/A/Tag" + actual_code (str): The actual error to report to the outside world. + """ if actual_code is None: actual_code = error_type @@ -83,28 +69,20 @@ def hed_tag_error(error_type, default_severity=ErrorSeverity.ERROR, has_sub_tag= def inner_decorator(func): if has_sub_tag: @wraps(func) - def wrapper(tag, index_in_tag, index_in_tag_end, *args, severity=default_severity, - **kwargs): - """ - Wrapper function for error handling tag errors with sub tags. - - Parameters - ---------- - tag: HedTag - The hed tag object with the problem - index_in_tag: int, - The index into the tag with a problem(usually 0) - index_in_tag_end: int - the last index into the tag with a problem(usually len(tag) - args: - Any other non keyword args - severity: ErrorSeverity, Optional - Will override the default error value if passed.(If you want to turn a warning into an error) - kwargs: - Any keyword args to be passed down to error message function - Returns - ------- - error_list: [{}] + def wrapper(tag, index_in_tag, index_in_tag_end, *args, severity=default_severity, **kwargs): + """ Wrapper function for error handling tag errors with sub tags. + + Args: + tag (HedTag): The hed tag object with the problem, + index_in_tag (int): The index into the tag with a problem(usually 0), + index_in_tag_end (int): The last index into the tag with a problem(usually len(tag), + args (args): Any other non keyword args. + severity (ErrorSeverity): Used to include warnings as well as errors. + kwargs (**kwargs): Any keyword args to be passed down to error message function. + + Returns: + list: A list of dict with the errors. + """ try: tag_as_string = tag.tag @@ -131,22 +109,17 @@ def wrapper(tag, index_in_tag, index_in_tag_end, *args, severity=default_severit else: @wraps(func) def wrapper(tag, *args, severity=default_severity, **kwargs): - """ - Wrapper function for error handling tag errors - - Parameters - ---------- - tag: HedTag or HedGroup - The hed tag object with the problem - args: - Any other non keyword args - severity: ErrorSeverity, Optional - Will override the default error value if passed.(If you want to turn a warning into an error) - kwargs: - Any keyword args to be passed down to error message function - Returns - ------- - error_list: [{}] + """ Wrapper function for error handling tag errors. + + Args: + tag (HedTag or HedGroup): The hed tag object with the problem. + args (non keyword args): Any other non keyword args. + severity (ErrorSeverity): For including warnings. + kwargs (keyword args): Any keyword args to be passed down to error message function. + + Returns: + list: A list of dict with the errors. + """ from hed.models.hed_tag import HedTag from hed.models.hed_group import HedGroup @@ -180,35 +153,36 @@ def __init__(self): self.error_context = [] def push_error_context(self, context_type, context, increment_depth_after=True): - """ - Pushes a new error context to the end of the stack to narrow down error scope. - - Parameters - ---------- - context_type : ErrorContext - This should be a value from ErrorContext representing the type of scope. - context : str or int or HedString - The main value for the context_type. eg for ErrorContext.FILE_NAME this would be the actual filename. - increment_depth_after : bool - If True, this adds an extra tab to any subsequent errors in the scope. - Returns - ------- + """ Push a new error context to narrow down error scope. + + Args: + context_type (ErrorContext): A value from ErrorContext representing the type of scope. + context (str, int, or HedString): The main value for the context_type. + increment_depth_after (bool): If True, add an extra tab to any subsequent errors in the scope. + + Notes: + The context depends on the context_type. For ErrorContext.FILE_NAME this would be the actual filename. + """ self.error_context.append((context_type, context, increment_depth_after)) def pop_error_context(self): - """ - Removes the last scope from the error context. + """ Remove the last scope from the error context. + + Notes: + Modifies the error context of this reporter. - Returns - ------- """ + self.error_context.pop(-1) def reset_error_context(self): - """Reset all error context information to defaults + """ Reset all error context information to defaults. + + Notes: + This function is mainly for testing and should not be needed with proper usage. - This function should not be needed with proper usage.""" + """ self.error_context = [] def get_error_context_copy(self): @@ -224,23 +198,20 @@ def format_error_with_context(self, *args, **kwargs): @staticmethod def format_error(error_type, *args, actual_error=None, **kwargs): - """ - The parameters vary based on what type of error this is. - - Parameters - ---------- - error_type : str - The type of error for this. Registered with @hed_error or @hed_tag_error. - args: args - Any remaining non keyword args. - actual_error: str or None - The code to actually add to report out. Useful for errors that are shared like invalid character. - kwargs : - The other parameters to pass down to the error handling func. - Returns - ------- - error: [{}] - A single error + """ Format an error based on the parameters, which vary based on what type of error this is. + + Args: + error_type (str): The type of error for this. Registered with @hed_error or @hed_tag_error. + args (args): Any remaining non keyword args after those required by the error type. + actual_error (str or None): Code to actually add to report out. + kwargs (dict): The other keyword args to pass down to the error handling func. + + Returns: + list: A list containing a single dictionary representing a single error. + + Notes: + The actual error is useful for errors that are shared like invalid character. + """ error_func = error_functions.get(error_type) if not error_func: @@ -260,16 +231,14 @@ def add_context_to_issues(self, issues): self._update_error_with_char_pos(error_object) def format_error_list(self, issue_params): - """ - Convert an issue params list to an issues list. This means adding the error context primarily. - - Parameters - ---------- - issue_params : [{}] - The unformatted issues list - Returns - ------- - issues_list: [{}] + """ Convert an issue params list to an issues list. This means adding the error context primarily. + + Args: + issue_params (list): A list of dict containing the unformatted issues list. + + Returns: + list: A list of dict containing unformatted errors. + """ formatted_issues = [] for issue in issue_params: @@ -278,25 +247,22 @@ def format_error_list(self, issue_params): @staticmethod def format_error_from_context(error_type, error_context, *args, actual_error=None, **kwargs): - """ - The parameters vary based on what type of error this is. - - Parameters - ---------- - error_type : str - The type of error for this. Registered with @hed_error or @hed_tag_error. - error_context: [] - A list containing the error context to use for this error. Generally returned from _add_context_to_errors - args: args - Any remaining non keyword args. - actual_error: str or None - The code to actually add to report out. Useful for errors that are shared like invalid character. - kwargs : - The other parameters to pass down to the error handling func. - Returns - ------- - error: [{}] - A single error + """ Format an error based on the error type. + + Args: + error_type (str): The type of error. Registered with @hed_error or @hed_tag_error. + error_context (list): Contains the error context to use for this error. + args (args): Any remaining non keyword args. + actual_error (str or None): Error code to actually add to report out. + kwargs (kwargs): Keyword parameters to pass down to the error handling func. + + Returns: + list: A list containing a single dictionary + + Notes: + - Generally the error_context is returned from _add_context_to_errors. + - The actual_error is useful for errors that are shared like invalid character. + """ error_func = error_functions.get(error_type) if not error_func: @@ -315,19 +281,15 @@ def format_error_from_context(error_type, error_context, *args, actual_error=Non @staticmethod def _add_context_to_errors(error_object, error_context_to_add): - """ - Takes an error object and adds relevant context around it, such as row number, or column name. - - Parameters - ---------- - error_object : {} - Generated error containing at least a code and message entry. - error_context_to_add: [] - Source context to use. If none, gets it from the error handler directly at this time. - Returns - ------- - error_object_list: [{}] - The passed in error with any needed context strings added to the start. + """ Add relevant context such as row number or column name around an error object. + + Args: + error_object (dict): Generated error containing at least a code and message entry. + error_context_to_add (list): Source context to use. If none, the error handler context is used. + + Returns: + list: A list of dict with needed context strings added at the beginning of the list. + """ if error_object is None: error_object = {} @@ -394,53 +356,44 @@ def _update_error_with_char_pos(error_object): @hed_error("Unknown") def val_error_unknown(*args, **kwargs): - """ - Default error handler if no error of this type was registered. + """ Default error handler if no error of this type was registered. + + Args: + args (args): List of non-keyword parameters (varies). + kwargs (kwargs): Keyword parameters (varies) - Parameters - ---------- - args : varies - kwargs : varies + Returns: + str: The error message. + dict: The extra args. - Returns - ------- - error_message, extra_error_args: str, dict """ return f"Unknown error. Args: {str(args)}", kwargs @staticmethod def filter_issues_by_severity(issues_list, severity): - """ - Gathers all issues matching or below a given severity. - - Parameters - ---------- - issues_list : [{}] - The full issue list - severity : int - The level of issue you're interested in - - Returns - ------- - filtered_issues_list: [{}] - The list with all other severities removed. + """ Gather all issues matching or below a given severity. + + Args: + issues_list (list): A list of dictionaries containing the full issue list. + severity (int): The level of issues to keep. + + Returns: + list: A list of dictionaries containing the issue list after filtering by severity. + """ return [issue for issue in issues_list if issue['severity'] <= severity] def get_exception_issue_string(issues, title=None): - """Return a string with issues list flatted into single string, one per line - - Parameters - ---------- - issues: [] - Issues to print - title: str - Optional title that will always show up first if present(even if there are no validation issues) - Returns - ------- - str - A str containing printable version of the issues or ''. + """ Return a string with issues list flatted into single string, one issue per line. + + Args: + issues (list) A list of strings containing issues to print. + title (str or None): An optional title that will always show up first if present. + + Returns: + str: A str containing printable version of the issues or ''. + """ issue_str = '' @@ -458,22 +411,16 @@ def get_exception_issue_string(issues, title=None): def get_printable_issue_string(issues, title=None, severity=None, skip_filename=True): - """Return a string with issues list flatted into single string, one per line - - Parameters - ---------- - issues: [] - Issues to print - title: str - Optional title that will always show up first if present(even if there are no validation issues) - severity: int - Return only warnings >= severity - skip_filename: bool - If true, don't add the filename context to the printable string. - Returns - ------- - str - A str containing printable version of the issues or ''. + """ Return a string with issues list flatted into single string, one per line. + + Args: + issues (list) Issues to print. + title (str) Optional title that will always show up first if present(even if there are no validation issues). + severity (int) Return only warnings >= severity. + skip_filename (bool) If true, don't add the filename context to the printable string. + + Returns: + str: A string containing printable version of the issues or ''. """ last_used_error_context = [] @@ -509,18 +456,15 @@ def check_for_any_errors(issues_list): def _get_context_from_issue(val_issue, skip_filename=True): - """ - Extract all the context values from the given issue - Parameters - ---------- - val_issue : {} - A dictionary a representing a single error - skip_filename: bool - If true, don't gather the filename context. - Returns - ------- - context_list: [] - A list of tuples containing the context_type and context for the given issue + """ Extract all the context values from the given issue. + + Args: + val_issue (dict): A dictionary a representing a single error. + skip_filename (bool): If true, don't gather the filename context. + + Returns: + list: A list of tuples containing the context_type and context for the given issue. + """ single_issue_context = [] for key in val_issue: @@ -533,22 +477,16 @@ def _get_context_from_issue(val_issue, skip_filename=True): def _format_single_context_string(context_type, context, tab_count=0): - """ - Takes a single context tuple and returns the human readable form. - - Parameters - ---------- - context_type : str - The context type of this entry - context : str or HedString - The value of this context - tab_count : int - Number of tabs to name_prefix each line with. - - Returns - ------- - context_string: str - A string containing the context, including tabs. + """ Return the human readable form of a single context tuple. + + Args: + context_type (str): The context type of this entry. + context (str or HedString): The value of this context + tab_count (int): Number of tabs to name_prefix each line with. + + Returns: + str: A string containing the context, including tabs. + """ tab_string = tab_count * '\t' if context_type == ErrorContext.HED_STRING: @@ -571,23 +509,20 @@ def _format_single_context_string(context_type, context, tab_count=0): def _get_context_string(single_issue_context, last_used_context): - """ - Converts a single context list into the final human readable output form. - - Parameters - ---------- - single_issue_context : [()] - A list of tuples containing the context(context_type, context, increment_tab) - last_used_context : [()] - A list of tuples containing the last drawn context, so it can only add the parts that have changed. - This is always the same format as single_issue_context. - - Returns - ------- - context_string: str - The full string of context(potentially multiline) to add before the error - tab_string: str - The name_prefix to add to any message line with this context. + """ Convert a single context list into the final human readable output form. + + Args: + single_issue_context (list): A list of tuples containing the context(context_type, context, increment_tab) + last_used_context (list): A list of tuples containing the last drawn context. + + Returns: + str: The full string of context(potentially multiline) to add before the error. + str: The tab string to add to the front of any message line with this context. + + Notes: + The last used context is always the same format as single_issue_context and used + so that the error handling can only add the parts that have changed. + """ context_string = "" tab_count = 0 diff --git a/hed/tools/__init__.py b/hed/tools/__init__.py index 1c5543a05..c82a9422a 100644 --- a/hed/tools/__init__.py +++ b/hed/tools/__init__.py @@ -4,10 +4,11 @@ from .analysis.hed_type_variable import HedTypeVariable from .analysis.hed_type_factors import HedTypeFactors from .analysis.hed_variable_summary import HedVariableCounts, HedVariableSummary -from .analysis.definition_manager import DefinitionManager +from .analysis.hed_definition_manager import HedDefinitionManager from .analysis.file_dictionary import FileDictionary from .analysis.key_map import KeyMap from .analysis.hed_context_manager import OnsetGroup, HedContextManager +from .analysis.tabular_summary import TabularSummary from .analysis.tag_summary import TagSummary from .analysis.annotation_util import \ check_df_columns, extract_tags, generate_sidecar_entry, hed_to_df, df_to_hed, merge_hed_dict @@ -21,9 +22,7 @@ from .bids.bids_sidecar_file import BidsSidecarFile from .bids.bids_tabular_dictionary import BidsTabularDictionary from .bids.bids_tabular_file import BidsTabularFile -from .bids.bids_tabular_summary import BidsTabularSummary -from .analysis.tabular_reports import report_diffs from .util.hed_logger import HedLogger from .util.data_util import get_new_dataframe, get_value_dict, replace_values, reorder_columns from .util.io_util import check_filename, generate_filename, extract_suffix_path, get_file_list, make_path diff --git a/hed/tools/analysis/file_dictionary.py b/hed/tools/analysis/file_dictionary.py index ef0a0da8e..1df1b714f 100644 --- a/hed/tools/analysis/file_dictionary.py +++ b/hed/tools/analysis/file_dictionary.py @@ -32,7 +32,7 @@ def __init__(self, collection_name, file_list, key_indices=(0, 2), separator='_' """ self.collection_name = collection_name - self.file_dict = {} + self._file_dict = {} self.create_file_dict(file_list, key_indices, separator) @property @@ -43,12 +43,17 @@ def name(self): @property def key_list(self): """ Keys in this dictionary. """ - return list(self.file_dict.keys()) + return list(self._file_dict.keys()) + + @property + def file_dict(self): + """ Dictionary of path values in this dictionary. """ + return self._file_dict @property def file_list(self): """ List of path values in this dictionary. """ - return list(self.file_dict.values()) + return list(self._file_dict.values()) def create_file_dict(self, file_list, key_indices, separator): """ Create new dict based on key indices. @@ -60,7 +65,7 @@ def create_file_dict(self, file_list, key_indices, separator): """ if key_indices: - self.file_dict = self.make_file_dict(file_list, key_indices=key_indices, separator=separator) + self._file_dict = self.make_file_dict(file_list, key_indices=key_indices, separator=separator) def get_file_path(self, key): """ Return file path corresponding to key. @@ -72,7 +77,7 @@ def get_file_path(self, key): str: File path. """ - return self.file_dict.get(key, None) + return self._file_dict.get(key, None) def iter_files(self): """ Iterator over the files in this dictionary. @@ -82,7 +87,7 @@ def iter_files(self): - file: File path. """ - for key, file in self.file_dict.items(): + for key, file in self._file_dict.items(): yield key, file def key_diffs(self, other_dict): @@ -95,7 +100,7 @@ def key_diffs(self, other_dict): list: The symmetric difference of the keys in this dictionary and the other one. """ - diffs = set(self.file_dict.keys()).symmetric_difference(set(other_dict.file_dict.keys())) + diffs = set(self._file_dict.keys()).symmetric_difference(set(other_dict._file_dict.keys())) return list(diffs) def output_files(self, title=None, logger=None): @@ -115,7 +120,7 @@ def output_files(self, title=None, logger=None): output_list = [] if title: output_list.append(f"{title} ({len(self.key_list)} files)") - for key, value in self.file_dict.items(): + for key, value in self._file_dict.items(): basename = os.path.basename(self.get_file_path(key)) output_list.append(f"{key}: {basename}") if logger: diff --git a/hed/tools/analysis/definition_manager.py b/hed/tools/analysis/hed_definition_manager.py similarity index 88% rename from hed/tools/analysis/definition_manager.py rename to hed/tools/analysis/hed_definition_manager.py index cec76c28f..6f12f0640 100644 --- a/hed/tools/analysis/definition_manager.py +++ b/hed/tools/analysis/hed_definition_manager.py @@ -1,15 +1,15 @@ from hed.schema import load_schema_version -from hed.models import HedString, HedTag, DefinitionEntry +from hed.models import HedString, HedTag, DefinitionEntry, DefMapper from hed.tools.analysis.hed_context_manager import HedContextManager -class DefinitionManager: +class HedDefinitionManager: def __init__(self, definitions, hed_schema, variable_type='condition-variable'): """ Create a definition manager for a type of variable. Args: - definitions (dict): A dictionary of DefinitionEntry objects. + definitions (dict or DefMapper): A dictionary of DefinitionEntry objects. hed_schema (Hedschema or HedSchemaGroup): The schema used for parsing. variable_type (str): Lower-case string giving the type of HED variable. @@ -17,7 +17,12 @@ def __init__(self, definitions, hed_schema, variable_type='condition-variable'): self.variable_type = variable_type.lower() self.hed_schema = hed_schema - self.definitions = definitions + if isinstance(definitions, DefMapper): + self.definitions = definitions.gathered_defs + elif isinstance(definitions, dict): + self.definitions = definitions + else: + self.definitions = {} self.variable_map = {} # maps def names to conditions. self._extract_variable_map() @@ -87,7 +92,7 @@ def get_def_names(item, no_value=True): names = [tag.extension_or_value_portion.lower() for tag in item.get_all_tags() if 'def' in tag.tag_terms] if no_value: for index, name in enumerate(names): - name, name_value = DefinitionManager.split_name(name) + name, name_value = HedDefinitionManager.split_name(name) names[index] = name return names @@ -130,7 +135,7 @@ def remove_defs(hed_strings): for i in range(len(hed_strings)): def_groups[i] = [] for i, hed in enumerate(hed_strings): - def_groups[i] = DefinitionManager.extract_defs(hed) + def_groups[i] = HedDefinitionManager.extract_defs(hed) return def_groups @staticmethod @@ -169,7 +174,7 @@ def extract_defs(hed): 'cond2': DefinitionEntry('Cond2', def2, False, None), 'cond3': DefinitionEntry('Cond3', def3, True, None), 'cond4': DefinitionEntry('Cond4', def4, False, None)} - def_man = DefinitionManager(definitions, schema) + def_man = HedDefinitionManager(definitions, schema) a = def_man.get_def_names(HedTag('Def/Cond3/4', hed_schema=schema)) b = def_man.get_def_names(HedString('(Def/Cond3/5,(Red, Blue))', hed_schema=schema)) c = def_man.get_def_names(HedString('(Def/Cond3/6,(Red, Blue, Def/Cond1), Def/Cond2)', hed_schema=schema)) diff --git a/hed/tools/analysis/hed_filters.py b/hed/tools/analysis/hed_filters.py index 726e0fdfc..d7ee64fe0 100644 --- a/hed/tools/analysis/hed_filters.py +++ b/hed/tools/analysis/hed_filters.py @@ -1,8 +1,6 @@ -from hed.models import DefMapper -from hed import HedTag -from hed.models.definition_dict import DefTagNames from hed.errors import get_printable_issue_string + class StringOp: def __init__(self, filter_name): self.filter_name = filter_name diff --git a/hed/tools/analysis/query_manager.py b/hed/tools/analysis/hed_query_manager.py similarity index 81% rename from hed/tools/analysis/query_manager.py rename to hed/tools/analysis/hed_query_manager.py index 52ef2208c..d8f66faa0 100644 --- a/hed/tools/analysis/query_manager.py +++ b/hed/tools/analysis/hed_query_manager.py @@ -3,7 +3,7 @@ import pandas as pd -class Query: +class HedQuery: def __init__(self, query): self.name = query['name'] self.query_type = query['query_type'] @@ -14,7 +14,7 @@ def evaluate(self, hed_string_obj): return self.expression.search_hed_string(hed_string_obj) -class QueryManager: +class HedQueryManager: def __init__(self, query_list): self.query_list = query_list @@ -34,14 +34,14 @@ def parse(self, hed_string_obj): if __name__ == '__main__': - qlist = [Query({'name': 'cond_1', 'query_type': 'condition', 'query_str': 'Condition-variable'}), - Query({'name': 'tag_1', 'query_type': 'get_tag', 'query_str': 'Sensory-presentation'})] + qlist = [HedQuery({'name': 'cond_1', 'query_type': 'condition', 'query_str': 'Condition-variable'}), + HedQuery({'name': 'tag_1', 'query_type': 'get_tag', 'query_str': 'Sensory-presentation'})] schema = load_schema_version(xml_version="8.0.0") test_strings = [HedString('Condition-variable/Test-cond', hed_schema=schema), HedString('Visual-presentation', hed_schema=schema), HedString('Agent-action, (Move, Hand)', hed_schema=schema)] - q_parser = QueryManager(qlist) + q_parser = HedQueryManager(qlist) col_names = q_parser.get_column_names() print(f"Column names:{str(col_names)}") @@ -50,4 +50,3 @@ def parse(self, hed_string_obj): result[index] = q_parser.parse(obj) df = pd.DataFrame(result, columns=col_names) - print("toHere") diff --git a/hed/tools/analysis/hed_type_factors.py b/hed/tools/analysis/hed_type_factors.py index 604575d83..eb7680b43 100644 --- a/hed/tools/analysis/hed_type_factors.py +++ b/hed/tools/analysis/hed_type_factors.py @@ -3,38 +3,38 @@ class HedTypeFactors: - """ Holds index of positions for type_variables, defined and otherwise. """ + """ Holds index of positions for a variable type for one tabular file. """ ALLOWED_ENCODINGS = ("categorical", "one-hot") - def __init__(self, name, number_elements, variable_type="condition-variable"): + def __init__(self, variable_value, number_elements, variable_type="condition-variable"): """ Constructor for HedTypeFactors. Args: - name (str): Name of the variable summarized by this class. + variable_value (str): The value of the type summarized by this class. number_elements (int): Number of elements in the data column variable_type (str): Lowercase string corresponding to a HED tag which has a takes value child. """ - self.variable_name = name + self.variable_value = variable_value self.number_elements = number_elements self.variable_type = variable_type.lower() self.levels = {} self.direct_indices = {} def __str__(self): - return f"{self.variable_name}[{self.variable_type}]: {self.number_elements} elements " + \ + return f"{self.variable_value}[{self.variable_type}]: {self.number_elements} elements " + \ f"{str(self.levels)} levels {len(self.direct_indices)} references" def get_factors(self, factor_encoding="one-hot"): - df = pd.DataFrame(0, index=range(self.number_elements), columns=[self.variable_name]) - df.loc[list(self.direct_indices.keys()), [self.variable_name]] = 1 + df = pd.DataFrame(0, index=range(self.number_elements), columns=[self.variable_value]) + df.loc[list(self.direct_indices.keys()), [self.variable_value]] = 1 if not self.levels: return df levels = list(self.levels.keys()) - levels_list = [f"{self.variable_name}.{level}" for level in levels] + levels_list = [f"{self.variable_value}.{level}" for level in levels] df_levels = pd.DataFrame(0, index=range(self.number_elements), columns=levels_list) for index, level in enumerate(levels): index_keys = list(self.levels[level].keys()) @@ -45,22 +45,22 @@ def get_factors(self, factor_encoding="one-hot"): sum_factors = factors.sum(axis=1) if sum_factors.max() > 1: raise HedFileError("MultipleFactorSameEvent", - f"{self.variable_name} has multiple occurrences at index{sum_factors.idxmax()}", "") + f"{self.variable_value} has multiple occurrences at index{sum_factors.idxmax()}", "") if factor_encoding == "categorical": return self.factors_to_vector(factors, levels) else: raise ValueError("BadFactorEncoding", - f"{factor_encoding} is not in the allowed encodings: {str(self.ALLOWED_ENDCODINGS)}") + f"{factor_encoding} is not in the allowed encodings: {str(self.ALLOWED_ENCODINGS)}") def factors_to_vector(self, factors, levels): - df = pd.DataFrame('n/a', index=range(len(factors.index)), columns=[self.variable_name]) + df = pd.DataFrame('n/a', index=range(len(factors.index)), columns=[self.variable_value]) for index, row in factors.iterrows(): - if row[self.variable_name]: - df.at[index, self.variable_name] = self.variable_name + if row[self.variable_value]: + df.at[index, self.variable_value] = self.variable_value continue for level in levels: - if row[f"{self.variable_name}.{level}"]: - df.at[index, self.variable_name] = level + if row[f"{self.variable_value}.{level}"]: + df.at[index, self.variable_value] = level break return df @@ -72,7 +72,7 @@ def get_summary(self, full=True): for index, item in cond.items(): count_list[index] = count_list[index] + 1 number_events, number_multiple, max_multiple = self.count_events(count_list) - summary = {'name': self.variable_name, 'variable_type': self.variable_type, 'levels': len(self.levels.keys()), + summary = {'name': self.variable_value, 'variable_type': self.variable_type, 'levels': len(self.levels.keys()), 'direct_references': len(self.direct_indices.keys()), 'total_events': self.number_elements, 'number_type_events': number_events, 'number_multiple_events': number_multiple, 'multiple_event_maximum': max_multiple} diff --git a/hed/tools/analysis/hed_type_variable.py b/hed/tools/analysis/hed_type_variable.py index 8eedf077c..2d50762e8 100644 --- a/hed/tools/analysis/hed_type_variable.py +++ b/hed/tools/analysis/hed_type_variable.py @@ -4,18 +4,18 @@ from hed import HedTag from hed.models import HedGroup from hed.schema import load_schema_version -from hed.tools.analysis.definition_manager import DefinitionManager +from hed.tools.analysis.hed_definition_manager import HedDefinitionManager from hed.tools.analysis.hed_context_manager import HedContextManager from hed.tools.analysis.hed_type_factors import HedTypeFactors class HedTypeVariable: - def __init__(self, onset_manager, hed_schema, hed_definitions, variable_type="condition-variable"): - """ Create a variable manager for an events file. + def __init__(self, context_manager, hed_schema, hed_definitions, variable_type="condition-variable"): + """ Create a variable manager for one type-variable for one tabular file. Args: - onset_manager (HedContextManager): A list of HED strings. + context_manager (HedContextManager): A list of HED strings. hed_schema (HedSchema or HedSchemaGroup): The HED schema to use for processing. hed_definitions (dict): A dictionary of DefinitionEntry objects. variable_type (str): Lowercase short form of the variable to be managed. @@ -25,9 +25,9 @@ def __init__(self, onset_manager, hed_schema, hed_definitions, variable_type="co """ self.variable_type = variable_type.lower() - self.definitions = DefinitionManager(hed_definitions, hed_schema, variable_type=variable_type) - hed_strings = onset_manager.hed_strings - hed_contexts = onset_manager.contexts + self.definitions = HedDefinitionManager(hed_definitions, hed_schema, variable_type=variable_type) + hed_strings = context_manager.hed_strings + hed_contexts = context_manager.contexts self.number_events = len(hed_strings) self._variable_map = {} self._extract_variables(hed_strings, hed_contexts) @@ -38,7 +38,7 @@ def get_variable(self, var_name): @property def type_variables(self): - return list(self._variable_map.keys()) + return set(self._variable_map.keys()) def get_variable_def_names(self): tag_list = [] @@ -52,7 +52,6 @@ def get_variable_type_map(self, type_name): def get_variable_names(self): return list(self._variable_map.keys()) - def summarize(self, as_json=False): summary = self._variable_map.copy() for var_name, var_sum in summary.items(): @@ -229,4 +228,3 @@ def _update_variables(self, tag_list, index): df_no_hot.to_csv("D:/wh_conditions_no_hot.csv", sep='\t', index=False) with open('d:/wh_summarylong.json', 'w') as f: json.dump(summary, f, indent=4) - print("to here") \ No newline at end of file diff --git a/hed/tools/analysis/hed_variable_manager.py b/hed/tools/analysis/hed_variable_manager.py index 48d1ff0fa..10ff9298a 100644 --- a/hed/tools/analysis/hed_variable_manager.py +++ b/hed/tools/analysis/hed_variable_manager.py @@ -1,27 +1,26 @@ import pandas as pd import json from hed.schema import load_schema_version -from hed.models import DefMapper from hed.tools.analysis.hed_type_variable import HedTypeVariable from hed.tools.analysis.hed_context_manager import HedContextManager class HedVariableManager: - def __init__(self, hed_strings, hed_schema, def_mapper): - """ Create a variable manager for an events file. + def __init__(self, hed_strings, hed_schema, definitions): + """ Create a variable manager for one tabular file for all type variables. Args: hed_strings (list): A list of HED strings. hed_schema (HedSchema or HedSchemaGroup): The HED schema to use for processing. - def_mapper (DefMapper): A dictionary of DefinitionEntry objects. + definitions (dict): A dictionary of DefinitionEntry objects. Raises: HedFileError: On errors such as unmatched onsets or missing definitions. """ self.hed_schema = hed_schema - self.def_mapper = def_mapper + self.definitions = definitions self.context_manager = HedContextManager(hed_strings, hed_schema) self._variable_type_map = {} # a map of type variable into HedTypeVariable objects @@ -33,23 +32,23 @@ def add_type_variable(self, type_name): if type_name.lower() in self._variable_type_map: return self._variable_type_map[type_name.lower()] = HedTypeVariable(self.context_manager, self.hed_schema, - self.def_mapper.gathered_defs, + self.definitions, variable_type=type_name) def get_factor_vectors(self, type_name, type_variables=None, factor_encoding="one-hot"): - this_map = self.get_type_variable_map(type_name) - if this_map is None: + this_var = self.get_type_variable(type_name) + if this_var is None: return None - variables = this_map.get_variable_names() + variables = this_var.get_variable_names() if variables is None: variables = type_variables df_list = [0]*len(variables) for index, variable in enumerate(variables): - var_sum = this_map._variable_map[variable] + var_sum = this_var._variable_map[variable] df_list[index] = var_sum.get_factors(factor_encoding=factor_encoding) return pd.concat(df_list, axis=1) - def get_type_variable_map(self, type_name): + def get_type_variable(self, type_name): return self._variable_type_map.get(type_name.lower(), None) def get_type_variable_factor(self, var_type, var_name): @@ -77,102 +76,12 @@ def summarize_all(self, as_json=False): def __str__(self): return f"Type_variables: {str(list(self._variable_type_map.keys()))}" - # def _extract_definition_variables(self, item, index): - # """ Extract the definition uses from a HedTag, HedGroup, or HedString. - # - # Args: - # item (HedTag, HedGroup, or HedString): The item to extract variable information from. - # index (int): Position of this item in the object's hed_strings. - # - # Notes: - # This updates the HedTypeFactors information. - # - # """ - # - # if isinstance(item, HedTag): - # tags = [item] - # else: - # tags = item.get_all_tags() - # for tag in tags: - # if tag.short_base_tag.lower() != "def": - # continue - # hed_vars = self.definitions.get_vars(tag) - # if not hed_vars: - # continue - # self._update_definition_variables(tag, hed_vars, index) - # - # def _update_definition_variables(self, tag, hed_vars, index): - # """Update the HedTypeFactors map with information from Def tag. - # - # Args: - # tag (HedTag): A HedTag that is a Def tag. - # hed_vars (list): A list of names of the hed type_variables - # index (ind): The event number associated with this. - # - # Notes: - # This modifies the HedTypeFactors map. - # - # """ - # level = tag.extension_or_value_portion.lower() - # for var_name in hed_vars: - # hed_var = self._variable_map.get(var_name, None) - # if hed_var is None: - # hed_var = HedTypeFactors(var_name, len(self.hed_strings)) - # self._variable_map[var_name] = hed_var - # var_levels = hed_var.levels.get(level, {index: 0}) - # var_levels[index] = 0 - # hed_var.levels[level] = var_levels - # - # def _extract_variables(self): - # """ Extract all condition type_variables from hed_strings and event_contexts. """ - # for index, hed in enumerate(self.hed_strings): - # self._extract_direct_variables(hed, index) - # self._extract_definition_variables(hed, index) - # for item in self._contexts[index]: - # self._extract_direct_variables(item, index) - # self._extract_definition_variables(item, index) - # - # def _extract_direct_variables(self, item, index): - # """ Extract the condition type_variables from a HedTag, HedGroup, or HedString. - # - # Args: - # item (HedTag or HedGroup): The item from which to extract condition type_variables. - # index (int): Position in the array. - # - # """ - # if isinstance(item, HedTag) and item.short_base_tag.lower() == self.variable_type: - # tag_list = [item] - # elif isinstance(item, HedGroup) and item.children: - # tag_list = item.find_tags_with_term(self.variable_type, recursive=True, include_groups=0) - # else: - # tag_list = [] - # self._update_variables(tag_list, index) - # - # def _update_variables(self, tag_list, index): - # """ Update the HedTypeFactors based on tags in the list. - # - # Args: - # tag_list (list): A list of Condition-variable HedTags. - # index (int): An integer representing the position in an array - # - # """ - # for tag in tag_list: - # name = tag.extension_or_value_portion.lower() - # if not name: - # name = self.variable_type - # hed_var = self._variable_map.get(name, None) - # if hed_var is None: - # hed_var = HedTypeFactors(name, len(self.hed_strings)) - # self._variable_map[name] = hed_var - # hed_var.direct_indices[index] = '' - if __name__ == '__main__': import os - from hed import Sidecar, TabularInput, HedString - from hed.models import DefinitionEntry + from hed import Sidecar, TabularInput from hed.tools.analysis.analysis_util import get_assembled_strings - hed_schema = load_schema_version(xml_version="8.1.0") + schema = load_schema_version(xml_version="8.1.0") bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../tests/data/bids/eeg_ds003654s_hed')) @@ -181,156 +90,10 @@ def __str__(self): sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) sidecar1 = Sidecar(sidecar_path, name='face_sub1_json') input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") - hed_strings = get_assembled_strings(input_data, hed_schema=hed_schema, expand_defs=False) - def_mapper = input_data.get_definitions() - var_manager = HedVariableManager(hed_strings, hed_schema, def_mapper) + assembled_strings = get_assembled_strings(input_data, hed_schema=schema, expand_defs=False) + definitions = input_data.get_definitions() + var_manager = HedVariableManager(assembled_strings, schema, definitions) var_manager.add_type_variable("condition-variable") - var_type = var_manager.get_type_variable("condition-variable") - summary = var_type.summarize_variables() - with open('d:/wh_summary.json', 'w') as f: - json.dump(summary, f, indent=4) - df = var_type.get_variable_factors() - df.to_csv("D:/wh_conditionslong.csv", sep='\t', index=False) - df_no_hot = var_type.get_variable_factors(factor_encoding="categorical") - df_no_hot.to_csv("D:/wh_conditions_no_hot.csv", sep='\t', index=False) - with open('d:/wh_summarylong.json', 'w') as f: - json.dump(summary, f, indent=4) + var_cond = var_manager.get_type_variable("condition-variable") + var_summary = var_cond.summarize() summary_total = var_manager.summarize_all() - print("to here") - # - # df = var_manager.get_variable_factors(factor_encoding="categorical") - # df.to_csv("D:/wh_conditions_direct.csv", sep='\t', index=False) - - # df = pd.read_csv(events_path, sep='\t') - # df = df.replace('n/a', np.NaN) - # input_data = TabularInput(df, hed_schema=hed_schema, sidecar=sidecar_path) - # hed_strings = get_assembled_strings(input_data, hed_schema=hed_schema, expand_defs=False) - # definitions = input_data.get_definitions(as_strings=False) - # var_manager = HedVariableManager(hed_strings, hed_schema, definitions) - # df = var_manager.get_variable_factors() - # summary = var_manager.summarize() - # print("to here") - # - # df_factors = var_manager.get_variable_factors(factor_encoding="categorical") - # print("to there") - # print(conditions) - # test_var = conditions.get_variable('var2') - # s = test_var.get_summary() - # print("s") - # test_sum = test_var.get_summary() - # print(f"{test_sum}") - # test_lumber = conditions.get_variable('lumber') - # test_sum_lumber = test_lumber.get_summary() - # - # lumber_factor = test_lumber.get_factors() - # print(f"lumber_factor: {lumber_factor.to_string()}") - # - # test_fast = conditions.get_variable('fast') - # fast_factor = test_fast.get_factors() - # print(f"fast_factor: {fast_factor.to_string()}") - # test_strings1 = [HedString(f"Sensory-event,(Def/Cond1,(Red, Blue, Condition-variable/Trouble),Onset)," - # f"(Def/Cond2,Onset),Green,Yellow, Def/Cond5, Def/Cond6/4", hed_schema=schema), - # HedString('(Def/Cond1, Offset)', hed_schema=schema), - # HedString('White, Black, Condition-variable/Wonder, Condition-variable/Fast', hed_schema=schema), - # HedString('', hed_schema=schema), - # HedString('(Def/Cond2, Onset)', hed_schema=schema), - # HedString('(Def/Cond3/4.3, Onset)', hed_schema=schema), - # HedString('Arm, Leg, Condition-variable/Fast, Def/Cond6/7.2', hed_schema=schema)] - # - # test_strings2 = [HedString(f"Def/Cond2, Def/Cond6/4, Def/Cond6/7.8, Def/Cond6/Alpha", hed_schema=schema), - # HedString("Yellow", hed_schema=schema), - # HedString("Def/Cond2", hed_schema=schema), - # HedString("Def/Cond2, Def/Cond6/5.2", hed_schema=schema)] - # test_strings3 = [HedString(f"Def/Cond2, (Def/Cond6/4, Onset), (Def/Cond6/7.8, Onset), Def/Cond6/Alpha", - # hed_schema=schema), - # HedString("Yellow", hed_schema=schema), - # HedString("Def/Cond2, (Def/Cond6/4, Onset)", hed_schema=schema), - # HedString("Def/Cond2, Def/Cond6/5.2 (Def/Cond6/7.8, Offset)", hed_schema=schema), - # HedString("Def/Cond2, Def/Cond6/4", hed_schema=schema)] - # def1 = HedString('(Condition-variable/Var1, Circle, Square)', hed_schema=schema) - # def2 = HedString('(condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere)', hed_schema=schema) - # def3 = HedString('(Organizational-property/Condition-variable/Var3, Physical-length/#, Ellipse, Cross)', - # hed_schema=schema) - # def4 = HedString('(Condition-variable, Apple, Banana)', hed_schema=schema) - # def5 = HedString('(Condition-variable/Lumber, Apple, Banana)', hed_schema=schema) - # def6 = HedString('(Condition-variable/Lumber, Label/#, Apple, Banana)', hed_schema=schema) - # defs = {'Cond1': DefinitionEntry('Cond1', def1, False, None), - # 'Cond2': DefinitionEntry('Cond2', def2, False, None), - # 'Cond3': DefinitionEntry('Cond3', def3, True, None), - # 'Cond4': DefinitionEntry('Cond4', def4, False, None), - # 'Cond5': DefinitionEntry('Cond5', def5, False, None), - # 'Cond6': DefinitionEntry('Cond6', def6, True, None) - # } - # - # conditions = HedVariableManager(test_strings1, schema, defs) - # print("to here") - # for man_var in conditions.type_variables: - # var_sum = conditions.get_variable(man_var) - # s = var_sum.get_summary() - # print(json.dumps(s)) - # s = var_sum.get_summary(full=False) - # print(json.dumps(s)) - - # bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - # '../../../tests/data/bids/eeg_ds003654s_hed')) - # events_path = os.path.realpath(os.path.join(bids_root_path, - # 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) - # sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) - # sidecar1 = Sidecar(sidecar_path, name='face_sub1_json') - # input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") - # hed_strings = get_assembled_strings(input_data, hed_schema=hed_schema, expand_defs=False) - # definitions = input_data.get_definitions(as_strings=False) - # var_manager = HedVariableManager(hed_strings, hed_schema, definitions) - # - # for man_var in var_manager.type_variables: - # var_sum = var_manager.get_variable(man_var) - # factors = var_sum.get_factors(factor_encoding="categorical") - # s = var_sum.get_summary() - # print(json.dumps(s)) - # # s = var_sum.get_summary(full=False) - # # print(json.dumps(s)) - # list1 = conditions1.get_variable_tags() - # print(f"List1: {str(list1)}") - # list2 = conditions2.get_variable_tags() - # print(f"List2: {str(list2)}") - # list3 = conditions2.get_variable_tags() - # print(f"List3: {str(list3)}") - - # test_strings1 = [HedString(f"Sensory-event,(Def/Cond1,(Red, Blue, Condition-variable/Trouble),Onset)," - # f"(Def/Cond2,Onset),Green,Yellow, Def/Cond5, Def/Cond6/4", hed_schema=hed_schema), - # HedString('(Def/Cond1, Offset)', hed_schema=hed_schema), - # HedString('White, Black, Condition-variable/Wonder, Condition-variable/Fast', - # hed_schema=hed_schema), - # HedString('', hed_schema=hed_schema), - # HedString('(Def/Cond2, Onset)', hed_schema=hed_schema), - # HedString('(Def/Cond3/4.3, Onset)', hed_schema=hed_schema), - # HedString('Arm, Leg, Condition-variable/Fast, Def/Cond6/7.2', hed_schema=hed_schema)] - # - # test_strings2 = [HedString(f"Def/Cond2, Def/Cond6/4, Def/Cond6/7.8, Def/Cond6/Alpha", hed_schema=hed_schema), - # HedString("Yellow", hed_schema=hed_schema), - # HedString("Def/Cond2", hed_schema=hed_schema), - # HedString("Def/Cond2, Def/Cond6/5.2", hed_schema=hed_schema)] - # test_strings3 = [HedString(f"Def/Cond2, (Def/Cond6/4, Onset), (Def/Cond6/7.8, Onset), Def/Cond6/Alpha", - # hed_schema=hed_schema), - # HedString("Yellow", hed_schema=hed_schema), - # HedString("Def/Cond2, (Def/Cond6/4, Onset)", hed_schema=hed_schema), - # HedString("Def/Cond2, Def/Cond6/5.2 (Def/Cond6/7.8, Offset)", hed_schema=hed_schema), - # HedString("Def/Cond2, Def/Cond6/4", hed_schema=hed_schema)] - # def1 = HedString('(Condition-variable/Var1, Circle, Square)', hed_schema=hed_schema) - # def2 = HedString('(condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere)', hed_schema=hed_schema) - # def3 = HedString('(Organizational-property/Condition-variable/Var3, Physical-length/#, Ellipse, Cross)', - # hed_schema=hed_schema) - # def4 = HedString('(Condition-variable, Apple, Banana)', hed_schema=hed_schema) - # def5 = HedString('(Condition-variable/Lumber, Apple, Banana)', hed_schema=hed_schema) - # def6 = HedString('(Condition-variable/Lumber, Label/#, Apple, Banana)', hed_schema=hed_schema) - # defs = {'Cond1': DefinitionEntry('Cond1', def1, False, None), - # 'Cond2': DefinitionEntry('Cond2', def2, False, None), - # 'Cond3': DefinitionEntry('Cond3', def3, True, None), - # 'Cond4': DefinitionEntry('Cond4', def4, False, None), - # 'Cond5': DefinitionEntry('Cond5', def5, False, None), - # 'Cond6': DefinitionEntry('Cond6', def6, True, None) - # } - # - # conditions1 = HedVariableManager(test_strings1, hed_schema, defs) - # conditions2 = HedVariableManager(test_strings2, hed_schema, defs) - # conditions3 = HedVariableManager(test_strings3, hed_schema, defs) diff --git a/hed/tools/analysis/hed_variable_summary.py b/hed/tools/analysis/hed_variable_summary.py index ce311e6ce..2725a165d 100644 --- a/hed/tools/analysis/hed_variable_summary.py +++ b/hed/tools/analysis/hed_variable_summary.py @@ -1,8 +1,20 @@ +import json + class HedVariableCounts: + """ Keeps a summary of one value of one type of variable. + + Args: + variable_value (str) The value of the variable to be counted + variable_type (str) The type of variable. + + Examples: + HedVariableCounts('SymmetricCond', 'condition-variable') keeps counts of Condition-variable/Symmetric + + """ - def __init__(self, name, variable_type="condition-variable"): - self.variable_name = name + def __init__(self, variable_value, variable_type): + self.variable_value = variable_value self.variable_type = variable_type.lower() self.direct_references = 0 self.total_events = 0 @@ -11,8 +23,14 @@ def __init__(self, name, variable_type="condition-variable"): self.multiple_event_maximum = 0 self.level_counts = {} - def update(self, var_counts): - var_sum = var_counts.get_summary(full=True) + def update(self, variable_info): + """ Update the counts from a HedTypeVariable. + + Args: + variable_info (HedTypeFactor) information about the contents for a particular data file. + + """ + var_sum = variable_info.get_summary(full=True) self.direct_references += var_sum['direct_references'] self.total_events += var_sum['total_events'] self.number_type_events += var_sum['number_type_events'] @@ -28,7 +46,8 @@ def _update_levels(self, level_dict): self.level_counts[key]['events'] = self.level_counts[key]['events'] + item def get_summary(self, as_json=False): - summary = {'name': self.variable_name, 'variable_type': self.variable_type, + summary = {'variable_value': self.variable_value, + 'variable_type': self.variable_type, 'levels': len(self.level_counts.keys()), 'direct_references': self.direct_references, 'total_events': self.total_events, @@ -42,9 +61,10 @@ def get_summary(self, as_json=False): class HedVariableSummary: + """ Holds a consolidated summary for one type variable. """ - def __init__(self, variable_type="condition-variable"): - """ Constructor for HedVariableSummary. + def __init__(self, variable_type, name=''): + """ Constructor for HedVariableSummary for a particular type of variable. Args: variable_type (str) Tag representing the type in this summary @@ -52,34 +72,41 @@ def __init__(self, variable_type="condition-variable"): """ self.variable_type = variable_type.lower() - self.summaries = {} + self.name = name + self.summary = {} def __str__(self): - return f"{self.variable_type}[{self.variable_type}]: {len(self.summaries)} type_variables " + return f"Summary {self.name} for HED {self.variable_type} [{len(self.summary)} values]:" + '\n' + \ + self.get_summary(as_json=True) - def get_summaries(self, as_json=True): + def get_summary(self, as_json=True): sum_dict = {} - for var_name, var_counts in self.summaries.items(): - sum_dict[var_name] = var_counts.get_summary(as_json=False) + for var_value, var_counts in self.summary.items(): + sum_dict[var_value] = var_counts.get_summary(as_json=False) if as_json: return json.dumps(sum_dict, indent=4) else: return sum_dict - def update_summary(self, var_counts): - if var_counts.variable_name not in self.summaries: - self.summaries[var_counts.variable_name] = HedVariableCounts(var_counts.variable_name, - var_counts.variable_type) - summary = self.summaries[var_counts.variable_name] - summary.update(var_counts) + def update_summary(self, variable): + """ Update this summary based on the type variable map. + + Args: + variable (HedTypeVariable): Contains the information about + """ + + for type_var in variable.type_variables: + if type_var not in self.summary: + self.summary[type_var] = HedVariableCounts(type_var, self.variable_type) + var_counts = self.summary[type_var] + var_counts.update(variable.get_variable(type_var)) if __name__ == '__main__': import os - import json from hed.tools.analysis.hed_variable_manager import HedVariableManager from hed.schema import load_schema_version - from hed.models import HedString, DefinitionEntry, TabularInput, Sidecar + from hed.models import TabularInput, Sidecar from hed.tools.analysis.analysis_util import get_assembled_strings schema = load_schema_version(xml_version="8.1.0") bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), @@ -92,11 +119,12 @@ def update_summary(self, var_counts): hed_strings = get_assembled_strings(input_data, hed_schema=schema, expand_defs=False) def_mapper = input_data.get_definitions() var_manager = HedVariableManager(hed_strings, schema, def_mapper) - var_summary = HedVariableSummary(variable_type="condition-variable") - + var_manager.add_type_variable("condition-variable") + var_summary = HedVariableSummary("condition-variable") for man_var in var_manager.type_variables: - var_map = var_manager.get_variable(man_var) + var_map = var_manager.get_type_variable(man_var) var_summary.update_summary(var_map) - summary = var_summary.get_summaries(as_json=False) - print(f"Variable summary\n{var_summary.get_summaries()}") + final_summary = var_summary.get_summary(as_json=False) + print(f"Variable summary\n{final_summary}") + print(f"\n\n{str(var_summary)}") diff --git a/hed/tools/analysis/tabular_reports.py b/hed/tools/analysis/tabular_reports.py deleted file mode 100644 index 0b6bf2992..000000000 --- a/hed/tools/analysis/tabular_reports.py +++ /dev/null @@ -1,58 +0,0 @@ - - -def report_diffs(tsv_dict1, tsv_dict2, logger): - """ Reports and logs the contents and differences of two equivalent BidsTabularDictionary objects - - Args: - tsv_dict1 (BidsTabularDictionary): A dictionary representing BIDS-keyed tsv files. - tsv_dict2 (BidsTabularDictionary): A dictionary representing BIDS-keyed tsv files. - logger (HedLogger): A HedLogger object for reporting the values by key. - - Returns: - str: A string with the differences. - - """ - report_list = [f"{tsv_dict1.name} has {len(tsv_dict1.file_list)} event files"] - logger.add("overall", f"{report_list[-1]}") - report_list.append(f"{tsv_dict2.name} has {len(tsv_dict2.file_list)} event files") - logger.add("overall", f"{report_list[-1]}") - - report_list.append(tsv_dict1.output_files(title=f"\n{tsv_dict1.name} event files", logger=logger)) - report_list.append(tsv_dict2.output_files(title=f"\n{tsv_dict2.name} event files", logger=logger)) - - # Make sure there are the same number of files in both collections - if len(tsv_dict1.key_list) != len(tsv_dict2.key_list): - report_list.append(f"{tsv_dict1.name} has {len(tsv_dict1.file_list)} files and " + - f"{tsv_dict2.name} has {len(tsv_dict2.file_list)} files") - logger.add("overall", f"{report_list[-1]}", level="ERROR") - - # Compare keys from the two dictionaries to make sure they have the same keys - key_diff = tsv_dict1.key_diffs(tsv_dict2) - if key_diff: - report_list.append(f"File key differences {str(key_diff)}") - logger.add("overall", f"{report_list[-1]}", level="ERROR") - - # Output the column names for each type of event file - report_list.append(f"\n{tsv_dict1.name} event file columns:") - for key, file, rowcount, columns in tsv_dict1.iter_files(): - report_list.append(f"{tsv_dict1.name}: [{rowcount} events] {str(columns)}") - logger.add(key, f"{report_list[-1]}") - - for key, file, rowcount, columns in tsv_dict2.iter_files(): - report_list.append(f"{tsv_dict2.name}: [{rowcount} events] {str(columns)}") - logger.add(key, f"{report_list[-1]}") - - # Output keys for files in which the BIDS and EEG.events have different numbers of events - count_diffs = tsv_dict1.count_diffs(tsv_dict2) - if count_diffs: - report_list.append(f"\nThe number of {tsv_dict1.name} events and {tsv_dict2.name} events" + - f"differ for the following files:") - for item in count_diffs: - report_list.append(f"The {tsv_dict1.name} file has {item[1]} rows and " + - f"the {tsv_dict2.name} event file has {item[2]} rows") - logger.add(item[0], f"{report_list[-1]}", level="ERROR") - else: - report_list.append(f"\nThe {tsv_dict1.name} and {tsv_dict2.name} files have the same number of rows") - logger.add("overall", f"{report_list[-1]}") - - return "\n".join(report_list) diff --git a/hed/tools/bids/bids_tabular_summary.py b/hed/tools/analysis/tabular_summary.py similarity index 84% rename from hed/tools/bids/bids_tabular_summary.py rename to hed/tools/analysis/tabular_summary.py index 1e0325bf5..f2a66da17 100644 --- a/hed/tools/bids/bids_tabular_summary.py +++ b/hed/tools/analysis/tabular_summary.py @@ -1,10 +1,10 @@ - +import json from hed.errors import HedFileError from hed.tools.util.data_util import get_new_dataframe from hed.tools.analysis.annotation_util import generate_sidecar_entry -class BidsTabularSummary: +class TabularSummary: """ Summarize the contents of BIDS tabular files. """ def __init__(self, value_cols=None, skip_cols=None, name=''): @@ -63,6 +63,26 @@ def extract_sidecar_template(self): side_dict[column_name] = generate_sidecar_entry(column_name, []) return side_dict + def get_summary(self, as_json=False): + sorted_keys = sorted(self.categorical_info.keys()) + categorical_cols = {} + for key in sorted_keys: + cat_dict = self.categorical_info[key] + sorted_v_keys = sorted(list(cat_dict)) + val_dict = {} + for v_key in sorted_v_keys: + val_dict[v_key] = cat_dict[v_key] + categorical_cols[f"{key} [categorical column] values"] = val_dict + sorted_cols = sorted(map(str, list(self.value_info))) + value_cols = {} + for key in sorted_cols: + value_cols[f"{key} [value_column]"] = f"{self.value_info[key]} values" + summary = {"Summary name": self.name, "Categorical columns": categorical_cols, "Value columns": value_cols} + if as_json: + return json.dumps(summary, indent=4) + else: + return summary + def get_number_unique(self, column_names=None): """ Return the number of unique values in columns. @@ -207,16 +227,16 @@ def make_combined_dicts(file_dictionary, skip_cols=None): Returns: tuple: - - BidsTabularSummary: Summary of the file dictionary. - - dict: of individual BidsTabularSummary objects. + - TabularSummary: Summary of the file dictionary. + - dict: of individual TabularSummary objects. """ - summary_all = BidsTabularSummary(skip_cols=skip_cols) + summary_all = TabularSummary(skip_cols=skip_cols) summary_dict = {} - for key, file in file_dictionary.file_dict.items(): - orig_dict = BidsTabularSummary(skip_cols=skip_cols) - df = get_new_dataframe(file.file_path) + for key, file_path in file_dictionary.items(): + orig_dict = TabularSummary(skip_cols=skip_cols) + df = get_new_dataframe(file_path) orig_dict.update(df) summary_dict[key] = orig_dict summary_all.update_summary(orig_dict) diff --git a/hed/tools/analysis/tag_summary.py b/hed/tools/analysis/tag_summary.py index 9bfe798b9..ebca7bc2d 100644 --- a/hed/tools/analysis/tag_summary.py +++ b/hed/tools/analysis/tag_summary.py @@ -123,4 +123,3 @@ def extract_summary_info(entry_dict, tag_name): event_group = bids.get_tabular_group(obj_type="events") summary = TagSummary(event_group, schema=bids.schema, breakout_list=breakouts) designs, others, errors = summary.get_design_matrices() - print("to here") \ No newline at end of file diff --git a/hed/tools/bids/bids_dataset.py b/hed/tools/bids/bids_dataset.py index f1206f620..0c8900da9 100644 --- a/hed/tools/bids/bids_dataset.py +++ b/hed/tools/bids/bids_dataset.py @@ -1,6 +1,6 @@ import os import json -from hed.errors.error_reporter import get_printable_issue_string +from hed.errors.error_reporter import ErrorHandler, get_printable_issue_string from hed.schema.hed_schema import HedSchema from hed.schema.hed_schema_io import load_schema, load_schema_version from hed.schema.hed_schema_group import HedSchemaGroup @@ -21,7 +21,8 @@ class BidsDataset: """ - def __init__(self, root_path, schema=None, tabular_types=None, exclude_dirs=['sourcedata', 'derivatives', 'code']): + def __init__(self, root_path, schema=None, tabular_types=None, + exclude_dirs=['sourcedata', 'derivatives', 'code', 'stimuli']): """ Constructor for a BIDS dataset. Args: @@ -77,13 +78,17 @@ def validate(self, types=None, check_for_warnings=True): """ validator = HedValidator(hed_schema=self.schema) + error_handler = ErrorHandler() if not types: types = list(self.tabular_files.keys()) issues = [] for tab_type in types: files = self.tabular_files[tab_type] - issues += files.validate_sidecars(hed_ops=[validator], check_for_warnings=check_for_warnings) - issues += files.validate_datafiles(hed_ops=[validator], check_for_warnings=check_for_warnings) + issues += files.validate_sidecars(hed_ops=[validator], + check_for_warnings=check_for_warnings, error_handler=error_handler) + issues += files.validate_datafiles(hed_ops=[validator], + check_for_warnings=check_for_warnings, + error_handler=error_handler) return issues def get_summary(self): @@ -113,37 +118,39 @@ def get_schema_versions(self): if __name__ == '__main__': - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../../tests/data/bids/eeg_ds003654s_hed_library') + # path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + # '../../../tests/data/bids/eeg_ds003654s_hed_library') # path = os.path.join(os.path.dirname(os.path.realpath(__file__)), # '../../../tests/data/bids/eeg_ds003654s_hed_inheritance') # path = os.path.join(os.path.dirname(os.path.realpath(__file__)), # '../../../tests/data/bids/eeg_ds003654s_hed') # + # path = 'Q:\WakemanHensonON' + path = 'G:\WakemanHenson\WH_Released' bids = BidsDataset(path) issue_list = bids.validate(check_for_warnings=False) if issue_list: - issue_str = get_printable_issue_string(issue_list, "HED validation errors:") + issue_str = get_printable_issue_string(issue_list, "HED validation errors:", skip_filename=False) else: issue_str = "No issues" print(issue_str) warnings = False - path = '/XXX/bids-examples/xeeg_hed_score/' - bids = BidsDataset(path) - # summary1 = bids.get_summary() - # print(json.dumps(summary1, indent=4)) - print("\nNow validating with the prerelease schema.") - base_version = '8.1.0' - score_url = f"https://raw.githubusercontent.com/hed-standard/hed-schema-library/main/library_schemas/" \ - f"score/prerelease/HED_score_1.0.0.xml" - - schema_base = load_schema_version(xml_version="8.1.0") - schema_score = load_schema(score_url, schema_prefix="sc") - bids.schema = HedSchemaGroup([schema_base, schema_score]) - - issue_list2 = bids.validate(check_for_warnings=warnings) - if issue_list2: - issue_str2 = get_printable_issue_string(issue_list2, "HED validation errors: ", skip_filename=False) - else: - issue_str2 = "No HED validation errors" - print(issue_str2) + # path = '/XXX/bids-examples/xeeg_hed_score/' + # bids = BidsDataset(path) + # # summary1 = bids.get_summary() + # # print(json.dumps(summary1, indent=4)) + # print("\nNow validating with the prerelease schema.") + # base_version = '8.1.0' + # score_url = f"https://raw.githubusercontent.com/hed-standard/hed-schema-library/main/library_schemas/" \ + # f"score/prerelease/HED_score_1.0.0.xml" + # + # schema_base = load_schema_version(xml_version="8.1.0") + # schema_score = load_schema(score_url, schema_prefix="sc") + # bids.schema = HedSchemaGroup([schema_base, schema_score]) + # + # issue_list2 = bids.validate(check_for_warnings=warnings) + # if issue_list2: + # issue_str2 = get_printable_issue_string(issue_list2, "HED validation errors: ", skip_filename=False) + # else: + # issue_str2 = "No HED validation errors" + # print(issue_str2) diff --git a/hed/tools/bids/bids_file_dictionary.py b/hed/tools/bids/bids_file_dictionary.py index 417702945..da77fdf27 100644 --- a/hed/tools/bids/bids_file_dictionary.py +++ b/hed/tools/bids/bids_file_dictionary.py @@ -30,17 +30,22 @@ def __init__(self, collection_name, files, entities=('sub', 'ses', 'task', 'run' """ super().__init__(collection_name, None, None, separator='_') self.entities = entities - self.file_dict = self.make_dict(files, entities) + self._file_dict = self.make_dict(files, entities) @property def key_list(self): """ The dictionary keys. """ - return list(self.file_dict.keys()) + return list(self._file_dict.keys()) + + @property + def file_dict(self): + """ Dictionary of keys and paths. """ + return {key:file.file_path for key, file in self._file_dict.items()} @property def file_list(self): - """ Files contained in this dictionary. """ - return list(self.file_dict.values()) + """ Paths of the files in the list. """ + return [file.file_path for file in self._file_dict.values()] def correct_file(self, the_file): """ Transform to BidsFile if needed. @@ -74,8 +79,8 @@ def get_file_path(self, key): - None is returned if the key is not present. """ - if key in self.file_dict.keys(): - return self.file_dict[key].file_path + if key in self._file_dict.keys(): + return self._file_dict[key].file_path return None def iter_files(self): @@ -87,7 +92,7 @@ def iter_files(self): - BidsFile: The next BidsFile. """ - for key, file in self.file_dict.items(): + for key, file in self._file_dict.items(): yield key, file def key_diffs(self, other_dict): @@ -100,7 +105,7 @@ def key_diffs(self, other_dict): list: The symmetric difference of the keys in this dictionary and the other one. """ - diffs = set(self.file_dict.keys()).symmetric_difference(set(other_dict.file_dict.keys())) + diffs = set(self._file_dict.keys()).symmetric_difference(set(other_dict._file_dict.keys())) return list(diffs) def get_new_dict(self, name, files): @@ -165,7 +170,7 @@ def make_query(self, query_dict={'sub': '*'}): """ response_dict = {} - for key, file in self.file_dict.items(): + for key, file in self._file_dict.items(): if self.match_query(query_dict, file.entity_dict): response_dict[key] = file return response_dict @@ -185,7 +190,7 @@ def split_by_entity(self, entity): - This function is used for analysis where a single subject or single type of task is being analyzed. """ - split_dict, leftovers = self._split_dict_by_entity(self.file_dict, entity) + split_dict, leftovers = self._split_dict_by_entity(self._file_dict, entity) for entity_value, entity_dict in split_dict.items(): split_dict[entity_value] = self.get_new_dict(f"{self.name}_{entity_value}", entity_dict) if leftovers: diff --git a/hed/tools/bids/bids_file_group.py b/hed/tools/bids/bids_file_group.py index b798e7aef..c0534b1af 100644 --- a/hed/tools/bids/bids_file_group.py +++ b/hed/tools/bids/bids_file_group.py @@ -1,10 +1,10 @@ import os -from hed.errors.error_reporter import get_printable_issue_string +from hed.errors.error_reporter import ErrorContext, ErrorHandler, get_printable_issue_string from hed.schema.hed_schema_io import load_schema_version +from hed.tools.analysis.tabular_summary import TabularSummary from hed.tools.bids.bids_timeseries_file import BidsTimeseriesFile from hed.tools.bids.bids_tabular_file import BidsTabularFile from hed.tools.bids.bids_sidecar_file import BidsSidecarFile -from hed.tools.bids.bids_tabular_summary import BidsTabularSummary from hed.tools.util.io_util import get_dir_dictionary, get_file_list, get_path_components from hed.validator.hed_validator import HedValidator @@ -24,7 +24,7 @@ class BidsFileGroup: """ def __init__(self, root_path, suffix="_events", obj_type="tabular", - exclude_dirs=['sourcedata', 'derivatives', 'code']): + exclude_dirs=['sourcedata', 'derivatives', 'code', 'stimuli']): """ Constructor for a BidsFileGroup. Args: @@ -98,7 +98,7 @@ def summarize(self, value_cols=None, skip_cols=None): skip_cols (list): Column names designated as columns to skip. Returns: - BidsTabularSummary or None: A summary of the number of values in different columns if tabular group. + TabularSummary or None: A summary of the number of values in different columns if tabular group. Notes: - The columns that are not value_cols or skip_col are summarized by counting @@ -107,42 +107,54 @@ def summarize(self, value_cols=None, skip_cols=None): """ if self.obj_type != 'tabular': return None - info = BidsTabularSummary(value_cols=value_cols, skip_cols=skip_cols) - for obj in self.datafile_dict.values(): - info.update(obj.file_path) + info = TabularSummary(value_cols=value_cols, skip_cols=skip_cols) + info.update(list(self.datafile_dict.keys())) return info - def validate_sidecars(self, hed_ops, check_for_warnings=True): + def validate_sidecars(self, hed_ops, check_for_warnings=True, error_handler=None): """ Validate merged sidecars. Args: hed_ops ([func or HedOps], func, HedOps): Validation functions to apply. check_for_warnings (bool): If True, include warnings in the check. + error_handler (ErrorHandler): The common error handler for the dataset. Returns: list: A list of validation issues found. Each issue is a dictionary. """ + + if not error_handler: + error_handler = ErrorHandler() issues = [] for sidecar in self.sidecar_dict.values(): + error_handler.push_error_context(ErrorContext.FILE_NAME, sidecar.file_path) if sidecar.has_hed: - issues += sidecar.contents.validate_entries(hed_ops=hed_ops, check_for_warnings=check_for_warnings) + issues += sidecar.contents.validate_entries(hed_ops=hed_ops, + name=sidecar.file_path, + check_for_warnings=check_for_warnings) + error_handler.pop_error_context() return issues - def validate_datafiles(self, hed_ops, check_for_warnings=True, keep_contents=False): + def validate_datafiles(self, hed_ops, check_for_warnings=True, keep_contents=False, error_handler=None): """ Validate the datafiles and return an error list. Args: hed_ops ([func or HedOps], func, HedOps): Validation functions to apply. check_for_warnings (bool): If True, include warnings in the check. keep_contents (bool): If True, the underlying data files are read and their contents retained. + error_handler (ErrorHandler): The common error handler to use for the dataset. Returns: list: A list of validation issues found. Each issue is a dictionary. """ + + if not error_handler: + error_handler = ErrorHandler() issues = [] for data_obj in self.datafile_dict.values(): + error_handler.push_error_context(ErrorContext.FILE_NAME, data_obj.file_path) data_obj.set_contents(overwrite=False) if not data_obj.has_hed: continue @@ -150,6 +162,7 @@ def validate_datafiles(self, hed_ops, check_for_warnings=True, keep_contents=Fal issues += data.validate_file(hed_ops=hed_ops, check_for_warnings=check_for_warnings) if not keep_contents: data_obj.clear_contents() + error_handler.pop_error_context() return issues def _make_datafile_dict(self): @@ -205,23 +218,26 @@ def _make_sidecar_dir_dict(self): if __name__ == '__main__': - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../../tests/data/bids/eeg_ds003654s_hed_inheritance') + # path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + # '../../../tests/data/bids/eeg_ds003654s_hed_inheritance') + path = 'G:\WakemanHenson\WH_Released' bids = BidsFileGroup(path) - for file_obj in bids.sidecar_dict.values(): - print(file_obj) - - for file_obj in bids.datafile_dict.values(): - print(file_obj) + # for file_obj in bids.sidecar_dict.values(): + # print(file_obj.file_path) + # + # for file_obj in bids.datafile_dict.values(): + # print(file_obj.file_path) schema = load_schema_version(xml_version="8.0.0") validator = HedValidator(hed_schema=schema) issues_side = bids.validate_sidecars(hed_ops=[validator], check_for_warnings=False) + print(f"Side issues {str(issues_side)}\n") issues_data = bids.validate_datafiles(hed_ops=[validator], check_for_warnings=False) + print(f"Data issues {str(issues_data)}\n") if issues_side: - print(get_printable_issue_string(issues_side, "Sidecar errors")) + print(get_printable_issue_string(issues_side, "Sidecar errors", skip_filename=False)) else: print("No validation errors in the sidecars") diff --git a/hed/tools/bids/bids_tabular_dictionary.py b/hed/tools/bids/bids_tabular_dictionary.py index 5c15a2b89..6cf506524 100644 --- a/hed/tools/bids/bids_tabular_dictionary.py +++ b/hed/tools/bids/bids_tabular_dictionary.py @@ -71,7 +71,7 @@ def count_diffs(self, other_dict): """ self._set_tsv_info() diff_list = [] - for key in self.file_dict.keys(): + for key in self._file_dict.keys(): if self.rowcount_dict[key] != other_dict.rowcount_dict[key]: diff_list.append((key, self.rowcount_dict[key], other_dict.rowcount_dict[key])) return diff_list @@ -121,7 +121,7 @@ def iter_files(self): """ self._set_tsv_info() - for key, file in self.file_dict.items(): + for key, file in self._file_dict.items(): yield key, file, self.rowcount_dict[key], self.column_dict[key] def make_new(self, name, files): @@ -141,8 +141,65 @@ def _set_tsv_info(self): if self._info_set: return - for key, file in self.file_dict.items(): + for key, file in self._file_dict.items(): df = get_new_dataframe(file.file_path) self.rowcount_dict[key] = len(df.index) self.column_dict[key] = list(df.columns.values) self.info_set = True + + def report_diffs(self, tsv_dict, logger=None): + """ Reports and logs the contents and differences between this tabular dictionary and another + + Args: + tsv_dict (BidsTabularDictionary): A dictionary representing BIDS-keyed tsv files. + logger (HedLogger): A HedLogger object for reporting the values by key. + + Returns: + str: A string with the differences. + + """ + report_list = [f"{self.name} has {len(self.file_list)} event files"] + logger.add("overall", f"{report_list[-1]}") + report_list.append(f"{self.name} has {len(tsv_dict.file_list)} event files") + logger.add("overall", f"{report_list[-1]}") + + report_list.append(self.output_files(title=f"\n{self.name} event files", logger=logger)) + report_list.append(tsv_dict.output_files(title=f"\n{tsv_dict.name} event files", logger=logger)) + + # Make sure there are the same number of files in both collections + if len(self.key_list) != len(tsv_dict.key_list): + report_list.append(f"{self.name} has {len(self.file_list)} files and " + + f"{tsv_dict.name} has {len(tsv_dict.file_list)} files") + if logger: + logger.add("overall", f"{report_list[-1]}", level="ERROR") + + # Compare keys from the two dictionaries to make sure they have the same keys + key_diff = self.key_diffs(tsv_dict) + if key_diff: + report_list.append(f"File key differences {str(key_diff)}") + logger.add("overall", f"{report_list[-1]}", level="ERROR") + + # Output the column names for each type of event file + report_list.append(f"\n{self.name} event file columns:") + for key, file, rowcount, columns in self.iter_files(): + report_list.append(f"{self.name}: [{rowcount} events] {str(columns)}") + logger.add(key, f"{report_list[-1]}") + + for key, file, rowcount, columns in tsv_dict.iter_files(): + report_list.append(f"{tsv_dict.name}: [{rowcount} events] {str(columns)}") + logger.add(key, f"{report_list[-1]}") + + # Output keys for files in which the BIDS and EEG.events have different numbers of events + count_diffs = self.count_diffs(tsv_dict) + if count_diffs: + report_list.append(f"\nThe number of {self.name} events and {tsv_dict.name} events" + + f"differ for the following files:") + for item in count_diffs: + report_list.append(f"The {self.name} file has {item[1]} rows and " + + f"the {tsv_dict.name} event file has {item[2]} rows") + logger.add(item[0], f"{report_list[-1]}", level="ERROR") + else: + report_list.append(f"\nThe {self.name} and {tsv_dict.name} files have the same number of rows") + logger.add("overall", f"{report_list[-1]}") + + return "\n".join(report_list) diff --git a/hed/tools/bids/bids_tabular_file.py b/hed/tools/bids/bids_tabular_file.py index dca5d6a1f..ca059e1a0 100644 --- a/hed/tools/bids/bids_tabular_file.py +++ b/hed/tools/bids/bids_tabular_file.py @@ -1,6 +1,6 @@ import os from hed.tools.bids.bids_file import BidsFile -from hed.models import TabularInput +from hed.models.tabular_input import TabularInput class BidsTabularFile(BidsFile): diff --git a/tests/data/curation/task-FacePerception_events.json b/tests/data/curation/task-FacePerception_events.json new file mode 100644 index 000000000..fa018c473 --- /dev/null +++ b/tests/data/curation/task-FacePerception_events.json @@ -0,0 +1,138 @@ +{ + "onset": { + "Description": "Position of event marker in seconds relative to the start.", + "Units": "s" + }, + "duration": { + "Description": "Duration of the event in seconds.", + "Units": "s" + }, + "event_type": { + "LongName": "Event category", + "Description": "The main category of the event.", + "Levels": { + "show_face": "Display a face to mark end of pre-stimulus and start of blink-inhibition.", + "show_face_initial": "Display a face at the beginning of the recording.", + "show_circle": "Display a white circle to mark end of the stimulus and blink inhibition.", + "show_cross": "Display only a white cross to mark start of trial and fixation.", + "left_press": "Experiment participant presses a key with left index finger.", + "right_press": "Experiment participant presses a key with right index finger.", + "setup_left_sym": "Setup for experiment with pressing key with left index finger means a face with above average symmetry.", + "setup_right_sym": "Setup for experiment with pressing key with right index finger means a face with above average symmetry.", + "double_press": "Experiment participant presses both keys ." + }, + "HED": { + "show_face": "Sensory-event, Experimental-stimulus, (Def/Face-image, Onset), (Def/Blink-inhibition-task,Onset),(Def/Cross-only, Offset)", + "show_face_initial": "Sensory-event, Experimental-stimulus, (Def/Face-image, Onset), (Def/Blink-inhibition-task,Onset), (Def/Fixation-task, Onset)", + "show_circle": "Sensory-event, (Intended-effect, Cue), (Def/Circle-only, Onset), (Def/Face-image, Offset), (Def/Blink-inhibition-task, Offset), (Def/Fixation-task, Offset)", + "show_cross": "Sensory-event, (Intended-effect, Cue), (Def/Cross-only, Onset), (Def/Fixation-task, Onset), (Def/Circle-only, Offset)", + "left_press": "Agent-action, Participant-response, Def/Press-left-finger", + "right_press": "Agent-action, Participant-response, Def/Press-right-finger", + "setup_left_sym": "Experiment-structure, (Def/Left-sym-cond, Onset), (Def/Initialize-recording, Onset)", + "setup_right_sym": "Experiment-structure, (Def/Right-sym-cond, Onset), (Def/Initialize-recording, Onset)", + "double_press": "Agent-action, Indeterminate-action, (Press, Keyboard-key)" + } + }, + "face_type": { + "Description": "Factor indicating type of face image being displayed.", + "Levels": { + "famous_face": "A face that should be recognized by the participants.", + "unfamiliar_face": "A face that should not be recognized by the participants.", + "scrambled_face": "A scrambled face image generated by taking face 2D FFT." + }, + "HED": { + "famous_face": "Def/Famous-face-cond", + "unfamiliar_face": "Def/Unfamiliar-face-cond", + "scrambled_face": "Def/Scrambled-face-cond" + } + }, + "rep_status": { + "Description": "Factor indicating whether this image has been already seen.", + "Levels": { + "first_show": "Factor level indicating the first display of this face.", + "immediate_repeat": "Factor level indicating this face was the same as previous one.", + "delayed_repeat": "Factor level indicating face was seen 5 to 15 trials ago." + }, + "HED": { + "first_show": "Def/First-show-cond", + "immediate_repeat": "Def/Immediate-repeat-cond", + "delayed_repeat": "Def/Delayed-repeat-cond" + } + }, + "trial": { + "Description": "Indicates which trial this event belongs to.", + "HED": "Experimental-trial/#" + }, + "rep_lag": { + "Description": "How face images before this one was the image was previously presented.", + "HED": "(Face, Item-interval/#)" + }, + "stim_file": { + "Description": "Path of the stimulus file in the stimuli directory.", + "HED": "(Image, Pathname/#)" + }, + "hed_def_sensory": { + "Description": "Metadata dictionary for gathering sensory definitions", + "HED": { + "cross_only_def": "(Definition/Cross-only, (Visual-presentation, (Foreground-view, (White, Cross), (Center-of, Computer-screen)), (Background-view, Black), Description/A white fixation cross on a black background in the center of the screen.))", + "face_image_def": "(Definition/Face-image, (Visual-presentation, (Foreground-view, ((Image, Face, Hair), Color/Grayscale), ((White, Cross), (Center-of, Computer-screen))), (Background-view, Black), Description/A happy or neutral face in frontal or three-quarters frontal pose with long hair cropped presented as an achromatic foreground image on a black background with a white fixation cross superposed.))", + "circle_only_def": "(Definition/Circle-only, (Visual-presentation, (Foreground-view, ((White, Circle), (Center-of, Computer-screen))), (Background-view, Black), Description/A white circle on a black background in the center of the screen.))" + } + }, + "hed_def_actions": { + "Description": "Metadata dictionary for gathering participant action definitions", + "HED": { + "press_left_finger_def": "(Definition/Press-left-finger, ((Index-finger, (Left-side-of, Experiment-participant)), (Press, Keyboard-key), Description/The participant presses a key with the left index finger to indicate a face symmetry judgment.))", + "press_right_finger_def": "(Definition/Press-right-finger, ((Index-finger, (Right-side-of, Experiment-participant)), (Press, Keyboard-key), Description/The participant presses a key with the right index finger to indicate a face symmetry evaluation.))" + } + }, + "hed_def_conds": { + "Description": "Metadata dictionary for gathering experimental condition definitions", + "HED": { + "famous_face_cond_def": "(Definition/Famous-face-cond, (Condition-variable/Face-type, (Image, (Face, Famous)), Description/A face that should be recognized by the participants))", + "unfamiliar_face_cond_def": "(Definition/Unfamiliar-face-cond, (Condition-variable/Face-type, (Image, (Face, Unfamiliar)), Description/A face that should not be recognized by the participants.))", + "scrambled_face_cond_def": "(Definition/Scrambled-face-cond, (Condition-variable/Face-type, (Image, (Face, Disordered)), Description/A scrambled face image generated by taking face 2D FFT.))", + "first_show_cond_def": "(Definition/First-show-cond, ((Condition-variable/Repetition-type, (Item-count/1, Face), Item-interval/0), Description/Factor level indicating the first display of this face.))", + "immediate_repeat_cond_def": "(Definition/Immediate-repeat-cond, ((Condition-variable/Repetition-type, (Item-count/2, Face), Item-interval/1), Description/Factor level indicating this face was the same as previous one.))", + "delayed_repeat_cond_def": "(Definition/Delayed-repeat-cond, (Condition-variable/Repetition-type, (Item-count/2, Face), (Item-interval, (Greater-than-or-equal-to, Item-interval/5)), Description/Factor level indicating face was seen 5 to 15 trials ago.))", + "left_sym_cond_def": "(Definition/Left-sym-cond, (Condition-variable/Key-assignment, ((Index-finger, (Left-side-of, Experiment-participant)), (Behavioral-evidence, Symmetrical)), ((Index-finger, (Right-side-of, Experiment-participant)), (Behavioral-evidence, Asymmetrical)), Description/Left index finger key press indicates a face with above average symmetry.))", + "right_sym_cond_def": "(Definition/Right-sym-cond, (Condition-variable/Key-assignment, ((Index-finger, (Right-side-of, Experiment-participant)), (Behavioral-evidence, Symmetrical)), ((Index-finger, (Left-side-of, Experiment-participant)), (Behavioral-evidence, Asymmetrical)), Description/Right index finger key press indicates a face with above average symmetry.))" + } + }, + "hed_def_tasks": { + "Description": "Metadata dictionary for gathering task definitions", + "HED": { + "face_symmetry_evaluation_task_def": "(Definition/Face-symmetry-evaluation-task, (Task, Experiment-participant, (See, Face), (Discriminate, (Face, Symmetrical)), (Press, Keyboard-key), Description/Evaluate degree of image symmetry and respond with key press evaluation.))", + "blink_inhibition_task_def": "(Definition/Blink-inhibition-task, (Task, Experiment-participant, Inhibit-blinks, Description/Do not blink while the face image is displayed.))", + "fixation_task_def": "(Definition/Fixation-task, (Task, Experiment-participant, (Fixate, Cross), Description/Fixate on the cross at the screen center.))" + } + }, + "hed_def_setup": { + "Description": "Metadata dictionary for gathering setup definitions", + "HED": { + "setup_def": "(Definition/Initialize-recording, (Recording))" + } + + }, + "value": { + "Description": "Numerical event marker", + "Levels": { + "x0": "Disappearance of face image and display of the inter-stimulus circle simultaneously", + "x1": "Disappearance of face image and display of the inter-stimulus circle simultaneously", + "x2": "Initial setup with left finger key press indicating above average symmetry", + "x3": "Initial setup with right finger key press indicating above average symmetry", + "x5": "Initial presentation of famous face", + "x6": "Immediate repeated presentation of famous face", + "x7": "Delayed repeated presentation of famous face", + "x13": "Initial presentation of unfamiliar face", + "x14": "Immediate repeated presentation of unfamiliar face", + "x15": "Delayed repeated presentation of unfamiliar face", + "x17": "Initial presentation of scrambled face", + "x18": "Immediate repeated presentation of scrambled face", + "x19": "Delayed repeated presentation of scrambled face", + "x256": "Left finger key press", + "x4096": "Right finger key press", + "x4352": "Left and right finger key presses" + } + } +} diff --git a/tests/errors/test_error_reporter.py b/tests/errors/test_error_reporter.py index e90dff80e..28c7fbffe 100644 --- a/tests/errors/test_error_reporter.py +++ b/tests/errors/test_error_reporter.py @@ -91,3 +91,25 @@ def test_printable_issue_string(self): self.assertTrue(len(printable_issues3) > len(printable_issues2)) self.error_handler.reset_error_context() + + def test_printable_issue_string_with_filenames(self): + myfile = 'my_file.txt' + self.error_handler.push_error_context(ErrorContext.CUSTOM_TITLE, "Default Custom Title") + self.error_handler.push_error_context(ErrorContext.FILE_NAME, myfile) + error_list = self.error_handler.format_error_with_context(ValidationErrors.HED_TAG_NOT_UNIQUE, "") + error_list += self.error_handler.format_error_with_context(SchemaWarnings.INVALID_CAPITALIZATION, + "dummy", problem_char="#", char_index=0) + + printable_issues = get_printable_issue_string(error_list, skip_filename=False) + self.assertTrue(len(printable_issues) > 10) + self.assertEqual(printable_issues.count(myfile), 1) + + printable_issues2 = get_printable_issue_string(error_list, severity=ErrorSeverity.ERROR, skip_filename=False) + self.assertTrue(len(printable_issues) > len(printable_issues2)) + self.assertEqual(printable_issues2.count(myfile), 1) + printable_issues3 = get_printable_issue_string(error_list, severity=ErrorSeverity.ERROR, skip_filename=False, + title="Later added custom title that is longer") + self.assertTrue(len(printable_issues3) > len(printable_issues2)) + self.assertEqual(printable_issues3.count(myfile), 1) + + self.error_handler.reset_error_context() diff --git a/tests/tools/analysis/test_annotation_util.py b/tests/tools/analysis/test_annotation_util.py index 24b621645..a9902feeb 100644 --- a/tests/tools/analysis/test_annotation_util.py +++ b/tests/tools/analysis/test_annotation_util.py @@ -5,7 +5,7 @@ from pandas import DataFrame from hed import schema as hedschema from hed.models import Sidecar -from hed.tools import BidsTabularSummary, check_df_columns, df_to_hed, extract_tags, hed_to_df, merge_hed_dict +from hed.tools import TabularSummary, check_df_columns, df_to_hed, extract_tags, hed_to_df, merge_hed_dict from hed.tools.analysis.annotation_util import _find_last_pos, _find_first_pos, \ _flatten_cat_col, _flatten_val_col, _get_value_entry, trim_back, trim_front, _tag_list_to_str, _update_cat_dict, \ generate_sidecar_entry @@ -256,7 +256,7 @@ def test_merge_hed_dict_full(self): value_columns = ["rep_lag", "stim_file", "value"] event_files = get_file_list(self.bids_root_path, extensions=[".tsv"], name_suffix="_events", exclude_dirs=exclude_dirs) - value_sum = BidsTabularSummary(value_cols=value_columns, skip_cols=skip_columns) + value_sum = TabularSummary(value_cols=value_columns, skip_cols=skip_columns) value_sum.update(event_files) sidecar_template = value_sum.extract_sidecar_template() example_spreadsheet = hed_to_df(sidecar_template) diff --git a/tests/tools/analysis/test_definition_summary.py b/tests/tools/analysis/test_definition_summary.py deleted file mode 100644 index 5acb0fc5a..000000000 --- a/tests/tools/analysis/test_definition_summary.py +++ /dev/null @@ -1,25 +0,0 @@ -import os -import unittest -from hed.models.sidecar import Sidecar -from hed.tools.bids.bids_sidecar_file import BidsSidecarFile - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.test_json = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../data/bids/eeg_ds003654s_hed/task-FacePerception_events.json') - - def test_constructor(self): - sidecar1 = BidsSidecarFile(self.test_json) - self.assertEqual(sidecar1.suffix, 'events', "BidsSidecarFile should have correct name_suffix") - self.assertEqual(sidecar1.ext, '.json', "BidsSidecarFile should have correct ext") - self.assertEqual(len(sidecar1.entity_dict), 1, "BidsSidecarFile should have right number of entity_dict") - self.assertFalse(sidecar1.contents, "BidsSidecarFile should not have contents on construction.") - sidecar1.set_contents() - self.assertIsInstance(sidecar1.contents, Sidecar, "BidsSidecarFile should contain a Sidecar after set_contents") - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/tools/analysis/test_definition_manager.py b/tests/tools/analysis/test_hed_definition_manager.py similarity index 87% rename from tests/tools/analysis/test_definition_manager.py rename to tests/tools/analysis/test_hed_definition_manager.py index a4f9300fa..d861d0346 100644 --- a/tests/tools/analysis/test_definition_manager.py +++ b/tests/tools/analysis/test_hed_definition_manager.py @@ -2,7 +2,7 @@ import unittest from hed import HedString, HedTag, load_schema_version, Sidecar, TabularInput from hed.models import DefinitionEntry -from hed.tools import DefinitionManager +from hed.tools import HedDefinitionManager class Test(unittest.TestCase): @@ -54,24 +54,24 @@ def setUpClass(cls): cls.schema = schema def test_constructor(self): - def_man = DefinitionManager(self.definitions1, self.schema) - self.assertIsInstance(def_man, DefinitionManager, - "Constructor should create a DefinitionManager directly from a dict") + def_man = HedDefinitionManager(self.definitions1, self.schema) + self.assertIsInstance(def_man, HedDefinitionManager, + "Constructor should create a HedDefinitionManager directly from a dict") self.assertEqual(len(def_man.variable_map), 6, "Constructor condition_map should have the right length") self.assertEqual(len(def_man.variable_map), len(def_man.definitions), "Constructor condition_map should be the same length as the definitions dictionary") def test_constructor_from_tabular_input(self): definitions = self.input_data.get_definitions(as_strings=False).gathered_defs - def_man = DefinitionManager(definitions, self.schema) - self.assertIsInstance(def_man, DefinitionManager, - "Constructor should create a DefinitionManager from a tabular input") + def_man = HedDefinitionManager(definitions, self.schema) + self.assertIsInstance(def_man, HedDefinitionManager, + "Constructor should create a HedDefinitionManager from a tabular input") self.assertEqual(len(def_man.variable_map), 17, "Constructor condition_map should have the right length") self.assertEqual(len(def_man.variable_map), len(def_man.definitions), "Constructor condition_map should be the same length as the definitions dictionary") def test_get_vars(self): - def_man = DefinitionManager(self.definitions1, self.schema) + def_man = HedDefinitionManager(self.definitions1, self.schema) item1 = HedString(f"Sensory-event,((Red,Blue)),", self.schema) vars1 = def_man.get_vars(item1) self.assertFalse(vars1, "get_vars should return None if no condition type_variables") @@ -84,7 +84,7 @@ def test_get_vars(self): self.assertEqual(len(vars3), 5, "get_vars should return multiple condition type_variables") def test_get_def_names(self): - def_man = DefinitionManager(self.definitions1, self.schema) + def_man = HedDefinitionManager(self.definitions1, self.schema) a = def_man.get_def_names(HedTag('Def/Cond3/4', hed_schema=self.schema)) self.assertEqual(len(a), 1, "get_def_names returns 1 item if single tag") self.assertEqual(a[0], 'cond3', "get_def_names returns the correct item if single tag") @@ -102,16 +102,16 @@ def test_get_def_names(self): self.assertFalse(e, "get_def_names returns no items if no defs") def test_split_name(self): - name1, val1 = DefinitionManager.split_name('') + name1, val1 = HedDefinitionManager.split_name('') self.assertIsNone(name1, "split_name should return None split name for empty name") self.assertIsNone(val1, "split_name should return None split value for empty name") - name2, val2 = DefinitionManager.split_name('lumber') + name2, val2 = HedDefinitionManager.split_name('lumber') self.assertEqual(name2, 'lumber', 'split_name should return name if no split value') self.assertEqual(val2, '', 'split_name should return empty string if no split value') - name3, val3 = DefinitionManager.split_name('Lumber/5.23', lowercase=False) + name3, val3 = HedDefinitionManager.split_name('Lumber/5.23', lowercase=False) self.assertEqual(name3, 'Lumber', 'split_name should return name if split value') self.assertEqual(val3, '5.23', 'split_name should return value as string if split value') - name4, val4 = DefinitionManager.split_name('Lumber/5.23') + name4, val4 = HedDefinitionManager.split_name('Lumber/5.23') self.assertEqual(name4, 'lumber', 'split_name should return name if split value') self.assertEqual(val4, '5.23', 'split_name should return value as string if split value') diff --git a/tests/tools/analysis/test_hed_type_variable.py b/tests/tools/analysis/test_hed_type_variable.py index ebed090aa..2dd8f9764 100644 --- a/tests/tools/analysis/test_hed_type_variable.py +++ b/tests/tools/analysis/test_hed_type_variable.py @@ -52,10 +52,10 @@ def setUpClass(cls): def test_constructor(self): test_strings1 = [HedString(hed, hed_schema=self.hed_schema) for hed in self.test_strings1] - var_manager = HedTypeVariable(HedContextManager(test_strings1, self.hed_schema), self.hed_schema, self.defs) - self.assertIsInstance(var_manager, HedTypeVariable, - "Constructor should create a HedVariableManager from strings") - self.assertEqual(len(var_manager._variable_map), 8, + type_var = HedTypeVariable(HedContextManager(test_strings1, self.hed_schema), self.hed_schema, self.defs) + self.assertIsInstance(type_var, HedTypeVariable, + "Constructor should create a HedTypeManager from strings") + self.assertEqual(len(type_var._variable_map), 8, "Constructor ConditionVariables should have the right length") def test_constructor_from_tabular_input(self): diff --git a/tests/tools/analysis/test_hed_variable_counts.py b/tests/tools/analysis/test_hed_variable_counts.py new file mode 100644 index 000000000..0fc3279c3 --- /dev/null +++ b/tests/tools/analysis/test_hed_variable_counts.py @@ -0,0 +1,69 @@ +import os +import unittest +from hed import load_schema_version, Sidecar, TabularInput +from hed.tools import HedContextManager, HedTypeVariable, HedVariableCounts, HedVariableSummary, get_assembled_strings + + +class Test(unittest.TestCase): + + def setUp(self): + schema = load_schema_version(xml_version="8.1.0") + bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../../data/bids/eeg_ds003654s_hed')) + events_path = os.path.realpath(os.path.join(bids_root_path, + 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) + sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) + sidecar1 = Sidecar(sidecar_path, name='face_sub1_json') + input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") + hed_strings1 = get_assembled_strings(input_data, hed_schema=schema, expand_defs=False) + definitions1 = input_data.get_definitions(as_strings=False).gathered_defs + self.var_type1 = HedTypeVariable(HedContextManager(hed_strings1, schema), schema, definitions1, + variable_type='condition-variable') + + def test_get_summary_one_level(self): + var_summary1 = HedVariableSummary(variable_type="condition-variable") + self.assertIsInstance(var_summary1, HedVariableSummary, + "Constructor should create a HedVariableSummary") + summary1 = var_summary1.get_summary(as_json=False) + self.assertIsInstance(summary1, dict, "get_summary should return a dictionary when empty") + self.assertFalse(summary1, "get_summary should create a empty dictionary before updates") + count1 = HedVariableCounts('key-assignment', 'condition-variable') + var1 = self.var_type1.get_variable('key-assignment') + count1.update(var1) + self.assertEqual(0, count1.direct_references, "get_summary") + self.assertIn('right-sym-cond', count1.level_counts) + self.assertEqual(200, count1.total_events) + self.assertEqual(1, count1.level_counts['right-sym-cond']['files']) + count1.update(var1) + self.assertEqual(0, count1.direct_references, "get_summary") + self.assertIn('right-sym-cond', count1.level_counts) + self.assertEqual(400, count1.total_events) + self.assertEqual(2, count1.level_counts['right-sym-cond']['files']) + + def test_get_summary_multiple_levels(self): + var_summary1 = HedVariableSummary(variable_type="condition-variable") + self.assertIsInstance(var_summary1, HedVariableSummary, + "Constructor should create a HedVariableSummary") + summary1 = var_summary1.get_summary(as_json=False) + self.assertIsInstance(summary1, dict, "get_summary should return a dictionary when empty") + self.assertFalse(summary1, "get_summary should create a empty dictionary before updates") + count1 = HedVariableCounts('face-type', 'condition-variable') + var1 = self.var_type1.get_variable('face-type') + count1.update(var1) + self.assertEqual(0, count1.direct_references, "get_summary") + self.assertEqual(3, len(count1.level_counts)) + self.assertIn('unfamiliar-face-cond', count1.level_counts) + self.assertEqual(200, count1.total_events) + self.assertEqual(52, count1.number_type_events) + self.assertEqual(1, count1.level_counts['unfamiliar-face-cond']['files']) + count1.update(var1) + self.assertEqual(0, count1.direct_references, "get_summary") + self.assertEqual(3, len(count1.level_counts)) + self.assertIn('unfamiliar-face-cond', count1.level_counts) + self.assertEqual(400, count1.total_events) + self.assertEqual(104, count1.number_type_events) + self.assertEqual(2, count1.level_counts['unfamiliar-face-cond']['files']) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/tools/analysis/test_hed_variable_manager.py b/tests/tools/analysis/test_hed_variable_manager.py index 9aac46da4..17af481da 100644 --- a/tests/tools/analysis/test_hed_variable_manager.py +++ b/tests/tools/analysis/test_hed_variable_manager.py @@ -6,22 +6,21 @@ class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): + def setUp(self): schema = load_schema_version(xml_version="8.1.0") bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data/bids/eeg_ds003654s_hed')) events_path = os.path.realpath(os.path.join(bids_root_path, - 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) + 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) sidecar1 = Sidecar(sidecar_path, name='face_sub1_json') - cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") - cls.hed_schema = schema + self.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") + self.hed_strings = get_assembled_strings(self.input_data, hed_schema=schema, expand_defs=False) + self.hed_schema = schema + self.definitions = self.input_data.get_definitions() def test_constructor(self): - hed_strings = get_assembled_strings(self.input_data, hed_schema=self.hed_schema, expand_defs=False) - def_mapper = self.input_data._def_mapper - var_manager = HedVariableManager(hed_strings, self.hed_schema, def_mapper) + var_manager = HedVariableManager(self.hed_strings, self.hed_schema, self.definitions) self.assertIsInstance(var_manager, HedVariableManager, "Constructor should create a HedVariableManager from a tabular input") self.assertEqual(len(var_manager.context_manager.hed_strings), len(var_manager.context_manager.contexts), @@ -29,9 +28,7 @@ def test_constructor(self): self.assertFalse(var_manager._variable_type_map, "constructor has empty map") def test_add_type_variable(self): - hed_strings = get_assembled_strings(self.input_data, hed_schema=self.hed_schema, expand_defs=False) - def_mapper = self.input_data._def_mapper - var_manager = HedVariableManager(hed_strings, self.hed_schema, def_mapper) + var_manager = HedVariableManager(self.hed_strings, self.hed_schema, self.definitions) self.assertFalse(var_manager._variable_type_map, "constructor has empty map") var_manager.add_type_variable("Condition-variable") self.assertEqual(len(var_manager._variable_type_map), 1, @@ -66,10 +63,10 @@ def test_get_type_variable(self): def_mapper = self.input_data._def_mapper var_manager = HedVariableManager(hed_strings, self.hed_schema, def_mapper) var_manager.add_type_variable("Condition-variable") - type_var = var_manager.get_type_variable_map("condition-variable") + type_var = var_manager.get_type_variable("condition-variable") self.assertIsInstance(type_var, HedTypeVariable, "get_type_variable returns a HedTypeVariable if the key exists") - type_var = var_manager.get_type_variable_map("baloney") + type_var = var_manager.get_type_variable("baloney") self.assertIsNone(type_var, "get_type_variable returns None if the key does not exist") def test_get_type_variable_def_names(self): @@ -90,15 +87,15 @@ def test_get_variable_type_map(self): def_mapper = self.input_data._def_mapper var_manager = HedVariableManager(hed_strings, self.hed_schema, def_mapper) var_manager.add_type_variable("Condition-variable") - this_map1 = var_manager.get_type_variable_map("condition-variable") - self.assertIsInstance(this_map1, HedTypeVariable, + this_var = var_manager.get_type_variable("condition-variable") + self.assertIsInstance(this_var, HedTypeVariable, "get_type_variable_map returns a non-empty map when key lower case") - self.assertEqual(len(this_map1.type_variables), 3, + self.assertEqual(len(this_var.type_variables), 3, "get_type_variable_map map has right length when key lower case") - this_map2 = var_manager.get_type_variable_map("Condition-variable") - self.assertIsInstance(this_map2, HedTypeVariable, + this_var2 = var_manager.get_type_variable("Condition-variable") + self.assertIsInstance(this_var2, HedTypeVariable, "get_type_variable_map returns a non-empty map when key upper case") - self.assertEqual(len(this_map2.type_variables), 3, + self.assertEqual(len(this_var2.type_variables), 3, "get_type_variable_map map has right length when key upper case") def test_get_type_variable_factor(self): @@ -116,8 +113,8 @@ def test_get_type_variable_factor(self): def test_type_variables(self): hed_strings = get_assembled_strings(self.input_data, hed_schema=self.hed_schema, expand_defs=False) - def_mapper = self.input_data._def_mapper - var_manager = HedVariableManager(hed_strings, self.hed_schema, def_mapper) + definitions = self.input_data.get_definitions + var_manager = HedVariableManager(hed_strings, self.hed_schema, definitions) vars1 = var_manager.type_variables self.assertFalse(vars1, "type_variables is empty if no types have been added") var_manager.add_type_variable("Condition-variable") diff --git a/tests/tools/analysis/test_hed_variable_summary.py b/tests/tools/analysis/test_hed_variable_summary.py index 63f4ae1a1..6fdb8c090 100644 --- a/tests/tools/analysis/test_hed_variable_summary.py +++ b/tests/tools/analysis/test_hed_variable_summary.py @@ -7,106 +7,42 @@ class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): + def setUp(self): + bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../../data/bids/eeg_ds003654s_hed')) + events_path = os.path.realpath(os.path.join(bids_root_path, + 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) + sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) + sidecar1 = Sidecar(sidecar_path, name='face_sub1_json') + self.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") schema = load_schema_version(xml_version="8.1.0") - cls.test_strings1 = [f"Sensory-event,(Def/Cond1,(Red, Blue, Condition-variable/Trouble),Onset)," - f"(Def/Cond2,Onset),Green,Yellow, Def/Cond5, Def/Cond6/4", - '(Def/Cond1, Offset)', - 'White, Black, Condition-variable/Wonder, Condition-variable/Fast', - '', - '(Def/Cond2, Onset)', - '(Def/Cond3/4.3, Onset)', - 'Arm, Leg, Condition-variable/Fast'] - cls.test_strings2 = [f"Def/Cond2, (Def/Cond6/4, Onset), (Def/Cond6/7.8, Onset), Def/Cond6/Alpha", - "Yellow", - "Def/Cond2, (Def/Cond6/4, Onset)", - "Def/Cond2, Def/Cond6/5.2 (Def/Cond6/7.8, Offset)", - "Def/Cond2, Def/Cond6/4"] - cls.test_strings3 = ['(Def/Cond3, Offset)'] - - def1 = HedString('(Condition-variable/Var1, Circle, Square)', hed_schema=schema) - def2 = HedString('(condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere)', hed_schema=schema) - def3 = HedString('(Organizational-property/Condition-variable/Var3, Physical-length/#, Ellipse, Cross)', - hed_schema=schema) - def4 = HedString('(Condition-variable, Apple, Banana)', hed_schema=schema) - def5 = HedString('(Condition-variable/Lumber, Apple, Banana)', hed_schema=schema) - def6 = HedString('(Condition-variable/Lumber, Label/#, Apple, Banana)', hed_schema=schema) - cls.defs = {'Cond1': DefinitionEntry('Cond1', def1, False, None), - 'Cond2': DefinitionEntry('Cond2', def2, False, None), - 'Cond3': DefinitionEntry('Cond3', def3, True, None), - 'Cond4': DefinitionEntry('Cond4', def4, False, None), - 'Cond5': DefinitionEntry('Cond5', def5, False, None), - 'Cond6': DefinitionEntry('Cond6', def6, True, None) - } - bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data/bids/eeg_ds003654s_hed')) events_path = os.path.realpath(os.path.join(bids_root_path, - 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) + 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) sidecar1 = Sidecar(sidecar_path, name='face_sub1_json') - cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") - cls.schema = schema - - def test_variable_summary_get_summaries(self): - hed_strings1 = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False) - definitions1 = self.input_data.get_definitions(as_strings=False).gathered_defs - var_manager1 = HedTypeVariable(HedContextManager(hed_strings1, self.schema), self.schema, definitions1) - var_summary1 = HedVariableSummary(variable_type="condition-variable") - self.assertIsInstance(var_summary1, HedVariableSummary, - "Constructor should create a HedVariableSummary") - summary1 = var_summary1.get_summaries(as_json=False) - self.assertIsInstance(summary1, dict, "get_summaries should return a dictionary when empty") - self.assertFalse(summary1, "get_summaries should create a empty dictionary before updates") - for man_var in var_manager1.get_variable_names(): - var_factor = var_manager1.get_variable(man_var) - var_summary1.update_summary(var_factor) - - summary1 = var_summary1.get_summaries(as_json=False) - self.assertIsInstance(summary1, dict, "get_summaries should return a dictionary") - self.assertEqual(len(summary1), 3, "get_summaries should have correct length when updated with tabular input") + input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") + hed_strings1 = get_assembled_strings(input_data, hed_schema=schema, expand_defs=False) + definitions = self.input_data.get_definitions(as_strings=False).gathered_defs + self.var_type1 = HedTypeVariable(HedContextManager(hed_strings1, schema), schema, definitions, + variable_type='condition-variable') - def test_empty(self): - hed_strings1 = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False) - definitions1 = self.input_data.get_definitions(as_strings=False).gathered_defs - var_manager1 = HedTypeVariable(HedContextManager(hed_strings1, self.schema), self.schema, definitions1) + def test_get_summary(self): var_summary1 = HedVariableSummary(variable_type="condition-variable") self.assertIsInstance(var_summary1, HedVariableSummary, "Constructor should create a HedVariableSummary") - summary1 = var_summary1.get_summaries(as_json=False) - self.assertIsInstance(summary1, dict, "get_summaries should return a dictionary when empty") - self.assertFalse(summary1, "get_summaries should create a empty dictionary before updates") - for man_var in var_manager1.get_variable_names(): - var_factor = var_manager1.get_variable(man_var) - var_summary1.update_summary(var_factor) - - summary1 = var_summary1.get_summaries(as_json=False) - self.assertIsInstance(summary1, dict, "get_summaries should return a dictionary") - self.assertEqual(len(summary1), 3, "get_summaries should have correct length when updated with tabular input") - - hed_strings2 = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False) - definitions2 = self.input_data.get_definitions(as_strings=False).gathered_defs - var_manager2 = HedTypeVariable(HedContextManager(hed_strings2, self.schema), self.schema, definitions2) - var_summary2 = HedVariableSummary(variable_type="condition-variable") - for man_var in var_manager2.get_variable_names(): - var_factor = var_manager2.get_variable(man_var) - var_summary2.update_summary(var_factor) - - hed_strings2a = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False) - definitions2a = self.input_data.get_definitions(as_strings=False).gathered_defs - var_manager2a = HedTypeVariable(HedContextManager(hed_strings2a, self.schema), self.schema, definitions2a) - for man_var in var_manager2.get_variable_names(): - var_factor = var_manager2a.get_variable(man_var) - var_summary2.update_summary(var_factor) - - summary2 = var_summary2.get_summaries(as_json=False) - self.assertIsInstance(summary2, dict, "get_summaries should return a dictionary") - self.assertEqual(len(summary2), 3, "get_summaries should have correct length when updated with tabular input") - face_type1 = summary1["face-type"] - face_type2 = summary2["face-type"] - self.assertEqual(2*face_type1["number_type_events"], face_type2["number_type_events"], - "get_summaries should have twice as many type events if the data is summarized twice") + summary1 = var_summary1.get_summary(as_json=False) + self.assertIsInstance(summary1, dict, "get_summary should return a dictionary when empty") + self.assertFalse(summary1, "get_summary should create a empty dictionary before updates") + var_summary1.update_summary(self.var_type1) + summary2 = var_summary1.get_summary(as_json=False) + self.assertEqual(len(summary2), 3) + self.assertEqual(summary2['repetition-type']['number_type_events'], 52) + var_summary1.update_summary(self.var_type1) + summary3 = var_summary1.get_summary(as_json=False) + self.assertEqual(len(summary3), 3) + self.assertEqual(summary3['repetition-type']['number_type_events'], 104) if __name__ == '__main__': diff --git a/tests/tools/analysis/test_sidecar_summary.py b/tests/tools/analysis/test_sidecar_summary.py deleted file mode 100644 index 74669ecd4..000000000 --- a/tests/tools/analysis/test_sidecar_summary.py +++ /dev/null @@ -1,31 +0,0 @@ -import os -import unittest -from hed.models.sidecar import Sidecar -from hed.tools.bids.bids_sidecar_file import BidsSidecarFile - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.sidecar_path1 = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../data/bids/eeg_ds003654s_hed/task-FacePerception_events.json') - - def test_constructor(self): - sidecar1 = BidsSidecarFile(self.sidecar_path1) - self.assertEqual(sidecar1.suffix, 'events', "BidsSidecarFile should have correct name_suffix") - self.assertEqual(sidecar1.ext, '.json', "BidsSidecarFile should have correct ext") - self.assertEqual(len(sidecar1.entity_dict), 1, "BidsSidecarFile should have right number of entity_dict") - self.assertFalse(sidecar1.contents) - - sidecar2 = BidsSidecarFile(self.sidecar_path1) - self.assertEqual(sidecar2.suffix, 'events', "BidsSidecarFile should have correct name_suffix") - self.assertEqual(sidecar2.ext, '.json', "BidsSidecarFile should have correct ext") - self.assertEqual(len(sidecar2.entity_dict), 1, "BidsSidecarFile should have right number of entity_dict") - self.assertFalse(sidecar2.contents, "BidsSidecarFile has no contents unless set") - sidecar2.set_contents() - self.assertIsInstance(sidecar2.contents, Sidecar, "BidsSidecarFile should contain a Sidecar") - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/tools/bids/test_bids_tabular_summary.py b/tests/tools/analysis/test_tabular_summary.py similarity index 77% rename from tests/tools/bids/test_bids_tabular_summary.py rename to tests/tools/analysis/test_tabular_summary.py index f27e555ce..ce006e167 100644 --- a/tests/tools/bids/test_bids_tabular_summary.py +++ b/tests/tools/analysis/test_tabular_summary.py @@ -1,7 +1,7 @@ import unittest import os from hed.errors.exceptions import HedFileError -from hed.tools import BidsTabularSummary, BidsFileDictionary +from hed.tools import FileDictionary, TabularSummary from hed.tools import get_file_list, get_new_dataframe @@ -21,17 +21,17 @@ def setUpClass(cls): 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) def test_constructor(self): - dict1 = BidsTabularSummary() - self.assertIsInstance(dict1, BidsTabularSummary, + dict1 = TabularSummary() + self.assertIsInstance(dict1, TabularSummary, "BidsTabularSummary constructor is allowed to have no arguments") self.assertFalse(dict1.value_info) - dict2 = BidsTabularSummary(value_cols=['a', 'b', 'c'], name='baloney') - self.assertIsInstance(dict2, BidsTabularSummary, "BidsTabularSummary: multiple values are okay in constructor") + dict2 = TabularSummary(value_cols=['a', 'b', 'c'], name='baloney') + self.assertIsInstance(dict2, TabularSummary, "BidsTabularSummary: multiple values are okay in constructor") self.assertEqual(len(dict2.value_info.keys()), 3, "BidsTabularSummary should have keys for each value column") def test_get_number_unique_values(self): - dict1 = BidsTabularSummary() + dict1 = TabularSummary() wh_df = get_new_dataframe(self.wh_events_path) dict1.update(wh_df) self.assertEqual(len(dict1.value_info.keys()), 0, @@ -42,8 +42,22 @@ def test_get_number_unique_values(self): self.assertEqual(len(count_dict), 10, "get_number_unique should have the correct number of entries") self.assertEqual(count_dict['onset'], 199, "get_number_unique should have the right number of unique") + def test_get_summary(self): + dict1 = TabularSummary(value_cols=['letter'], skip_cols=['event_type'], name="Sternberg1") + stern_df = get_new_dataframe(self.stern_map_path) + dict1.update(stern_df) + self.assertEqual(len(dict1.value_info.keys()), 1, + "BidsTabularSummary value_info should be empty if no value columns") + self.assertEqual(len(dict1.categorical_info.keys()), len(stern_df.columns)-2, + "BidsTabularSummary categorical_info be columns minus skip and value columns") + summary1 = dict1.get_summary(as_json=False) + self.assertIsInstance(summary1, dict) + self.assertEqual(len(summary1), 3) + summary2 = dict1.get_summary(as_json=True).replace('"', '') + self.assertIsInstance(summary2, str) + def test__str__(self): - t_map = BidsTabularSummary(name="My output") + t_map = TabularSummary(name="My output") t_map.update(self.stern_map_path) df = get_new_dataframe(self.stern_map_path) t_map.update(self.stern_map_path) @@ -53,7 +67,7 @@ def test__str__(self): self.assertTrue(t_map_str, "__str__ returns a non-empty string when the map has content.") def test_update(self): - dict1 = BidsTabularSummary() + dict1 = TabularSummary() stern_df = get_new_dataframe(self.stern_map_path) dict1.update(stern_df) self.assertEqual(len(dict1.value_info.keys()), 0, @@ -61,7 +75,7 @@ def test_update(self): self.assertEqual(len(dict1.categorical_info.keys()), len(stern_df.columns), "BidsTabularSummary categorical_info should have all the columns if no restrictions") - dict2 = BidsTabularSummary(value_cols=['letter'], skip_cols=['event_type']) + dict2 = TabularSummary(value_cols=['letter'], skip_cols=['event_type']) dict2.update(stern_df) self.assertEqual(len(dict2.value_info.keys()), 1, "BidsTabularSummary value_info should have letter value column") @@ -77,8 +91,8 @@ def test_update(self): "BidsTabularSummary value counts should update by column length each time update is called") def test_update_dict(self): - dict1 = BidsTabularSummary() - dict2 = BidsTabularSummary() + dict1 = TabularSummary() + dict2 = TabularSummary() stern_df_map = get_new_dataframe(self.stern_map_path) dict1.update(stern_df_map) dict2.update_summary(dict1) @@ -95,9 +109,9 @@ def test_update_dict(self): def test_update_dict_with_value_cols(self): stern_df_test1 = get_new_dataframe(self.stern_test1_path) stern_df_test3 = get_new_dataframe(self.stern_test3_path) - dict1 = BidsTabularSummary(value_cols=['latency']) + dict1 = TabularSummary(value_cols=['latency']) dict1.update(stern_df_test3) - dict2 = BidsTabularSummary(value_cols=['latency']) + dict2 = TabularSummary(value_cols=['latency']) dict2.update(stern_df_test1) dict2.update_summary(dict1) dict1.update(stern_df_test1) @@ -109,9 +123,9 @@ def test_update_dict_with_value_cols(self): def test_update_dict_with_bad_value_cols(self): stern_df_test1 = get_new_dataframe(self.stern_test1_path) stern_df_test3 = get_new_dataframe(self.stern_test3_path) - dict1 = BidsTabularSummary(value_cols=['latency']) + dict1 = TabularSummary(value_cols=['latency']) dict1.update(stern_df_test3) - dict3 = BidsTabularSummary() + dict3 = TabularSummary() dict3.update(stern_df_test1) try: dict1.update_summary(dict3) @@ -124,9 +138,9 @@ def test_update_dict_with_bad_value_cols(self): def test_update_dict_bad_skip_col(self): stern_test3 = get_new_dataframe(self.stern_test3_path) - dict1 = BidsTabularSummary(skip_cols=['latency']) + dict1 = TabularSummary(skip_cols=['latency']) dict1.update(stern_test3) - dict2 = BidsTabularSummary(value_cols=['latency']) + dict2 = TabularSummary(value_cols=['latency']) dict2.update(stern_test3) try: dict2.update_summary(dict1) @@ -139,34 +153,34 @@ def test_update_dict_bad_skip_col(self): def test_get_columns_info(self): df = get_new_dataframe(self.stern_test2_path) - col_info = BidsTabularSummary.get_columns_info(df) + col_info = TabularSummary.get_columns_info(df) self.assertIsInstance(col_info, dict, "get_columns_info should return a dictionary") self.assertEqual(len(col_info.keys()), len(df.columns), "get_columns_info should return a dictionary with a key for each column") def test_get_columns_info_skip_columns(self): df = get_new_dataframe(self.stern_test2_path) - col_info = BidsTabularSummary.get_columns_info(df, ['latency']) + col_info = TabularSummary.get_columns_info(df, ['latency']) self.assertIsInstance(col_info, dict, "get_columns_info should return a dictionary") self.assertEqual(len(col_info.keys()), len(df.columns) - 1, "get_columns_info should return a dictionary with a key for each column included") - col_info = BidsTabularSummary.get_columns_info(df, list(df.columns.values)) + col_info = TabularSummary.get_columns_info(df, list(df.columns.values)) self.assertIsInstance(col_info, dict, "get_columns_info should return a dictionary") self.assertFalse(col_info, "get_columns_info should return a dictionary with a key for each column included") def test_make_combined_dicts(self): files_bids = get_file_list(self.bids_base_dir, extensions=[".tsv"], name_suffix="_events") - file_dict = BidsFileDictionary("my name", files_bids) - dicts_all1, dicts1 = BidsTabularSummary.make_combined_dicts(file_dict) - self.assertTrue(isinstance(dicts_all1, BidsTabularSummary), + file_dict1 = FileDictionary("my name", files_bids) + dicts_all1, dicts1 = TabularSummary.make_combined_dicts(file_dict1.file_dict) + self.assertTrue(isinstance(dicts_all1, TabularSummary), "make_combined_dicts should return a BidsTabularSummary") self.assertTrue(isinstance(dicts1, dict), "make_combined_dicts should also return a dictionary of file names") self.assertEqual(6, len(dicts1), "make_combined_dicts should return correct number of file names") self.assertEqual(10, len(dicts_all1.categorical_info), "make_combined_dicts should return right number of entries") - dicts_all2, dicts2 = BidsTabularSummary.make_combined_dicts(file_dict, - skip_cols=["onset", "duration", "sample"]) - self.assertTrue(isinstance(dicts_all2, BidsTabularSummary), + dicts_all2, dicts2 = TabularSummary.make_combined_dicts(file_dict1.file_dict, + skip_cols=["onset", "duration", "sample"]) + self.assertTrue(isinstance(dicts_all2, TabularSummary), "make_combined_dicts should return a BidsTabularSummary") self.assertTrue(isinstance(dicts2, dict), "make_combined_dicts should also return a dictionary of file names") self.assertEqual(6, len(dicts2), "make_combined_dicts should return correct number of file names") diff --git a/tests/tools/bids/test_bids_file_dictionary.py b/tests/tools/bids/test_bids_file_dictionary.py index a72b48074..5c8e454e2 100644 --- a/tests/tools/bids/test_bids_file_dictionary.py +++ b/tests/tools/bids/test_bids_file_dictionary.py @@ -40,7 +40,7 @@ def test_make_dict(self): def test_make_query(self): dict1 = BidsFileDictionary("My name", self.file_list, entities=('sub', 'run')) results1 = dict1.make_query(query_dict={'sub': '*', 'run': '*'}) - self.assertEqual(len(results1), len(dict1.file_dict), "make_query should return all of the entries when *.") + self.assertEqual(len(results1), len(dict1._file_dict), "make_query should return all of the entries when *.") results2 = dict1.make_query(query_dict={'sub': '*', 'run': ['1']}) self.assertEqual(len(results2), 2, "make_query should return the right number of entries.") results3 = dict1.make_query(query_dict={'sub': '*', 'run': ['*']}) @@ -50,7 +50,7 @@ def test_make_query(self): results5 = dict1.make_query(query_dict={'sub': '*', 'run': []}) self.assertFalse(len(results5), "make_query be empty if the list for one of the entities is empty.") results6 = dict1.make_query(query_dict={'sub': '*'}) - self.assertEqual(len(results6), len(dict1.file_dict), "make_query should return all of the entries when *.") + self.assertEqual(len(results6), len(dict1._file_dict), "make_query should return all of the entries when *.") def test_match_query(self): entity_dict = {'sub': '01', 'task': 'tempTask', 'run': '2'} @@ -80,7 +80,7 @@ def test_split_by_entity(self): def test_split_dict_by_entity(self): dict1 = BidsFileDictionary("My name", self.file_list, entities=('sub', 'run')) - dist1_split, leftovers = BidsFileDictionary._split_dict_by_entity(dict1.file_dict, 'run') + dist1_split, leftovers = BidsFileDictionary._split_dict_by_entity(dict1._file_dict, 'run') self.assertIsInstance(dist1_split, dict, "split_by_entity returns a dictionary") self.assertEqual(3, len(dist1_split), 'split_by_entity should return the correct number of items') for value in dist1_split.values(): diff --git a/tests/tools/bids/test_bids_file_group.py b/tests/tools/bids/test_bids_file_group.py index a7dc8eeb5..37956ef22 100644 --- a/tests/tools/bids/test_bids_file_group.py +++ b/tests/tools/bids/test_bids_file_group.py @@ -1,6 +1,7 @@ import os import unittest from hed.schema.hed_schema_io import load_schema +from hed.tools.analysis.tabular_summary import TabularSummary from hed.tools.bids.bids_file_group import BidsFileGroup from hed.validator.hed_validator import HedValidator @@ -38,6 +39,15 @@ def test_validator(self): self.assertEqual(len(validation_issues), 6, "BidsFileGroup should have 2 validation warnings for missing columns") + def test_summarize(self): + events = BidsFileGroup(self.root_path) + info = events.summarize() + self.assertIsInstance(info, TabularSummary, "summarize returns a TabularSummary") + self.assertEqual(len(info.categorical_info), 10, "summarize info has entries with all columns if non-skipped") + info2 = events.summarize(skip_cols=['onset', 'sample']) + self.assertEqual(len(info2.categorical_info), len(info.categorical_info)-2, + "summarize info has two less entries if two columns are skipped") + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/test_hed_logger.py b/tests/tools/util/test_hed_logger.py similarity index 100% rename from tests/tools/test_hed_logger.py rename to tests/tools/util/test_hed_logger.py