Skip to content

Commit

Permalink
Moved utils into tools (#533)
Browse files Browse the repository at this point in the history
* Added get_definitions to BaseInput

* Wrote tests for onset_manager creation

* Started work on the condition manager

* Partial implementation of condition_manager

* Updated the condition variable manager -- still incomplete

* Updated the variable manager before changes levels

* Updated the variable summary function

* Updated requirements to use >=

* Updated the docs -- still failing to generate all docs files

* Restored also_gather_defs

* Updated the HED factorization

* Reorganized the variable manager and variable summary

* Updated the api documentation

* Updated the filter concept

* Cleaned up the unit tests for the variable manager

* Updated the variable manager

* Got the unittests to run again

* Minor updates to assemlby

* Added exclude_dirs option to get_dir_dictionary

* Updated Bids File Summary to not require entitites

* Updated the init for some utilities

* Moved the util directory under tools

* Updated the summary output for design matrices

* Revising the summary format

* Minor doc updates plus elimination of BidsTabularySummary
  • Loading branch information
VisLab authored Sep 8, 2022
1 parent 13dd1bc commit 5b1d6d5
Show file tree
Hide file tree
Showing 32 changed files with 828 additions and 922 deletions.
435 changes: 185 additions & 250 deletions hed/errors/error_reporter.py

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions hed/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
from .analysis.hed_type_variable import HedTypeVariable
from .analysis.hed_type_factors import HedTypeFactors
from .analysis.hed_variable_summary import HedVariableCounts, HedVariableSummary
from .analysis.definition_manager import DefinitionManager
from .analysis.hed_definition_manager import HedDefinitionManager
from .analysis.file_dictionary import FileDictionary
from .analysis.key_map import KeyMap
from .analysis.hed_context_manager import OnsetGroup, HedContextManager
from .analysis.tabular_summary import TabularSummary
from .analysis.tag_summary import TagSummary
from .analysis.annotation_util import \
check_df_columns, extract_tags, generate_sidecar_entry, hed_to_df, df_to_hed, merge_hed_dict
Expand All @@ -21,9 +22,7 @@
from .bids.bids_sidecar_file import BidsSidecarFile
from .bids.bids_tabular_dictionary import BidsTabularDictionary
from .bids.bids_tabular_file import BidsTabularFile
from .bids.bids_tabular_summary import BidsTabularSummary

from .analysis.tabular_reports import report_diffs
from .util.hed_logger import HedLogger
from .util.data_util import get_new_dataframe, get_value_dict, replace_values, reorder_columns
from .util.io_util import check_filename, generate_filename, extract_suffix_path, get_file_list, make_path
Expand Down
21 changes: 13 additions & 8 deletions hed/tools/analysis/file_dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(self, collection_name, file_list, key_indices=(0, 2), separator='_'
"""
self.collection_name = collection_name
self.file_dict = {}
self._file_dict = {}
self.create_file_dict(file_list, key_indices, separator)

@property
Expand All @@ -43,12 +43,17 @@ def name(self):
@property
def key_list(self):
""" Keys in this dictionary. """
return list(self.file_dict.keys())
return list(self._file_dict.keys())

@property
def file_dict(self):
""" Dictionary of path values in this dictionary. """
return self._file_dict

@property
def file_list(self):
""" List of path values in this dictionary. """
return list(self.file_dict.values())
return list(self._file_dict.values())

def create_file_dict(self, file_list, key_indices, separator):
""" Create new dict based on key indices.
Expand All @@ -60,7 +65,7 @@ def create_file_dict(self, file_list, key_indices, separator):
"""
if key_indices:
self.file_dict = self.make_file_dict(file_list, key_indices=key_indices, separator=separator)
self._file_dict = self.make_file_dict(file_list, key_indices=key_indices, separator=separator)

def get_file_path(self, key):
""" Return file path corresponding to key.
Expand All @@ -72,7 +77,7 @@ def get_file_path(self, key):
str: File path.
"""
return self.file_dict.get(key, None)
return self._file_dict.get(key, None)

def iter_files(self):
""" Iterator over the files in this dictionary.
Expand All @@ -82,7 +87,7 @@ def iter_files(self):
- file: File path.
"""
for key, file in self.file_dict.items():
for key, file in self._file_dict.items():
yield key, file

def key_diffs(self, other_dict):
Expand All @@ -95,7 +100,7 @@ def key_diffs(self, other_dict):
list: The symmetric difference of the keys in this dictionary and the other one.
"""
diffs = set(self.file_dict.keys()).symmetric_difference(set(other_dict.file_dict.keys()))
diffs = set(self._file_dict.keys()).symmetric_difference(set(other_dict._file_dict.keys()))
return list(diffs)

def output_files(self, title=None, logger=None):
Expand All @@ -115,7 +120,7 @@ def output_files(self, title=None, logger=None):
output_list = []
if title:
output_list.append(f"{title} ({len(self.key_list)} files)")
for key, value in self.file_dict.items():
for key, value in self._file_dict.items():
basename = os.path.basename(self.get_file_path(key))
output_list.append(f"{key}: {basename}")
if logger:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,28 @@
from hed.schema import load_schema_version
from hed.models import HedString, HedTag, DefinitionEntry
from hed.models import HedString, HedTag, DefinitionEntry, DefMapper
from hed.tools.analysis.hed_context_manager import HedContextManager


class DefinitionManager:
class HedDefinitionManager:

def __init__(self, definitions, hed_schema, variable_type='condition-variable'):
""" Create a definition manager for a type of variable.
Args:
definitions (dict): A dictionary of DefinitionEntry objects.
definitions (dict or DefMapper): A dictionary of DefinitionEntry objects.
hed_schema (Hedschema or HedSchemaGroup): The schema used for parsing.
variable_type (str): Lower-case string giving the type of HED variable.
"""

self.variable_type = variable_type.lower()
self.hed_schema = hed_schema
self.definitions = definitions
if isinstance(definitions, DefMapper):
self.definitions = definitions.gathered_defs
elif isinstance(definitions, dict):
self.definitions = definitions
else:
self.definitions = {}
self.variable_map = {} # maps def names to conditions.
self._extract_variable_map()

Expand Down Expand Up @@ -87,7 +92,7 @@ def get_def_names(item, no_value=True):
names = [tag.extension_or_value_portion.lower() for tag in item.get_all_tags() if 'def' in tag.tag_terms]
if no_value:
for index, name in enumerate(names):
name, name_value = DefinitionManager.split_name(name)
name, name_value = HedDefinitionManager.split_name(name)
names[index] = name
return names

Expand Down Expand Up @@ -130,7 +135,7 @@ def remove_defs(hed_strings):
for i in range(len(hed_strings)):
def_groups[i] = []
for i, hed in enumerate(hed_strings):
def_groups[i] = DefinitionManager.extract_defs(hed)
def_groups[i] = HedDefinitionManager.extract_defs(hed)
return def_groups

@staticmethod
Expand Down Expand Up @@ -169,7 +174,7 @@ def extract_defs(hed):
'cond2': DefinitionEntry('Cond2', def2, False, None),
'cond3': DefinitionEntry('Cond3', def3, True, None),
'cond4': DefinitionEntry('Cond4', def4, False, None)}
def_man = DefinitionManager(definitions, schema)
def_man = HedDefinitionManager(definitions, schema)
a = def_man.get_def_names(HedTag('Def/Cond3/4', hed_schema=schema))
b = def_man.get_def_names(HedString('(Def/Cond3/5,(Red, Blue))', hed_schema=schema))
c = def_man.get_def_names(HedString('(Def/Cond3/6,(Red, Blue, Def/Cond1), Def/Cond2)', hed_schema=schema))
4 changes: 1 addition & 3 deletions hed/tools/analysis/hed_filters.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from hed.models import DefMapper
from hed import HedTag
from hed.models.definition_dict import DefTagNames
from hed.errors import get_printable_issue_string


class StringOp:
def __init__(self, filter_name):
self.filter_name = filter_name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pandas as pd


class Query:
class HedQuery:
def __init__(self, query):
self.name = query['name']
self.query_type = query['query_type']
Expand All @@ -14,7 +14,7 @@ def evaluate(self, hed_string_obj):
return self.expression.search_hed_string(hed_string_obj)


class QueryManager:
class HedQueryManager:

def __init__(self, query_list):
self.query_list = query_list
Expand All @@ -34,14 +34,14 @@ def parse(self, hed_string_obj):


if __name__ == '__main__':
qlist = [Query({'name': 'cond_1', 'query_type': 'condition', 'query_str': 'Condition-variable'}),
Query({'name': 'tag_1', 'query_type': 'get_tag', 'query_str': 'Sensory-presentation'})]
qlist = [HedQuery({'name': 'cond_1', 'query_type': 'condition', 'query_str': 'Condition-variable'}),
HedQuery({'name': 'tag_1', 'query_type': 'get_tag', 'query_str': 'Sensory-presentation'})]

schema = load_schema_version(xml_version="8.0.0")
test_strings = [HedString('Condition-variable/Test-cond', hed_schema=schema),
HedString('Visual-presentation', hed_schema=schema),
HedString('Agent-action, (Move, Hand)', hed_schema=schema)]
q_parser = QueryManager(qlist)
q_parser = HedQueryManager(qlist)
col_names = q_parser.get_column_names()
print(f"Column names:{str(col_names)}")

Expand All @@ -50,4 +50,3 @@ def parse(self, hed_string_obj):
result[index] = q_parser.parse(obj)

df = pd.DataFrame(result, columns=col_names)
print("toHere")
32 changes: 16 additions & 16 deletions hed/tools/analysis/hed_type_factors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,38 @@


class HedTypeFactors:
""" Holds index of positions for type_variables, defined and otherwise. """
""" Holds index of positions for a variable type for one tabular file. """

ALLOWED_ENCODINGS = ("categorical", "one-hot")

def __init__(self, name, number_elements, variable_type="condition-variable"):
def __init__(self, variable_value, number_elements, variable_type="condition-variable"):
""" Constructor for HedTypeFactors.
Args:
name (str): Name of the variable summarized by this class.
variable_value (str): The value of the type summarized by this class.
number_elements (int): Number of elements in the data column
variable_type (str): Lowercase string corresponding to a HED tag which has a takes value child.
"""

self.variable_name = name
self.variable_value = variable_value
self.number_elements = number_elements
self.variable_type = variable_type.lower()
self.levels = {}
self.direct_indices = {}

def __str__(self):
return f"{self.variable_name}[{self.variable_type}]: {self.number_elements} elements " + \
return f"{self.variable_value}[{self.variable_type}]: {self.number_elements} elements " + \
f"{str(self.levels)} levels {len(self.direct_indices)} references"

def get_factors(self, factor_encoding="one-hot"):
df = pd.DataFrame(0, index=range(self.number_elements), columns=[self.variable_name])
df.loc[list(self.direct_indices.keys()), [self.variable_name]] = 1
df = pd.DataFrame(0, index=range(self.number_elements), columns=[self.variable_value])
df.loc[list(self.direct_indices.keys()), [self.variable_value]] = 1
if not self.levels:
return df

levels = list(self.levels.keys())
levels_list = [f"{self.variable_name}.{level}" for level in levels]
levels_list = [f"{self.variable_value}.{level}" for level in levels]
df_levels = pd.DataFrame(0, index=range(self.number_elements), columns=levels_list)
for index, level in enumerate(levels):
index_keys = list(self.levels[level].keys())
Expand All @@ -45,22 +45,22 @@ def get_factors(self, factor_encoding="one-hot"):
sum_factors = factors.sum(axis=1)
if sum_factors.max() > 1:
raise HedFileError("MultipleFactorSameEvent",
f"{self.variable_name} has multiple occurrences at index{sum_factors.idxmax()}", "")
f"{self.variable_value} has multiple occurrences at index{sum_factors.idxmax()}", "")
if factor_encoding == "categorical":
return self.factors_to_vector(factors, levels)
else:
raise ValueError("BadFactorEncoding",
f"{factor_encoding} is not in the allowed encodings: {str(self.ALLOWED_ENDCODINGS)}")
f"{factor_encoding} is not in the allowed encodings: {str(self.ALLOWED_ENCODINGS)}")

def factors_to_vector(self, factors, levels):
df = pd.DataFrame('n/a', index=range(len(factors.index)), columns=[self.variable_name])
df = pd.DataFrame('n/a', index=range(len(factors.index)), columns=[self.variable_value])
for index, row in factors.iterrows():
if row[self.variable_name]:
df.at[index, self.variable_name] = self.variable_name
if row[self.variable_value]:
df.at[index, self.variable_value] = self.variable_value
continue
for level in levels:
if row[f"{self.variable_name}.{level}"]:
df.at[index, self.variable_name] = level
if row[f"{self.variable_value}.{level}"]:
df.at[index, self.variable_value] = level
break
return df

Expand All @@ -72,7 +72,7 @@ def get_summary(self, full=True):
for index, item in cond.items():
count_list[index] = count_list[index] + 1
number_events, number_multiple, max_multiple = self.count_events(count_list)
summary = {'name': self.variable_name, 'variable_type': self.variable_type, 'levels': len(self.levels.keys()),
summary = {'name': self.variable_value, 'variable_type': self.variable_type, 'levels': len(self.levels.keys()),
'direct_references': len(self.direct_indices.keys()),
'total_events': self.number_elements, 'number_type_events': number_events,
'number_multiple_events': number_multiple, 'multiple_event_maximum': max_multiple}
Expand Down
18 changes: 8 additions & 10 deletions hed/tools/analysis/hed_type_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@
from hed import HedTag
from hed.models import HedGroup
from hed.schema import load_schema_version
from hed.tools.analysis.definition_manager import DefinitionManager
from hed.tools.analysis.hed_definition_manager import HedDefinitionManager
from hed.tools.analysis.hed_context_manager import HedContextManager
from hed.tools.analysis.hed_type_factors import HedTypeFactors


class HedTypeVariable:

def __init__(self, onset_manager, hed_schema, hed_definitions, variable_type="condition-variable"):
""" Create a variable manager for an events file.
def __init__(self, context_manager, hed_schema, hed_definitions, variable_type="condition-variable"):
""" Create a variable manager for one type-variable for one tabular file.
Args:
onset_manager (HedContextManager): A list of HED strings.
context_manager (HedContextManager): A list of HED strings.
hed_schema (HedSchema or HedSchemaGroup): The HED schema to use for processing.
hed_definitions (dict): A dictionary of DefinitionEntry objects.
variable_type (str): Lowercase short form of the variable to be managed.
Expand All @@ -25,9 +25,9 @@ def __init__(self, onset_manager, hed_schema, hed_definitions, variable_type="co
"""

self.variable_type = variable_type.lower()
self.definitions = DefinitionManager(hed_definitions, hed_schema, variable_type=variable_type)
hed_strings = onset_manager.hed_strings
hed_contexts = onset_manager.contexts
self.definitions = HedDefinitionManager(hed_definitions, hed_schema, variable_type=variable_type)
hed_strings = context_manager.hed_strings
hed_contexts = context_manager.contexts
self.number_events = len(hed_strings)
self._variable_map = {}
self._extract_variables(hed_strings, hed_contexts)
Expand All @@ -38,7 +38,7 @@ def get_variable(self, var_name):

@property
def type_variables(self):
return list(self._variable_map.keys())
return set(self._variable_map.keys())

def get_variable_def_names(self):
tag_list = []
Expand All @@ -52,7 +52,6 @@ def get_variable_type_map(self, type_name):
def get_variable_names(self):
return list(self._variable_map.keys())


def summarize(self, as_json=False):
summary = self._variable_map.copy()
for var_name, var_sum in summary.items():
Expand Down Expand Up @@ -229,4 +228,3 @@ def _update_variables(self, tag_list, index):
df_no_hot.to_csv("D:/wh_conditions_no_hot.csv", sep='\t', index=False)
with open('d:/wh_summarylong.json', 'w') as f:
json.dump(summary, f, indent=4)
print("to here")
Loading

0 comments on commit 5b1d6d5

Please sign in to comment.