Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moved utils into tools #533

Merged
merged 35 commits into from
Sep 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
ea71477
Added get_definitions to BaseInput
VisLab Jul 3, 2022
d556aac
Wrote tests for onset_manager creation
VisLab Jul 4, 2022
2071039
Started work on the condition manager
VisLab Jul 4, 2022
db5e771
Merge branch 'develop' of http://github.com/hed-standard/hed-python i…
VisLab Jul 4, 2022
63fc2f3
Partial implementation of condition_manager
VisLab Jul 5, 2022
f6e96ec
Updated the condition variable manager -- still incomplete
VisLab Jul 9, 2022
205ae09
Updated the variable manager before changes levels
VisLab Jul 11, 2022
aba738e
Updated the variable summary function
VisLab Jul 12, 2022
72f06cb
Merge branch 'develop' of http://github.com/hed-standard/hed-python i…
VisLab Jul 12, 2022
f962d42
Updated requirements to use >=
VisLab Jul 12, 2022
7dc68b6
Updated the docs -- still failing to generate all docs files
VisLab Jul 12, 2022
7df6a2d
Merge branch 'develop' of http://github.com/hed-standard/hed-python i…
VisLab Jul 12, 2022
015fb78
Restored also_gather_defs
VisLab Jul 12, 2022
318e8cb
Updated the HED factorization
VisLab Jul 13, 2022
1f75f75
Reorganized the variable manager and variable summary
VisLab Aug 1, 2022
57eb38f
Updated tabular input for merging
VisLab Aug 1, 2022
d563243
Updated the api documentation
VisLab Aug 1, 2022
8cecf2b
Updated the filter concept
VisLab Aug 2, 2022
b82e755
Cleaned up the unit tests for the variable manager
VisLab Aug 4, 2022
d4b4ffd
Updated the variable manager
VisLab Aug 4, 2022
188098d
Trying to commit and update
VisLab Aug 4, 2022
541a943
Got the unittests to run again
VisLab Aug 4, 2022
6c6645f
Minor updates to assemlby
VisLab Aug 6, 2022
c245dff
Added exclude_dirs option to get_dir_dictionary
VisLab Aug 16, 2022
1b333e4
Merge branch 'develop' of http://github.com/hed-standard/hed-python i…
VisLab Aug 16, 2022
83c224a
Updated Bids File Summary to not require entitites
VisLab Aug 18, 2022
b2121d0
Updated the init for some utilities
VisLab Aug 24, 2022
ce144cc
Merging upstream
VisLab Aug 24, 2022
fae5f28
Moved the util directory under tools
VisLab Aug 27, 2022
354851b
Resolving conflicts for moving util directory
VisLab Aug 27, 2022
d6897e3
Merge branch 'develop' of http://github.com/hed-standard/hed-python i…
VisLab Aug 28, 2022
6f51c9e
Updated the summary output for design matrices
VisLab Aug 31, 2022
12763b4
Merge branch 'develop' of http://github.com/hed-standard/hed-python i…
VisLab Aug 31, 2022
6079c89
Revising the summary format
VisLab Sep 3, 2022
c535ea0
Minor doc updates plus elimination of BidsTabularySummary
VisLab Sep 8, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
435 changes: 185 additions & 250 deletions hed/errors/error_reporter.py

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions hed/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
from .analysis.hed_type_variable import HedTypeVariable
from .analysis.hed_type_factors import HedTypeFactors
from .analysis.hed_variable_summary import HedVariableCounts, HedVariableSummary
from .analysis.definition_manager import DefinitionManager
from .analysis.hed_definition_manager import HedDefinitionManager
from .analysis.file_dictionary import FileDictionary
from .analysis.key_map import KeyMap
from .analysis.hed_context_manager import OnsetGroup, HedContextManager
from .analysis.tabular_summary import TabularSummary
from .analysis.tag_summary import TagSummary
from .analysis.annotation_util import \
check_df_columns, extract_tags, generate_sidecar_entry, hed_to_df, df_to_hed, merge_hed_dict
Expand All @@ -21,9 +22,7 @@
from .bids.bids_sidecar_file import BidsSidecarFile
from .bids.bids_tabular_dictionary import BidsTabularDictionary
from .bids.bids_tabular_file import BidsTabularFile
from .bids.bids_tabular_summary import BidsTabularSummary

from .analysis.tabular_reports import report_diffs
from .util.hed_logger import HedLogger
from .util.data_util import get_new_dataframe, get_value_dict, replace_values, reorder_columns
from .util.io_util import check_filename, generate_filename, extract_suffix_path, get_file_list, make_path
Expand Down
21 changes: 13 additions & 8 deletions hed/tools/analysis/file_dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(self, collection_name, file_list, key_indices=(0, 2), separator='_'

"""
self.collection_name = collection_name
self.file_dict = {}
self._file_dict = {}
self.create_file_dict(file_list, key_indices, separator)

@property
Expand All @@ -43,12 +43,17 @@ def name(self):
@property
def key_list(self):
""" Keys in this dictionary. """
return list(self.file_dict.keys())
return list(self._file_dict.keys())

@property
def file_dict(self):
""" Dictionary of path values in this dictionary. """
return self._file_dict

@property
def file_list(self):
""" List of path values in this dictionary. """
return list(self.file_dict.values())
return list(self._file_dict.values())

def create_file_dict(self, file_list, key_indices, separator):
""" Create new dict based on key indices.
Expand All @@ -60,7 +65,7 @@ def create_file_dict(self, file_list, key_indices, separator):

"""
if key_indices:
self.file_dict = self.make_file_dict(file_list, key_indices=key_indices, separator=separator)
self._file_dict = self.make_file_dict(file_list, key_indices=key_indices, separator=separator)

def get_file_path(self, key):
""" Return file path corresponding to key.
Expand All @@ -72,7 +77,7 @@ def get_file_path(self, key):
str: File path.

"""
return self.file_dict.get(key, None)
return self._file_dict.get(key, None)

def iter_files(self):
""" Iterator over the files in this dictionary.
Expand All @@ -82,7 +87,7 @@ def iter_files(self):
- file: File path.

"""
for key, file in self.file_dict.items():
for key, file in self._file_dict.items():
yield key, file

def key_diffs(self, other_dict):
Expand All @@ -95,7 +100,7 @@ def key_diffs(self, other_dict):
list: The symmetric difference of the keys in this dictionary and the other one.

"""
diffs = set(self.file_dict.keys()).symmetric_difference(set(other_dict.file_dict.keys()))
diffs = set(self._file_dict.keys()).symmetric_difference(set(other_dict._file_dict.keys()))
return list(diffs)

def output_files(self, title=None, logger=None):
Expand All @@ -115,7 +120,7 @@ def output_files(self, title=None, logger=None):
output_list = []
if title:
output_list.append(f"{title} ({len(self.key_list)} files)")
for key, value in self.file_dict.items():
for key, value in self._file_dict.items():
basename = os.path.basename(self.get_file_path(key))
output_list.append(f"{key}: {basename}")
if logger:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,28 @@
from hed.schema import load_schema_version
from hed.models import HedString, HedTag, DefinitionEntry
from hed.models import HedString, HedTag, DefinitionEntry, DefMapper
from hed.tools.analysis.hed_context_manager import HedContextManager


class DefinitionManager:
class HedDefinitionManager:

def __init__(self, definitions, hed_schema, variable_type='condition-variable'):
""" Create a definition manager for a type of variable.

Args:
definitions (dict): A dictionary of DefinitionEntry objects.
definitions (dict or DefMapper): A dictionary of DefinitionEntry objects.
hed_schema (Hedschema or HedSchemaGroup): The schema used for parsing.
variable_type (str): Lower-case string giving the type of HED variable.

"""

self.variable_type = variable_type.lower()
self.hed_schema = hed_schema
self.definitions = definitions
if isinstance(definitions, DefMapper):
self.definitions = definitions.gathered_defs
elif isinstance(definitions, dict):
self.definitions = definitions
else:
self.definitions = {}
self.variable_map = {} # maps def names to conditions.
self._extract_variable_map()

Expand Down Expand Up @@ -87,7 +92,7 @@ def get_def_names(item, no_value=True):
names = [tag.extension_or_value_portion.lower() for tag in item.get_all_tags() if 'def' in tag.tag_terms]
if no_value:
for index, name in enumerate(names):
name, name_value = DefinitionManager.split_name(name)
name, name_value = HedDefinitionManager.split_name(name)
names[index] = name
return names

Expand Down Expand Up @@ -130,7 +135,7 @@ def remove_defs(hed_strings):
for i in range(len(hed_strings)):
def_groups[i] = []
for i, hed in enumerate(hed_strings):
def_groups[i] = DefinitionManager.extract_defs(hed)
def_groups[i] = HedDefinitionManager.extract_defs(hed)
return def_groups

@staticmethod
Expand Down Expand Up @@ -169,7 +174,7 @@ def extract_defs(hed):
'cond2': DefinitionEntry('Cond2', def2, False, None),
'cond3': DefinitionEntry('Cond3', def3, True, None),
'cond4': DefinitionEntry('Cond4', def4, False, None)}
def_man = DefinitionManager(definitions, schema)
def_man = HedDefinitionManager(definitions, schema)
a = def_man.get_def_names(HedTag('Def/Cond3/4', hed_schema=schema))
b = def_man.get_def_names(HedString('(Def/Cond3/5,(Red, Blue))', hed_schema=schema))
c = def_man.get_def_names(HedString('(Def/Cond3/6,(Red, Blue, Def/Cond1), Def/Cond2)', hed_schema=schema))
4 changes: 1 addition & 3 deletions hed/tools/analysis/hed_filters.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from hed.models import DefMapper
from hed import HedTag
from hed.models.definition_dict import DefTagNames
from hed.errors import get_printable_issue_string


class StringOp:
def __init__(self, filter_name):
self.filter_name = filter_name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pandas as pd


class Query:
class HedQuery:
def __init__(self, query):
self.name = query['name']
self.query_type = query['query_type']
Expand All @@ -14,7 +14,7 @@ def evaluate(self, hed_string_obj):
return self.expression.search_hed_string(hed_string_obj)


class QueryManager:
class HedQueryManager:

def __init__(self, query_list):
self.query_list = query_list
Expand All @@ -34,14 +34,14 @@ def parse(self, hed_string_obj):


if __name__ == '__main__':
qlist = [Query({'name': 'cond_1', 'query_type': 'condition', 'query_str': 'Condition-variable'}),
Query({'name': 'tag_1', 'query_type': 'get_tag', 'query_str': 'Sensory-presentation'})]
qlist = [HedQuery({'name': 'cond_1', 'query_type': 'condition', 'query_str': 'Condition-variable'}),
HedQuery({'name': 'tag_1', 'query_type': 'get_tag', 'query_str': 'Sensory-presentation'})]

schema = load_schema_version(xml_version="8.0.0")
test_strings = [HedString('Condition-variable/Test-cond', hed_schema=schema),
HedString('Visual-presentation', hed_schema=schema),
HedString('Agent-action, (Move, Hand)', hed_schema=schema)]
q_parser = QueryManager(qlist)
q_parser = HedQueryManager(qlist)
col_names = q_parser.get_column_names()
print(f"Column names:{str(col_names)}")

Expand All @@ -50,4 +50,3 @@ def parse(self, hed_string_obj):
result[index] = q_parser.parse(obj)

df = pd.DataFrame(result, columns=col_names)
print("toHere")
32 changes: 16 additions & 16 deletions hed/tools/analysis/hed_type_factors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,38 @@


class HedTypeFactors:
""" Holds index of positions for type_variables, defined and otherwise. """
""" Holds index of positions for a variable type for one tabular file. """

ALLOWED_ENCODINGS = ("categorical", "one-hot")

def __init__(self, name, number_elements, variable_type="condition-variable"):
def __init__(self, variable_value, number_elements, variable_type="condition-variable"):
""" Constructor for HedTypeFactors.

Args:
name (str): Name of the variable summarized by this class.
variable_value (str): The value of the type summarized by this class.
number_elements (int): Number of elements in the data column
variable_type (str): Lowercase string corresponding to a HED tag which has a takes value child.

"""

self.variable_name = name
self.variable_value = variable_value
self.number_elements = number_elements
self.variable_type = variable_type.lower()
self.levels = {}
self.direct_indices = {}

def __str__(self):
return f"{self.variable_name}[{self.variable_type}]: {self.number_elements} elements " + \
return f"{self.variable_value}[{self.variable_type}]: {self.number_elements} elements " + \
f"{str(self.levels)} levels {len(self.direct_indices)} references"

def get_factors(self, factor_encoding="one-hot"):
df = pd.DataFrame(0, index=range(self.number_elements), columns=[self.variable_name])
df.loc[list(self.direct_indices.keys()), [self.variable_name]] = 1
df = pd.DataFrame(0, index=range(self.number_elements), columns=[self.variable_value])
df.loc[list(self.direct_indices.keys()), [self.variable_value]] = 1
if not self.levels:
return df

levels = list(self.levels.keys())
levels_list = [f"{self.variable_name}.{level}" for level in levels]
levels_list = [f"{self.variable_value}.{level}" for level in levels]
df_levels = pd.DataFrame(0, index=range(self.number_elements), columns=levels_list)
for index, level in enumerate(levels):
index_keys = list(self.levels[level].keys())
Expand All @@ -45,22 +45,22 @@ def get_factors(self, factor_encoding="one-hot"):
sum_factors = factors.sum(axis=1)
if sum_factors.max() > 1:
raise HedFileError("MultipleFactorSameEvent",
f"{self.variable_name} has multiple occurrences at index{sum_factors.idxmax()}", "")
f"{self.variable_value} has multiple occurrences at index{sum_factors.idxmax()}", "")
if factor_encoding == "categorical":
return self.factors_to_vector(factors, levels)
else:
raise ValueError("BadFactorEncoding",
f"{factor_encoding} is not in the allowed encodings: {str(self.ALLOWED_ENDCODINGS)}")
f"{factor_encoding} is not in the allowed encodings: {str(self.ALLOWED_ENCODINGS)}")

def factors_to_vector(self, factors, levels):
df = pd.DataFrame('n/a', index=range(len(factors.index)), columns=[self.variable_name])
df = pd.DataFrame('n/a', index=range(len(factors.index)), columns=[self.variable_value])
for index, row in factors.iterrows():
if row[self.variable_name]:
df.at[index, self.variable_name] = self.variable_name
if row[self.variable_value]:
df.at[index, self.variable_value] = self.variable_value
continue
for level in levels:
if row[f"{self.variable_name}.{level}"]:
df.at[index, self.variable_name] = level
if row[f"{self.variable_value}.{level}"]:
df.at[index, self.variable_value] = level
break
return df

Expand All @@ -72,7 +72,7 @@ def get_summary(self, full=True):
for index, item in cond.items():
count_list[index] = count_list[index] + 1
number_events, number_multiple, max_multiple = self.count_events(count_list)
summary = {'name': self.variable_name, 'variable_type': self.variable_type, 'levels': len(self.levels.keys()),
summary = {'name': self.variable_value, 'variable_type': self.variable_type, 'levels': len(self.levels.keys()),
'direct_references': len(self.direct_indices.keys()),
'total_events': self.number_elements, 'number_type_events': number_events,
'number_multiple_events': number_multiple, 'multiple_event_maximum': max_multiple}
Expand Down
18 changes: 8 additions & 10 deletions hed/tools/analysis/hed_type_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@
from hed import HedTag
from hed.models import HedGroup
from hed.schema import load_schema_version
from hed.tools.analysis.definition_manager import DefinitionManager
from hed.tools.analysis.hed_definition_manager import HedDefinitionManager
from hed.tools.analysis.hed_context_manager import HedContextManager
from hed.tools.analysis.hed_type_factors import HedTypeFactors


class HedTypeVariable:

def __init__(self, onset_manager, hed_schema, hed_definitions, variable_type="condition-variable"):
""" Create a variable manager for an events file.
def __init__(self, context_manager, hed_schema, hed_definitions, variable_type="condition-variable"):
""" Create a variable manager for one type-variable for one tabular file.

Args:
onset_manager (HedContextManager): A list of HED strings.
context_manager (HedContextManager): A list of HED strings.
hed_schema (HedSchema or HedSchemaGroup): The HED schema to use for processing.
hed_definitions (dict): A dictionary of DefinitionEntry objects.
variable_type (str): Lowercase short form of the variable to be managed.
Expand All @@ -25,9 +25,9 @@ def __init__(self, onset_manager, hed_schema, hed_definitions, variable_type="co
"""

self.variable_type = variable_type.lower()
self.definitions = DefinitionManager(hed_definitions, hed_schema, variable_type=variable_type)
hed_strings = onset_manager.hed_strings
hed_contexts = onset_manager.contexts
self.definitions = HedDefinitionManager(hed_definitions, hed_schema, variable_type=variable_type)
hed_strings = context_manager.hed_strings
hed_contexts = context_manager.contexts
self.number_events = len(hed_strings)
self._variable_map = {}
self._extract_variables(hed_strings, hed_contexts)
Expand All @@ -38,7 +38,7 @@ def get_variable(self, var_name):

@property
def type_variables(self):
return list(self._variable_map.keys())
return set(self._variable_map.keys())

def get_variable_def_names(self):
tag_list = []
Expand All @@ -52,7 +52,6 @@ def get_variable_type_map(self, type_name):
def get_variable_names(self):
return list(self._variable_map.keys())


def summarize(self, as_json=False):
summary = self._variable_map.copy()
for var_name, var_sum in summary.items():
Expand Down Expand Up @@ -229,4 +228,3 @@ def _update_variables(self, tag_list, index):
df_no_hot.to_csv("D:/wh_conditions_no_hot.csv", sep='\t', index=False)
with open('d:/wh_summarylong.json', 'w') as f:
json.dump(summary, f, indent=4)
print("to here")
Loading