Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated the variable summary mechanism to get condition variables #523

Merged
merged 8 commits into from
Jul 12, 2022
Merged
2 changes: 1 addition & 1 deletion docs/source/generated/hed.models.TabularInput.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
~TabularInput.to_excel
~TabularInput.update_definition_mapper
~TabularInput.validate_file
~TabularInput.validate_file_sidecars
~TabularInput.validate_sidecar



Expand Down
8 changes: 8 additions & 0 deletions hed/models/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
if mapper is None:
mapper = ColumnMapper()
self._mapper = mapper
if def_mapper is None:
def_mapper = DefMapper(mapper.get_def_dicts())
self._def_mapper = def_mapper
self._has_column_names = has_column_names
self._name = name
Expand Down Expand Up @@ -135,6 +137,12 @@ def worksheet_name(self):
""" The worksheet name. """
return self._worksheet_name

def get_definitions(self, as_strings=True):
if as_strings:
return DefinitionDict.get_as_strings(self._def_mapper.gathered_defs)
else:
return self._def_mapper.gathered_defs

def _convert_to_form(self, hed_schema, tag_form, error_handler):
""" Convert all tags to the specified form.

Expand Down
21 changes: 2 additions & 19 deletions hed/models/hed_group_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,29 +361,12 @@ def find_tags_with_term(self, term, recursive=False, include_groups=2):
recursive (bool): If true, recursively check subgroups.
include_groups: 0, 1 or 2
If 0: Return only tags
If 1: return only groups
If 2 or any other value: return both

recursive (bool): If true, also check subgroups.
include_groups (int, 0, 1, 2, 3): options for how to expand or include groups
If 1: Return only groups
If 2 or any other value: Return both

Returns:
list:

def_tag: HedTag
The located def tag
def_expand_group: HedGroup or None
If this is a def-expand rather than def tag, this will be the entire def-expand group.
group: HedGroup
The group the def tag or def expand group is in.

Notes:
- The include_groups option controls the tag expansion as follows:
- If 0: Return only def and def expand tags/.
- If 1: Return only def tags and def-expand groups.
- If 2: Return only groups containing defs, or def-expand groups.
- If 3 or any other value: Return all 3 as a tuple.

"""

found_tags = []
Expand Down
2 changes: 2 additions & 0 deletions hed/models/model_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@ class DefTagNames:

ONSET_KEY = ONSET_ORG_KEY.lower()
OFFSET_KEY = OFFSET_ORG_KEY.lower()

DEF_KEYS = (DEF_KEY, DEF_EXPAND_KEY)
30 changes: 12 additions & 18 deletions hed/models/tabular_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ class TabularInput(BaseInput):

HED_COLUMN_NAME = "HED"

def __init__(self, file=None, sidecar=None, extra_def_dicts=None,
also_gather_defs=True, name=None):
def __init__(self, file=None, sidecar=None, extra_def_dicts=None, name=None):
""" Constructor for the TabularInput class.

Args:
Expand All @@ -20,8 +19,6 @@ def __init__(self, file=None, sidecar=None, extra_def_dicts=None,
the definitions this file should use other than the ones coming from the file
itself and from the sidecar. These are added as the last entries, so names will override
earlier ones.
also_gather_defs (bool): If False, do NOT extract any definitions from column groups,
assume they are already in the def_dict list.
name (str): The name to display for this file for error purposes.

"""
Expand All @@ -32,9 +29,10 @@ def __init__(self, file=None, sidecar=None, extra_def_dicts=None,

definition_columns = [self.HED_COLUMN_NAME]
self._sidecar = sidecar
self._also_gather_defs = also_gather_defs
if extra_def_dicts and not isinstance(extra_def_dicts, list):
extra_def_dicts = [extra_def_dicts]
self._extra_def_dicts = extra_def_dicts
def_mapper = self.create_def_mapper(new_mapper, extra_def_dicts)
def_mapper = self.create_def_mapper(new_mapper)

super().__init__(file, file_type=".tsv", worksheet_name=None, has_column_names=True, mapper=new_mapper,
def_mapper=def_mapper, name=name, definition_columns=definition_columns,
Expand All @@ -44,12 +42,12 @@ def __init__(self, file=None, sidecar=None, extra_def_dicts=None,
raise ValueError("You are attempting to open a bids_old style file with no column headers provided.\n"
"This is probably not intended.")

def create_def_mapper(self, column_mapper, extra_def_dicts=None):
def create_def_mapper(self, column_mapper):
""" Create the definition mapper for this file.

Args:
column_mapper (ColumnMapper): The column mapper to gather definitions from.
extra_def_dicts (DefinitionDict or [DefinitionDict]): Additional definitions to add to mapper.


Returns:
def mapper (DefMapper): A class to validate or expand definitions with the given def dicts.
Expand All @@ -58,14 +56,10 @@ def mapper (DefMapper): A class to validate or expand definitions with the given
- The extra_def_dicts are definitions not included in the column mapper.

"""
def_dicts = []
if self._also_gather_defs:
def_dicts = column_mapper.get_def_dicts()

if extra_def_dicts and not isinstance(extra_def_dicts, list):
extra_def_dicts = [extra_def_dicts]
if extra_def_dicts:
def_dicts += extra_def_dicts
def_dicts = column_mapper.get_def_dicts()
if self._extra_def_dicts:
def_dicts += self._extra_def_dicts
def_mapper = DefMapper(def_dicts)

return def_mapper
Expand All @@ -79,14 +73,14 @@ def reset_column_mapper(self, sidecar=None):
"""
new_mapper = ColumnMapper(sidecar=sidecar, optional_tag_columns=[self.HED_COLUMN_NAME])

self._def_mapper = self.create_def_mapper(new_mapper, self._extra_def_dicts)
self._def_mapper = self.create_def_mapper(new_mapper)
self.reset_mapper(new_mapper)

def validate_file_sidecars(self, hed_ops=None, error_handler=None, **kwargs):
def validate_sidecar(self, hed_ops=None, error_handler=None, **kwargs):
""" Validate column definitions and hed strings.

Args:
hed_ops (list): A list of HedOps of funcs to apply to the hed strings in the sidecars.
hed_ops (list or HedOps): A list of HedOps of funcs to apply to the hed strings in the sidecars.
error_handler (ErrorHandler or None): Used to report errors. Uses a default one if none passed in.
kwargs: See models.hed_ops.translate_ops or the specific hed_ops for additional options.

Expand Down
5 changes: 4 additions & 1 deletion hed/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
""" HED tools for analysis and summarization. """


from .analysis.variable_manager import VariableManager
from .analysis.variable_summary import VariableSummary
from .analysis.definition_manager import DefinitionManager
from .analysis.file_dictionary import FileDictionary
from .analysis.key_map import KeyMap
from .analysis.onset_manager import OnsetGroup, OnsetManager
from .analysis.tag_summary import TagSummary
from .analysis.annotation_util import \
check_df_columns, extract_tags, generate_sidecar_entry, hed_to_df, df_to_hed, merge_hed_dict
Expand Down
11 changes: 5 additions & 6 deletions hed/tools/analysis/analysis_util.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
""" Utilities for downstream analysis such as searching. """

import pandas as pd
from hed.models import DefinitionDict, TabularInput, TagExpressionParser
from hed.models import TabularInput, TagExpressionParser


def assemble_hed(data_input, columns_included=None, expand_defs=False):
Expand All @@ -24,13 +24,14 @@ def assemble_hed(data_input, columns_included=None, expand_defs=False):
else:
eligible_columns = None

hed_obj_list, definitions = get_assembled_strings(data_input, expand_defs=expand_defs)
hed_obj_list = get_assembled_strings(data_input, expand_defs=expand_defs)
hed_string_list = [str(hed) for hed in hed_obj_list]
if not eligible_columns:
df = pd.DataFrame({"HED_assembled": hed_string_list})
else:
df = data_input.dataframe[eligible_columns].copy(deep=True)
df['HED_assembled'] = hed_string_list
definitions = data_input.get_definitions()
return df, definitions


Expand All @@ -44,13 +45,11 @@ def get_assembled_strings(table, hed_schema=None, expand_defs=False):

Returns:
list: A list of HedString or HedStringGroup objects.
dict: A dictionary of definitions for this table.

"""
hed_list = list(table.iter_dataframe(hed_ops=[hed_schema], return_string_only=True,
expand_defs=expand_defs, remove_definitions=True))
definitions = DefinitionDict.get_as_strings(table._def_mapper.gathered_defs)
return hed_list, definitions
return hed_list


def search_tabular(data_input, hed_schema, query, columns_included=None):
Expand All @@ -72,7 +71,7 @@ def search_tabular(data_input, hed_schema, query, columns_included=None):
else:
eligible_columns = None

hed_list, dictionary = get_assembled_strings(data_input, hed_schema=hed_schema, expand_defs=True)
hed_list = get_assembled_strings(data_input, hed_schema=hed_schema, expand_defs=True)
expression = TagExpressionParser(query)
hed_tags = []
row_numbers = []
Expand Down
175 changes: 175 additions & 0 deletions hed/tools/analysis/definition_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
from hed.schema import load_schema_version
from hed.models import HedString, HedTag, DefinitionEntry
from hed.tools.analysis.onset_manager import OnsetManager


class DefinitionManager:

def __init__(self, definitions, hed_schema, variable_type='condition-variable'):
""" Create a definition manager for a type of variable.

Args:
definitions (dict): A dictionary of DefinitionEntry objects.
hed_schema (Hedschema or HedSchemaGroup): The schema used for parsing.
variable_type (str): Lower-case string giving the type of HED variable.

"""

self.variable_type = variable_type
self.hed_schema = hed_schema
self.definitions = definitions
self.variable_map = {} # maps def names to conditions.
self._extract_variable_map()

def get_vars(self, item):
""" Return a list of variables in item.

Args:
item (HedTag, HedGroup, or HedString): An item potentially containing def tags.

Returns:
list:

"""
def_names = self.get_def_names(item, no_value=True)
var_list = []
for def_name in def_names:
hed_vars = self.variable_map.get(def_name.lower(), None)
if hed_vars:
var_list = var_list + hed_vars
return var_list

def _extract_variable_map(self):
""" Extract all of the variables associated with each definition and add them to the dictionary. """
self.variable_map = {}
for entry in self.definitions.values():
self.variable_map[entry.name.lower()] = self._extract_from_entry(entry)

def _extract_from_entry(self, entry):
""" Extract a list of variables associated with a definition.

Args:
entry (DictionaryEntry): A definition entry to be processed.

Returns:
A list of variables associated with this definition.


"""
tag_list = entry.contents.get_all_tags()
hed_vars = []
for hed_tag in tag_list:
hed_tag.convert_to_canonical_forms(self.hed_schema)
if hed_tag.short_base_tag.lower() != self.variable_type:
continue
value = hed_tag.extension_or_value_portion.lower()
if value:
hed_vars.append(value)
else:
hed_vars.append(entry.name)
return hed_vars

@staticmethod
def get_def_names(item, no_value=True):
""" Return a list of Def values in item.

Args:
item (HedTag, HedGroup, or HedString): An item containing a def tag.
no_value (bool): If True, strip off extra values after the definition name.

Returns:
list: A list of definition names (as strings).

"""
if isinstance(item, HedTag) and 'def' in item.tag_terms:
names = [item.extension_or_value_portion.lower()]
else:
names = [tag.extension_or_value_portion.lower() for tag in item.get_all_tags() if 'def' in tag.tag_terms]
if no_value:
for index, name in enumerate(names):
name, name_value = DefinitionManager.split_name(name)
names[index] = name
return names

@staticmethod
def split_name(name, lowercase=True):
""" Split a name/# or name/x into name, x.

Args:
name (str): The extension or value portion of a tag
lowercase (bool): If True

Returns:
tuple: (name, value)

"""
if not name:
return None, None
parts = name.split('/', 1)
def_name = parts[0]
def_value = ''
if len(parts) > 1:
def_value = parts[1]
if lowercase:
return def_name.lower(), def_value.lower()
else:
return def_name, def_value

@staticmethod
def remove_defs(hed_strings):
""" This removes any def or Def-expand from a list of HedStrings.

Args:
hed_strings (list): A list of HedStrings

Returns:
list: A list of the removed Defs.

"""
def_groups = [0] * len(hed_strings)
for i in range(len(hed_strings)):
def_groups[i] = []
for i, hed in enumerate(hed_strings):
def_groups[i] = DefinitionManager.extract_defs(hed)
return def_groups

@staticmethod
def extract_defs(hed):
to_remove = []
to_append = []
tups = hed.find_def_tags(recursive=True, include_groups=3)
for tup in tups:
if len(tup[2].children) == 1:
to_append.append(tup[0])
else:
to_append.append(tup[2])
to_remove.append(tup[2])
hed.remove(to_remove)
return to_append


if __name__ == '__main__':
schema = load_schema_version(xml_version="8.1.0")
test_strings1 = [HedString(f"Sensory-event,(Def/Cond1,(Red, Blue, Condition-variable/Trouble),Onset),"
f"(Def/Cond2,Onset),Green,Yellow, Def/Cond5, Def/Cond6/4", hed_schema=schema),
HedString('(Def/Cond1, Offset)', hed_schema=schema),
HedString('White, Black, Condition-variable/Wonder, Condition-variable/Fast', hed_schema=schema),
HedString('', hed_schema=schema),
HedString('(Def/Cond2, Onset)', hed_schema=schema),
HedString('(Def/Cond3/4.3, Onset)', hed_schema=schema),
HedString('Arm, Leg, Condition-variable/Fast', hed_schema=schema)]

onset_man = OnsetManager(test_strings1, schema)
def1 = HedString('(Condition-variable/Var1, Circle, Square)', hed_schema=schema)
def2 = HedString('(condition-variable/Var2, Condition-variable/Apple, Triangle, Sphere)', hed_schema=schema)
def3 = HedString('(Organizational-property/Condition-variable/Var3, Physical-length/#, Ellipse, Cross)',
hed_schema=schema)
def4 = HedString('(Condition-variable, Apple, Banana)', hed_schema=schema)
definitions = {'cond1': DefinitionEntry('Cond1', def1, False, None),
'cond2': DefinitionEntry('Cond2', def2, False, None),
'cond3': DefinitionEntry('Cond3', def3, True, None),
'cond4': DefinitionEntry('Cond4', def4, False, None)}
def_man = DefinitionManager(definitions, schema)
a = def_man.get_def_names(HedTag('Def/Cond3/4', hed_schema=schema))
b = def_man.get_def_names(HedString('(Def/Cond3/5,(Red, Blue))', hed_schema=schema))
c = def_man.get_def_names(HedString('(Def/Cond3/6,(Red, Blue, Def/Cond1), Def/Cond2)', hed_schema=schema))
Loading