Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add summary to top of schema comparisons #735

Merged
merged 2 commits into from
Aug 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 18 additions & 13 deletions hed/models/hed_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,27 +466,32 @@ def find_def_tags(self, recursive=False, include_groups=3):
Returns:
list: A list of tuples. The contents depend on the values of the include_group.
"""
from hed.models.definition_dict import DefTagNames
if recursive:
groups = self.get_all_groups()
def_tags = []
for group in groups:
def_tags += self._get_def_tags_from_group(group)
else:
groups = (self,)

def_tags = []
for group in groups:
for child in group.children:
if isinstance(child, HedTag):
if child.short_base_tag == DefTagNames.DEF_ORG_KEY:
def_tags.append((child, child, group))
else:
for tag in child.tags():
if tag.short_base_tag == DefTagNames.DEF_EXPAND_ORG_KEY:
def_tags.append((tag, child, group))
def_tags = self._get_def_tags_from_group(self)

if include_groups == 0 or include_groups == 1 or include_groups == 2:
return [tag[include_groups] for tag in def_tags]
return def_tags

@staticmethod
def _get_def_tags_from_group(group):
from hed.models.definition_dict import DefTagNames
def_tags = []
for child in group.children:
if isinstance(child, HedTag):
if child.short_base_tag == DefTagNames.DEF_ORG_KEY:
def_tags.append((child, child, group))
else:
for tag in child.tags():
if tag.short_base_tag == DefTagNames.DEF_EXPAND_ORG_KEY:
def_tags.append((tag, child, group))
return def_tags

def find_tags_with_term(self, term, recursive=False, include_groups=2):
""" Find any tags that contain the given term.

Expand Down
66 changes: 61 additions & 5 deletions hed/schema/schema_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,21 @@
HedSectionKey.UnitModifiers: "Unit Modifier",
HedSectionKey.Properties: "Property",
HedSectionKey.Attributes: "Attribute",
}

SectionEntryNamesPlural = {
HedSectionKey.Tags: "Tags",
HedSectionKey.Units: "Units",
HedSectionKey.UnitClasses: "Unit Classes",
HedSectionKey.ValueClasses: "Value Classes",
HedSectionKey.UnitModifiers: "Unit Modifiers",
HedSectionKey.Properties: "Properties",
HedSectionKey.Attributes: "Attributes",
}


def find_matching_tags(schema1, schema2, output='raw', sections=(HedSectionKey.Tags,)):
def find_matching_tags(schema1, schema2, output='raw', sections=(HedSectionKey.Tags,),
include_summary=True):
"""
Compare the tags in two library schemas. This finds tags with the same term.

Expand All @@ -28,7 +38,8 @@ def find_matching_tags(schema1, schema2, output='raw', sections=(HedSectionKey.T
'dict' returns a json style dictionary
sections(list): the list of sections to compare. By default, just the tags section.
If None, checks all sections including header, prologue, and epilogue.

include_summary(bool): If True, adds the 'summary' dict to the dict return option, and prints it with the
string option. Lists the names of all the nodes that are missing or different.
Returns:
dict, json style dict, or str: A dictionary containing matching entries in the Tags section of both schemas.
"""
Expand All @@ -37,8 +48,12 @@ def find_matching_tags(schema1, schema2, output='raw', sections=(HedSectionKey.T
for section_key, section_dict in matches.items():
section_dict.update(unequal_entries[section_key])

header_summary = _get_tag_name_summary((matches, unequal_entries))

if output == 'string':
final_string = ""
if include_summary:
final_string += _pretty_print_header(header_summary)
if sections is None:
sections = HedSectionKey
for section_key in sections:
Expand All @@ -51,6 +66,9 @@ def find_matching_tags(schema1, schema2, output='raw', sections=(HedSectionKey.T
return final_string
elif output == 'dict':
output_dict = {}
if include_summary:
output_dict["summary"] = {str(key): value for key, value in header_summary.items()}

for section_name, section_entries in matches.items():
output_dict[str(section_name)] = {}
for key, (entry1, entry2) in section_entries.items():
Expand All @@ -59,7 +77,8 @@ def find_matching_tags(schema1, schema2, output='raw', sections=(HedSectionKey.T
return matches


def compare_differences(schema1, schema2, output='raw', attribute_filter=None, sections=(HedSectionKey.Tags,)):
def compare_differences(schema1, schema2, output='raw', attribute_filter=None, sections=(HedSectionKey.Tags,),
include_summary=True):
"""
Compare the tags in two schemas, this finds any differences

Expand All @@ -75,6 +94,8 @@ def compare_differences(schema1, schema2, output='raw', attribute_filter=None, s
If it evaluates to False, no filtering is performed.
sections(list or None): the list of sections to compare. By default, just the tags section.
If None, checks all sections including header, prologue, and epilogue.
include_summary(bool): If True, adds the 'summary' dict to the dict return option, and prints it with the
string option. Lists the names of all the nodes that are missing or different.

Returns:
tuple, str or dict:
Expand All @@ -94,26 +115,31 @@ def compare_differences(schema1, schema2, output='raw', attribute_filter=None, s
if sections is None:
sections = HedSectionKey

header_summary = _get_tag_name_summary((not_in_1, not_in_2, unequal_entries))
if output == 'string':
final_string = ""
if include_summary:
final_string += _pretty_print_header(header_summary)
for section_key in sections:
val1, val2, val3 = unequal_entries[section_key], not_in_1[section_key], not_in_2[section_key]
type_name = SectionEntryNames[section_key]
if val1 or val2 or val3:
if final_string:
final_string += "\n\n"
final_string += f"{type_name} differences:\n"
if val1:
final_string += _pretty_print_diff_all(val1, type_name=type_name) + "\n"
if val2:
final_string += _pretty_print_missing_all(val2, "Schema1", type_name) + "\n"
if val3:
final_string += _pretty_print_missing_all(val3, "Schema2", type_name) + "\n"
final_string += "\n\n"
return final_string
elif output == 'dict':
# todo: clean this part up
output_dict = {}
current_section = {}
if include_summary:
output_dict["summary"] = {str(key): value for key, value in header_summary.items()}

output_dict["unequal"] = current_section
for section_name, section_entries in unequal_entries.items():
current_section[str(section_name)] = {}
Expand Down Expand Up @@ -212,6 +238,36 @@ def compare_schemas(schema1, schema2, attribute_filter=HedKey.InLibrary, section
return matches, not_in_schema1, not_in_schema2, unequal_entries


def _get_tag_name_summary(tag_dicts):
out_dict = {section_key: [] for section_key in HedSectionKey}
for tag_dict in tag_dicts:
for section_key, section in tag_dict.items():
if section_key == MiscSection:
continue
out_dict[section_key].extend(section.keys())

return out_dict


def _pretty_print_header(summary_dict):
output_string = ""
first_entry = True
for section_key, tag_names in summary_dict.items():
if not tag_names:
continue
type_name = SectionEntryNamesPlural[section_key]
if not first_entry:
output_string += "\n"
output_string += f"{type_name}: "

output_string += ", ".join(sorted(tag_names))

output_string += "\n"
first_entry = False
output_string += "\n"
return output_string


def _pretty_print_entry(entry):
""" Returns the contents of a HedSchemaEntry object as a list of strings.

Expand Down
51 changes: 38 additions & 13 deletions tests/schema/test_schema_compare.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
import unittest
import os
import io
import json


from hed.schema import HedKey, HedSectionKey, from_string
from hed.schema.schema_compare import compare_schemas, find_matching_tags, \
_pretty_print_diff_all, _pretty_print_missing_all, compare_differences
from hed import load_schema_version


class TestSchemaComparison(unittest.TestCase):

library_schema_start = """HED library="testcomparison" version="1.1.0" withStandard="8.2.0" unmerged="true"

'''Prologue'''
Expand All @@ -37,15 +33,15 @@ def load_schema1(self):
" *TestNode2",
" *TestNode3",
" *TestNode4"
]
]
return self._get_test_schema(test_nodes)

def load_schema2(self):
test_nodes = ["'''TestNode''' <nowiki> [This is a simple test node]</nowiki>\n",
" *TestNode2",
" **TestNode3",
" *TestNode5"
]
]

return self._get_test_schema(test_nodes)

Expand All @@ -63,16 +59,28 @@ def test_find_matching_tags(self):
self.assertNotIn("TestNode4", result[HedSectionKey.Tags])
self.assertNotIn("TestNode5", result[HedSectionKey.Tags])

match_string = find_matching_tags(schema1, schema2, output='string')
# Test with include_summary=True
match_string = find_matching_tags(schema1, schema2, output='string', include_summary=True)
self.assertIsInstance(match_string, str)
self.assertIn("Tags:", match_string)
# print(match_string)

json_style_dict = find_matching_tags(schema1, schema2, output='dict')
json_style_dict = find_matching_tags(schema1, schema2, output='dict', include_summary=True)
self.assertIsInstance(json_style_dict, dict)
self.assertIn("summary", json_style_dict)

result_string = json.dumps(json_style_dict, indent=4)
self.assertIsInstance(result_string, str)

# Optionally, you can also test the case without include_summary
match_string_no_summary = find_matching_tags(schema1, schema2, output='string', include_summary=False)
self.assertIsInstance(match_string_no_summary, str)
self.assertNotIn("Tags:", match_string_no_summary)

json_style_dict_no_summary = find_matching_tags(schema1, schema2, output='dict', include_summary=False)
self.assertIsInstance(json_style_dict_no_summary, dict)
self.assertNotIn("summary", json_style_dict_no_summary)

def test_compare_schemas(self):
schema1 = self.load_schema1()
schema2 = self.load_schema2()
Expand Down Expand Up @@ -109,9 +117,26 @@ def test_compare_differences(self):
self.assertEqual(len(unequal_entries[HedSectionKey.Tags]), 1) # No unequal entries should be found
self.assertIn("TestNode3", unequal_entries[HedSectionKey.Tags])

diff_string = compare_differences(schema1, schema2, output='string')
self.assertIsInstance(diff_string, str)
# print(diff_string)
# Test with include_summary=True, string output
diff_string_with_summary = compare_differences(schema1, schema2, output='string', include_summary=True)
self.assertIsInstance(diff_string_with_summary, str)
self.assertIn("Tags:", diff_string_with_summary)
# print(diff_string_with_summary)

# Test with include_summary=True, dict output
diff_dict_with_summary = compare_differences(schema1, schema2, output='dict', include_summary=True)
self.assertIsInstance(diff_dict_with_summary, dict)
self.assertIn("summary", diff_dict_with_summary)

# Optionally, test without include_summary, string output
diff_string_no_summary = compare_differences(schema1, schema2, output='string', include_summary=False)
self.assertIsInstance(diff_string_no_summary, str)
self.assertNotIn("Tags:", diff_string_no_summary)

# Optionally, test without include_summary, dict output
diff_dict_no_summary = compare_differences(schema1, schema2, output='dict', include_summary=False)
self.assertIsInstance(diff_dict_no_summary, dict)
self.assertNotIn("summary", diff_dict_no_summary)

def test_compare_score_lib_versions(self):
schema1 = load_schema_version("score_1.0.0")
Expand All @@ -128,8 +153,8 @@ def test_compare_score_lib_versions(self):
# print(diff_string)

json_style_dict = compare_differences(schema1, schema2, attribute_filter=HedKey.InLibrary, output='dict',
sections=None)
sections=None)
self.assertIsInstance(json_style_dict, dict)

result_string = json.dumps(json_style_dict, indent=4)
self.assertIsInstance(result_string, str)
self.assertIsInstance(result_string, str)