From 297697511e3fb42a9b03b832f8bd055893fefdc0 Mon Sep 17 00:00:00 2001 From: IanCa Date: Thu, 10 Aug 2023 17:46:51 -0500 Subject: [PATCH 1/2] Add summary to top of schema comparisons --- hed/schema/schema_compare.py | 66 ++++++++++++++++++++++++++--- tests/schema/test_schema_compare.py | 53 +++++++++++++++++------ 2 files changed, 100 insertions(+), 19 deletions(-) diff --git a/hed/schema/schema_compare.py b/hed/schema/schema_compare.py index 5a64f6b7..a229ee1a 100644 --- a/hed/schema/schema_compare.py +++ b/hed/schema/schema_compare.py @@ -12,11 +12,21 @@ HedSectionKey.UnitModifiers: "Unit Modifier", HedSectionKey.Properties: "Property", HedSectionKey.Attributes: "Attribute", +} +SectionEntryNamesPlural = { + HedSectionKey.Tags: "Tags", + HedSectionKey.Units: "Units", + HedSectionKey.UnitClasses: "Unit Classes", + HedSectionKey.ValueClasses: "Value Classes", + HedSectionKey.UnitModifiers: "Unit Modifiers", + HedSectionKey.Properties: "Properties", + HedSectionKey.Attributes: "Attributes", } -def find_matching_tags(schema1, schema2, output='raw', sections=(HedSectionKey.Tags,)): +def find_matching_tags(schema1, schema2, output='raw', sections=(HedSectionKey.Tags,), + include_summary=True): """ Compare the tags in two library schemas. This finds tags with the same term. @@ -28,7 +38,8 @@ def find_matching_tags(schema1, schema2, output='raw', sections=(HedSectionKey.T 'dict' returns a json style dictionary sections(list): the list of sections to compare. By default, just the tags section. If None, checks all sections including header, prologue, and epilogue. - + include_summary(bool): If True, adds the 'summary' dict to the dict return option, and prints it with the + string option. Lists the names of all the nodes that are missing or different. Returns: dict, json style dict, or str: A dictionary containing matching entries in the Tags section of both schemas. """ @@ -37,8 +48,12 @@ def find_matching_tags(schema1, schema2, output='raw', sections=(HedSectionKey.T for section_key, section_dict in matches.items(): section_dict.update(unequal_entries[section_key]) + header_summary = _get_tag_name_summary((matches, unequal_entries)) + if output == 'string': final_string = "" + if include_summary: + final_string += _pretty_print_header(header_summary) if sections is None: sections = HedSectionKey for section_key in sections: @@ -51,6 +66,9 @@ def find_matching_tags(schema1, schema2, output='raw', sections=(HedSectionKey.T return final_string elif output == 'dict': output_dict = {} + if include_summary: + output_dict["summary"] = {str(key): value for key, value in header_summary.items()} + for section_name, section_entries in matches.items(): output_dict[str(section_name)] = {} for key, (entry1, entry2) in section_entries.items(): @@ -59,7 +77,8 @@ def find_matching_tags(schema1, schema2, output='raw', sections=(HedSectionKey.T return matches -def compare_differences(schema1, schema2, output='raw', attribute_filter=None, sections=(HedSectionKey.Tags,)): +def compare_differences(schema1, schema2, output='raw', attribute_filter=None, sections=(HedSectionKey.Tags,), + include_summary=True): """ Compare the tags in two schemas, this finds any differences @@ -75,6 +94,8 @@ def compare_differences(schema1, schema2, output='raw', attribute_filter=None, s If it evaluates to False, no filtering is performed. sections(list or None): the list of sections to compare. By default, just the tags section. If None, checks all sections including header, prologue, and epilogue. + include_summary(bool): If True, adds the 'summary' dict to the dict return option, and prints it with the + string option. Lists the names of all the nodes that are missing or different. Returns: tuple, str or dict: @@ -94,14 +115,15 @@ def compare_differences(schema1, schema2, output='raw', attribute_filter=None, s if sections is None: sections = HedSectionKey + header_summary = _get_tag_name_summary((not_in_1, not_in_2, unequal_entries)) if output == 'string': final_string = "" + if include_summary: + final_string += _pretty_print_header(header_summary) for section_key in sections: val1, val2, val3 = unequal_entries[section_key], not_in_1[section_key], not_in_2[section_key] type_name = SectionEntryNames[section_key] if val1 or val2 or val3: - if final_string: - final_string += "\n\n" final_string += f"{type_name} differences:\n" if val1: final_string += _pretty_print_diff_all(val1, type_name=type_name) + "\n" @@ -109,11 +131,15 @@ def compare_differences(schema1, schema2, output='raw', attribute_filter=None, s final_string += _pretty_print_missing_all(val2, "Schema1", type_name) + "\n" if val3: final_string += _pretty_print_missing_all(val3, "Schema2", type_name) + "\n" + final_string += "\n\n" return final_string elif output == 'dict': # todo: clean this part up output_dict = {} current_section = {} + if include_summary: + output_dict["summary"] = {str(key): value for key, value in header_summary.items()} + output_dict["unequal"] = current_section for section_name, section_entries in unequal_entries.items(): current_section[str(section_name)] = {} @@ -212,6 +238,36 @@ def compare_schemas(schema1, schema2, attribute_filter=HedKey.InLibrary, section return matches, not_in_schema1, not_in_schema2, unequal_entries +def _get_tag_name_summary(tag_dicts): + out_dict = {section_key: [] for section_key in HedSectionKey} + for tag_dict in tag_dicts: + for section_key, section in tag_dict.items(): + if section_key == MiscSection: + continue + out_dict[section_key].extend(section.keys()) + + return out_dict + + +def _pretty_print_header(summary_dict): + output_string = "" + first_entry = True + for section_key, tag_names in summary_dict.items(): + if not tag_names: + continue + type_name = SectionEntryNamesPlural[section_key] + if not first_entry: + output_string += "\n" + output_string += f"{type_name}: " + + output_string += ", ".join(sorted(tag_names)) + + output_string += "\n" + first_entry = False + output_string += "\n" + return output_string + + def _pretty_print_entry(entry): """ Returns the contents of a HedSchemaEntry object as a list of strings. diff --git a/tests/schema/test_schema_compare.py b/tests/schema/test_schema_compare.py index 6543fa27..c10085fe 100644 --- a/tests/schema/test_schema_compare.py +++ b/tests/schema/test_schema_compare.py @@ -1,9 +1,6 @@ import unittest -import os -import io import json - from hed.schema import HedKey, HedSectionKey, from_string from hed.schema.schema_compare import compare_schemas, find_matching_tags, \ _pretty_print_diff_all, _pretty_print_missing_all, compare_differences @@ -11,7 +8,6 @@ class TestSchemaComparison(unittest.TestCase): - library_schema_start = """HED library="testcomparison" version="1.1.0" withStandard="8.2.0" unmerged="true" '''Prologue''' @@ -37,7 +33,7 @@ def load_schema1(self): " *TestNode2", " *TestNode3", " *TestNode4" - ] + ] return self._get_test_schema(test_nodes) def load_schema2(self): @@ -45,7 +41,7 @@ def load_schema2(self): " *TestNode2", " **TestNode3", " *TestNode5" - ] + ] return self._get_test_schema(test_nodes) @@ -63,16 +59,28 @@ def test_find_matching_tags(self): self.assertNotIn("TestNode4", result[HedSectionKey.Tags]) self.assertNotIn("TestNode5", result[HedSectionKey.Tags]) - match_string = find_matching_tags(schema1, schema2, output='string') + # Test with include_summary=True + match_string = find_matching_tags(schema1, schema2, output='string', include_summary=True) self.assertIsInstance(match_string, str) - # print(match_string) + self.assertIn("Tags:", match_string) + print(match_string) - json_style_dict = find_matching_tags(schema1, schema2, output='dict') + json_style_dict = find_matching_tags(schema1, schema2, output='dict', include_summary=True) self.assertIsInstance(json_style_dict, dict) + self.assertIn("summary", json_style_dict) result_string = json.dumps(json_style_dict, indent=4) self.assertIsInstance(result_string, str) + # Optionally, you can also test the case without include_summary + match_string_no_summary = find_matching_tags(schema1, schema2, output='string', include_summary=False) + self.assertIsInstance(match_string_no_summary, str) + self.assertNotIn("Tags:", match_string_no_summary) + + json_style_dict_no_summary = find_matching_tags(schema1, schema2, output='dict', include_summary=False) + self.assertIsInstance(json_style_dict_no_summary, dict) + self.assertNotIn("summary", json_style_dict_no_summary) + def test_compare_schemas(self): schema1 = self.load_schema1() schema2 = self.load_schema2() @@ -109,9 +117,26 @@ def test_compare_differences(self): self.assertEqual(len(unequal_entries[HedSectionKey.Tags]), 1) # No unequal entries should be found self.assertIn("TestNode3", unequal_entries[HedSectionKey.Tags]) - diff_string = compare_differences(schema1, schema2, output='string') - self.assertIsInstance(diff_string, str) - # print(diff_string) + # Test with include_summary=True, string output + diff_string_with_summary = compare_differences(schema1, schema2, output='string', include_summary=True) + self.assertIsInstance(diff_string_with_summary, str) + self.assertIn("Tags:", diff_string_with_summary) + # print(diff_string_with_summary) + + # Test with include_summary=True, dict output + diff_dict_with_summary = compare_differences(schema1, schema2, output='dict', include_summary=True) + self.assertIsInstance(diff_dict_with_summary, dict) + self.assertIn("summary", diff_dict_with_summary) + + # Optionally, test without include_summary, string output + diff_string_no_summary = compare_differences(schema1, schema2, output='string', include_summary=False) + self.assertIsInstance(diff_string_no_summary, str) + self.assertNotIn("Tags:", diff_string_no_summary) + + # Optionally, test without include_summary, dict output + diff_dict_no_summary = compare_differences(schema1, schema2, output='dict', include_summary=False) + self.assertIsInstance(diff_dict_no_summary, dict) + self.assertNotIn("summary", diff_dict_no_summary) def test_compare_score_lib_versions(self): schema1 = load_schema_version("score_1.0.0") @@ -128,8 +153,8 @@ def test_compare_score_lib_versions(self): # print(diff_string) json_style_dict = compare_differences(schema1, schema2, attribute_filter=HedKey.InLibrary, output='dict', - sections=None) + sections=None) self.assertIsInstance(json_style_dict, dict) result_string = json.dumps(json_style_dict, indent=4) - self.assertIsInstance(result_string, str) \ No newline at end of file + self.assertIsInstance(result_string, str) From b0c5521a6813fe08eff7d164a9568d92affbbc91 Mon Sep 17 00:00:00 2001 From: IanCa Date: Thu, 10 Aug 2023 18:13:00 -0500 Subject: [PATCH 2/2] Simplify find_def_tags. Remove print from test --- hed/models/hed_group.py | 31 +++++++++++++++++------------ tests/schema/test_schema_compare.py | 2 +- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/hed/models/hed_group.py b/hed/models/hed_group.py index 1f1c02d4..aa90d227 100644 --- a/hed/models/hed_group.py +++ b/hed/models/hed_group.py @@ -466,27 +466,32 @@ def find_def_tags(self, recursive=False, include_groups=3): Returns: list: A list of tuples. The contents depend on the values of the include_group. """ - from hed.models.definition_dict import DefTagNames if recursive: groups = self.get_all_groups() + def_tags = [] + for group in groups: + def_tags += self._get_def_tags_from_group(group) else: - groups = (self,) - - def_tags = [] - for group in groups: - for child in group.children: - if isinstance(child, HedTag): - if child.short_base_tag == DefTagNames.DEF_ORG_KEY: - def_tags.append((child, child, group)) - else: - for tag in child.tags(): - if tag.short_base_tag == DefTagNames.DEF_EXPAND_ORG_KEY: - def_tags.append((tag, child, group)) + def_tags = self._get_def_tags_from_group(self) if include_groups == 0 or include_groups == 1 or include_groups == 2: return [tag[include_groups] for tag in def_tags] return def_tags + @staticmethod + def _get_def_tags_from_group(group): + from hed.models.definition_dict import DefTagNames + def_tags = [] + for child in group.children: + if isinstance(child, HedTag): + if child.short_base_tag == DefTagNames.DEF_ORG_KEY: + def_tags.append((child, child, group)) + else: + for tag in child.tags(): + if tag.short_base_tag == DefTagNames.DEF_EXPAND_ORG_KEY: + def_tags.append((tag, child, group)) + return def_tags + def find_tags_with_term(self, term, recursive=False, include_groups=2): """ Find any tags that contain the given term. diff --git a/tests/schema/test_schema_compare.py b/tests/schema/test_schema_compare.py index c10085fe..ccc08b79 100644 --- a/tests/schema/test_schema_compare.py +++ b/tests/schema/test_schema_compare.py @@ -63,7 +63,7 @@ def test_find_matching_tags(self): match_string = find_matching_tags(schema1, schema2, output='string', include_summary=True) self.assertIsInstance(match_string, str) self.assertIn("Tags:", match_string) - print(match_string) + # print(match_string) json_style_dict = find_matching_tags(schema1, schema2, output='dict', include_summary=True) self.assertIsInstance(json_style_dict, dict)