Merge pull request #944 from IanCa/develop

First pass writing out ontology structure file
hed-standard · Jun 6, 2024 · fbbdbcb · fbbdbcb
2 parents ac3d5f0 + 8068780
commit fbbdbcb
Show file tree

Hide file tree

Showing 4 changed files with 46 additions and 5 deletions.
diff --git a/hed/schema/hed_schema_df_constants.py b/hed/schema/hed_schema_df_constants.py
@@ -1,4 +1,5 @@
 from hed.schema.hed_schema_constants import HedSectionKey
+from hed.schema import hed_schema_constants
 
 # Known tsv format suffixes
 
@@ -43,7 +44,7 @@
 equivalent_to = "omn:EquivalentTo"
 has_unit_class = "hasUnitClass"
 
-struct_columns = [hed_id, name, attributes, subclass_of, description]
+struct_columns = [hed_id, name, attributes, subclass_of, description, equivalent_to]
 tag_columns = [hed_id, name, level, subclass_of, attributes, description, equivalent_to]
 unit_columns = [hed_id, name, subclass_of, has_unit_class, attributes, description, equivalent_to]
 
@@ -76,3 +77,10 @@
     "HedEpilogue": 12
 }
 
+# todo: this should be retrieved directly from the appropriate spreadsheet
+valid_omn_attributes = {
+    hed_schema_constants.VERSION_ATTRIBUTE: "HED_0000300",
+    hed_schema_constants.LIBRARY_ATTRIBUTE: "HED_0000301",
+    hed_schema_constants.WITH_STANDARD_ATTRIBUTE: "HED_0000302",
+    hed_schema_constants.UNMERGED_ATTRIBUTE: "HED_0000303"
+}
diff --git a/hed/schema/schema_io/ontology_util.py b/hed/schema/schema_io/ontology_util.py
@@ -7,7 +7,7 @@
 from hed.errors.exceptions import HedFileError
 from hed.schema import hed_schema_df_constants as constants
 from hed.schema.hed_schema_constants import HedKey
-from hed.schema.schema_io.text_util import parse_attribute_string
+from hed.schema.schema_io.text_util import parse_attribute_string, _parse_header_attributes_line
 
 library_index_ranges = {
     "": (10000, 40000),
@@ -274,8 +274,6 @@ def convert_df_to_omn(dataframes):
     full_text = ""
     omn_data = {}
     for suffix, dataframe in dataframes.items():
-        if suffix == constants.STRUCT_KEY:  # not handled here yet
-            continue
         output_text = _convert_df_to_omn(dataframes[suffix], annotation_properties=annotation_props)
         omn_data[suffix] = output_text
         full_text += output_text + "\n"
@@ -398,6 +396,10 @@ def get_attributes_from_row(row):
         attr_string = row[constants.attributes]
     else:
         attr_string = ""
+
+    if constants.subclass_of in row.index and row[constants.subclass_of] == "HedHeader":
+        header_attributes, _ =  _parse_header_attributes_line(attr_string)
+        return header_attributes
     return parse_attribute_string(attr_string)
 
 

diff --git a/hed/schema/schema_io/schema2df.py b/hed/schema/schema_io/schema2df.py
@@ -3,6 +3,7 @@
 from hed.schema.hed_schema_constants import HedSectionKey, HedKey
 from hed.schema.schema_io.ontology_util import get_library_name_and_id, remove_prefix, create_empty_dataframes
 from hed.schema.schema_io.schema2base import Schema2Base
+from hed.schema.schema_io import text_util
 import pandas as pd
 import hed.schema.hed_schema_df_constants as constants
 from hed.schema.hed_schema_entry import HedTagEntry
@@ -67,6 +68,7 @@ def _create_and_add_object_row(self, base_object, attributes="", description="")
             constants.attributes: attributes,
             constants.subclass_of: base_object,
             constants.description: description.replace("\n", "\\n"),
+            constants.equivalent_to: self._get_header_equivalent_to(attributes, base_object)
         }
         self.output[constants.STRUCT_KEY].loc[len(self.output[constants.STRUCT_KEY])] = new_row
 
@@ -215,6 +217,35 @@ def _attribute_disallowed(self, attribute):
         # strip out hedID in dataframe format
         return attribute in [HedKey.HedID, HedKey.AnnotationProperty]
 
+    def _get_header_equivalent_to(self, attributes_string, subclass_of):
+        attribute_strings = []
+
+        attributes, _ = text_util._parse_header_attributes_line(attributes_string)
+        schema_name, schema_id = self._get_object_name_and_id("HedSchema", include_prefix=True)
+
+        if self._get_as_ids:
+            attribute_strings.append(f"(hed:HED_0000102 some {schema_id})")
+        else:
+            attribute_strings.append(f"(inHedSchema some {schema_name})")
+
+        for attribute, value in attributes.items():
+            if attribute not in constants.valid_omn_attributes:
+                continue
+
+            if self._get_as_ids:
+                attribute = f"hed:{constants.valid_omn_attributes[attribute]}"
+            attribute_strings.append(f'({attribute} value "{value}")')
+
+        if self._get_as_ids:
+            # we just want the ID for normal hed objects, not schema specific
+            subclass_of = self._get_object_id(subclass_of, base_id=0, include_prefix=True)
+
+        # If they match, we want to leave equivalent_to blank
+        final_out = " and ".join([subclass_of] + attribute_strings)
+        if final_out == subclass_of:
+            return ""
+        return final_out
+
     def _get_tag_equivalent_to(self, tag_entry):
         subclass = self._get_subclass_of(tag_entry)
 

diff --git a/hed/scripts/create_ontology.py b/hed/scripts/create_ontology.py
@@ -14,7 +14,7 @@ def create_ontology(repo_path, schema_name, schema_version, dest):
     _, omn_dict = convert_df_to_omn(dataframes)
 
     base = get_schema_filename(schema_name, schema_version)
-    output_dest = os.path.join(dest, base)
+    output_dest = os.path.join(dest, base, "generated_omn")
     os.makedirs(output_dest, exist_ok=True)
     for suffix, omn_text in omn_dict.items():
         filename = os.path.join(output_dest, f"{base}_{suffix}.omn")