hed-standard · VisLab · May 7, 2024 · May 7, 2024 · May 7, 2024 · May 7, 2024
diff --git a/hed/models/tabular_input.py b/hed/models/tabular_input.py
@@ -52,6 +52,7 @@ def reset_column_mapper(self, sidecar=None):
 
         """
         new_mapper = ColumnMapper(sidecar=sidecar, optional_tag_columns=[self.HED_COLUMN_NAME])
+        self._sidecar = sidecar
 
         self.reset_mapper(new_mapper)
 

diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py
@@ -1,15 +1,14 @@
 import json
-import os
 
-from hed.schema.hed_schema_constants import HedKey, HedSectionKey, HedKey83
+from hed.schema.hed_schema_constants import HedKey, HedSectionKey, HedKeyOld
 from hed.schema import hed_schema_constants as constants
 from hed.schema.schema_io import schema_util
 from hed.schema.schema_io.schema2xml import Schema2XML
 from hed.schema.schema_io.schema2wiki import Schema2Wiki
 from hed.schema.schema_io.schema2df import Schema2DF
+from hed.schema.schema_io import ontology_util
+
 
-# from hed.schema.schema_io.schema2owl import Schema2Owl
-# from hed.schema.schema_io.owl_constants import ext_to_format
 from hed.schema.hed_schema_section import (HedSchemaSection, HedSchemaTagSection, HedSchemaUnitClassSection,
                                            HedSchemaUnitSection)
 from hed.errors import ErrorHandler
@@ -245,29 +244,9 @@ def get_as_mediawiki_string(self, save_merged=False):
             str:  The schema as a string in mediawiki format.
 
         """
-        output_strings = Schema2Wiki.process_schema(self, save_merged)
+        output_strings = Schema2Wiki().process_schema(self, save_merged)
         return '\n'.join(output_strings)
 
-    # def get_as_owl_string(self, save_merged=False, file_format="owl"):
-    #     """ Return the schema to a mediawiki string.
-    #
-    #     Parameters:
-    #         save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
-    #                             If it is not a "withStandard" schema, this setting has no effect.
-    #         file_format(str or None): Override format from filename extension.
-    #             Accepts any value rdflib accepts(We fully support "turtle", "xml"("owl" also accepted) and "json-ld").
-    #             Other values should work, but aren't as fully supported.
-    #     Returns:
-    #         str:  The schema as a string in mediawiki format.
-    #
-    #     :raises rdflib.plugin.PluginException:
-    #         - Invalid format of file_format.  Make sure you use a supported RDF format.
-    #     """
-    #     if file_format == "owl":
-    #         file_format = "xml"
-    #     rdf_data = Schema2Owl.process_schema(self, save_merged)
-    #     return rdf_data.serialize(format=file_format)
-
     def get_as_xml_string(self, save_merged=True):
         """ Return the schema to an XML string.
 
@@ -279,72 +258,39 @@ def get_as_xml_string(self, save_merged=True):
             str: Return the schema as an XML string.
 
         """
-        xml_tree = Schema2XML.process_schema(self, save_merged)
+        xml_tree = Schema2XML().process_schema(self, save_merged)
         return schema_util.xml_element_2_str(xml_tree)
 
-    def save_as_mediawiki(self, filename, save_merged=False):
-        """ Save as mediawiki to a file.
+    def get_as_dataframes(self, save_merged=False):
+        """ Get a dict of dataframes representing this file
 
-        filename: str
-            save location
         save_merged: bool
             If True, this will save the schema as a merged schema if it is a "withStandard" schema.
             If it is not a "withStandard" schema, this setting has no effect.
 
-        :raises OSError:
-            - File cannot be saved for some reason.
+        Returns:
+            dataframes(dict): a dict of dataframes you can load as a schema
         """
-        output_strings = Schema2Wiki.process_schema(self, save_merged)
-        with open(filename, mode='w', encoding='utf-8') as opened_file:
-            for string in output_strings:
-                opened_file.write(string)
-                opened_file.write('\n')
+        output_dfs = Schema2DF().process_schema(self, save_merged)
+        return output_dfs
 
-    def save_as_dataframes(self, base_filename, save_merged=False):
+    def save_as_mediawiki(self, filename, save_merged=False):
         """ Save as mediawiki to a file.
 
-        base_filename: str
-            save filename.  A suffix will be added to most, e.g. _Tag
+        filename: str
+            save location
         save_merged: bool
             If True, this will save the schema as a merged schema if it is a "withStandard" schema.
             If it is not a "withStandard" schema, this setting has no effect.
 
         :raises OSError:
             - File cannot be saved for some reason.
         """
-        output_dfs = Schema2DF.process_schema(self, save_merged)
-        base, base_ext = os.path.splitext(base_filename)
-        for suffix, dataframe in output_dfs.items():
-            filename = f"{base}_{suffix}.tsv"
-            with open(filename, mode='w', encoding='utf-8') as opened_file:
-                dataframe.to_csv(opened_file, sep='\t', index=False, header=True)
-
-    # def save_as_owl(self, filename, save_merged=False, file_format=None):
-    #     """ Save as json to a file.
-    #
-    #     filename: str
-    #         Save the file here
-    #     save_merged: bool
-    #         If True, this will save the schema as a merged schema if it is a "withStandard" schema.
-    #         If it is not a "withStandard" schema, this setting has no effect.
-    #     file_format(str or None): Required for owl formatted files other than the following:
-    #         .ttl: turtle
-    #         .owl: xml
-    #         .json-ld: json-ld
-    #
-    #     :raises OSError:
-    #         - File cannot be saved for some reason
-    #
-    #     :raises rdflib.plugin.PluginException:
-    #         - Invalid format of file_format.  Make sure you use a supported RDF format.
-    #     """
-    #     ext = os.path.splitext(filename.lower())[1]
-    #     if ext in ext_to_format and file_format is None:
-    #         file_format = ext_to_format[ext]
-    #     if file_format == "owl":
-    #         file_format = "xml"
-    #     rdf_data = Schema2Owl.process_schema(self, save_merged)
-    #     rdf_data.serialize(filename, format=file_format)
+        output_strings = Schema2Wiki().process_schema(self, save_merged)
+        with open(filename, mode='w', encoding='utf-8') as opened_file:
+            for string in output_strings:
+                opened_file.write(string)
+                opened_file.write('\n')
 
     def save_as_xml(self, filename, save_merged=True):
         """ Save as XML to a file.
@@ -358,11 +304,26 @@ def save_as_xml(self, filename, save_merged=True):
         :raises OSError:
             - File cannot be saved for some reason
         """
-        xml_tree = Schema2XML.process_schema(self, save_merged)
+        xml_tree = Schema2XML().process_schema(self, save_merged)
         with open(filename, mode='w', encoding='utf-8') as opened_file:
             xml_string = schema_util.xml_element_2_str(xml_tree)
             opened_file.write(xml_string)
 
+    def save_as_dataframes(self, base_filename, save_merged=False):
+        """ Save as mediawiki to a file.
+
+        base_filename: str
+            save filename.  A suffix will be added to most, e.g. _Tag
+        save_merged: bool
+            If True, this will save the schema as a merged schema if it is a "withStandard" schema.
+            If it is not a "withStandard" schema, this setting has no effect.
+
+        :raises OSError:
+            - File cannot be saved for some reason.
+        """
+        output_dfs = Schema2DF().process_schema(self, save_merged)
+        ontology_util.save_dataframes(base_filename, output_dfs)
+
     def set_schema_prefix(self, schema_namespace):
         """ Set library namespace associated for this schema.
 
@@ -664,10 +625,10 @@ def get_tag_attribute_names_old(self):
 
         """
         return {tag_entry.name: tag_entry for tag_entry in self._sections[HedSectionKey.Attributes].values()
-                if not tag_entry.has_attribute(HedKey.UnitClassProperty)
-                and not tag_entry.has_attribute(HedKey.UnitProperty)
-                and not tag_entry.has_attribute(HedKey.UnitModifierProperty)
-                and not tag_entry.has_attribute(HedKey.ValueClassProperty)}
+                if not tag_entry.has_attribute(HedKeyOld.UnitClassProperty)
+                and not tag_entry.has_attribute(HedKeyOld.UnitProperty)
+                and not tag_entry.has_attribute(HedKeyOld.UnitModifierProperty)
+                and not tag_entry.has_attribute(HedKeyOld.ValueClassProperty)}
 
     # ===============================================
     # Private utility functions
@@ -693,19 +654,14 @@ def _get_modifiers_for_unit(self, unit):
             unit (str): A known unit.
 
         Returns:
-            list: List of HedSchemaEntry.
+            derived_unit_list(list of HedSchemaEntry): The derived units for this unit
 
         Notes:
             This is a lower level one that doesn't rely on the Unit entries being fully setup.
-
         """
-        # todo: could refactor this so this unit.casefold() part is in HedSchemaUnitSection.get
         unit_entry = self.get_tag_entry(unit, HedSectionKey.Units)
         if unit_entry is None:
-            unit_entry = self.get_tag_entry(unit.casefold(), HedSectionKey.Units)
-            # Unit symbols must match exactly
-            if unit_entry is None or unit_entry.has_attribute(HedKey.UnitSymbol):
-                return []
+            return []
         is_si_unit = unit_entry.has_attribute(HedKey.SIUnit)
         is_unit_symbol = unit_entry.has_attribute(HedKey.UnitSymbol)
         if not is_si_unit:
@@ -732,7 +688,7 @@ def _get_attributes_for_section(self, key_class):
         Returns:
             dict: A dict of all the attributes for this section.
         """
-        element_prop_key = HedKey83.ElementDomain if self.schema_83_props else HedKey.ElementProperty
+        element_prop_key = HedKey.ElementDomain if self.schema_83_props else HedKeyOld.ElementProperty
 
         # Common logic for Attributes and Properties
         if key_class in [HedSectionKey.Attributes, HedSectionKey.Properties]:
@@ -744,18 +700,18 @@ def _get_attributes_for_section(self, key_class):
 
         if self.schema_83_props:
             attrib_classes = {
-                HedSectionKey.UnitClasses: HedKey83.UnitClassDomain,
-                HedSectionKey.Units: HedKey83.UnitDomain,
-                HedSectionKey.UnitModifiers: HedKey83.UnitModifierDomain,
-                HedSectionKey.ValueClasses: HedKey83.ValueClassDomain,
-                HedSectionKey.Tags: HedKey83.TagDomain
+                HedSectionKey.UnitClasses: HedKey.UnitClassDomain,
+                HedSectionKey.Units: HedKey.UnitDomain,
+                HedSectionKey.UnitModifiers: HedKey.UnitModifierDomain,
+                HedSectionKey.ValueClasses: HedKey.ValueClassDomain,
+                HedSectionKey.Tags: HedKey.TagDomain
             }
         else:
             attrib_classes = {
-                HedSectionKey.UnitClasses: HedKey.UnitClassProperty,
-                HedSectionKey.Units: HedKey.UnitProperty,
-                HedSectionKey.UnitModifiers: HedKey.UnitModifierProperty,
-                HedSectionKey.ValueClasses: HedKey.ValueClassProperty
+                HedSectionKey.UnitClasses: HedKeyOld.UnitClassProperty,
+                HedSectionKey.Units: HedKeyOld.UnitProperty,
+                HedSectionKey.UnitModifiers: HedKeyOld.UnitModifierProperty,
+                HedSectionKey.ValueClasses: HedKeyOld.ValueClassProperty
             }
             if key_class == HedSectionKey.Tags:
                 return self.get_tag_attribute_names_old()

diff --git a/hed/schema/hed_schema_constants.py b/hed/schema/hed_schema_constants.py
@@ -61,18 +61,6 @@ class HedKey:
     InLibrary = "inLibrary"
     HedID = 'hedId'
 
-    # All known properties
-    BoolProperty = 'boolProperty'
-    UnitClassProperty = 'unitClassProperty'
-    UnitProperty = 'unitProperty'
-    UnitModifierProperty = 'unitModifierProperty'
-    ValueClassProperty = 'valueClassProperty'
-    ElementProperty = 'elementProperty'
-    NodeProperty = 'nodeProperty'
-    IsInheritedProperty = 'isInheritedProperty'
-
-
-class HedKey83:
     UnitClassDomain = "unitClassDomain"
     UnitDomain = "unitDomain"
     UnitModifierDomain = "unitModifierDomain"
@@ -82,8 +70,6 @@ class HedKey83:
     AnnotationProperty = "annotationProperty"
 
     BoolRange = "boolRange"
-
-    # Fully new below this
     TagRange = "tagRange"
     NumericRange = "numericRange"
     StringRange = "stringRange"
@@ -92,6 +78,18 @@ class HedKey83:
     ValueClassRange = "valueClassRange"
 
 
+class HedKeyOld:
+    # Fully Deprecated properties
+    BoolProperty = 'boolProperty'
+    UnitClassProperty = 'unitClassProperty'
+    UnitProperty = 'unitProperty'
+    UnitModifierProperty = 'unitModifierProperty'
+    ValueClassProperty = 'valueClassProperty'
+    ElementProperty = 'elementProperty'
+    NodeProperty = 'nodeProperty'
+    IsInheritedProperty = 'isInheritedProperty'
+
+
 VERSION_ATTRIBUTE = 'version'
 LIBRARY_ATTRIBUTE = 'library'
 WITH_STANDARD_ATTRIBUTE = "withStandard"

diff --git a/hed/schema/hed_schema_df_constants.py b/hed/schema/hed_schema_df_constants.py
@@ -1,7 +1,78 @@
+from hed.schema.hed_schema_constants import HedSectionKey
+
 # Known tsv format suffixes
 
 STRUCT_KEY = "Structure"
 TAG_KEY = "Tag"
+UNIT_KEY = "Unit"
+UNIT_CLASS_KEY = "UnitClass"
+UNIT_MODIFIER_KEY = "UnitModifier"
+VALUE_CLASS_KEY = "ValueClass"
+
+ANNOTATION_KEY = "AnnotationProperty"
+DATA_KEY = "DataProperty"
+OBJECT_KEY = "ObjectProperty"
+
+ATTRIBUTE_PROPERTY_KEY = "AttributeProperty"
+
+PROPERTY_KEYS = [ANNOTATION_KEY, DATA_KEY, OBJECT_KEY]
+DF_SUFFIXES = {TAG_KEY, STRUCT_KEY, VALUE_CLASS_KEY,
+                 UNIT_CLASS_KEY, UNIT_KEY, UNIT_MODIFIER_KEY,
+                 *PROPERTY_KEYS, ATTRIBUTE_PROPERTY_KEY}
+
+section_mapping = {
+    STRUCT_KEY: None,
+    TAG_KEY: HedSectionKey.Tags,
+    VALUE_CLASS_KEY: HedSectionKey.ValueClasses,
+    UNIT_CLASS_KEY: HedSectionKey.UnitClasses,
+    UNIT_KEY: HedSectionKey.Units,
+    UNIT_MODIFIER_KEY: HedSectionKey.UnitModifiers,
+    ANNOTATION_KEY: HedSectionKey.Attributes,
+    DATA_KEY: HedSectionKey.Attributes,
+    OBJECT_KEY: HedSectionKey.Attributes,
+    ATTRIBUTE_PROPERTY_KEY: HedSectionKey.Properties,
+}
+
+# Spreadsheet column ids
+hed_id = "hedId"
+level = "Level"
+name = "rdfs:label"
+subclass_of = "omn:SubClassOf"
+attributes = "Attributes"
+description = "dc:description"
+equivalent_to = "owm:EquivalentTo"
+has_unit_class = "hasUnitClass"
+
+struct_columns = [hed_id, name, attributes, subclass_of, description]
+tag_columns = [hed_id, level, name, subclass_of, attributes, description, equivalent_to]
+unit_columns = [hed_id, name, subclass_of, has_unit_class, attributes, description, equivalent_to]
+
+# The columns for unit class, value class, and unit modifier
+other_columns = [hed_id, name, subclass_of, attributes, description, equivalent_to]
+
+# for schema attributes
+property_type = "Type"
+property_domain = "omn:Domain"
+property_range = "omn:Range"
+properties = "Properties"
+property_columns = [hed_id, name, property_type, property_domain, property_range, properties, description]
+
+# For the schema properties
+property_columns_reduced = [hed_id, name, property_type, description]
+
+# HED_00X__YY where X is the library starting index, and Y is the entity number below.
+struct_base_ids = {
+    "HedEntity": 1,
+    "HedStructure": 2,
+    "HedElement": 3,
+    "HedSchema": 4,
+    "HedTag": 5,
+    "HedUnitClass": 6,
+    "HedUnit": 7,
+    "HedUnitModifier": 8,
+    "HedValueClass": 9,
+    "HedHeader": 10,
+    "HedPrologue": 11,
+    "HedEpilogue": 12
+}
 
-# todo: move more constants up here
-hed_id_column = "hedId"
-Original file line number
+Diff line change
@@ Expand Up / @@ -52,6 +52,7 @@ def reset_column_mapper(self, sidecar=None): @@
             """
             new_mapper = ColumnMapper(sidecar=sidecar, optional_tag_columns=[self.HED_COLUMN_NAME])
+            self._sidecar = sidecar
             self.reset_mapper(new_mapper)
@@ Expand Down @@