Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Second pass dataframe/omn schema handling #919

Merged
merged 3 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions hed/models/tabular_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def reset_column_mapper(self, sidecar=None):

"""
new_mapper = ColumnMapper(sidecar=sidecar, optional_tag_columns=[self.HED_COLUMN_NAME])
self._sidecar = sidecar

self.reset_mapper(new_mapper)

Expand Down
146 changes: 51 additions & 95 deletions hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import json
import os

from hed.schema.hed_schema_constants import HedKey, HedSectionKey, HedKey83
from hed.schema.hed_schema_constants import HedKey, HedSectionKey, HedKeyOld
from hed.schema import hed_schema_constants as constants
from hed.schema.schema_io import schema_util
from hed.schema.schema_io.schema2xml import Schema2XML
from hed.schema.schema_io.schema2wiki import Schema2Wiki
from hed.schema.schema_io.schema2df import Schema2DF
from hed.schema.schema_io import ontology_util


# from hed.schema.schema_io.schema2owl import Schema2Owl
# from hed.schema.schema_io.owl_constants import ext_to_format
from hed.schema.hed_schema_section import (HedSchemaSection, HedSchemaTagSection, HedSchemaUnitClassSection,
HedSchemaUnitSection)
from hed.errors import ErrorHandler
Expand Down Expand Up @@ -245,29 +244,9 @@ def get_as_mediawiki_string(self, save_merged=False):
str: The schema as a string in mediawiki format.

"""
output_strings = Schema2Wiki.process_schema(self, save_merged)
output_strings = Schema2Wiki().process_schema(self, save_merged)
return '\n'.join(output_strings)

# def get_as_owl_string(self, save_merged=False, file_format="owl"):
# """ Return the schema to a mediawiki string.
#
# Parameters:
# save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
# If it is not a "withStandard" schema, this setting has no effect.
# file_format(str or None): Override format from filename extension.
# Accepts any value rdflib accepts(We fully support "turtle", "xml"("owl" also accepted) and "json-ld").
# Other values should work, but aren't as fully supported.
# Returns:
# str: The schema as a string in mediawiki format.
#
# :raises rdflib.plugin.PluginException:
# - Invalid format of file_format. Make sure you use a supported RDF format.
# """
# if file_format == "owl":
# file_format = "xml"
# rdf_data = Schema2Owl.process_schema(self, save_merged)
# return rdf_data.serialize(format=file_format)

def get_as_xml_string(self, save_merged=True):
""" Return the schema to an XML string.

Expand All @@ -279,72 +258,39 @@ def get_as_xml_string(self, save_merged=True):
str: Return the schema as an XML string.

"""
xml_tree = Schema2XML.process_schema(self, save_merged)
xml_tree = Schema2XML().process_schema(self, save_merged)
return schema_util.xml_element_2_str(xml_tree)

def save_as_mediawiki(self, filename, save_merged=False):
""" Save as mediawiki to a file.
def get_as_dataframes(self, save_merged=False):
""" Get a dict of dataframes representing this file

filename: str
save location
save_merged: bool
If True, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.

:raises OSError:
- File cannot be saved for some reason.
Returns:
dataframes(dict): a dict of dataframes you can load as a schema
"""
output_strings = Schema2Wiki.process_schema(self, save_merged)
with open(filename, mode='w', encoding='utf-8') as opened_file:
for string in output_strings:
opened_file.write(string)
opened_file.write('\n')
output_dfs = Schema2DF().process_schema(self, save_merged)
return output_dfs

def save_as_dataframes(self, base_filename, save_merged=False):
def save_as_mediawiki(self, filename, save_merged=False):
""" Save as mediawiki to a file.

base_filename: str
save filename. A suffix will be added to most, e.g. _Tag
filename: str
save location
save_merged: bool
If True, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.

:raises OSError:
- File cannot be saved for some reason.
"""
output_dfs = Schema2DF.process_schema(self, save_merged)
base, base_ext = os.path.splitext(base_filename)
for suffix, dataframe in output_dfs.items():
filename = f"{base}_{suffix}.tsv"
with open(filename, mode='w', encoding='utf-8') as opened_file:
dataframe.to_csv(opened_file, sep='\t', index=False, header=True)

# def save_as_owl(self, filename, save_merged=False, file_format=None):
# """ Save as json to a file.
#
# filename: str
# Save the file here
# save_merged: bool
# If True, this will save the schema as a merged schema if it is a "withStandard" schema.
# If it is not a "withStandard" schema, this setting has no effect.
# file_format(str or None): Required for owl formatted files other than the following:
# .ttl: turtle
# .owl: xml
# .json-ld: json-ld
#
# :raises OSError:
# - File cannot be saved for some reason
#
# :raises rdflib.plugin.PluginException:
# - Invalid format of file_format. Make sure you use a supported RDF format.
# """
# ext = os.path.splitext(filename.lower())[1]
# if ext in ext_to_format and file_format is None:
# file_format = ext_to_format[ext]
# if file_format == "owl":
# file_format = "xml"
# rdf_data = Schema2Owl.process_schema(self, save_merged)
# rdf_data.serialize(filename, format=file_format)
output_strings = Schema2Wiki().process_schema(self, save_merged)
with open(filename, mode='w', encoding='utf-8') as opened_file:
for string in output_strings:
opened_file.write(string)
opened_file.write('\n')

def save_as_xml(self, filename, save_merged=True):
""" Save as XML to a file.
Expand All @@ -358,11 +304,26 @@ def save_as_xml(self, filename, save_merged=True):
:raises OSError:
- File cannot be saved for some reason
"""
xml_tree = Schema2XML.process_schema(self, save_merged)
xml_tree = Schema2XML().process_schema(self, save_merged)
with open(filename, mode='w', encoding='utf-8') as opened_file:
xml_string = schema_util.xml_element_2_str(xml_tree)
opened_file.write(xml_string)

def save_as_dataframes(self, base_filename, save_merged=False):
""" Save as mediawiki to a file.

base_filename: str
save filename. A suffix will be added to most, e.g. _Tag
save_merged: bool
If True, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.

:raises OSError:
- File cannot be saved for some reason.
"""
output_dfs = Schema2DF().process_schema(self, save_merged)
ontology_util.save_dataframes(base_filename, output_dfs)

def set_schema_prefix(self, schema_namespace):
""" Set library namespace associated for this schema.

Expand Down Expand Up @@ -664,10 +625,10 @@ def get_tag_attribute_names_old(self):

"""
return {tag_entry.name: tag_entry for tag_entry in self._sections[HedSectionKey.Attributes].values()
if not tag_entry.has_attribute(HedKey.UnitClassProperty)
and not tag_entry.has_attribute(HedKey.UnitProperty)
and not tag_entry.has_attribute(HedKey.UnitModifierProperty)
and not tag_entry.has_attribute(HedKey.ValueClassProperty)}
if not tag_entry.has_attribute(HedKeyOld.UnitClassProperty)
and not tag_entry.has_attribute(HedKeyOld.UnitProperty)
and not tag_entry.has_attribute(HedKeyOld.UnitModifierProperty)
and not tag_entry.has_attribute(HedKeyOld.ValueClassProperty)}

# ===============================================
# Private utility functions
Expand All @@ -693,19 +654,14 @@ def _get_modifiers_for_unit(self, unit):
unit (str): A known unit.

Returns:
list: List of HedSchemaEntry.
derived_unit_list(list of HedSchemaEntry): The derived units for this unit

Notes:
This is a lower level one that doesn't rely on the Unit entries being fully setup.

"""
# todo: could refactor this so this unit.casefold() part is in HedSchemaUnitSection.get
unit_entry = self.get_tag_entry(unit, HedSectionKey.Units)
if unit_entry is None:
unit_entry = self.get_tag_entry(unit.casefold(), HedSectionKey.Units)
# Unit symbols must match exactly
if unit_entry is None or unit_entry.has_attribute(HedKey.UnitSymbol):
return []
return []
is_si_unit = unit_entry.has_attribute(HedKey.SIUnit)
is_unit_symbol = unit_entry.has_attribute(HedKey.UnitSymbol)
if not is_si_unit:
Expand All @@ -732,7 +688,7 @@ def _get_attributes_for_section(self, key_class):
Returns:
dict: A dict of all the attributes for this section.
"""
element_prop_key = HedKey83.ElementDomain if self.schema_83_props else HedKey.ElementProperty
element_prop_key = HedKey.ElementDomain if self.schema_83_props else HedKeyOld.ElementProperty

# Common logic for Attributes and Properties
if key_class in [HedSectionKey.Attributes, HedSectionKey.Properties]:
Expand All @@ -744,18 +700,18 @@ def _get_attributes_for_section(self, key_class):

if self.schema_83_props:
attrib_classes = {
HedSectionKey.UnitClasses: HedKey83.UnitClassDomain,
HedSectionKey.Units: HedKey83.UnitDomain,
HedSectionKey.UnitModifiers: HedKey83.UnitModifierDomain,
HedSectionKey.ValueClasses: HedKey83.ValueClassDomain,
HedSectionKey.Tags: HedKey83.TagDomain
HedSectionKey.UnitClasses: HedKey.UnitClassDomain,
HedSectionKey.Units: HedKey.UnitDomain,
HedSectionKey.UnitModifiers: HedKey.UnitModifierDomain,
HedSectionKey.ValueClasses: HedKey.ValueClassDomain,
HedSectionKey.Tags: HedKey.TagDomain
}
else:
attrib_classes = {
HedSectionKey.UnitClasses: HedKey.UnitClassProperty,
HedSectionKey.Units: HedKey.UnitProperty,
HedSectionKey.UnitModifiers: HedKey.UnitModifierProperty,
HedSectionKey.ValueClasses: HedKey.ValueClassProperty
HedSectionKey.UnitClasses: HedKeyOld.UnitClassProperty,
HedSectionKey.Units: HedKeyOld.UnitProperty,
HedSectionKey.UnitModifiers: HedKeyOld.UnitModifierProperty,
HedSectionKey.ValueClasses: HedKeyOld.ValueClassProperty
}
if key_class == HedSectionKey.Tags:
return self.get_tag_attribute_names_old()
Expand Down
26 changes: 12 additions & 14 deletions hed/schema/hed_schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,18 +61,6 @@ class HedKey:
InLibrary = "inLibrary"
HedID = 'hedId'

# All known properties
BoolProperty = 'boolProperty'
UnitClassProperty = 'unitClassProperty'
UnitProperty = 'unitProperty'
UnitModifierProperty = 'unitModifierProperty'
ValueClassProperty = 'valueClassProperty'
ElementProperty = 'elementProperty'
NodeProperty = 'nodeProperty'
IsInheritedProperty = 'isInheritedProperty'


class HedKey83:
UnitClassDomain = "unitClassDomain"
UnitDomain = "unitDomain"
UnitModifierDomain = "unitModifierDomain"
Expand All @@ -82,8 +70,6 @@ class HedKey83:
AnnotationProperty = "annotationProperty"

BoolRange = "boolRange"

# Fully new below this
TagRange = "tagRange"
NumericRange = "numericRange"
StringRange = "stringRange"
Expand All @@ -92,6 +78,18 @@ class HedKey83:
ValueClassRange = "valueClassRange"


class HedKeyOld:
# Fully Deprecated properties
BoolProperty = 'boolProperty'
UnitClassProperty = 'unitClassProperty'
UnitProperty = 'unitProperty'
UnitModifierProperty = 'unitModifierProperty'
ValueClassProperty = 'valueClassProperty'
ElementProperty = 'elementProperty'
NodeProperty = 'nodeProperty'
IsInheritedProperty = 'isInheritedProperty'


VERSION_ATTRIBUTE = 'version'
LIBRARY_ATTRIBUTE = 'library'
WITH_STANDARD_ATTRIBUTE = "withStandard"
Expand Down
75 changes: 73 additions & 2 deletions hed/schema/hed_schema_df_constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,78 @@
from hed.schema.hed_schema_constants import HedSectionKey

# Known tsv format suffixes

STRUCT_KEY = "Structure"
TAG_KEY = "Tag"
UNIT_KEY = "Unit"
UNIT_CLASS_KEY = "UnitClass"
UNIT_MODIFIER_KEY = "UnitModifier"
VALUE_CLASS_KEY = "ValueClass"

ANNOTATION_KEY = "AnnotationProperty"
DATA_KEY = "DataProperty"
OBJECT_KEY = "ObjectProperty"

ATTRIBUTE_PROPERTY_KEY = "AttributeProperty"

PROPERTY_KEYS = [ANNOTATION_KEY, DATA_KEY, OBJECT_KEY]
DF_SUFFIXES = {TAG_KEY, STRUCT_KEY, VALUE_CLASS_KEY,
UNIT_CLASS_KEY, UNIT_KEY, UNIT_MODIFIER_KEY,
*PROPERTY_KEYS, ATTRIBUTE_PROPERTY_KEY}

section_mapping = {
STRUCT_KEY: None,
TAG_KEY: HedSectionKey.Tags,
VALUE_CLASS_KEY: HedSectionKey.ValueClasses,
UNIT_CLASS_KEY: HedSectionKey.UnitClasses,
UNIT_KEY: HedSectionKey.Units,
UNIT_MODIFIER_KEY: HedSectionKey.UnitModifiers,
ANNOTATION_KEY: HedSectionKey.Attributes,
DATA_KEY: HedSectionKey.Attributes,
OBJECT_KEY: HedSectionKey.Attributes,
ATTRIBUTE_PROPERTY_KEY: HedSectionKey.Properties,
}

# Spreadsheet column ids
hed_id = "hedId"
level = "Level"
name = "rdfs:label"
subclass_of = "omn:SubClassOf"
attributes = "Attributes"
description = "dc:description"
equivalent_to = "owm:EquivalentTo"
has_unit_class = "hasUnitClass"

struct_columns = [hed_id, name, attributes, subclass_of, description]
tag_columns = [hed_id, level, name, subclass_of, attributes, description, equivalent_to]
unit_columns = [hed_id, name, subclass_of, has_unit_class, attributes, description, equivalent_to]

# The columns for unit class, value class, and unit modifier
other_columns = [hed_id, name, subclass_of, attributes, description, equivalent_to]

# for schema attributes
property_type = "Type"
property_domain = "omn:Domain"
property_range = "omn:Range"
properties = "Properties"
property_columns = [hed_id, name, property_type, property_domain, property_range, properties, description]

# For the schema properties
property_columns_reduced = [hed_id, name, property_type, description]

# HED_00X__YY where X is the library starting index, and Y is the entity number below.
struct_base_ids = {
"HedEntity": 1,
"HedStructure": 2,
"HedElement": 3,
"HedSchema": 4,
"HedTag": 5,
"HedUnitClass": 6,
"HedUnit": 7,
"HedUnitModifier": 8,
"HedValueClass": 9,
"HedHeader": 10,
"HedPrologue": 11,
"HedEpilogue": 12
}

# todo: move more constants up here
hed_id_column = "hedId"
Loading