Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add manifest settings for postimport actions #854

Merged
merged 1 commit into from
Dec 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 27 additions & 2 deletions cli/rack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,15 @@ def go() -> dict:
return semtk3.copy_graph(from_graph, to_graph, triple_store, triple_store_type, triple_store, triple_store_type)
go()

def ingest_manifest_driver(manifest_path: Path, base_url: Url, triple_store: Optional[Url], triple_store_type: Optional[str], clear: bool, default_graph: bool) -> None:
def ingest_manifest_driver(
manifest_path: Path,
base_url: Url,
triple_store: Optional[Url],
triple_store_type: Optional[str],
clear: bool,
default_graph: bool,
top_level: bool = True) -> None:

with open(manifest_path, mode='r', encoding='utf-8-sig') as manifest_file:
manifest = Manifest.fromYAML(manifest_file)

Expand Down Expand Up @@ -396,9 +404,26 @@ def ingest_manifest_driver(manifest_path: Path, base_url: Url, triple_store: Opt
store_nodegroups_driver(stepFile, base_url)
elif StepType.MANIFEST == step_type:
stepFile = base_path / step_data
ingest_manifest_driver(stepFile, base_url, triple_store, triple_store_type, False, default_graph)
ingest_manifest_driver(stepFile, base_url, triple_store, triple_store_type, False, default_graph, False)
elif StepType.COPYGRAPH == step_type:
utility_copygraph_driver(base_url, triple_store, triple_store_type, step_data[0], step_data[1])

if top_level:
if manifest.getCopyToDefaultGraph():
defaultGraph = Url("uri://DefaultGraph")
for graph in manifest.modelgraphsFootprint:
utility_copygraph_driver(base_url, triple_store, triple_store_type, graph, defaultGraph)
for graph in manifest.datagraphsFootprint:
utility_copygraph_driver(base_url, triple_store, triple_store_type, graph, defaultGraph)

if manifest.getPerformEntityResolution():
@with_status(f'Executing entity resolution')
def go() -> dict:
return semtk3.combine_entities_in_conn(conn=sparql_connection(base_url, [defaultGraph], defaultGraph, [], triple_store, triple_store_type))
go()

if manifest.getPerformOptimization():
logger.warning("Optimization requested but not yet implemented")

def ingest_data_driver(config_path: Path, base_url: Url, model_graphs: Optional[List[Url]], data_graphs: Optional[List[Url]], triple_store: Optional[Url], triple_store_type: Optional[str], clear: bool) -> None:
"""Use an import.yaml file to ingest multiple CSV files into the data graph."""
Expand Down
24 changes: 24 additions & 0 deletions cli/rack/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
'properties': {
'name': {'type': 'string'},
'description': {'type': 'string'},

'copy-to-default-graph': {'type': 'boolean'},
'perform-entity-resolution': {'type': 'boolean'},
'perform-triplestore-optimization': {'type': 'boolean'},

'footprint': {
'type': 'object',
'additionalProperties': False,
Expand Down Expand Up @@ -97,13 +102,28 @@ def __init__(self, name: str, description: Optional[str] = None) -> None:
self.datagraphsFootprint: List[Url] = []
self.nodegroupsFootprint: List[str] = []
self.steps: List[Tuple[StepType, Any]] = []
self.performOptimization: bool = False
self.performEntityResolution: bool = False
self.copyToDefaultGraph: bool = False

def getName(self) -> str:
return self.name

def getDescription(self) -> Optional[str]:
return self.description

def getPerformOptimization(self) -> bool:
"""Return True when this manifest file prescribes running the triplestore optimizer"""
return self.performOptimization

def getPerformEntityResolution(self) -> bool:
"""Return True when this manifest prescribes running entity resolution"""
return self.performEntityResolution

def getCopyToDefaultGraph(self) -> bool:
"""Return True when this manifest prescribes copying the footprint to the default graph"""
return self.copyToDefaultGraph

def addModelgraphFootprint(self, modelgraph: Url) -> None:
self.modelgraphsFootprint.append(modelgraph)

Expand Down Expand Up @@ -141,6 +161,10 @@ def fromYAML(src: Any) -> 'Manifest':

manifest = Manifest(obj.get('name'), obj.get('description'))

manifest.copyToDefaultGraph = obj.get('copy-to-default-graph', False)
manifest.performEntityResolution = obj.get('perform-entity-resolution', False)
manifest.performOptimization = obj.get('perform-triplestore-optimization', False)

footprint = obj.get('footprint', {})
for datagraph in footprint.get('data-graphs', []):
manifest.addDatagraphFootprint(Url(datagraph))
Expand Down