Skip to content

Commit

Permalink
input object document: remap location
Browse files Browse the repository at this point in the history
  • Loading branch information
simleo committed Apr 24, 2023
1 parent fd46a97 commit b181ff8
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 2 deletions.
33 changes: 32 additions & 1 deletion src/runcrate/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import hashlib
import json
import re
from io import StringIO
from pathlib import Path

import networkx as nx
Expand Down Expand Up @@ -187,6 +188,8 @@ def __init__(self, root, workflow_name=None, license=None, readme=None):
# index collections by their main entity's id
self.collections = {}
self.hashes = {}
# map source files to destination files
self.file_map = {}

@staticmethod
def _get_step_maps(cwl_defs):
Expand Down Expand Up @@ -531,6 +534,11 @@ def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
"sha1": hash_,
})
self._set_alternate_name(prov_param, action_p, parent=parent)
try:
source_k = str(source.resolve(strict=False))
except RuntimeError:
source_k = str(source)
self.file_map[source_k] = dest
return action_p
if "ro:Folder" in type_names:
hash_ = self.hashes[prov_param.id.localpart]
Expand Down Expand Up @@ -647,10 +655,33 @@ def patch_workflow_input_collection(self, crate, wf=None):
if "ComputationalWorkflow" in as_list(tool.type):
self.patch_workflow_input_collection(crate, wf=tool)

def _map_input_data(self, data):
if isinstance(data, list):
return [self._map_input_data(_) for _ in data]
if isinstance(data, dict):
rval = {}
for k, v in data.items():
if k == "location":
source = self.root / "workflow" / v
try:
source_k = str(source.resolve(strict=False))
except RuntimeError:
source_k = str(source)
dest = self.file_map.get(source_k)
rval[k] = str(dest) if dest else v
else:
rval[k] = self._map_input_data(v)
return rval
return data

def add_inputs_file(self, crate):
path = self.root / "workflow" / INPUTS_FILE_BASENAME
if path.is_file():
crate.add_file(path, properties={
with open(path) as f:
data = json.load(f)
data = self._map_input_data(data)
source = StringIO(json.dumps(data, indent=4))
crate.add_file(source, path.name, properties={
"name": "input object document",
"encodingFormat": "application/json",
})
13 changes: 12 additions & 1 deletion tests/test_cwlprov_crate_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_revsort(data_dir, tmpdir):
output = tmpdir / "revsort-run-1-crate"
license = "Apache-2.0"
readme = data_dir / "README.md"
inputs_file = data_dir / "workflow" / "primary-job.json"
inputs_file = root / "workflow" / "primary-job.json"
workflow_name = "RevSort"
builder = ProvCrateBuilder(root, workflow_name=workflow_name, license=license, readme=readme)
crate = builder.build()
Expand Down Expand Up @@ -145,6 +145,17 @@ def test_revsort(data_dir, tmpdir):
assert inputs_f
assert inputs_f.type == "File"
assert inputs_f["encodingFormat"] == "application/json"
with open(inputs_file) as f:
ro_json = json.load(f)
with open(output / inputs_file.name) as f:
crate_json = json.load(f)
assert set(crate_json) == {"input", "reverse_sort"}
assert set(crate_json["input"]) == set(ro_json["input"])
for k, v in crate_json["input"].items():
if k == "location":
assert v == ro_json["input"][k].rsplit("/", 1)[-1]
else:
assert v == ro_json["input"][k]

# file contents
in_text = (root / "data/32/327fc7aedf4f6b69a42a7c8b808dc5a7aff61376").read_text()
Expand Down

0 comments on commit b181ff8

Please sign in to comment.