Skip to content

Commit

Permalink
Merge pull request #25 from simleo/add_params_file
Browse files Browse the repository at this point in the history
convert: add input object document
  • Loading branch information
simleo authored Apr 24, 2023
2 parents 0c331a0 + b181ff8 commit a728698
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 0 deletions.
41 changes: 41 additions & 0 deletions src/runcrate/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import hashlib
import json
import re
from io import StringIO
from pathlib import Path

import networkx as nx
Expand All @@ -37,6 +38,7 @@


WORKFLOW_BASENAME = "packed.cwl"
INPUTS_FILE_BASENAME = "primary-job.json"

CWL_TYPE_MAP = {
"string": "Text",
Expand Down Expand Up @@ -186,6 +188,8 @@ def __init__(self, root, workflow_name=None, license=None, readme=None):
# index collections by their main entity's id
self.collections = {}
self.hashes = {}
# map source files to destination files
self.file_map = {}

@staticmethod
def _get_step_maps(cwl_defs):
Expand Down Expand Up @@ -256,6 +260,7 @@ def build(self):
self.add_engine_run(crate)
self.add_action(crate, self.workflow_run)
self.patch_workflow_input_collection(crate)
self.add_inputs_file(crate)
return crate

def add_root_metadata(self, crate):
Expand Down Expand Up @@ -529,6 +534,11 @@ def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
"sha1": hash_,
})
self._set_alternate_name(prov_param, action_p, parent=parent)
try:
source_k = str(source.resolve(strict=False))
except RuntimeError:
source_k = str(source)
self.file_map[source_k] = dest
return action_p
if "ro:Folder" in type_names:
hash_ = self.hashes[prov_param.id.localpart]
Expand Down Expand Up @@ -644,3 +654,34 @@ def patch_workflow_input_collection(self, crate, wf=None):
for tool in wf.get("hasPart", []):
if "ComputationalWorkflow" in as_list(tool.type):
self.patch_workflow_input_collection(crate, wf=tool)

def _map_input_data(self, data):
if isinstance(data, list):
return [self._map_input_data(_) for _ in data]
if isinstance(data, dict):
rval = {}
for k, v in data.items():
if k == "location":
source = self.root / "workflow" / v
try:
source_k = str(source.resolve(strict=False))
except RuntimeError:
source_k = str(source)
dest = self.file_map.get(source_k)
rval[k] = str(dest) if dest else v
else:
rval[k] = self._map_input_data(v)
return rval
return data

def add_inputs_file(self, crate):
path = self.root / "workflow" / INPUTS_FILE_BASENAME
if path.is_file():
with open(path) as f:
data = json.load(f)
data = self._map_input_data(data)
source = StringIO(json.dumps(data, indent=4))
crate.add_file(source, path.name, properties={
"name": "input object document",
"encodingFormat": "application/json",
})
17 changes: 17 additions & 0 deletions tests/test_cwlprov_crate_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def test_revsort(data_dir, tmpdir):
output = tmpdir / "revsort-run-1-crate"
license = "Apache-2.0"
readme = data_dir / "README.md"
inputs_file = root / "workflow" / "primary-job.json"
workflow_name = "RevSort"
builder = ProvCrateBuilder(root, workflow_name=workflow_name, license=license, readme=readme)
crate = builder.build()
Expand Down Expand Up @@ -140,6 +141,22 @@ def test_revsort(data_dir, tmpdir):
assert set(_connected(workflow)) == set([
("packed.cwl#sorttool.cwl/output", "packed.cwl#main/output"),
])
inputs_f = crate.get(inputs_file.name)
assert inputs_f
assert inputs_f.type == "File"
assert inputs_f["encodingFormat"] == "application/json"
with open(inputs_file) as f:
ro_json = json.load(f)
with open(output / inputs_file.name) as f:
crate_json = json.load(f)
assert set(crate_json) == {"input", "reverse_sort"}
assert set(crate_json["input"]) == set(ro_json["input"])
for k, v in crate_json["input"].items():
if k == "location":
assert v == ro_json["input"][k].rsplit("/", 1)[-1]
else:
assert v == ro_json["input"][k]

# file contents
in_text = (root / "data/32/327fc7aedf4f6b69a42a7c8b808dc5a7aff61376").read_text()
assert (output / wf_input_file.id).read_text() == in_text
Expand Down

0 comments on commit a728698

Please sign in to comment.