diff --git a/.github/workflows/fuzzy_compile_weekly.yml b/.github/workflows/fuzzy_compile_weekly.yml index 6fe24c56..14bea188 100644 --- a/.github/workflows/fuzzy_compile_weekly.yml +++ b/.github/workflows/fuzzy_compile_weekly.yml @@ -72,7 +72,7 @@ jobs: uses: conda-incubator/setup-miniconda@v3.0.1 with: miniforge-variant: Miniforge-pypy3 - miniforge-version: latest + miniforge-version: 24.7.1-0 environment-file: workflow-inference-compiler/install/system_deps.yml activate-environment: wic channels: conda-forge diff --git a/.github/workflows/lint_and_test.yml b/.github/workflows/lint_and_test.yml index ee1bdf54..7f5f8c58 100644 --- a/.github/workflows/lint_and_test.yml +++ b/.github/workflows/lint_and_test.yml @@ -138,7 +138,7 @@ jobs: uses: conda-incubator/setup-miniconda@v3.0.1 with: miniforge-variant: Miniforge-pypy3 - miniforge-version: latest + miniforge-version: 24.7.1-0 environment-file: workflow-inference-compiler/install/system_deps.yml activate-environment: wic channels: conda-forge @@ -149,7 +149,7 @@ jobs: uses: conda-incubator/setup-miniconda@v3.0.1 with: miniforge-variant: Miniforge-pypy3 - miniforge-version: latest + miniforge-version: 24.7.1-0 environment-file: workflow-inference-compiler/install/system_deps_windows.yml activate-environment: wic channels: conda-forge @@ -218,13 +218,13 @@ jobs: if: always() run: cd workflow-inference-compiler/ && pytest -k test_cwl_embedding_independence # --cov --cov-config=.coveragerc_serial # NOTE: This test MUST be run in serial! See is_isomorphic_with_timeout() - timeout-minutes: 5 # backup timeout for windows + timeout-minutes: 20 # backup timeout for windows - name: PyTest Inline Subworkflows if: always() run: cd workflow-inference-compiler/ && pytest -k test_inline_subworkflows # --cov --cov-config=.coveragerc_serial # NOTE: This test MUST be run in serial! See is_isomorphic_with_timeout() - timeout-minutes: 5 # backup timeout for windows + timeout-minutes: 20 # backup timeout for windows - name: PyTest Scattering Scaling if: runner.os == 'Linux' diff --git a/.github/workflows/lint_and_test_macos.yml b/.github/workflows/lint_and_test_macos.yml index 6c1d46c1..a6c64ccf 100644 --- a/.github/workflows/lint_and_test_macos.yml +++ b/.github/workflows/lint_and_test_macos.yml @@ -72,7 +72,7 @@ jobs: uses: conda-incubator/setup-miniconda@v2.2.0 with: miniforge-variant: Miniforge-pypy3 - miniforge-version: latest + miniforge-version: 24.7.1-0 environment-file: workflow-inference-compiler/install/system_deps.yml activate-environment: wic use-mamba: true @@ -142,10 +142,10 @@ jobs: if: always() run: cd workflow-inference-compiler/ && pytest -k test_cwl_embedding_independence # --cov --cov-config=.coveragerc_serial # NOTE: This test MUST be run in serial! See is_isomorphic_with_timeout() - timeout-minutes: 5 # backup timeout for windows + timeout-minutes: 20 # backup timeout for windows - name: PyTest Inline Subworkflows if: always() run: cd workflow-inference-compiler/ && pytest -k test_inline_subworkflows # --cov --cov-config=.coveragerc_serial # NOTE: This test MUST be run in serial! See is_isomorphic_with_timeout() - timeout-minutes: 5 # backup timeout for windows + timeout-minutes: 20 # backup timeout for windows diff --git a/.github/workflows/publish_rest_docker.yml b/.github/workflows/publish_rest_docker.yml new file mode 100644 index 00000000..7fb8bb9e --- /dev/null +++ b/.github/workflows/publish_rest_docker.yml @@ -0,0 +1,40 @@ +name: Publish REST API Docker + +on: + workflow_dispatch: + push: + tags: + - v[0-9]+.[0-9]+.[0-9]+ + - v[0-9]+.[0-9]+.[0-9]+-dev[0-9]+ + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout 🛎️ + uses: actions/checkout@v2 + + - name: Set up Docker Buildx 🐳 + uses: docker/setup-buildx-action@v1 + + - name: Get Tag + run: echo "tag=${{ github.ref_name }}" >> $GITHUB_ENV + + - name: Print Tag + run: echo "Publishing with tag ${{ env.tag }}" + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_TOKEN }} + + - name: Publish Sophios Container 🐳 + uses: docker/build-push-action@v5 + with: + context: . + file: ./docker/Dockerfile_ubuntu_REST + push: true + tags: polusai/sophios-rest-api:${{ env.tag }} + diff --git a/.github/workflows/run_workflows.yml b/.github/workflows/run_workflows.yml index 6ffc87f2..f2c51e62 100644 --- a/.github/workflows/run_workflows.yml +++ b/.github/workflows/run_workflows.yml @@ -127,7 +127,7 @@ jobs: uses: conda-incubator/setup-miniconda@v3.0.1 with: miniforge-variant: Miniforge-pypy3 - miniforge-version: latest + miniforge-version: 24.7.1-0 environment-file: workflow-inference-compiler/install/system_deps.yml activate-environment: wic_github_actions channels: conda-forge diff --git a/.github/workflows/run_workflows_weekly.yml b/.github/workflows/run_workflows_weekly.yml index a6eab21f..62d286eb 100644 --- a/.github/workflows/run_workflows_weekly.yml +++ b/.github/workflows/run_workflows_weekly.yml @@ -89,7 +89,7 @@ jobs: # installs pypy in the base environment (only). Although we are using # another environment, better to avoid the problem altogether by # not using Miniforge-pypy3 - miniforge-version: latest + miniforge-version: 24.7.1-0 environment-file: workflow-inference-compiler/install/system_deps.yml activate-environment: wic_github_actions use-mamba: true diff --git a/docker/Dockerfile_ubuntu_REST b/docker/Dockerfile_ubuntu_REST new file mode 100644 index 00000000..4b259dc6 --- /dev/null +++ b/docker/Dockerfile_ubuntu_REST @@ -0,0 +1,19 @@ +FROM ubuntu:jammy + +RUN apt update && \ + apt install software-properties-common -y && \ + apt install curl -y && \ + curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ + python3 get-pip.py && \ + apt autoremove -y && \ + rm -rf /var/lib/apt/lists/* + +COPY . /sophios +WORKDIR /sophios + +RUN pip3 install /sophios --no-cache-dir + +# Then run the sophios REST API through port 3000 +EXPOSE 3000 + +CMD ["uvicorn", "sophios.api.http.restapi:app", "--host", "0.0.0.0", "--port", "3000"] \ No newline at end of file diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 00000000..3bce2461 --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,9 @@ +version: '0.2.1' +services: + fastapi-app: + image: polusai/sophios-rest-api:0.2.1 + ports: + - "3000:3000" + environment: + - PATH=$PATH + command: ["uvicorn", "sophios.api.http.restapi:app", "--host", "0.0.0.0", "--port", "3000"] \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 07b41b91..9ea913ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ dependencies = [ # This 'graphviz' is equivalent to `conda install python-graphviz` or # `sudo apt install python3-graphviz` ONLY. "graphviz", - "jsonschema<4.18", # temporarily downgrade due to severe performance regression + "jsonschema", "pyyaml", "requests", "mergedeep", @@ -42,7 +42,8 @@ dependencies = [ "toil[cwl]", "fastapi", "python-jose", - "uvicorn" + "uvicorn", + "referencing" ] [project.readme] diff --git a/src/sophios/api/http/restapi.py b/src/sophios/api/http/restapi.py index 9bdb6c8e..f68a5de9 100644 --- a/src/sophios/api/http/restapi.py +++ b/src/sophios/api/http/restapi.py @@ -1,6 +1,7 @@ from pathlib import Path import argparse import copy +import uuid import yaml @@ -13,7 +14,7 @@ from sophios.utils_graphs import get_graph_reps from sophios.utils_yaml import wic_loader from sophios import utils_cwl -from sophios.post_compile import cwl_inline_runtag +from sophios.post_compile import cwl_inline_runtag, remove_entrypoints from sophios.cli import get_args from sophios.wic_types import CompilerInfo, Json, Tool, Tools, StepId, YamlTree, Cwl, NodeData from sophios.api.utils import converter @@ -94,14 +95,15 @@ async def compile_wf(request: Request) -> Json: print('---------- Compile Workflow! ---------') # ========= PROCESS REQUEST OBJECT ========== req: Json = await request.json() + suppliedargs = ['--cwl_inline_runtag', '--generate_cwl_workflow'] # clean up and convert the incoming object # schema preserving req = converter.update_payload_missing_inputs_outputs(req) wfb_payload = converter.raw_wfb_to_lean_wfb(req) # schema non-preserving - workflow_temp = converter.wfb_to_wic(wfb_payload) - wkflw_name = "generic_workflow" - args = get_args(wkflw_name, ['--inline_cwl_runtag', '--generate_cwl_workflow']) + workflow_temp = converter.wfb_to_wic(wfb_payload, req["plugins"]) + wkflw_name = "workflow_" + args = get_args(wkflw_name, suppliedargs) # Build canonical workflow object workflow_can = utils_cwl.desugar_into_canonical_normal_form(workflow_temp) @@ -128,29 +130,48 @@ async def compile_wf(request: Request) -> Json: yaml_tree: YamlTree = YamlTree(StepId(wkflw_name, plugin_ns), workflow_can) # ========= COMPILE WORKFLOW ================ - args.ignore_dir_path = True + if req.get('run_local_env') == 'true': + args.ignore_dir_path = False + else: + args.ignore_dir_path = True compiler_info: CompilerInfo = compiler.compile_workflow(yaml_tree, args, [], [graph], {}, {}, {}, {}, tools_cwl, True, relative_run_path=True, testing=False) rose_tree = compiler_info.rose input_output.write_to_disk(rose_tree, Path('autogenerated/'), True, args.inputs_file) - cwl_inline_runtag(args, rose_tree) + rose_tree = cwl_inline_runtag(args, rose_tree) + rose_tree = remove_entrypoints(args, rose_tree) # ======== OUTPUT PROCESSING ================ # ========= PROCESS COMPILED OBJECT ========= sub_node_data: NodeData = rose_tree.data yaml_stem = sub_node_data.name cwl_tree = sub_node_data.compiled_cwl yaml_inputs = sub_node_data.workflow_inputs_file - cwl_tree_no_dd = remove_dot_dollar(cwl_tree) - yaml_inputs_no_dd = remove_dot_dollar(yaml_inputs) # Convert the compiled yaml file to json for labshare Compute. - cwl_tree_run = copy.deepcopy(cwl_tree_no_dd) - + cwl_tree_run = copy.deepcopy(cwl_tree) + cwl_tree_run['steps_dict'] = {} + for step in cwl_tree_run['steps']: + node_name = step['id'] + step.pop('id', None) + step = {node_name: step} + step_copy = copy.deepcopy(step) + cwl_tree_run['steps_dict'].update(step_copy) + + cwl_tree_run.pop('steps', None) + cwl_tree_run['steps'] = cwl_tree_run.pop('steps_dict', None) + + # currently there is a compiler bug where the output variables are duplicated + # this is a workaround to remove the duplicates till the compiler is fixed + for step in cwl_tree_run['steps']: + + out_vars = cwl_tree_run['steps'][step]['out'] + out_vars_unique = list(set(out_vars)) + cwl_tree_run['steps'][step]['out'] = out_vars_unique compute_workflow: Json = {} compute_workflow = { "name": yaml_stem, - "cwlJobInputs": yaml_inputs_no_dd, + "cwlJobInputs": yaml_inputs, **cwl_tree_run } compute_workflow["retval"] = str(0) diff --git a/src/sophios/api/pythonapi.py b/src/sophios/api/pythonapi.py index a81a8bd9..43771079 100644 --- a/src/sophios/api/pythonapi.py +++ b/src/sophios/api/pythonapi.py @@ -2,7 +2,6 @@ """CLT utilities.""" import logging from pathlib import Path -import subprocess as sub from typing import Any, ClassVar, Optional, TypeVar, Union import cwl_utils.parser as cu_parser @@ -757,8 +756,8 @@ def run(self) -> None: rose_tree: RoseTree = compiler_info.rose post_compile.cwl_docker_extract(args, self.process_name) - post_compile.remove_entrypoints(args, rose_tree) - + rose_tree = post_compile.remove_entrypoints(args, rose_tree) + post_compile.find_and_create_output_dirs(rose_tree) # Do NOT capture stdout and/or stderr and pipe warnings and errors into a black hole. retval = run_local_module.run_local(args, rose_tree, args.cachedir, args.cwl_runner, True) diff --git a/src/sophios/api/utils/converter.py b/src/sophios/api/utils/converter.py index ba00ac99..9c64b5ca 100644 --- a/src/sophios/api/utils/converter.py +++ b/src/sophios/api/utils/converter.py @@ -10,6 +10,7 @@ from sophios.wic_types import Json, Cwl from sophios.api.utils.ict.ict_spec.model import ICT from sophios.api.utils.ict.ict_spec.cast import cast_to_ict +from sophios.api.utils.wfb_util import get_node_config SCHEMA_FILE = Path(__file__).parent / "input_object_schema.json" SCHEMA: Json = {} @@ -62,7 +63,9 @@ def extract_state(inp: Json) -> Json: plugin = next((ict for ict in plugins if ict['pid'] == node_pid), None) clt: Json = {} if plugin: - clt = ict_to_clt(plugin) + # by default have network access true + # so we don't get runtime error for docker/container pull + clt = ict_to_clt(plugin, True) # just have the clt payload in run node['run'] = clt inp_restrict = inp_inter['state'] @@ -98,10 +101,63 @@ def raw_wfb_to_lean_wfb(inp: Json) -> Json: return inp_restrict -def wfb_to_wic(inp: Json) -> Cwl: +def get_topological_order(links: list[dict[str, str]]) -> list[str]: + """Get topological order of the nodes from links""" + # Create adjacency list representation + graph: dict[str, list[str]] = {} + in_degree: dict[str, int] = {} + + # Initialize all nodes with 0 in-degree + for link in links: + source = link['sourceId'] + target = link['targetId'] + if source not in graph: + graph[source] = [] + if target not in graph: + graph[target] = [] + if source not in in_degree: + in_degree[source] = 0 + if target not in in_degree: + in_degree[target] = 0 + + # Build the graph and count in-degrees + for link in links: + source = link['sourceId'] + target = link['targetId'] + graph[source].append(target) + in_degree[target] += 1 + + # Initialize queue with nodes that have 0 in-degree + queue: list[str] = [] + for node in in_degree: + if in_degree[node] == 0: + queue.append(node) + + # Process the queue + result: list[str] = [] + while queue: + node = queue.pop(0) + result.append(node) + + # Reduce in-degree of neighbors + for neighbor in graph[node]: + in_degree[neighbor] -= 1 + if in_degree[neighbor] == 0: + queue.append(neighbor) + + return result + + +def wfb_to_wic(inp: Json, plugins: List[dict[str, Any]]) -> Cwl: """Convert lean wfb json to compliant wic""" # non-schema preserving changes inp_restrict = copy.deepcopy(inp) + plugin_config_map: dict[str, dict] = {} + for plugin in plugins: + pid: str = plugin.get("pid", "") + if pid == "": + continue + plugin_config_map[pid] = get_node_config(plugin) for node in inp_restrict['nodes']: if node.get('settings'): @@ -110,7 +166,6 @@ def wfb_to_wic(inp: Json) -> Cwl: node['out'] = list({k: yaml.load('!& ' + v, Loader=wic_loader())} for k, v in node['settings'] ['outputs'].items()) # outputs always have to be list # remove these (now) superfluous keys - node.pop('settings', None) node.pop('name', None) node.pop('internal', None) @@ -119,53 +174,122 @@ def wfb_to_wic(inp: Json) -> Cwl: target_node_ids = [] for edg in inp_restrict['links']: target_node_ids.append(edg['targetId']) + # keep track of all the args that processed + node_arg_map: dict[int, set] = {} # now set inputs on non-sink nodes as inline input '!ii ' # if inputs exist non_sink_nodes = [node for node in inp_restrict['nodes'] if node['id'] not in target_node_ids] for node in non_sink_nodes: + if node["id"] not in node_arg_map: + node_arg_map[node['id']] = set() if node.get('in'): for nkey in node['in']: - node['in'][nkey] = yaml.load('!ii ' + str(node['in'][nkey]), Loader=wic_loader()) + if str(node['in'][nkey]) != "": + node['in'][nkey] = yaml.load('!ii ' + str(node['in'][nkey]), Loader=wic_loader()) + node_arg_map[node['id']].add(nkey) + + if plugins != []: # use the look up logic similar to WFB + for edg in inp_restrict['links']: + # links = edge. nodes and edges is the correct terminology! + src_id = edg['sourceId'] + tgt_id = edg['targetId'] + src_node = next((node for node in inp_restrict['nodes'] if node['id'] == src_id), None) + tgt_node = next((node for node in inp_restrict['nodes'] if node['id'] == tgt_id), None) + assert src_node, f'output(s) of source node of edge{edg} must exist!' + assert tgt_node, f'input(s) of target node of edge{edg} must exist!' + if src_id not in node_arg_map: + node_arg_map[src_id] = set() + + if tgt_id not in node_arg_map: + node_arg_map[tgt_id] = set() + + src_node_ui_config = plugin_config_map.get(src_node['pluginId'], None) + tgt_node_ui_config = plugin_config_map.get(tgt_node['pluginId'], None) + if src_node_ui_config and tgt_node_ui_config: + inlet_index = edg['inletIndex'] + outlet_index = edg['outletIndex'] + + src_node_out_arg = src_node_ui_config['outputs'][outlet_index]["name"] + tgt_node_in_arg = tgt_node_ui_config['inputs'][inlet_index]["name"] + + if tgt_node.get('in'): + source_output = src_node['out'][0][src_node_out_arg] + if isinstance(source_output, dict) and 'wic_anchor' in source_output: + source_output = source_output["wic_anchor"] + tgt_node['in'][tgt_node_in_arg] = yaml.load('!* ' + str(source_output), Loader=wic_loader()) + node_arg_map[tgt_id].add(tgt_node_in_arg) + + for node in inp_restrict['nodes']: + output_dict = node['settings'].get('outputs', {}) + for key in output_dict: + if str(output_dict[key]) != "": + node['in'][key] = yaml.load('!ii ' + str(output_dict[key]), Loader=wic_loader()) + node_arg_map[node['id']].add(key) + node.pop('settings', None) - # After outs are set - for edg in inp_restrict['links']: - # links = edge. nodes and edges is the correct terminology! - src_id = edg['sourceId'] - tgt_id = edg['targetId'] - src_node = next((node for node in inp_restrict['nodes'] if node['id'] == src_id), None) - tgt_node = next((node for node in inp_restrict['nodes'] if node['id'] == tgt_id), None) - assert src_node, f'output(s) of source node of edge{edg} must exist!' - assert tgt_node, f'input(s) of target node of edge{edg} must exist!' - # flattened list of keys - if src_node.get('out') and tgt_node.get('in'): - src_out_keys = [sk for sout in src_node['out'] for sk in sout.keys()] - tgt_in_keys = tgt_node['in'].keys() - # we match the source output tag type to target input tag type - # and connect them through '!* ' for input, all outputs are '!& ' before this - for sk in src_out_keys: - # It maybe possible that (explicit) outputs of src nodes might not have corresponding - # (explicit) inputs in target node - if tgt_node['in'].get(sk): - tgt_node['in'][sk] = yaml.load('!* ' + tgt_node['in'][sk], Loader=wic_loader()) - # the inputs which aren't dependent on previous/other steps - # they are by default inline input - diff_keys = set(tgt_in_keys) - set(src_out_keys) - for dfk in diff_keys: - tgt_node['in'][dfk] = yaml.load('!ii ' + str(tgt_node['in'][dfk]), Loader=wic_loader()) + if "in" in node: + unprocessed_args = set(node['in'].keys()) + if node['id'] in node_arg_map: + unprocessed_args = unprocessed_args.difference(node_arg_map[node['id']]) + for arg in unprocessed_args: + node['in'][arg] = yaml.load('!ii ' + str(node['in'][arg]), Loader=wic_loader()) + else: # No plugins, use the old logic + # this logic is most likely not correct and need to be scrubbed + # along with updating the non_wfb dummy tests + for node in inp_restrict['nodes']: + node.pop('settings', None) - for node in inp_restrict['nodes']: - # just reuse name as node's pluginId, wic id is same as wfb name - node['id'] = node['pluginId'].split('@')[0].replace('/', '_') - node.pop('pluginId', None) + for edg in inp_restrict['links']: + # links = edge. nodes and edges is the correct terminology! + src_id = edg['sourceId'] + tgt_id = edg['targetId'] + src_node = next((node for node in inp_restrict['nodes'] if node['id'] == src_id), None) + tgt_node = next((node for node in inp_restrict['nodes'] if node['id'] == tgt_id), None) + assert src_node, f'output(s) of source node of edge{edg} must exist!' + assert tgt_node, f'input(s) of target node of edge{edg} must exist!' + # flattened list of keys + if src_node.get('out') and tgt_node.get('in'): + src_out_keys = [sk for sout in src_node['out'] for sk in sout.keys()] + tgt_in_keys = tgt_node['in'].keys() + # we match the source output tag type to target input tag type + # and connect them through '!* ' for input, all outputs are '!& ' before this + for sk in src_out_keys: + # It maybe possible that (explicit) outputs of src nodes might not have corresponding + # (explicit) inputs in target node + if tgt_node['in'].get(sk): + # if the output is a dict, it is a wic_anchor, so we need to get the anchor + # and use that as the input + src_node_out_arg = src_node['out'][0][sk] + source_output = src_node['out'][0][sk] + if isinstance(source_output, dict) and 'wic_anchor' in source_output: + source_output = source_output["wic_anchor"] + tgt_node['in'][sk] = yaml.load('!* ' + str(source_output), Loader=wic_loader()) + # the inputs which aren't dependent on previous/other steps + # they are by default inline input + diff_keys = set(tgt_in_keys) - set(src_out_keys) + for dfk in diff_keys: + tgt_node['in'][dfk] = yaml.load('!ii ' + str(tgt_node['in'][dfk]), Loader=wic_loader()) workflow_temp: Cwl = {} if inp_restrict["links"] != []: + node_order = get_topological_order(inp_restrict["links"]) workflow_temp["steps"] = [] - for node in inp_restrict["nodes"]: - workflow_temp["steps"].append(node) # node["cwlScript"] # Assume dict form + for id in node_order: + node = next((n for n in inp_restrict["nodes"] if n["id"] == id), None) + if node: + # just reuse name as node's pluginId, wic id is same as wfb name + node['id'] = node['pluginId'].split('@')[0].replace('/', '_') + node.pop('pluginId', None) + workflow_temp["steps"].append(node) else: # A single node workflow node = inp_restrict["nodes"][0] - workflow_temp = node["cwlScript"] if node.get("cwlScript") else node['run'] + node['id'] = node['pluginId'].split('@')[0].replace('/', '_') + node.pop('pluginId', None) + if node.get("cwlScript"): + workflow_temp = node["cwlScript"] + else: + workflow_temp["steps"] = [] + workflow_temp["steps"].append(node) return workflow_temp diff --git a/src/sophios/api/utils/ict/ict_spec/io/objects.py b/src/sophios/api/utils/ict/ict_spec/io/objects.py index 4f4f92d8..03a26006 100644 --- a/src/sophios/api/utils/ict/ict_spec/io/objects.py +++ b/src/sophios/api/utils/ict/ict_spec/io/objects.py @@ -4,6 +4,7 @@ from typing import Optional, Union, Any from pydantic import BaseModel, Field +from sophios.api.utils.wfb_util import is_directory CWL_IO_DICT: dict[str, str] = { @@ -116,21 +117,35 @@ def _output_to_cwl(self, inputs: Any) -> dict: """Convert outputs to CWL.""" if self.io_type == "path": if self.name in inputs: - if ( - not isinstance(self.io_format, list) - and self.io_format["term"].lower() - == "directory" # pylint: disable=unsubscriptable-object - ): + if is_directory(dict(self)): cwl_type = "Directory" - elif ( - not isinstance(self.io_format, list) - and self.io_format["term"].lower() - == "file" # pylint: disable=unsubscriptable-object - ): - cwl_type = "File" else: cwl_type = "File" + # the logic here is probably wrong + # let's not go here until we have a better idea of io_format in ICT Spec + + # if ( + # not isinstance(self.io_format, list) + # and self.io_format["term"].lower() + # == "directory" # pylint: disable=unsubscriptable-object + # ): + # cwl_type = "Directory" + # elif ( + # not isinstance(self.io_format, list) + # and self.io_format["term"].lower() + # == "file" # pylint: disable=unsubscriptable-object + # ): + # cwl_type = "File" + # elif ( + # isinstance(self.io_format, list) + # and len(self.io_format) == 1 + # and self.io_format[0].lower() == 'directory' + # ): + # cwl_type = "Directory" + # else: + # cwl_type = "File" + cwl_dict_ = { "outputBinding": {"glob": f"$(inputs.{self.name}.basename)"}, "type": cwl_type, diff --git a/src/sophios/api/utils/ict/ict_spec/tools/cwl_ict.py b/src/sophios/api/utils/ict/ict_spec/tools/cwl_ict.py index 577b1c9b..723b5585 100644 --- a/src/sophios/api/utils/ict/ict_spec/tools/cwl_ict.py +++ b/src/sophios/api/utils/ict/ict_spec/tools/cwl_ict.py @@ -20,6 +20,12 @@ def requirements(ict_: "ICT", network_access: bool) -> dict: return reqs +def split_entrypoint_string(enrtypoint: str) -> list[str]: + """Fix str to list of str for entrypoint/baseCommand""" + list_of_str_entry = enrtypoint.split(' ') + return list_of_str_entry + + def clt_dict(ict_: "ICT", network_access: bool) -> dict: """Return a dict of a CommandLineTool from an ICT object.""" @@ -37,7 +43,7 @@ def clt_dict(ict_: "ICT", network_access: bool) -> dict: for io in ict_.outputs }, "requirements": requirements(ict_, network_access), - "baseCommand": ict_.entrypoint, + "baseCommand": [], "label": ict_.title, "doc": str(ict_.documentation), } diff --git a/src/sophios/api/utils/input_object_schema.json b/src/sophios/api/utils/input_object_schema.json index 801f2fbf..54ba53dc 100644 --- a/src/sophios/api/utils/input_object_schema.json +++ b/src/sophios/api/utils/input_object_schema.json @@ -45,7 +45,9 @@ "required": [ "id", "sourceId", - "targetId" + "targetId", + "inletIndex", + "outletIndex" ], "type": "object" }, diff --git a/src/sophios/api/utils/wfb_util.py b/src/sophios/api/utils/wfb_util.py new file mode 100644 index 00000000..fcf5ede4 --- /dev/null +++ b/src/sophios/api/utils/wfb_util.py @@ -0,0 +1,84 @@ +def is_directory(input_dict: dict) -> bool: + """Check if the given input dictionary represents a directory. + + Args: + input_dict (dict): The input dictionary containing type and name. + + Returns: + bool: True if the input represents a directory, False otherwise. + """ + + is_dir: bool = input_dict.get("type", "") == "directory" \ + or input_dict.get("type", "") == "file" \ + or input_dict.get("type", "") == "path" \ + or input_dict.get("type", "") == "collection" \ + or input_dict.get("type", "") == "csvCollection" \ + or input_dict.get("name", "").lower() == "file" \ + or input_dict.get("name", "").lower().endswith("path") \ + or input_dict.get("name", "").lower().endswith("dir") + + return is_dir + + +def get_node_config(plugin: dict) -> dict: + """Get the UI configuration for a specific plugin. + + Args: + plugin (dict): The plugin dictionary containing UI and inputs. + + Returns: + dict: A dictionary containing UI inputs, non-UI inputs, and outputs. + """ + uis = plugin.get("ui", []) + plugin_inputs = plugin.get("inputs", []) + + # split inputs into UI (form) and non-UI (circle inlets) + non_ui_inputs = [] # circle inlets on the left side of the node + ui_inputs = [] # UI inputs such as text fields, checkboxes, etc. + + for i in range(len(plugin_inputs) - 1, -1, -1): + input = plugin_inputs[i] + + # find the UI element that corresponds to this input + ui_input = next( + (x for x in uis if "key" in x and x["key"] == "inputs." + input["name"]), + None, + ) + is_dir = is_directory(input) + + # if input is a directory - move it to the non-UI section + if is_dir: + non_ui_inputs.append(input) + + # in some cases UI is missing for the input, so we need to create it + # but only if it's not a directory + if not ui_input and not is_dir: + calculated_ui_input = { + "key": "inputs." + input["name"], + "type": input["type"], + "title": input["name"], + "required": input["required"], + "format": input["format"], + } + + ui_inputs.append(calculated_ui_input) + + if ui_input and not is_dir: + ui_input["required"] = input["required"] + ui_input["format"] = input["format"] + ui_inputs.append(ui_input) + + outputs = plugin.get("outputs", []) + + # if output has UI - move it to the UI section + # this is mostly for internal nodes such as Input Data Directory + for output in outputs: + ui_output = next( + (x for x in uis if "key" in x and x["key"] == "outputs." + output["name"]), + None, + ) + if ui_output: + ui_inputs.append(ui_output) + + result = {"ui": ui_inputs, "inputs": non_ui_inputs, "outputs": outputs} + return result diff --git a/src/sophios/cli.py b/src/sophios/cli.py index 1f9fa8fe..45a3bb81 100644 --- a/src/sophios/cli.py +++ b/src/sophios/cli.py @@ -39,7 +39,7 @@ (You should only disable provenance if absolutely necessary.)''') parser.add_argument('--copy_output_files', default=False, action="store_true", help='Copies output files from the cachedir to outdir/ (automatically enabled with --run_local)') -parser.add_argument('--inline_cwl_runtag', default=False, action="store_true", +parser.add_argument('--cwl_inline_runtag', default=False, action="store_true", help='Copies cwl adapter file contents inline into the final .cwl in autogenerated/') # This is a hidden flag parser.add_argument('--ignore_dir_path', type=bool, diff --git a/src/sophios/compiler.py b/src/sophios/compiler.py index e81e843b..9af62be2 100644 --- a/src/sophios/compiler.py +++ b/src/sophios/compiler.py @@ -4,7 +4,7 @@ import os from pathlib import Path import sys -from typing import Dict, List +from typing import Dict, List, Any import graphviz from mergedeep import merge, Strategy @@ -861,10 +861,8 @@ def compile_workflow_once(yaml_tree_ast: YamlTree, steps_list.append(step_i_copy) yaml_tree.update({'steps': steps_list}) # steps_list ? - # Dump the workflow inputs to a separate yml file. - yaml_inputs: WorkflowInputsFile = {} - for key, in_dict in inputs_file_workflow.items(): - new_keyval: WorkflowInputsFile = {} + def populate_scalar_val(in_dict: dict) -> Any: + newval: Any = () if 'File' == in_dict['type']: # path = Path(in_dict['value']).name # NOTE: Use .name ? newval = {'class': 'File', 'path': in_dict['value']} @@ -878,20 +876,28 @@ def compile_workflow_once(yaml_tree_ast: YamlTree, print(f'Choosing {in_format[0]}') in_format = in_format[0] newval['format'] = in_format - new_keyval = {key: newval} elif 'Directory' == in_dict['type']: - if not args.ignore_dir_path: - ldir = Path(in_dict['value']) - if not ldir.is_absolute(): - ldir = Path('autogenerated') / ldir - ldir.mkdir(parents=True, exist_ok=True) newval = {'class': 'Directory', 'location': in_dict['value']} - new_keyval = {key: newval} + elif 'string' == in_dict['type'] or 'string?' == in_dict['type']: + # We cannot store string values as a dict, so use type: ignore + newval = str(in_dict['value']) # TODO: Check for all valid types? else: - # We cannot store string values as a dict, so use type: ignore - arg_val = in_dict['value'] - new_keyval = {key: arg_val} + newval = in_dict['value'] + return newval + + # Dump the workflow inputs to a separate yml file. + yaml_inputs: WorkflowInputsFile = {} + for key, in_dict in inputs_file_workflow.items(): + new_keyval: WorkflowInputsFile = {} + if isinstance(in_dict['type'], dict) and 'array' == in_dict['type']['type']: + val_list = [] + for val in in_dict['value']: + val_list.append(populate_scalar_val( + {'type': in_dict['type']['items'], 'value': val, 'format': in_dict.get('format')})) + new_keyval = {key: val_list} + else: + new_keyval = {key: populate_scalar_val(in_dict)} # else: # raise Exception(f"Error! Unknown type: {in_dict['type']}") yaml_inputs.update(new_keyval) diff --git a/src/sophios/main.py b/src/sophios/main.py index 63a17b69..85622a38 100644 --- a/src/sophios/main.py +++ b/src/sophios/main.py @@ -171,7 +171,7 @@ def main() -> None: io.write_to_disk(rose_tree, Path('autogenerated/'), True, args.inputs_file) - pc.cwl_inline_runtag(args, rose_tree) + rose_tree = pc.cwl_inline_runtag(args, rose_tree) io.write_to_disk(rose_tree, Path('autogenerated/'), True, args.inputs_file) if args.graphviz: @@ -194,8 +194,9 @@ def main() -> None: if args.run_local or args.generate_run_script: pc.cwl_docker_extract(args, yaml_stem) - pc.remove_entrypoints(args, rose_tree) - + rose_tree = pc.remove_entrypoints(args, rose_tree) + io.write_to_disk(rose_tree, Path('autogenerated/'), True, args.inputs_file) + pc.find_and_create_output_dirs(rose_tree) run_local.run_local(args, rose_tree, args.cachedir, args.cwl_runner, False) # Finally, since there is an output file copying bug in cwltool, diff --git a/src/sophios/plugins.py b/src/sophios/plugins.py index 91a33c46..a5015368 100644 --- a/src/sophios/plugins.py +++ b/src/sophios/plugins.py @@ -279,13 +279,13 @@ def cwl_update_inline_runtag_rosetree(rose_tree: RoseTree, path: Path, relative_ """ n_d: NodeData = rose_tree.data if n_d.compiled_cwl['class'] == 'Workflow': - outputs_inline_cwl_runtag = cwl_update_inline_runtag(n_d.compiled_cwl, path, relative_run_path) + outputs_cwl_inline_runtag = cwl_update_inline_runtag(n_d.compiled_cwl, path, relative_run_path) else: - outputs_inline_cwl_runtag = n_d.compiled_cwl + outputs_cwl_inline_runtag = n_d.compiled_cwl sub_trees_path = [cwl_update_inline_runtag_rosetree(sub_rose_tree, path, relative_run_path) for sub_rose_tree in rose_tree.sub_trees] - node_data_path = NodeData(n_d.namespaces, n_d.name, n_d.yml, outputs_inline_cwl_runtag, n_d.tool, + node_data_path = NodeData(n_d.namespaces, n_d.name, n_d.yml, outputs_cwl_inline_runtag, n_d.tool, n_d.workflow_inputs_file, n_d.explicit_edge_defs, n_d.explicit_edge_calls, n_d.graph, n_d.inputs_workflow, n_d.step_name_1) return RoseTree(node_data_path, sub_trees_path) diff --git a/src/sophios/post_compile.py b/src/sophios/post_compile.py index 40d61c44..5a335a85 100644 --- a/src/sophios/post_compile.py +++ b/src/sophios/post_compile.py @@ -1,12 +1,58 @@ import argparse from pathlib import Path import subprocess as sub - +from typing import Dict, Union from . import plugins -from . import input_output as io from .wic_types import RoseTree +def find_output_dirs(data: Union[RoseTree, Dict, list]) -> list: + """ + Recursively searches through a nested structure and finds all dictionaries + that contain the key 'location', and a key 'class' with a value of 'Directory'. + + Args: + data (any): The data to search through, which can be a dictionary, list, + or any other structure. + + Returns: + list: A list of location values. + """ + results = [] + if isinstance(data, Dict): + if "class" in data and data["class"] == "Directory" and "location" in data: + if isinstance(data["location"], dict) and "wic_inline_input" in data["location"]: + results.append(data["location"]["wic_inline_input"]) + else: + results.append(data["location"]) + for value in data.values(): + results.extend(find_output_dirs(value)) + elif isinstance(data, list): + for item in data: + results.extend(find_output_dirs(item)) + + return results + + +def create_output_dirs(output_dirs: list, basepath: str = 'autogenerated') -> None: + """ + Creates all the directories that are needed for the outputs of a workflow. + """ + for output_dir in output_dirs: + dir_path = Path(output_dir) + if not dir_path.is_absolute(): + dir_path = Path(basepath) / dir_path + dir_path.mkdir(parents=True, exist_ok=True) + + +def find_and_create_output_dirs(rose_tree: RoseTree, basepath: str = 'autogenerated') -> None: + """ + Finds all output directories in the workflow and creates them. + """ + output_dirs = find_output_dirs(rose_tree.data.workflow_inputs_file) + create_output_dirs(output_dirs, basepath) + + def cwl_docker_extract(args: argparse.Namespace, file_name: str) -> None: """Helper function to do the cwl_docker_extract""" # cwl-docker-extract recursively `docker pull`s all images in all subworkflows. @@ -22,15 +68,16 @@ def cwl_docker_extract(args: argparse.Namespace, file_name: str) -> None: sub.run(cmd, check=True) -def cwl_inline_runtag(args: argparse.Namespace, rose_tree: RoseTree) -> None: +def cwl_inline_runtag(args: argparse.Namespace, rose_tree: RoseTree) -> RoseTree: """Transform with cwl inline runtag""" # this has to happen after at least one write # so we can copy from local cwl_dapters in autogenerated/ - if args.inline_cwl_runtag: + if args.cwl_inline_runtag: rose_tree = plugins.cwl_update_inline_runtag_rosetree(rose_tree, Path('autogenerated/'), True) + return rose_tree -def remove_entrypoints(args: argparse.Namespace, rose_tree: RoseTree) -> None: +def remove_entrypoints(args: argparse.Namespace, rose_tree: RoseTree) -> RoseTree: """Remove entry points""" if args.docker_remove_entrypoints: # Requires root, so guard behind CLI option @@ -40,4 +87,4 @@ def remove_entrypoints(args: argparse.Namespace, rose_tree: RoseTree) -> None: plugins.remove_entrypoints_podman() rose_tree = plugins.dockerPull_append_noentrypoint_rosetree(rose_tree) - io.write_to_disk(rose_tree, Path('autogenerated/'), True, args.inputs_file) + return rose_tree diff --git a/src/sophios/schemas/wic_schema.py b/src/sophios/schemas/wic_schema.py index d5d07db4..e4dcd551 100644 --- a/src/sophios/schemas/wic_schema.py +++ b/src/sophios/schemas/wic_schema.py @@ -5,7 +5,9 @@ import networkx as nx import graphviz -from jsonschema import RefResolver, Draft202012Validator +from jsonschema import Draft202012Validator +from referencing import Registry, Resource +from referencing.jsonschema import DRAFT202012 import yaml import sophios @@ -680,7 +682,6 @@ def get_validator(tools_cwl: Tools, yml_stems: List[str], schema_store: Dict[str schema = wic_main_schema(tools_cwl, yml_stems, schema_store, hypothesis) schema_store[schema['$id']] = schema - schema_store['wic_tag'] = wic_tag_schema(hypothesis) if write_to_disk: with open('autogenerated/schemas/wic.json', mode='w', encoding='utf-8') as f: f.write(json.dumps(schema, indent=2)) @@ -696,7 +697,11 @@ def get_validator(tools_cwl: Tools, yml_stems: List[str], schema_store: Dict[str # The $ref tag refers to URIs defined in $id tags, NOT relative paths on # the local filesystem! We need to create a global mapping between ids and schemas # i.e. schema_store. - resolver = RefResolver.from_schema(schema, store=schema_store) + schema_store_resource: Resource = Resource(contents=schema_store, specification=DRAFT202012) # type: ignore + registry: Registry = Registry().with_resource(uri="wic_schema_store", resource=schema_store_resource) + wic_tag_schema_resource: Resource = Resource(contents=wic_tag_schema( + hypothesis), specification=DRAFT202012) # type: ignore + registry = registry.with_resource(uri="wic_tag", resource=wic_tag_schema_resource) """ Use check_schema to 'first verify that the provided schema is itself valid, since not doing so can lead to less obvious error messages and fail in less obvious or consistent ways.' @@ -707,5 +712,5 @@ def get_validator(tools_cwl: Tools, yml_stems: List[str], schema_store: Dict[str # try temporarily commenting this line out to generate the schema anyway. # Then, in any yml file, the very first line should show a "schema stack trace" Draft202012Validator.check_schema(schema) - validator = Draft202012Validator(schema, resolver=resolver) + validator = Draft202012Validator(schema, registry=registry) return validator diff --git a/src/sophios/utils_yaml.py b/src/sophios/utils_yaml.py index 5f329678..a7bebf8e 100644 --- a/src/sophios/utils_yaml.py +++ b/src/sophios/utils_yaml.py @@ -25,7 +25,10 @@ def inlineinput_constructor(loader: yaml.SafeLoader, node: yaml.nodes.Node) -> D if isinstance(node, yaml.nodes.ScalarNode): try: # loader.construct_scalar always returns a string, whereas - val = yaml.safe_load(node.value) + if node.value == "": + val = "" + else: + val = yaml.safe_load(node.value) # yaml.safe_load returns the correct primitive types except Exception: # but fallback to a string if it is not actually a primitive type. diff --git a/tests/data/ict_data/czi_extract/czi_extract_clt.json b/tests/data/ict_data/czi_extract/czi_extract_clt.json index 9fcd1ae2..b9c3105f 100644 --- a/tests/data/ict_data/czi_extract/czi_extract_clt.json +++ b/tests/data/ict_data/czi_extract/czi_extract_clt.json @@ -1,5 +1,5 @@ { - "baseCommand": "[python3, main.py]", + "baseCommand": [], "class": "CommandLineTool", "cwlVersion": "v1.2", "doc": "None", @@ -23,7 +23,7 @@ "outputBinding": { "glob": "$(inputs.outDir.basename)" }, - "type": "File" + "type": "Directory" } }, "requirements": { diff --git a/tests/data/ict_data/label_to_vector/label_to_vector_clt.json b/tests/data/ict_data/label_to_vector/label_to_vector_clt.json index 4bf5370c..ba84ac0b 100644 --- a/tests/data/ict_data/label_to_vector/label_to_vector_clt.json +++ b/tests/data/ict_data/label_to_vector/label_to_vector_clt.json @@ -1,5 +1,5 @@ { - "baseCommand": "[python3, main.py]", + "baseCommand": [], "class": "CommandLineTool", "cwlVersion": "v1.2", "doc": "None", @@ -29,7 +29,7 @@ "outputBinding": { "glob": "$(inputs.outDir.basename)" }, - "type": "File" + "type": "Directory" } }, "requirements": { diff --git a/tests/data/ict_data/ome_conversion/ome_conversion_clt.json b/tests/data/ict_data/ome_conversion/ome_conversion_clt.json index 3656cf47..4ea6e534 100644 --- a/tests/data/ict_data/ome_conversion/ome_conversion_clt.json +++ b/tests/data/ict_data/ome_conversion/ome_conversion_clt.json @@ -1,5 +1,5 @@ { - "baseCommand": "python3 -m polus.images.formats.ome_converter", + "baseCommand": [], "class": "CommandLineTool", "cwlVersion": "v1.2", "doc": "None", @@ -35,7 +35,7 @@ "outputBinding": { "glob": "$(inputs.outDir.basename)" }, - "type": "File" + "type": "Directory" } }, "requirements": { diff --git a/tests/data/ict_data/ome_conversion/ome_conversion_ict.json b/tests/data/ict_data/ome_conversion/ome_conversion_ict.json index 7e81c3fa..7ac3ddda 100644 --- a/tests/data/ict_data/ome_conversion/ome_conversion_ict.json +++ b/tests/data/ict_data/ome_conversion/ome_conversion_ict.json @@ -34,15 +34,6 @@ "name": "fileExtension", "required": true, "type": "string" - }, - { - "description": "Output collection", - "format": [ - "genericData" - ], - "name": "outDir", - "required": true, - "type": "path" } ], "name": "polusai/OMEConverter", diff --git a/tests/rest_wfb_objects/bbbc_download_wfb.json b/tests/rest_wfb_objects/bbbc_download_wfb.json new file mode 100644 index 00000000..aa93dfa1 --- /dev/null +++ b/tests/rest_wfb_objects/bbbc_download_wfb.json @@ -0,0 +1,85 @@ +{ + "state": { + "nodes": [ + { + "id": 1, + "x": 259, + "y": 209, + "z": 1, + "name": "BBBC Download", + "expanded": true, + "pluginId": "polusai/bbbc-download-plugin@0.1.0-dev1", + "height": 50, + "width": 250, + "settings": { + "inputs": { + "name": "BBBC001", + "outDir": "bbbcdownload.outDir" + } + }, + "internal": false + } + ], + "links": [], + "selection": [] + }, + "plugins": [ + { + "name": "polusai/bbbc-download-plugin:0.1.0-dev1", + "version": "0.1.0-dev1", + "title": "BBBC Download", + "description": "Downloads the datasets on the Broad Bioimage Benchmark Collection website", + "createdBy": "Serebryakov, Artem (NIH/NCATS) [C]", + "updatedBy": "Serebryakov, Artem (NIH/NCATS) [C]", + "author": [ + "Hamdah Abbasi" + ], + "contact": "hamdahshafqat.abbasi@nih.gov", + "container": "polusai/bbbc-download-plugin:0.1.0-dev1", + "entrypoint": "", + "inputs": [ + { + "description": "The name of the dataset(s) to be downloaded (separate the datasets with a comma. eg BBBC001,BBBC002,BBBC003)", + "format": [ + "collection" + ], + "name": "name", + "required": true, + "type": "string" + } + ], + "outputs": [ + { + "description": "Output collection", + "name": "outDir", + "format": [ + "directory" + ], + "required": true, + "type": "path" + } + ], + "repository": "https://github.com/LabShare/polus-plugins", + "specVersion": "1.0.0", + "ui": [ + { + "description": "The name of the dataset(s) to be downloaded (separate the datasets with a comma. eg BBBC001,BBBC002,BBBC003)", + "format": [ + "collection" + ], + "name": "name", + "required": true, + "type": "string" + } + ], + "path": "visualization", + "tags": [ + "bbbc-download" + ], + "createdAt": "2024-10-29T18:59:55.843Z", + "updatedAt": "2024-10-29T20:01:42.463Z", + "id": "672130abad801ccee7f5eaad", + "pid": "polusai/bbbc-download-plugin@0.1.0-dev1" + } + ] +} \ No newline at end of file diff --git a/tests/rest_wfb_objects/multi_node.json b/tests/rest_wfb_objects/multi_node.json index 27b9894a..e3b79442 100644 --- a/tests/rest_wfb_objects/multi_node.json +++ b/tests/rest_wfb_objects/multi_node.json @@ -64,12 +64,16 @@ "links": [ { "sourceId": 7, + "outletIndex": 0, "targetId": 18, + "inletIndex": 1, "id": 1 }, { "sourceId": 18, + "outletIndex": 0, "targetId": 9, + "inletIndex": 1, "id": 5 } ], diff --git a/tests/rest_wfb_objects/multi_node_inline_cwl.json b/tests/rest_wfb_objects/multi_node_inline_cwl.json index 410a1d48..37628043 100644 --- a/tests/rest_wfb_objects/multi_node_inline_cwl.json +++ b/tests/rest_wfb_objects/multi_node_inline_cwl.json @@ -169,12 +169,16 @@ "links": [ { "sourceId": 7, + "outletIndex": 0, "targetId": 18, + "inletIndex": 1, "id": 1 }, { "sourceId": 18, + "outletIndex": 0, "targetId": 9, + "inletIndex": 1, "id": 5 } ], diff --git a/tests/rest_wfb_objects/single_node_bbbc_download.json b/tests/rest_wfb_objects/single_node_bbbc_download.json new file mode 100644 index 00000000..527e73e1 --- /dev/null +++ b/tests/rest_wfb_objects/single_node_bbbc_download.json @@ -0,0 +1,74 @@ +{ + "state": { + "nodes": [ + { + "id": 1, + "name": "bbbcdownload", + "pluginId": "", + "run": { + "baseCommand": [], + "class": "CommandLineTool", + "cwlVersion": "v1.2", + "inputs": { + "name": { + "label": "The name of the dataset(s) to be downloaded (separate the datasets with a comma. eg BBBC001,BBBC002,BBBC003)", + "doc": "The name of the dataset(s) to be downloaded (separate the datasets with a comma. eg BBBC001,BBBC002,BBBC003)", + "inputBinding": { + "prefix": "--name" + }, + "type": "string" + }, + "outDir": { + "label": "Output collection", + "doc": "Output collection", + "inputBinding": { + "prefix": "--outDir" + }, + "type": "Directory" + } + }, + "outputs": { + "outDir": { + "label": "Output collection", + "doc": "Output collection", + "type": "Directory", + "outputBinding": { + "glob": "$(inputs.outDir.basename)" + } + } + }, + "stdout": "output", + "requirements": { + "DockerRequirement": { + "dockerPull": "polusai/bbbc-download-plugin:0.1.0-dev1" + }, + "InitialWorkDirRequirement": { + "listing": [ + { + "entry": "$(inputs.outDir)", + "writable": true + } + ] + }, + "InlineJavascriptRequirement": {}, + "NetworkAccess": { + "networkAccess": true + } + } + }, + "settings": { + "inputs": { + "name": "BBBC001", + "outDir": "bbbcdownload.outDir" + }, + "outputs": { + "outDir": "bbbcdownload.outDir" + } + }, + "internal": false + } + ], + "links": [] + }, + "plugins": [] +} \ No newline at end of file diff --git a/tests/test_examples.py b/tests/test_examples.py index a4322ac8..91e5077d 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -22,7 +22,7 @@ from sophios import auto_gen_header from sophios.cli import get_args from sophios.utils_yaml import wic_loader -from sophios.post_compile import cwl_docker_extract +from sophios.post_compile import cwl_docker_extract, remove_entrypoints, find_and_create_output_dirs from sophios.wic_types import NodeData, StepId, Yaml, YamlTree, Json from sophios.utils_graphs import get_graph_reps @@ -216,20 +216,14 @@ def run_workflows(yml_path_str: str, yml_path: Path, cwl_runner: str, args: argp cwl_docker_extract(args, Path(yml_path).stem) return - if args.docker_remove_entrypoints: - # Requires root, so guard behind CLI option - if args.container_engine == 'docker': - sophios.plugins.remove_entrypoints_docker() - if args.container_engine == 'podman': - sophios.plugins.remove_entrypoints_podman() - - rose_tree = sophios.plugins.dockerPull_append_noentrypoint_rosetree(rose_tree) - sophios.input_output.write_to_disk(rose_tree, Path('autogenerated/'), True, args.inputs_file) + rose_tree = remove_entrypoints(args, rose_tree) + sophios.input_output.write_to_disk(rose_tree, Path('autogenerated/'), True, args.inputs_file) if args.partial_failure_enable: rose_tree = sophios.plugins.cwl_update_outputs_optional_rosetree(rose_tree) sophios.input_output.write_to_disk(rose_tree, Path('autogenerated/'), True, args.inputs_file) # NOTE: Do not use --cachedir; we want to actually test everything. + find_and_create_output_dirs(rose_tree) retval = sophios.run_local.run_local(args, rose_tree, None, cwl_runner, True) assert retval == 0 diff --git a/tests/test_rest_core.py b/tests/test_rest_core.py index 59003d8e..f9ce8273 100644 --- a/tests/test_rest_core.py +++ b/tests/test_rest_core.py @@ -12,6 +12,7 @@ import pytest from sophios.wic_types import Json, List +import sophios.post_compile as pc from sophios.api.http import restapi @@ -103,9 +104,6 @@ def write_out_to_disk(res: Json, workflow_name: str) -> None: res_cwl.pop('retval', None) res_cwl.pop('cwlJobInputs', None) res_cwl.pop('name', None) - # Add back the dollar for tags like 'namespaces' and 'schemas' - res_cwl['$namespaces'] = res_cwl.pop('namespaces', None) - res_cwl['$schemas'] = res_cwl.pop('schemas', None) compiled_cwl = workflow_name + '.cwl' inputs_yml = workflow_name + '_inputs.yml' # write compiled .cwl file @@ -116,12 +114,8 @@ def write_out_to_disk(res: Json, workflow_name: str) -> None: yaml.dump(res['cwlJobInputs'], f) -@pytest.mark.fast -def test_rest_core_single_node() -> None: - """A simple single node sophios/restapi test""" - inp_file = "single_node.json" - inp: Json = {} - inp_path = Path(__file__).parent / 'rest_wfb_objects' / inp_file +def prepare_call_rest_api(inp_path: Path) -> Json: + """prepare payload and call rest api""" with open(inp_path, 'r', encoding='utf-8') as f: inp = json.load(f) print('----------- from rest api ----------- \n\n') @@ -136,8 +130,50 @@ async def receive() -> Json: req: Request = Request(scope) req._receive = receive res: Json = asyncio.run(restapi.compile_wf(req)) # call to rest api + return res + + +@pytest.mark.fast +def test_rest_core_single_node() -> None: + """A simple single node sophios/restapi test""" + basepath = 'autogenerated' + inp_file = "single_node.json" + inp_path = Path(__file__).parent / 'rest_wfb_objects' / inp_file + workflow_name = inp_file.split('.', maxsplit=1)[0] + # write compiled_cwl and inputs_yml + res = prepare_call_rest_api(inp_path) + output_dirs = pc.find_output_dirs(res) + pc.create_output_dirs(output_dirs, basepath) + write_out_to_disk(res, workflow_name) + retval = run_cwl_local(workflow_name, 'cwltool', 'docker', False) + assert retval == 0 + + +def test_rest_core_single_node_bbbc() -> None: + """A simple single node sophios/restapi test""" + basepath = 'autogenerated' + inp_file = "single_node_bbbc_download.json" + inp_path = Path(__file__).parent / 'rest_wfb_objects' / inp_file + workflow_name = inp_file.split('.', maxsplit=1)[0] + # write compiled_cwl and inputs_yml + res = prepare_call_rest_api(inp_path) + output_dirs = pc.find_output_dirs(res) + pc.create_output_dirs(output_dirs, basepath) + write_out_to_disk(res, workflow_name) + retval = run_cwl_local(workflow_name, 'cwltool', 'docker', False) + assert retval == 0 + + +def test_rest_core_bbbc_download_wfb() -> None: + """A simple multi node (inline cwl) sophios/restapi test""" + basepath = 'autogenerated' + inp_file = "bbbc_download_wfb.json" + inp_path = Path(__file__).parent / 'rest_wfb_objects' / inp_file workflow_name = inp_file.split('.', maxsplit=1)[0] # write compiled_cwl and inputs_yml + res = prepare_call_rest_api(inp_path) + output_dirs = pc.find_output_dirs(res) + pc.create_output_dirs(output_dirs, basepath) write_out_to_disk(res, workflow_name) retval = run_cwl_local(workflow_name, 'cwltool', 'docker', False) assert retval == 0 @@ -146,25 +182,14 @@ async def receive() -> Json: @pytest.mark.fast def test_rest_core_multi_node_file() -> None: """A simple multi node sophios/restapi test""" + basepath = 'autogenerated' inp_file = "multi_node.json" - inp: Json = {} inp_path = Path(__file__).parent / 'rest_wfb_objects' / inp_file - with open(inp_path, 'r', encoding='utf-8') as f: - inp = json.load(f) - print('----------- from rest api ----------- \n\n') - scope = {} - scope['type'] = 'http' - - async def receive() -> Json: - inp_byte = json.dumps(inp).encode('utf-8') - return {"type": "http.request", "body": inp_byte} - - # create a request object and pack it with our json payload - req: Request = Request(scope) - req._receive = receive - res: Json = asyncio.run(restapi.compile_wf(req)) # call to rest api workflow_name = inp_file.split('.', maxsplit=1)[0] # write compiled_cwl and inputs_yml + res = prepare_call_rest_api(inp_path) + output_dirs = pc.find_output_dirs(res) + pc.create_output_dirs(output_dirs, basepath) write_out_to_disk(res, workflow_name) retval = run_cwl_local(workflow_name, 'cwltool', 'docker', False) assert retval == 0 @@ -173,25 +198,14 @@ async def receive() -> Json: @pytest.mark.fast def test_rest_core_multi_node_inline_cwl() -> None: """A simple multi node (inline cwl) sophios/restapi test""" + basepath = 'autogenerated' inp_file = "multi_node_inline_cwl.json" - inp: Json = {} inp_path = Path(__file__).parent / 'rest_wfb_objects' / inp_file - with open(inp_path, 'r', encoding='utf-8') as f: - inp = json.load(f) - print('----------- from rest api ----------- \n\n') - scope = {} - scope['type'] = 'http' - - async def receive() -> Json: - inp_byte = json.dumps(inp).encode('utf-8') - return {"type": "http.request", "body": inp_byte} - - # create a request object and pack it with our json payload - req: Request = Request(scope) - req._receive = receive - res: Json = asyncio.run(restapi.compile_wf(req)) # call to rest api workflow_name = inp_file.split('.', maxsplit=1)[0] # write compiled_cwl and inputs_yml + res = prepare_call_rest_api(inp_path) + output_dirs = pc.find_output_dirs(res) + pc.create_output_dirs(output_dirs, basepath) write_out_to_disk(res, workflow_name) retval = run_cwl_local(workflow_name, 'cwltool', 'docker', False) assert retval == 0