diff --git a/.github/workflows/run_workflows.yml b/.github/workflows/run_workflows.yml index 1b6da8cd..54a7dfb5 100644 --- a/.github/workflows/run_workflows.yml +++ b/.github/workflows/run_workflows.yml @@ -168,13 +168,18 @@ jobs: - name: cwl-docker-extract (i.e. recursively docker pull) if: always() - run: cd workflow-inference-compiler/ && pytest -k test_cwl_docker_extract + run: cd workflow-inference-compiler/ && pytest tests/test_examples.py -k test_cwl_docker_extract # For self-hosted runners, make sure the docker cache is up-to-date. - name: PyTest Run Workflows if: always() # NOTE: Do NOT add coverage to PYPY CI runs https://github.com/tox-dev/tox/issues/2252 - run: cd workflow-inference-compiler/ && pytest -k test_run_workflows_on_push --workers 8 --cwl_runner cwltool # --cov + run: cd workflow-inference-compiler/ && pytest tests/test_examples.py -k test_run_workflows_on_push --workers 8 --cwl_runner cwltool # --cov + + - name: PyTest Run REST Core Tests + if: always() + # NOTE: Do NOT add coverage to PYPY CI runs https://github.com/tox-dev/tox/issues/2252 + run: cd workflow-inference-compiler/ && pytest tests/test_rest_core.py -k test_rest_core --cwl_runner cwltool # NOTE: The steps below are for repository_dispatch only. For all other steps, please insert above # this comment. diff --git a/README.md b/README.md index 3e302366..9a394a5b 100644 --- a/README.md +++ b/README.md @@ -1,46 +1,35 @@ -# Workflow Inference Compiler +# Sophios [![doc-buid-status](https://readthedocs.org/projects/workflow-inference-compiler/badge/?version=latest)](https://workflow-inference-compiler.readthedocs.io/en/latest/) -Scientific computing can be difficult in practice due to various complex software issues. In particular, chaining together software packages into a computational pipeline can be very error prone. Using the [Common Workflow Language](https://www.commonwl.org) (CWL) greatly helps, but like many other workflow languages users still need to explicitly specify how to connect inputs & outputs. The Workflow Inference Compiler allows users to specify computational protocols at a very high level of abstraction, it automatically infers almost all connections between inputs & outputs, and it compiles to CWL for execution. +Scientific computing can be difficult in practice due to various complex software issues. In particular, chaining together software packages into a computational pipeline can be very error prone. Using the [Common Workflow Language](https://www.commonwl.org) (CWL) greatly helps, but like many other workflow languages users still need to explicitly specify how to connect inputs & outputs. Sophios allows users to specify computational protocols at a very high level of abstraction, it automatically infers almost all connections between inputs & outputs, and it compiles to CWL for execution. ## Documentation The documentation is available on [readthedocs](https://workflow-inference-compiler.readthedocs.io/en/latest/). -## Example Workflows -The following repositories contain example workflows: - -[Molecular Modeling Workflows](https://github.com/PolusAI/mm-workflows) - -[Image Workflows](https://github.com/PolusAI/image-workflows) - -Like CWL, the compiler is general purpose and is not limited to any specific domain. -You do not need to install these to use wic. They are completely optional. - -(But obviously if you're just getting started and you don't have any workflows of your own, you probably want to install at least one of them.) ## Quick Start See the [installation guide](docs/installguide.md) for more details, but: For pip users: -`pip install wic` # Please read the next sentence +`pip install sophios` -Unlike conda, **pip cannot install the binary system dependencies needed to actually run most workflows!** +In order to execute the CWL workflows that are generated by `sophios`, `cwltool` and all of its underlying dependencies need to be present in the system. Unfortunately +`pip` has no capability to resolve and install these dependencies. PLease refer to the `cwltool` [installation guide](https://cwltool.readthedocs.io/en/latest/#install) to prepare the system to run CWL workflows. -If you want to actually run workflows, you (or your sysadmin) will have to manually install and configure additional software! For conda users / developers: See the [installation guide for developers](docs/dev/installguide.md) ``` -wic --yaml ../workflow-inference-compiler/docs/tutorials/helloworld.wic --graphviz --run_local --quiet +sophios --yaml ../workflow-inference-compiler/docs/tutorials/helloworld.wic --graphviz --run_local --quiet ``` -The Workflow Inference Compiler is a [Domain Specific Language](https://en.wikipedia.org/wiki/Domain-specific_language) (DSL) based on the [Common Workflow Language](https://www.commonwl.org). CWL is fantastic, but explicitly constructing the Directed Acyclic Graph (DAG) associated with a non-trivial workflow is not so simple. Instead of writing raw CWL, you can write your workflows in a much simpler yml DSL. For technical reasons edge inference is far from unique, so ***`users should always check that edge inference actually produces the intended DAG`***. +Sophios is a [Domain Specific Language](https://en.wikipedia.org/wiki/Domain-specific_language) (DSL) based on the [Common Workflow Language](https://www.commonwl.org). CWL is fantastic, but explicitly constructing the Directed Acyclic Graph (DAG) associated with a non-trivial workflow is not so simple. Instead of writing raw CWL, users can write workflows in a much simpler yml DSL. For technical reasons edge inference is far from unique, so ***`users should always check that edge inference actually produces the intended DAG`***. ## Edge Inference -The key feature is that in most cases, you do not need to specify any of the edges! They will be automatically inferred for you based on types, file formats, and naming conventions. For more information, see the [user guide](docs/userguide.md#edge-inference-algorithm) If for some reason edge inference fails, there is a syntax for creating [explicit edges](docs/userguide.md#explicit-edges). +The key feature is that in most cases, users do not need to specify any of the edges! They will be automatically inferred for users based on types, file formats, and naming conventions. For more information, see the [user guide](docs/userguide.md#edge-inference-algorithm) If for some reason edge inference fails, there is a syntax for creating [explicit edges](docs/userguide.md#explicit-edges). ## Subworkflows @@ -48,7 +37,7 @@ Subworkflows are very useful for creating reusable, composable building blocks. ## Explicit CWL -Since the yml DSL files are automatically compiled to CWL, users should not have to know any CWL. However, the yml DSL is secretly CWL that is simply missing almost all of the tags! In other words, the compiler merely adds missing information to the files, and so if you know CWL you are free to explicitly add the information yourself. Thus, the yml DSL is intentionally a [leaky abstraction](https://en.wikipedia.org/wiki/Leaky_abstraction). +Since the yml DSL files are automatically compiled to CWL, users should not have to know any CWL. However, the yml DSL is secretly CWL that is simply missing almost all of the tags! In other words, the compiler merely adds missing information to the files, and so if the users know CWL, they are free to explicitly add the information themselves. Thus, the yml DSL is intentionally a [leaky abstraction](https://en.wikipedia.org/wiki/Leaky_abstraction). ## Python API In addition to the underlying declarative yaml syntax, there is an API for writing WIC workflows in python. The python API is philosophically the exact opposite: users should not have to know any CWL, and in fact all CWL features are hidden unless explicitly exposed. \ No newline at end of file diff --git a/docs/dev/devguide.md b/docs/dev/devguide.md index 73c51bb6..d2b88428 100644 --- a/docs/dev/devguide.md +++ b/docs/dev/devguide.md @@ -90,4 +90,73 @@ INFO [job test.cwl] completed success ] ``` -As can be seen from the output json blob the order returned is not the same as input order. \ No newline at end of file +As can be seen from the output json blob the order returned is not the same as input order. + + +## Partial Failures +When the partial failures feature is enabled although the subprocess for the workflow step itself will pass, the post-processing javascript can potentially crash as seen below. The Sophios compiler only semantically understands Sophios/CWL. It is theoretically impossible to correct mistakes in the embedded JS of any arbitrary workflow. The corresponding cwl snippet is also shown. +``` +outputs: + + topology_changed: + type: boolean + outputBinding: + glob: valid.txt + loadContents: true + outputEval: | + ${ + // Read the contents of the file + const lines = self[0].contents.split("\n"); + // Read boolean value from the first line + const valid = lines[0].trim() === "True"; + return valid; + } +``` +``` +stdout was: '' +stderr was: 'evalmachine.:45 + const lines = self[0].contents.split("\n"); + ^ +TypeError: Cannot read properties of undefined (reading 'contents') +``` +To fix this the developer needs to add a javascript snippet to check if the self object being globbed exists, shown below. +``` +outputs: + + topology_changed: + type: boolean + outputBinding: + glob: valid.txt + loadContents: true + outputEval: | + ${ + // check if self[0] exists + if (!self[0]) { + return null; + } + // Read the contents of the file + const lines = self[0].contents.split("\n"); + // Read boolean value from the first line + const valid = lines[0].trim() === "True"; + return valid; + } +``` + +## Workflow Development +When adding new .cwl or .wic files its best to remove the .wic folder containing paths to .cwl and .yml files +``` +rm -r ~/wic +``` + +## Singularity +When building images with Singularity its best to clean the cache to avoid potential errors with cwltool or cwl-docker-extract. +``` +singularity cache clean +``` + +## Toil +When working with toil be sure to clean the working state as well as the configuration file, otherwise if you change input flags the configuration file will not be updated. +``` +toil clean +rm -r ~/.toil +``` \ No newline at end of file diff --git a/docs/dev/installguide.md b/docs/dev/installguide.md index 5eb4b58a..39572281 100644 --- a/docs/dev/installguide.md +++ b/docs/dev/installguide.md @@ -92,7 +92,7 @@ You should now have the `wic` executable available in your terminal. To test your installation, you can run the example in README.md: ``` -wic --yaml ../workflow-inference-compiler/docs/tutorials/helloworld.wic --run_local --quiet +sophios --yaml ../workflow-inference-compiler/docs/tutorials/helloworld.wic --run_local --quiet ``` You can also run the automated test suite. Note that the tests are based on the workflows; if you have more workflows, the tests will take longer. diff --git a/docs/index.rst b/docs/index.rst index 22ee0e9c..68363ec2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,4 +1,4 @@ -Workflow Inference Compiler documentation +Sophios documentation ======================================================= .. toctree:: diff --git a/docs/installguide.md b/docs/installguide.md index d8130612..554e728d 100644 --- a/docs/installguide.md +++ b/docs/installguide.md @@ -2,11 +2,10 @@ For pip users: -`pip install wic` # Please read the next sentence +`pip install sophios` -Unlike conda, **pip cannot install the binary system dependencies needed to actually run most workflows!** - -If you want to actually run workflows, you (or your sysadmin) will have to manually install and configure additional software! +In order to execute the CWL workflows that are generated by `sophios`, `cwltool` and all of its underlying dependencies need to be present in the system. Unfortunately +`pip` has no capability to resolve and install these dependencies. PLease refer to the `cwltool` [installation guide](https://cwltool.readthedocs.io/en/latest/#install) to prepare the system to run CWL workflows. For conda users / developers: diff --git a/docs/tutorials/conditional_example.wic b/docs/tutorials/conditional_example.wic new file mode 100644 index 00000000..95f9f5e8 --- /dev/null +++ b/docs/tutorials/conditional_example.wic @@ -0,0 +1,10 @@ +steps: + toString: + in: + input: !ii 27 + out: + - output: !& string_int + echo: + when: '$(inputs.message < "27")' + in: + message: !* string_int \ No newline at end of file diff --git a/docs/userguide.md b/docs/userguide.md index 1cf5b702..8296bebd 100644 --- a/docs/userguide.md +++ b/docs/userguide.md @@ -76,4 +76,223 @@ steps: Note that this is one key difference between WIC and CWL. In CWL, all inputs must be given in a separate file. In WIC, inputs can be given inline with !ii and after compilation they will be automatically extracted into the separate file. -(NOTE: raw CWL is still supported with the --allow_raw_cwl flag.) \ No newline at end of file +(NOTE: raw CWL is still supported with the --allow_raw_cwl flag.) + +## Python API (experimental) +In addition to YAML based language for building workflows Sophios also provides a Python API. The aspirational goal of this API is to be close to regular +usage of Python. This API leverages YAML based syntax by transforming the Python workflow internally into a regular Sophios YAML workflow. All the Python API examples discussed here can be found in directory [`examples/scripts`](https://github.com/PolusAI/workflow-inference-compiler/tree/master/examples/scripts) in the Sophios repository. + +### basics +Let us take the most basic workflow *`hello world`*. This is how we write it in YAML syntax. + +``` +steps: +- echo: + in: + message: !ii Hello World +``` + +The Python API closely follows the YAML syntax. We create steps and from steps we create workflows. The API exposes the means to create Step and Workflow objects. The steps and workflows are just plain objects in Python which can be passed around, manipulated, composed and reused. We can write the above workflow as follows using Python API. + +``` +from sophios.api.pythonapi import Step, Workflow + + +def workflow() -> Workflow: + # step echo + echo = Step(clt_path='../../cwl_adapters/echo.cwl') + echo.message = 'hello world' + # arrange steps + steps = [echo] + + # create workflow + filename = 'helloworld_pyapi_py' + wkflw = Workflow(steps, filename) + return wkflw + +# Do NOT .run() here + +if __name__ == '__main__': + helloworld = workflow() + helloworld.run() # .run() here inside main +``` + +Here `echo` is a step object created by specifying the path of cwl adapter (a basic cwl workflow) `echo.cwl`. The the input to `echo` is `message` the user can assign value directly (i.e, inline) or create another cwl object compatible with the type of `message`. It is to be noted that we didn't have to specify if `message` is an input type, the specified attributes of the step object gets mapped to the corresponding `input` or `output` of the cwl step if it exists. + +A workflow object is created using a list of steps in **`correct order`** and a unique filename. As there is only one step in this example hence the workflow object is created with a list containing only one step `echo`. + +### multistep +Here is an example of a multistep workflow in YAML syntax. It is to be noted that the output of step `touch` is inferred by the compiler as the input *`file`* of step `append` and the output *`file`* of `append` is inferred as input for the step `cat`. + +``` +steps: +- id: touch + in: + filename: !ii empty.txt +- id: append + in: + str: !ii Hello +- id: cat +``` +We can write the above workflow as follows using the Python API. + +``` +from sophios.api.pythonapi import Step, Workflow + + +def workflow() -> Workflow: + # step echo + touch = Step(clt_path='../../cwl_adapters/touch.cwl') + touch.filename = 'empty.txt' + append = Step(clt_path='../../cwl_adapters/append.cwl') + append.file = touch.file + append.str = 'Hello' + cat = Step(clt_path='../../cwl_adapters/cat.cwl') + cat.file = append.file + # arrange steps + steps = [touch,append,cat] + + # create workflow + filename = 'multistep1_pyapi_py' + wkflw = Workflow(steps, filename) + return wkflw + +# Do NOT .run() here + +if __name__ == '__main__': + multistep1 = workflow() + multistep1.run() # .run() here inside main +``` + +It is the same process of creating step objects and using the step objects to create a workflow object. The difference is there is no support for edge inference! All the outputs and inputs for each step must be specified by the user and the user is responsible to correctly match the edges of each step. It is important to note that the user must **know** the names of the output of a step to specify as input of another step. + +In the Python API step objects and workflow objects are purely syntactic constructions at the user level. No semantic transformation are done at this level. All the transformations are done by the Sophios compiler on the (internal) generated YAML workflow. + +### scattering +An important feature of Sophios and CWL is scattering over an input in any given step. The following workflow is a simple example of `scatter` and the non-default `scatterMethod`. + +``` +# Demonstrates scattering on a subset of inputs and a non default scattering method +steps: +- id: array_indices + in: + input_array: !ii ["hello world", "not", "what world?"] + input_indices: !ii [0,2] + out: + - output_array: !& filt_message +- id: echo_3 + scatter: [message1,message2] + scatterMethod: flat_crossproduct + in: + message1: !* filt_message + message2: !* filt_message + message3: !ii scalar +``` +We can write the above workflow as follows using the Python API. + +``` +from sophios.api.pythonapi import Step, Workflow + +def workflow() -> Workflow: + # scatter on a subset of inputs + # step array_indices + array_ind = Step(clt_path='../../cwl_adapters/array_indices.cwl') + array_ind.input_array = ["hello world", "not", "what world?"] + array_ind.input_indices = [0, 2] + # step echo_3 + echo_3 = Step(clt_path='../../cwl_adapters/echo_3.cwl') + echo_3.message1 = array_ind.output_array + echo_3.message2 = array_ind.output_array + echo_3.message3 = 'scalar' + # set up inputs for scattering + msg1 = echo_3.inputs[0] + msg2 = echo_3.inputs[1] + # assign the scatter and scatterMethod fields + echo_3.scatter = [msg1, msg2] + echo_3.scatterMethod = 'flat_crossproduct' + + # arrange steps + steps = [array_ind, echo_3] + + # create workflow + filename = 'scatter_pyapi_py' # .yml + wkflw = Workflow(steps, filename) + return wkflw + + +# Do NOT .run() here + +if __name__ == '__main__': + scatter_wic = workflow() + scatter_wic.run() # .run() here inside main + +``` +Here again we see all the inputs and outputs are explicitly specified by the user and explicit edges are constructed from one output to another. Any attribute which is not an *input* or an *output* of a step object is a **special** attribute. `scatter` is just a **special (and optional)** attribute on the `echo_3` step object, just like in YAML the user must specify which inputs be scattered before applying the step. + +The scatter attribute needs the actual input objects of the step not *just* the names as a list, this is quite similar to `scatter` tag in the YAML syntax. Similarly here a non-default scatter method is specified on `echo_3` through the (optional) `scatterMethod` attribute on the step that needs to be scattered. + +The user must make sure that scatter operation described in the code is valid i.e, the arity of input data is compatible with scattering and scattering method. If there is any mismatch or mistake in the Python code, the API wouldn't be able to point to the exact issue in the Python code. The user will get a Sophios compiler error in that scenario and it might not be straightforward to pinpoint the error in the Python source code. Again it is to be noted that the objects in the Python API are purely syntactic at the user level. + +### conditional +The Python API also supports conditional workflows. It transparently exposes the syntax and semantics of `when` tag of CWL. Here is a simple example of using `when` in a workflow. + +``` +steps: + toString: + in: + input: !ii 27 + out: + - output: !& string_int + echo: + when: '$(inputs.message < "27")' + in: + message: !* string_int +``` + +We can write the above workflow as follows using the Python API. + +``` +from sophios.api.pythonapi import Step, Workflow + + +def workflow() -> Workflow: + # conditional on input + # step toString + toString = Step(clt_path='../../cwl_adapters/toString.cwl') + toString.input = 27 + # step echo + echo = Step(clt_path='../../cwl_adapters/echo.cwl') + echo.message = toString.output + # add a when clause + # alternate js syntax + # echo.when = '$(inputs["message"] < 27)' + echo.when = '$(inputs.message < "27")' + # since the condition is not met the echo step is skipped! + + # arrange steps + steps = [toString, echo] + + # create workflow + filename = 'when_pyapi_py' # .yml + wkflw = Workflow(steps, filename) + return wkflw + + +# Do NOT .run() here + +if __name__ == '__main__': + when_wic = workflow() + when_wic.run() # .run() here inside main +``` +Similar to `scatter`, `when` is a **special (and optional)** attribute to any step object in the Python API. +The `when` attribute of a step object exposes the exact same js embedded syntax of `when` tag of the YAML/CWL syntax. One has to be careful about appropriate escaping in the string input of `when` in Python API. In the above case the comparison is between two strings so "" is around the literal 27 (i.e. value after `toString` step). +## Partial Failures + +In running workflows at scale, sometimes it is the case that one of the workflow steps may crash due to a bug causing the entire workflow to crash. In this case can use `--partial_failure_enable` flag. For special cases when the exit status of a workflow step isn't 1, and a different error code is returned (for example 142), then the user can supply the error code to wic as a success code to prevent workflow from crashing with `--partial_failure_success_codes 0 1 142`. By default partial failure flag will consider only 0 and 1 as success codes. An example line snippet of the error code being printed is shown below. +``` +[1;30mWARNING[0m [33m[job compare_extract_protein_pdbbind__step__4__topology_check] exited with status: 139[0m +``` + +## Parallelization + +In order to utilize scattering features in cwl, the user needs to provide the flag `--parallel`. Additionally cwltool has various issues regarding scattering features such as deadlocks and thus it is preferred to use toil in this case `--cwl_runner toil-cwl-runner`. diff --git a/examples/scripts/helloworld_pyapi.py b/examples/scripts/helloworld_pyapi.py new file mode 100644 index 00000000..7f5b024e --- /dev/null +++ b/examples/scripts/helloworld_pyapi.py @@ -0,0 +1,21 @@ +from sophios.api.pythonapi import Step, Workflow + + +def workflow() -> Workflow: + # step echo + echo = Step(clt_path='../../cwl_adapters/echo.cwl') + echo.message = 'hello world' + # arrange steps + steps = [echo] + + # create workflow + filename = 'helloworld_pyapi_py' + wkflw = Workflow(steps, filename) + return wkflw + +# Do NOT .run() here + + +if __name__ == '__main__': + scatter_wic = workflow() + scatter_wic.run() # .run() here inside main diff --git a/examples/scripts/multistep1_pyapi.py b/examples/scripts/multistep1_pyapi.py new file mode 100644 index 00000000..635d078c --- /dev/null +++ b/examples/scripts/multistep1_pyapi.py @@ -0,0 +1,26 @@ +from sophios.api.pythonapi import Step, Workflow + + +def workflow() -> Workflow: + # step echo + touch = Step(clt_path='../../cwl_adapters/touch.cwl') + touch.filename = 'empty.txt' + append = Step(clt_path='../../cwl_adapters/append.cwl') + append.file = touch.file + append.str = 'Hello' + cat = Step(clt_path='../../cwl_adapters/cat.cwl') + cat.file = append.file + # arrange steps + steps = [touch, append, cat] + + # create workflow + filename = 'multistep1_pyapi_py' + wkflw = Workflow(steps, filename) + return wkflw + +# Do NOT .run() here + + +if __name__ == '__main__': + multistep1 = workflow() + multistep1.run() # .run() here inside main diff --git a/examples/scripts/when_pyapi.py b/examples/scripts/when_pyapi.py new file mode 100644 index 00000000..754733be --- /dev/null +++ b/examples/scripts/when_pyapi.py @@ -0,0 +1,31 @@ +from sophios.api.pythonapi import Step, Workflow + + +def workflow() -> Workflow: + # conditional on input + # step toString + toString = Step(clt_path='../../cwl_adapters/toString.cwl') + toString.input = 27 + # step echo + echo = Step(clt_path='../../cwl_adapters/echo.cwl') + echo.message = toString.output + # add a when clause + # alternate js syntax + # echo.when = '$(inputs["message"] < "27")' + echo.when = '$(inputs.message < "27")' + # since the condition is not met the echo step is skipped! + + # arrange steps + steps = [toString, echo] + + # create workflow + filename = 'when_pyapi_py' # .yml + wkflw = Workflow(steps, filename) + return wkflw + + +# Do NOT .run() here + +if __name__ == '__main__': + when_wic = workflow() + when_wic.run() # .run() here inside main diff --git a/install/system_deps.yml b/install/system_deps.yml index 022f345f..dda22365 100644 --- a/install/system_deps.yml +++ b/install/system_deps.yml @@ -35,8 +35,10 @@ dependencies: # Similarly, toil[cwl] depends on ruamel.yaml.clib for performance. # Install it with conda/mamba here. - ruamel.yaml.clib -# Simiarly, cryptography needs to build binary wheels +# Similarly, cryptography needs to build binary wheels - cryptography # Needs binary PyQt5 dependencies. - kubernetes-helm - zstandard +# Needed for orjson wheels + - orjson diff --git a/pyproject.toml b/pyproject.toml index 490ae2ac..87d8d150 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,6 @@ # Based on https://packaging.python.org/en/latest/tutorials/packaging-projects/ [build-system] requires = ["setuptools>=42", "wheel", "versioneer[toml]==0.29", "tomli"] -# NOTE: Use upper bound of <68 for setuptools due to 2to3 error. -# See https://stackoverflow.com/questions/72414481/error-in-anyjson-setup-command-use-2to3-is-invalid build-backend = "setuptools.build_meta" @@ -35,11 +33,15 @@ dependencies = [ # See https://github.com/common-workflow-language/cwl-utils/releases/ "typeguard", "pydantic>=2.6", + "pydantic-settings", "docker", # FYI also need uidmap to run podman rootless "podman", # We are using the official release for these packages for now "toil[cwl]", + "fastapi", + "python-jose", + "uvicorn" ] [project.readme] diff --git a/src/sophios/api/http/__init__.py b/src/sophios/api/http/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/sophios/api/http/restapi.py b/src/sophios/api/http/restapi.py new file mode 100644 index 00000000..0c108749 --- /dev/null +++ b/src/sophios/api/http/restapi.py @@ -0,0 +1,179 @@ +from pathlib import Path +import argparse +import yaml + +import uvicorn +from fastapi import FastAPI, Request, status +from fastapi.middleware.cors import CORSMiddleware + +from sophios import __version__, compiler +from sophios import run_local, input_output +from sophios.utils_graphs import get_graph_reps +from sophios.utils_yaml import wic_loader +from sophios import utils_cwl +from sophios.cli import get_args +from sophios.wic_types import CompilerInfo, Json, Tool, Tools, StepId, YamlTree, Cwl +# from sophios.api.utils import converter +# from .auth.auth import authenticate + + +# helper functions + + +def remove_dot_dollar(tree: Cwl) -> Cwl: + """Removes . and $ from dictionary keys, e.g. $namespaces and $schemas. Otherwise, you will get + {'error': {'statusCode': 500, 'message': 'Internal Server Error'}} + This is due to MongoDB: + See https://www.mongodb.com/docs/manual/reference/limits/#Restrictions-on-Field-Names + Args: + tree (Cwl): A Cwl document + Returns: + Cwl: A Cwl document with . and $ removed from $namespaces and $schemas + """ + tree_str = str(yaml.dump(tree, sort_keys=False, line_break='\n', indent=2)) + tree_str_no_dd = tree_str.replace('$namespaces', 'namespaces').replace( + '$schemas', 'schemas').replace('.wic', '_wic') + tree_no_dd: Cwl = yaml.load(tree_str_no_dd, Loader=wic_loader()) # This effectively copies tree + return tree_no_dd + + +def get_yaml_tree(req: Json) -> Json: + """ + Get the Sophios yaml tree from incoming JSON + Args: + req (JSON): A raw JSON content of incoming JSON object + Returns: + Cwl: A Cwl document with . and $ removed from $namespaces and $schemas + """ + wkflw_name = "generic_workflow" + # args = converter.get_args(wkflw_name) + # yaml_tree_json: Json = converter.wfb_to_wic(req) + yaml_tree_json: Json = {} + return yaml_tree_json + + +def run_workflow(compiler_info: CompilerInfo, args: argparse.Namespace) -> int: + """ + Get the Sophios yaml tree from incoming JSON + Args: + req (JSON): A raw JSON content of incoming JSON object + Returns: + Cwl: A Cwl document with . and $ removed from $namespaces and $schemas + """ + # ========= WRITE OUT ======================= + input_output.write_to_disk(compiler_info.rose, Path('autogenerated/'), relative_run_path=True) + # ======== TEST RUN ========================= + retval = run_local.run_local(args, compiler_info.rose, args.cachedir, 'cwltool', False) + return retval + + +app = FastAPI() + +origins = ["*"] + +app.add_middleware( + CORSMiddleware, + allow_origins=origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +@app.get("/", status_code=status.HTTP_200_OK) +# @authenticate +async def root(request: Request) -> Json: + """The api has 1 route: compile + + Returns: + Dict[str, str]: {"message": "The api has 1 route: compile"} + """ + return {"message": "The api has 1 route: compile"} + + +@app.post("/compile") +# @authenticate +async def compile_wf(request: Request) -> Json: + """The compile route compiles the json object from http request object built elsewhere + + Args: + request (Request): request object built elsewhere + + Returns: + compute_workflow (JSON): workflow json object ready to submit to compute + """ + print('---------- Compile Workflow! ---------') + # ========= PROCESS REQUEST OBJECT ========== + req: Json = await request.json() + wkflw_name = "generic_workflow" + args = get_args(wkflw_name) + + workflow_temp = {} + if req["links"] != []: + for node in req["nodes"]: + workflow_temp["id"] = node["id"] + workflow_temp["step"] = node["cwlScript"] # Assume dict form + else: # A single node workflow + node = req["nodes"][0] + workflow_temp = node["cwlScript"] + + workflow_can = utils_cwl.desugar_into_canonical_normal_form(workflow_temp) + + # ========= BUILD WIC COMPILE INPUT ========= + tools_cwl: Tools = {StepId(content["id"], "global"): + Tool(".", content["run"]) for content in workflow_can["steps"]} + # run tag will have the actual CommandLineTool + wic_obj = {'wic': workflow_can.get('wic', {})} + plugin_ns = wic_obj['wic'].get('namespace', 'global') + + graph = get_graph_reps(wkflw_name) + yaml_tree: YamlTree = YamlTree(StepId(wkflw_name, plugin_ns), workflow_can) + + # ========= COMPILE WORKFLOW ================ + compiler_info: CompilerInfo = compiler.compile_workflow(yaml_tree, args, [], [graph], {}, {}, {}, {}, + tools_cwl, True, relative_run_path=True, testing=False) + + # =========== OPTIONAL RUN ============== + print('---------- Run Workflow locally! ---------') + retval = run_workflow(compiler_info, args) + + compute_workflow: Json = {} + compute_workflow["retval"] = str(retval) + return compute_workflow + + +if __name__ == '__main__': + uvicorn.run(app, host="0.0.0.0", port=3000) + + +# # ========= PROCESS COMPILED OBJECT ========= + # sub_node_data: NodeData = compiler_info.rose.data + # yaml_stem = sub_node_data.name + # cwl_tree = sub_node_data.compiled_cwl + # yaml_inputs = sub_node_data.workflow_inputs_file + + # # ======== OUTPUT PROCESSING ================ + # cwl_tree_no_dd = remove_dot_dollar(cwl_tree) + # yaml_inputs_no_dd = remove_dot_dollar(yaml_inputs) + + # # Convert the compiled yaml file to json for labshare Compute. + # cwl_tree_run = copy.deepcopy(cwl_tree_no_dd) + # for step_key in cwl_tree['steps']: + # step_name_i = step_key + # step_name_i = step_name_i.replace('.yml', '_yml') # Due to calling remove_dot_dollar above + # step_name = '__'.join(step_key.split('__')[3:]) # Remove prefix + + # # Get step CWL from templates + # run_val = next((tval['cwlScript'] + # for _, tval in ict_plugins.items() if step_name == tval['name']), None) + # cwl_tree_run['steps'][step_name_i]['run'] = run_val + + # TODO: set name and driver in workflow builder ui + # compute_workflow: Json = {} + # compute_workflow = { + # "name": yaml_stem, + # "driver": "argo", + # # "driver": "cwltool", + # "cwlJobInputs": yaml_inputs_no_dd, + # **cwl_tree_run + # } diff --git a/src/sophios/api/pythonapi.py b/src/sophios/api/pythonapi.py index 47ed38d8..f370356d 100644 --- a/src/sophios/api/pythonapi.py +++ b/src/sophios/api/pythonapi.py @@ -2,6 +2,7 @@ """CLT utilities.""" import logging from pathlib import Path +import subprocess as sub from typing import Any, ClassVar, Optional, TypeVar, Union import cwl_utils.parser as cu_parser @@ -260,6 +261,8 @@ class Step(BaseModel): # pylint: disable=too-few-public-methods # these are not part of 'clt data' scatter: list[ProcessInput] = [] scatterMethod: str = '' + # use when tag to enable conditional steps + when: str = '' _input_names: list[str] = PrivateAttr(default_factory=list) _output_names: list[str] = PrivateAttr(default_factory=list) @@ -367,6 +370,11 @@ def __setattr__(self, __name: str, __value: Any) -> Any: if not all([isinstance(x, ProcessInput) for x in __value]): raise TypeError("all scatter inputs must be ProcessInput type") return super().__setattr__(__name, __value) + if __name == "when": + if (isinstance(__value, str)) and __value.startswith('$(') and __value.endswith(')'): + return super().__setattr__(__name, __value) + else: + raise ValueError("Invalid input to when.The js string must start with '$(' and end with ')'") if hasattr(self, "_input_names") and __name in self._input_names: set_input_Step_Workflow(self, __name, __value) @@ -443,6 +451,9 @@ def _yml(self) -> dict: if '' == self.scatterMethod: self.scatterMethod = ScatterMethod.dotproduct.value d["scatterMethod"] = self.scatterMethod + # when operates on step + if self.when != '': + d["when"] = self.when return d def _save_cwl(self, path: Path) -> None: @@ -743,6 +754,14 @@ def run(self) -> None: args = get_args(self.process_name) # Use mock CLI args rose_tree: RoseTree = compiler_info.rose + # cwl-docker-extract recursively `docker pull`s all images in all subworkflows. + # This is important because cwltool only uses `docker run` when executing + # workflows, and if there is a local image available, + # `docker run` will NOT query the remote repository for the latest image! + # cwltool has a --force-docker-pull option, but this may cause multiple pulls in parallel. + cmd = ['cwl-docker-extract', '--force-download', f'autogenerated/{self.process_name}.cwl'] + sub.run(cmd, check=True) + # If you don't like it, you can programmatically overwrite anything in args # args.docker_remove_entrypoints = True if args.docker_remove_entrypoints: diff --git a/src/sophios/compiler.py b/src/sophios/compiler.py index 5ba74d3a..90f81ce8 100644 --- a/src/sophios/compiler.py +++ b/src/sophios/compiler.py @@ -833,7 +833,8 @@ def compile_workflow_once(yaml_tree_ast: YamlTree, vars_workflow_output_internal = list(set(vars_workflow_output_internal)) # Get uniques # (Why are we getting uniques?) workflow_outputs = utils_cwl.get_workflow_outputs(args, namespaces, is_root, yaml_stem, - steps, outputs_workflow, vars_workflow_output_internal, graph, tools_lst, step_node_name) + steps, outputs_workflow, vars_workflow_output_internal, + graph, tools_lst, step_node_name, tools) # Add the provided workflow outputs to the workflow outputs from each step outputs_combined = {**yaml_tree.get('outputs', {}), **workflow_outputs} yaml_tree.update({'outputs': outputs_combined}) diff --git a/src/sophios/input_output.py b/src/sophios/input_output.py index a13f5927..45137ba1 100644 --- a/src/sophios/input_output.py +++ b/src/sophios/input_output.py @@ -203,7 +203,7 @@ def get_default_config() -> Json: def get_absolute_paths(sub_config: Json) -> Json: - """Makes the paths within the dirs_file file absolute and write them into sub_config object. + """Update the paths within the sub_config json object as absolute paths Args: sub_config (dict): The json (sub)object where filepaths are stored diff --git a/src/sophios/plugins.py b/src/sophios/plugins.py index e117f218..fd6f34e4 100644 --- a/src/sophios/plugins.py +++ b/src/sophios/plugins.py @@ -413,7 +413,7 @@ def get_py_paths(config: Json) -> Dict[str, Dict[str, Path]]: def blindly_execute_python_workflows() -> None: """This function imports (read: blindly executes) all python files in 'search_paths_wic' The python files are assumed to have a top-level workflow() function - which returns a wic.api.pythonapi.Workflow object. + which returns a sophios.api.pythonapi.Workflow object. The python files should NOT call the .run() method! (from any code path that is automatically executed on import) """ diff --git a/src/sophios/utils_cwl.py b/src/sophios/utils_cwl.py index 38bcf62c..4bfba23c 100644 --- a/src/sophios/utils_cwl.py +++ b/src/sophios/utils_cwl.py @@ -6,7 +6,7 @@ from . import utils from .wic_types import (GraphReps, InternalOutputs, Namespaces, Tool, Tools, - WorkflowOutputs, Yaml) + WorkflowOutputs, Yaml, StepId) def maybe_add_requirements(yaml_tree: Yaml, steps_keys: List[str], @@ -110,7 +110,8 @@ def get_workflow_outputs(args: argparse.Namespace, vars_workflow_output_internal: InternalOutputs, graph: GraphReps, tools_lst: List[Tool], - step_node_name: str) -> Dict[str, Dict[str, str]]: + step_node_name: str, + tools: Tools) -> Dict[str, Dict[str, str]]: """Chooses a subset of the CWL outputs: to actually output Args: @@ -125,6 +126,7 @@ def get_workflow_outputs(args: argparse.Namespace, graph (GraphReps): A tuple of a GraphViz DiGraph and a networkx DiGraph tools_lst (List[Tool]): A list of the CWL CommandLineTools or compiled subworkflows for the current workflow. step_node_name (str): The namespaced name of the current step + tools (Tools): The CWL CommandLineTool definitions found using get_tools_cwl() Returns: Dict[str, Dict[str, str]]: The actual outputs to be specified in the generated CWL file @@ -135,7 +137,9 @@ def get_workflow_outputs(args: argparse.Namespace, for i, step_key in enumerate(steps_keys): tool_i = tools_lst[i].cwl step_name_i = utils.step_name_str(yaml_stem, i, step_key) + step_id = StepId(Path(step_key).stem, 'global') step_name_or_key = step_name_i if step_key.endswith('.wic') \ + or step_id in tools \ or Path(step_key).stem == Path(tools_lst[i].run_path).stem else step_key # step_name_or_key = step_name_i if stepid in tools else step_key out_keys = steps[i]['out'] diff --git a/tests/single_node_helloworld.json b/tests/single_node_helloworld.json new file mode 100644 index 00000000..099bf9ce --- /dev/null +++ b/tests/single_node_helloworld.json @@ -0,0 +1,40 @@ +{ + "nodes": [ + { + "id": 1, + "name": "PythonHelloWorld", + "pluginId": "", + "cwlScript": { + "steps": { + "one": { + "run": { + "baseCommand": [ + "python", + "-c", + "print('hello world'); print('sqr of 7 : %.2f' % 7**2)" + ], + "class": "CommandLineTool", + "cwlVersion": "v1.2", + "inputs": {}, + "outputs": { + "pyout": { + "outputBinding": { + "glob": "output" + }, + "type": "File" + } + }, + "stdout": "output", + "requirements": { + "InlineJavascriptRequirement": {} + } + } + } + } + }, + "settings": {}, + "internal": false + } + ], + "links": [] +} \ No newline at end of file diff --git a/tests/test_rest_core.py b/tests/test_rest_core.py new file mode 100644 index 00000000..f85bf2e9 --- /dev/null +++ b/tests/test_rest_core.py @@ -0,0 +1,166 @@ +import json +# import subprocess as sub +from pathlib import Path +# import signal +# import sys +# from typing import List +# import argparse +import asyncio +from jsonschema import Draft202012Validator + +from fastapi import Request + +import pytest +from sophios.wic_types import Json + + +from sophios.api.http import restapi + +SCHEMA = { + "$schema": "http://json-schema.org/draft-07/schema#", + "definitions": { + "Link": { + "properties": { + "id": { + "type": "number" + }, + "inletIndex": { + "type": "number" + }, + "outletIndex": { + "type": "number" + }, + "sourceId": { + "type": "number" + }, + "targetId": { + "type": "number" + }, + "x1": { + "type": "number" + }, + "x2": { + "type": "number" + }, + "y1": { + "type": "number" + }, + "y2": { + "type": "number" + } + }, + "type": "object", + "required": ["id", "inletIndex", "outletIndex", "sourceId", "targetId"] + }, + "NodeSettings": { + "properties": { + "inputs": { + "additionalProperties": { + "$ref": "#/definitions/T" + }, + "type": "object" + }, + "outputs": { + "additionalProperties": { + "$ref": "#/definitions/T" + }, + "type": "object" + } + }, + "type": "object" + }, + "NodeX": { + "properties": { + "expanded": { + "type": "boolean" + }, + "height": { + "type": "number" + }, + "id": { + "type": "number" + }, + "internal": { + "type": "boolean" + }, + "name": { + "type": "string" + }, + "pluginId": { + "type": "string" + }, + "settings": { + "$ref": "#/definitions/NodeSettings" + }, + "width": { + "type": "number" + }, + "x": { + "type": "number" + }, + "y": { + "type": "number" + }, + "z": { + "type": "number" + }, + }, + "type": "object", + "required": ["id", "name", "pluginId", "settings", "internal"] + }, + "T": { + "type": "object" + } + }, + "properties": { + "links": { + "items": { + "$ref": "#/definitions/Link" + }, + "type": "array" + }, + "nodes": { + "items": { + "$ref": "#/definitions/NodeX" + }, + "type": "array" + }, + "selection": { + "items": { + "type": "number" + }, + "type": "array" + } + }, + "type": "object", + "required": ["links", "nodes"] +} + + +@pytest.mark.fast +def test_rest_core_single_node() -> None: + """A simple single node 'hello world' test""" + # validate schema + Draft202012Validator.check_schema(SCHEMA) + df2012 = Draft202012Validator(SCHEMA) + inp_file = "single_node_helloworld.json" + inp: Json = {} + yaml_path = "workflow.json" + inp_path = Path(__file__).with_name(inp_file) + with open(inp_path, 'r', encoding='utf-8') as f: + inp = json.load(f) + # check if object is conformant with our schema + df2012.is_valid(inp) + print('----------- from rest api ----------- \n\n') + scope = {} + scope['type'] = 'http' + + async def receive() -> Json: + inp_byte = json.dumps(inp).encode('utf-8') + return {"type": "http.request", "body": inp_byte} + + # create a request object and pack it with our json payload + req: Request = Request(scope) + req._receive = receive + res: Json = asyncio.run(restapi.compile_wf(req)) # call to rest api + assert int(res['retval']) == 0