Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding best practices and assertion checks to workflow_lint #1213

Merged
merged 4 commits into from
Feb 4, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions docs/best_practices_workflows.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,36 @@ ability of other Galaxy's to use the workflow.
Syntax
~~~~~~

Planemo ``workflow_lint`` also checks if workflows have the correct JSON or YAML syntax.
This may be less of a problem for workflows exported from a Galaxy instance but can assist
Planemo's ``workflow_lint`` also checks if workflows have the correct JSON or YAML syntax,
and ensures workflows follow certain 'best practices'. Best practices can also be checked
in the 'Workflow Best Practices' panel of Galaxy's workflow editor and to some extent
automatically fixed.

.. image:: images/workflow_best_practices.png
:alt: Screenshot of Galaxy's best practices panel

The ``workflow_lint`` subcommand allows the same checks to be made via the command line;
this may be less of a problem for workflows exported from a Galaxy instance but can assist
with workflows hand-edited or implemented using the newer YAML gxformat2 syntax.

::

$ planemo workflow_lint path/to/workflow.ga

Running this command makes the following checks:

* The workflow is annotated
* The workflow specifies a creator
* The workflow specifies a license
* All workflow steps are connected to formal input parameters
* All workflow steps are annotated and labelled
* No legacy 'untyped' parameters are used, e.g. a variable such as `${report_name}` in an input field
* All outputs are labelled.

In addition to checking the structure of the JSON or YAML file and workflow best practices,
``workflow_lint`` also checks the workflow test file is formatted correctly and at least one
valid test is specified.

Tests
-----

Expand Down
Binary file added docs/images/workflow_best_practices.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
93 changes: 92 additions & 1 deletion planemo/workflow_lint.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import json
import os
import re
from collections import OrderedDict

import yaml
from galaxy.tool_util.lint import LintContext
from galaxy.tool_util.loader_directory import EXCLUDE_WALK_DIRS
from galaxy.tool_util.verify import asserts
from gxformat2._yaml import ordered_load
from gxformat2.lint import lint_format2, lint_ga

Expand Down Expand Up @@ -72,7 +75,7 @@ def structure(path, lint_context):
lint_func(lint_context, workflow_dict, path=path)

lint_context.lint("lint_structure", structure, potential_workflow_artifact_path)

lint_context.lint("lint_best_practices", _lint_best_practices, potential_workflow_artifact_path)
lint_context.lint("lint_tests", _lint_tsts, potential_workflow_artifact_path)
else:
# Allow linting ro crates and such also
Expand All @@ -99,6 +102,70 @@ def _lint_tsts(path, lint_context):
lint_context.valid(f"Tests appear structurally correct for {runnable.path}")


def _lint_best_practices(path, lint_context): # noqa: C901
"""
This function duplicates the checks made by Galaxy's best practices panel:
https://github.com/galaxyproject/galaxy/blob/5396bb15fe8cfcf2e89d46c1d061c49b60e2f0b1/client/src/components/Workflow/Editor/Lint.vue
"""
def check_json_for_untyped_params(j):
values = j if type(j) == list else j.values()
for value in values:
if type(value) in [list, dict, OrderedDict]:
if check_json_for_untyped_params(value):
return True
elif type(value) == str:
if re.match(r'\$\{.+?\}', value):
return True
return False

with open(path, "r") as f:
workflow_dict = ordered_load(f)

steps = workflow_dict.get("steps", {})

# annotation
if not workflow_dict.get("annotation"):
lint_context.warn("Workflow is not annotated.")

# creator
if not len(workflow_dict.get("creator", [])) > 0:
lint_context.warn("Workflow does not specify a creator.")

# license
if not workflow_dict.get("license"):
lint_context.warn("Workflow does not specify a license.")

# checks on individual steps
for step in steps.values():
print(step)
# disconnected inputs
for input in step.get("inputs", []):
if input.get("name") not in step.get("input_connections"): # TODO: check optional
lint_context.warn(f"Input {input.get('name')} of workflow step {step.get('annotation') or step.get('id')} is disconnected.")

# missing metadata
if not step.get("annotation"):
lint_context.warn(f"Workflow step with ID {step.get('id')} has no annotation.")
if not step.get("label"):
lint_context.warn(f"Workflow step with ID {step.get('id')} has no label.")

# untyped parameters
if workflow_dict.get("class") == "GalaxyWorkflow":
tool_state = step.get('tool_state', {})
pjas = step.get('out', {})
else:
tool_state = json.loads(step.get('tool_state', '{}'))
pjas = step.get('post_job_actions', {})

if check_json_for_untyped_params(tool_state):
lint_context.warn(f"Workflow step with ID {step.get('id')} specifies an untyped parameter as an input.")

if check_json_for_untyped_params(pjas):
lint_context.warn(f"Workflow step with ID {step.get('id')} specifies an untyped parameter in the post-job actions.")

# unlabeled outputs are checked by gxformat2, no need to check here


def _lint_case(path, test_case, lint_context):
test_valid = True

Expand Down Expand Up @@ -133,13 +200,37 @@ def _lint_case(path, test_case, lint_context):
found_valid_expectation = True
# TODO: validate structure of test expectations

assertion_definitions = test_case.output_expectations[test_output_id].get("asserts")
if not _check_test_assertions(lint_context, assertion_definitions):
test_valid = False

if not found_valid_expectation:
lint_context.warn("Found no valid test expectations for workflow test")
test_valid = False

return test_valid


def _check_test_assertions(lint_context, assertion_definitions):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe this can be improved, if someone knows more about importing Python functions than me?

# we are already in Python, not XML, so it is simpler to lint assertions by checking against the
# Python functions directly, rather than checking against galaxy.xsd as for tool linting
assertions_valid = True
if assertion_definitions:
for module in asserts.assertion_modules:
for function_name in dir(module):
if function_name.split('assert_')[-1] in assertion_definitions:
f = module.__dict__[function_name]
Copy link
Member

@mvdbeek mvdbeek Jan 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could check that this is a function with https://docs.python.org/3/library/inspect.html#inspect.isfunction
and inspect the signature with https://docs.python.org/3/library/inspect.html#inspect.signature instead of relying on TypeError. OTOH that's some manual parsing that is maybe not necessary. A healthy middleground could be to run https://docs.python.org/3/library/inspect.html#inspect.Signature.bind, so we don't have to execute the test function.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

run https://docs.python.org/3/library/inspect.html#inspect.Signature.bind, so we don't have to execute the test function.

That is cool, thanks!

try:
# try running the function with the attributes supplied and check for TypeError
f('', **assertion_definitions[function_name.split('assert_')[-1]])
except AssertionError:
pass
except TypeError as e:
lint_context.error(f'Invalid assertion in tests: {str(e)}')
assertions_valid = False
return assertions_valid


def _tst_input_valid(test_case, input_id, input_def, lint_context):
if type(input_def) == dict: # else assume it is a parameter
clazz = input_def.get("class")
Expand Down
96 changes: 96 additions & 0 deletions tests/data/wf14-unlinted-best-practices.ga
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
{
"a_galaxy_workflow": "true",
"annotation": "",
"format-version": "0.1",
"name": "bp (imported from uploaded file)",
"steps": {
"0": {
"annotation": "",
"content_id": null,
"errors": null,
"id": 0,
"input_connections": {},
"inputs": [],
"label": null,
"name": "Input dataset",
"outputs": [],
"position": {
"bottom": 730.3166656494141,
"height": 82.55000305175781,
"left": 1155,
"right": 1355,
"top": 647.7666625976562,
"width": 200,
"x": 1155,
"y": 647.7666625976562
},
"tool_id": null,
"tool_state": "{\"optional\": false}",
"tool_version": null,
"type": "data_input",
"uuid": "4219d43a-e182-49c0-bee3-8422e6344911",
"workflow_outputs": []
},
"1": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.3",
"errors": null,
"id": 1,
"input_connections": {},
"inputs": [
{
"description": "runtime parameter for tool Replace Text",
"name": "infile"
}
],
"label": null,
"name": "Replace Text",
"outputs": [
{
"name": "outfile",
"type": "input"
}
],
"position": {
"bottom": 744.8333282470703,
"height": 114.06666564941406,
"left": 1465,
"right": 1665,
"top": 630.7666625976562,
"width": 200,
"x": 1465,
"y": 630.7666625976562
},
"post_job_actions": {
"TagDatasetActionoutfile": {
"action_arguments": {
"tags": "${report_name}"
},
"action_type": "TagDatasetAction",
"output_name": "outfile"
}
},
"tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.3",
"tool_shed_repository": {
"changeset_revision": "ddf54b12c295",
"name": "text_processing",
"owner": "bgruening",
"tool_shed": "toolshed.g2.bx.psu.edu"
},
"tool_state": "{\"infile\": {\"__class__\": \"RuntimeValue\"}, \"replacements\": [{\"__index__\": 0, \"column\": \"1\", \"find_pattern\": \"${report_name}\", \"replace_pattern\": \"\"}], \"__page__\": null, \"__rerun_remap_job_id__\": null}",
"tool_version": "1.1.3",
"type": "tool",
"uuid": "e429e21f-f01e-4efb-b665-56f454cbe38b",
"workflow_outputs": [
{
"label": null,
"output_name": "outfile",
"uuid": "e4066fe7-c9a2-43af-a9f5-30a75a5b2107"
}
]
}
},
"tags": [],
"uuid": "66708ffe-c08c-4647-a7e9-fc7f731206a1",
"version": 2
}
45 changes: 45 additions & 0 deletions tests/data/wf14-unlinted-best-practices.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
class: GalaxyWorkflow
label: bp (imported from uploaded file)
uuid: 66708ffe-c08c-4647-a7e9-fc7f731206a1
inputs:
null:
optional: false
position:
bottom: 730.3166656494141
height: 82.55000305175781
left: 1155
right: 1355
top: 647.7666625976562
width: 200
x: 1155
y: 647.7666625976562
type: data
outputs:
_anonymous_output_1:
outputSource: 1/outfile
steps:
input:
tool_id: toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.3
tool_version: 1.1.3
position:
bottom: 744.8333282470703
height: 114.06666564941406
left: 1465
right: 1665
top: 630.7666625976562
width: 200
x: 1465
y: 630.7666625976562
tool_state:
infile:
__class__: RuntimeValue
replacements:
- __index__: 0
column: '1'
find_pattern: ${report_name}
replace_pattern: ''
in: {}
out:
outfile:
add_tags:
- ${report_name}
10 changes: 10 additions & 0 deletions tests/data/wf15-test-assertions-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
- doc: Test outline for Galaxy-Workflow-test_output_assertions(1).ga
job:
input:
class: File
path: todo_test_data_path.ext
outputs:
output:
asserts:
has_text:
non_existent_attribute: 0
21 changes: 21 additions & 0 deletions tests/data/wf15-test-assertions.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
class: GalaxyWorkflow
label: test output assertions
uuid: 1819187f-a84d-4224-aec2-283bb6cd3cfc
inputs:
input:
doc: input
optional: false
position:
bottom: 379.28334045410156
height: 62.15000915527344
left: 847
right: 1047
top: 317.1333312988281
width: 200
x: 847
y: 317.1333312988281
type: data
outputs:
_anonymous_output_1:
outputSource: input
steps: {}
Loading