galaxyproject · mvdbeek · Feb 4, 2022 · Jan 13, 2022 · Jan 13, 2022 · Jan 13, 2022
diff --git a/docs/best_practices_workflows.rst b/docs/best_practices_workflows.rst
@@ -72,10 +72,36 @@ ability of other Galaxy's to use the workflow.
 Syntax
 ~~~~~~
 
-Planemo ``workflow_lint`` also checks if workflows have the correct JSON or YAML syntax.
-This may be less of a problem for workflows exported from a Galaxy instance but can assist
+Planemo's ``workflow_lint`` also checks if workflows have the correct JSON or YAML syntax,
+and ensures workflows follow certain 'best practices'. Best practices can also be checked
+in the 'Workflow Best Practices' panel of Galaxy's workflow editor and to some extent
+automatically fixed.
+
+.. image:: images/workflow_best_practices.png
+   :alt: Screenshot of Galaxy's best practices panel
+
+The ``workflow_lint`` subcommand allows the same checks to be made via the command line;
+this may be less of a problem for workflows exported from a Galaxy instance but can assist
 with workflows hand-edited or implemented using the newer YAML gxformat2 syntax.
 
+::
+
+    $ planemo workflow_lint path/to/workflow.ga
+
+Running this command makes the following checks:
+
+* The workflow is annotated
+* The workflow specifies a creator
+* The workflow specifies a license
+* All workflow steps are connected to formal input parameters
+* All workflow steps are annotated and labelled
+* No legacy 'untyped' parameters are used, e.g. a variable such as `${report_name}` in an input field
+* All outputs are labelled.
+
+In addition to checking the structure of the JSON or YAML file and workflow best practices,
+``workflow_lint`` also checks the workflow test file is formatted correctly and at least one
+valid test is specified.
+
 Tests
 -----
 

diff --git a/docs/images/workflow_best_practices.png b/docs/images/workflow_best_practices.png
diff --git a/planemo/workflow_lint.py b/planemo/workflow_lint.py
@@ -1,9 +1,12 @@
+import json
 import os
 import re
+from collections import OrderedDict
 
 import yaml
 from galaxy.tool_util.lint import LintContext
 from galaxy.tool_util.loader_directory import EXCLUDE_WALK_DIRS
+from galaxy.tool_util.verify import asserts
 from gxformat2._yaml import ordered_load
 from gxformat2.lint import lint_format2, lint_ga
 
@@ -72,7 +75,7 @@ def structure(path, lint_context):
                 lint_func(lint_context, workflow_dict, path=path)
 
             lint_context.lint("lint_structure", structure, potential_workflow_artifact_path)
-
+            lint_context.lint("lint_best_practices", _lint_best_practices, potential_workflow_artifact_path)
             lint_context.lint("lint_tests", _lint_tsts, potential_workflow_artifact_path)
         else:
             # Allow linting ro crates and such also
@@ -99,6 +102,70 @@ def _lint_tsts(path, lint_context):
             lint_context.valid(f"Tests appear structurally correct for {runnable.path}")
 
 
+def _lint_best_practices(path, lint_context):  # noqa: C901
+    """
+    This function duplicates the checks made by Galaxy's best practices panel:
+    https://github.com/galaxyproject/galaxy/blob/5396bb15fe8cfcf2e89d46c1d061c49b60e2f0b1/client/src/components/Workflow/Editor/Lint.vue
+    """
+    def check_json_for_untyped_params(j):
+        values = j if type(j) == list else j.values()
+        for value in values:
+            if type(value) in [list, dict, OrderedDict]:
+                if check_json_for_untyped_params(value):
+                    return True
+            elif type(value) == str:
+                if re.match(r'\$\{.+?\}', value):
+                    return True
+        return False
+
+    with open(path, "r") as f:
+        workflow_dict = ordered_load(f)
+
+    steps = workflow_dict.get("steps", {})
+
+    # annotation
+    if not workflow_dict.get("annotation"):
+        lint_context.warn("Workflow is not annotated.")
+
+    # creator
+    if not len(workflow_dict.get("creator", [])) > 0:
+        lint_context.warn("Workflow does not specify a creator.")
+
+    # license
+    if not workflow_dict.get("license"):
+        lint_context.warn("Workflow does not specify a license.")
+
+    # checks on individual steps
+    for step in steps.values():
+        print(step)
+        # disconnected inputs
+        for input in step.get("inputs", []):
+            if input.get("name") not in step.get("input_connections"):  # TODO: check optional
+                lint_context.warn(f"Input {input.get('name')} of workflow step {step.get('annotation') or step.get('id')} is disconnected.")
+
+        # missing metadata
+        if not step.get("annotation"):
+            lint_context.warn(f"Workflow step with ID {step.get('id')} has no annotation.")
+        if not step.get("label"):
+            lint_context.warn(f"Workflow step with ID {step.get('id')} has no label.")
+
+        # untyped parameters
+        if workflow_dict.get("class") == "GalaxyWorkflow":
+            tool_state = step.get('tool_state', {})
+            pjas = step.get('out', {})
+        else:
+            tool_state = json.loads(step.get('tool_state', '{}'))
+            pjas = step.get('post_job_actions', {})
+
+        if check_json_for_untyped_params(tool_state):
+            lint_context.warn(f"Workflow step with ID {step.get('id')} specifies an untyped parameter as an input.")
+
+        if check_json_for_untyped_params(pjas):
+            lint_context.warn(f"Workflow step with ID {step.get('id')} specifies an untyped parameter in the post-job actions.")
+
+        # unlabeled outputs are checked by gxformat2, no need to check here
+
+
 def _lint_case(path, test_case, lint_context):
     test_valid = True
 
@@ -133,13 +200,37 @@ def _lint_case(path, test_case, lint_context):
             found_valid_expectation = True
         # TODO: validate structure of test expectations
 
+        assertion_definitions = test_case.output_expectations[test_output_id].get("asserts")
+        if not _check_test_assertions(lint_context, assertion_definitions):
+            test_valid = False
+
     if not found_valid_expectation:
         lint_context.warn("Found no valid test expectations for workflow test")
         test_valid = False
 
     return test_valid
 
 
+def _check_test_assertions(lint_context, assertion_definitions):
+    # we are already in Python, not XML, so it is simpler to lint assertions by checking against the
+    # Python functions directly, rather than checking against galaxy.xsd as for tool linting
+    assertions_valid = True
+    if assertion_definitions:
+        for module in asserts.assertion_modules:
+            for function_name in dir(module):
+                if function_name.split('assert_')[-1] in assertion_definitions:
+                    f = module.__dict__[function_name]
+                    try:
+                        # try running the function with the attributes supplied and check for TypeError
+                        f('', **assertion_definitions[function_name.split('assert_')[-1]])
+                    except AssertionError:
+                        pass
+                    except TypeError as e:
+                        lint_context.error(f'Invalid assertion in tests: {str(e)}')
+                        assertions_valid = False
+    return assertions_valid
+
+
 def _tst_input_valid(test_case, input_id, input_def, lint_context):
     if type(input_def) == dict:  # else assume it is a parameter
         clazz = input_def.get("class")

diff --git a/tests/data/wf14-unlinted-best-practices.ga b/tests/data/wf14-unlinted-best-practices.ga
@@ -0,0 +1,96 @@
+{
+    "a_galaxy_workflow": "true",
+    "annotation": "",
+    "format-version": "0.1",
+    "name": "bp (imported from uploaded file)",
+    "steps": {
+        "0": {
+            "annotation": "",
+            "content_id": null,
+            "errors": null,
+            "id": 0,
+            "input_connections": {},
+            "inputs": [],
+            "label": null,
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "bottom": 730.3166656494141,
+                "height": 82.55000305175781,
+                "left": 1155,
+                "right": 1355,
+                "top": 647.7666625976562,
+                "width": 200,
+                "x": 1155,
+                "y": 647.7666625976562
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "4219d43a-e182-49c0-bee3-8422e6344911",
+            "workflow_outputs": []
+        },
+        "1": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.3",
+            "errors": null,
+            "id": 1,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "runtime parameter for tool Replace Text",
+                    "name": "infile"
+                }
+            ],
+            "label": null,
+            "name": "Replace Text",
+            "outputs": [
+                {
+                    "name": "outfile",
+                    "type": "input"
+                }
+            ],
+            "position": {
+                "bottom": 744.8333282470703,
+                "height": 114.06666564941406,
+                "left": 1465,
+                "right": 1665,
+                "top": 630.7666625976562,
+                "width": 200,
+                "x": 1465,
+                "y": 630.7666625976562
+            },
+            "post_job_actions": {
+                "TagDatasetActionoutfile": {
+                    "action_arguments": {
+                        "tags": "${report_name}"
+                    },
+                    "action_type": "TagDatasetAction",
+                    "output_name": "outfile"
+                }
+            },
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.3",
+            "tool_shed_repository": {
+                "changeset_revision": "ddf54b12c295",
+                "name": "text_processing",
+                "owner": "bgruening",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"infile\": {\"__class__\": \"RuntimeValue\"}, \"replacements\": [{\"__index__\": 0, \"column\": \"1\", \"find_pattern\": \"${report_name}\", \"replace_pattern\": \"\"}], \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "1.1.3",
+            "type": "tool",
+            "uuid": "e429e21f-f01e-4efb-b665-56f454cbe38b",
+            "workflow_outputs": [
+                {
+                    "label": null,
+                    "output_name": "outfile",
+                    "uuid": "e4066fe7-c9a2-43af-a9f5-30a75a5b2107"
+                }
+            ]
+        }
+    },
+    "tags": [],
+    "uuid": "66708ffe-c08c-4647-a7e9-fc7f731206a1",
+    "version": 2
+}
diff --git a/tests/data/wf14-unlinted-best-practices.yml b/tests/data/wf14-unlinted-best-practices.yml
@@ -0,0 +1,45 @@
+class: GalaxyWorkflow
+label: bp (imported from uploaded file)
+uuid: 66708ffe-c08c-4647-a7e9-fc7f731206a1
+inputs:
+  null:
+    optional: false
+    position:
+      bottom: 730.3166656494141
+      height: 82.55000305175781
+      left: 1155
+      right: 1355
+      top: 647.7666625976562
+      width: 200
+      x: 1155
+      y: 647.7666625976562
+    type: data
+outputs:
+  _anonymous_output_1:
+    outputSource: 1/outfile
+steps:
+  input:
+    tool_id: toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.3
+    tool_version: 1.1.3
+    position:
+      bottom: 744.8333282470703
+      height: 114.06666564941406
+      left: 1465
+      right: 1665
+      top: 630.7666625976562
+      width: 200
+      x: 1465
+      y: 630.7666625976562
+    tool_state:
+      infile:
+        __class__: RuntimeValue
+      replacements:
+      - __index__: 0
+        column: '1'
+        find_pattern: ${report_name}
+        replace_pattern: ''
+    in: {}
+    out:
+      outfile:
+        add_tags:
+        - ${report_name}
diff --git a/tests/data/wf15-test-assertions-test.yml b/tests/data/wf15-test-assertions-test.yml
@@ -0,0 +1,10 @@
+- doc: Test outline for Galaxy-Workflow-test_output_assertions(1).ga
+  job:
+    input:
+      class: File
+      path: todo_test_data_path.ext
+  outputs:
+    output:
+      asserts:
+        has_text:
+          non_existent_attribute: 0 
diff --git a/tests/data/wf15-test-assertions.yml b/tests/data/wf15-test-assertions.yml
@@ -0,0 +1,21 @@
+class: GalaxyWorkflow
+label: test output assertions
+uuid: 1819187f-a84d-4224-aec2-283bb6cd3cfc
+inputs:
+  input:
+    doc: input
+    optional: false
+    position:
+      bottom: 379.28334045410156
+      height: 62.15000915527344
+      left: 847
+      right: 1047
+      top: 317.1333312988281
+      width: 200
+      x: 847
+      y: 317.1333312988281
+    type: data
+outputs:
+  _anonymous_output_1:
+    outputSource: input
+steps: {}