openpipelines-bio · DriesSchaumont · Aug 6, 2024 · Apr 18, 2024 · Apr 25, 2024 · Apr 29, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -41,6 +41,8 @@
 * `transform/clr` component: Added the option to set the `axis` along which to apply CLR. Possible to override
   on workflow level as well (PR #767).
 
+* `workflows/test_workflows/ingestion` components & `workflows/ingestion`: Added standalone components for integration testing of ingestion workflows (PR #801). 
+
 ## MINOR CHANGES
 
 * Bump scvelo to `0.3.2` (PR #828).

diff --git a/src/base/openpipelinetestutils/conftest.py b/src/base/openpipelinetestutils/conftest.py
@@ -0,0 +1,13 @@
+import importlib
+import pytest
+from pathlib import Path
+
+def pytest_collect_file(file_path: Path, parent):
+    if (file_path.name == ".viash_script.sh"):
+        # Allow file ending in .sh to be imported
+        importlib.machinery.SOURCE_SUFFIXES.append('.viash_script.sh')
+        return pytest.Module.from_parent(parent, path=file_path)
+
+
+def pytest_collection_finish(session):
+   importlib.machinery.SOURCE_SUFFIXES.remove('.viash_script.sh')
diff --git a/src/workflows/ingestion/bd_rhapsody/test.nf b/src/workflows/ingestion/bd_rhapsody/test.nf
@@ -1,6 +1,7 @@
 nextflow.enable.dsl=2
 
 include { bd_rhapsody } from params.rootDir + "/target/nextflow/workflows/ingestion/bd_rhapsody/main.nf"
+include { bd_rhapsody_test } from params.rootDir + "/target/nextflow/test_workflows/ingestion/bd_rhapsody_test/main.nf"
 
 workflow test_wf {
   // allow changing the resources_test dir
@@ -32,9 +33,16 @@ workflow test_wf {
       assert data.output_h5mu.toString().endsWith(".h5mu") : "Output file should be a h5mu file. Found: ${output[1]}"
       "Output: $output"
     }
+
+    | bd_rhapsody_test.run(
+      fromState: ["input": "output_h5mu"]
+    )
+
     | toList()
     | view { output_list ->
       assert output_list.size() == 1 : "output channel should contain one event"
     }
+
+    // | view { output -> output[1]}
     // | check_format(args: {""}) // todo: check whether output h5mu has the right slots defined
 }
diff --git a/src/workflows/ingestion/cellranger_mapping/integration_test.sh b/src/workflows/ingestion/cellranger_mapping/integration_test.sh
@@ -8,6 +8,8 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
 # ensure that the command below is run from the root of the repository
 cd "$REPO_ROOT"
 
+viash ns build -q ingestion/cellranger_mapping --setup cb --platform nextflow
+
 export NXF_VER=21.10.6
 
 nextflow \

diff --git a/src/workflows/ingestion/cellranger_mapping/test.nf b/src/workflows/ingestion/cellranger_mapping/test.nf
@@ -1,6 +1,7 @@
 nextflow.enable.dsl=2
 
 include { cellranger_mapping } from params.rootDir + "/target/nextflow/workflows/ingestion/cellranger_mapping/main.nf"
+include { cellranger_mapping_test } from params.rootDir + "/target/nextflow/test_workflows/ingestion/cellranger_mapping_test/main.nf"
 
 workflow test_wf {
   // allow changing the resources_test dir
@@ -22,6 +23,11 @@ workflow test_wf {
       // todo: check whether output dir contains fastq files
       "Output: $output"
     }
+
+    | cellranger_mapping_test.run(
+      fromState: ["input": "output_h5mu"]
+    )
+
     | toSortedList()
     | map { output_list ->
       assert output_list.size() == 1 : "output channel should contain one event"

diff --git a/src/workflows/ingestion/cellranger_multi/test.nf b/src/workflows/ingestion/cellranger_multi/test.nf
@@ -1,6 +1,7 @@
 nextflow.enable.dsl=2
 
 include { cellranger_multi } from params.rootDir + "/target/nextflow/workflows/ingestion/cellranger_multi/main.nf"
+include { cellranger_multi_test } from params.rootDir + "/target/nextflow/test_workflows/ingestion/cellranger_multi_test/main.nf"
 
 workflow test_wf {
   resources_test = file("${params.rootDir}/resources_test")
@@ -29,6 +30,11 @@ workflow test_wf {
       // todo: check whether output dir contains fastq files
       "Output: $output"
     }
+
+    | cellranger_multi_test.run(
+      fromState: ["input": "output_h5mu"]
+    )
+
     | toSortedList()
     | map { output_list ->
       assert output_list.size() == 1 : "output channel should contain one event"

diff --git a/src/workflows/ingestion/cellranger_postprocessing/test.nf b/src/workflows/ingestion/cellranger_postprocessing/test.nf
@@ -2,6 +2,7 @@ nextflow.enable.dsl=2
 
 include { cellranger_postprocessing } from params.rootDir + "/target/nextflow/workflows/ingestion/cellranger_postprocessing/main.nf"
 include { from_10xh5_to_h5mu } from params.rootDir + "/target/nextflow/convert/from_10xh5_to_h5mu/main.nf"
+include { cellranger_postprocessing_test } from params.rootDir + "/target/nextflow/test_workflows/ingestion/cellranger_postprocessing_test/main.nf"
 
 workflow test_wf {
   // allow changing the resources_test dir
@@ -11,6 +12,7 @@ workflow test_wf {
       [
         id: "foo",
         input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5"),
+        input_og: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5mu"),
         perform_correction: true,
         min_genes: 100,
         min_counts: 1000,
@@ -24,13 +26,32 @@ workflow test_wf {
       toState: ["input": "output"]
     )
 
-    | cellranger_postprocessing
+    | cellranger_postprocessing.run(
+      toState: {id, output, state ->
+        output + [
+          input_og: state.input_og,
+          perform_correction: state.perform_correction
+        ]
+      }
+    )
+
     | view { output ->
       assert output.size() == 2 : "outputs should contain two elements; [id, out]"
       assert output[1] instanceof Map : "Output should be a Map."
       // todo: check whether output dir contains fastq files
       "Output: $output"
     }
+
+    | cellranger_postprocessing_test.run(
+      fromState: {id, state ->
+        [
+          input: state.output,
+          input_og: state.input_og,
+          is_corrected: state.perform_correction
+        ]
+      }
+    )
+
     | toSortedList()
     | map { output_list ->
       assert output_list.size() == 1 : "output channel should contain one event"
@@ -47,6 +68,7 @@ workflow test_wf2 {
       [
         id: "zing",
         input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5"),
+        input_og: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5mu"),
         perform_correction: false,
         min_genes: 100,
         min_counts: 1000,
@@ -59,13 +81,33 @@ workflow test_wf2 {
       fromState: ["input"],
       toState: ["input": "output"]
     )
-    | cellranger_postprocessing
+
+    | cellranger_postprocessing.run(
+      toState: {id, output, state ->
+        output + [
+          input_og: state.input_og,
+          perform_correction: state.perform_correction
+        ]
+      }
+    )
+
     | view { output ->
       assert output.size() == 2 : "outputs should contain two elements; [id, out]"
       assert output[1] instanceof Map : "Output should be a Map."
       // todo: check whether output dir contains fastq files
       "Output: $output"
     }
+
+    | cellranger_postprocessing_test.run(
+      fromState: {id, state ->
+        [
+          input: state.output,
+          input_og: state.input_og,
+          is_corrected: state.perform_correction
+        ]
+      }
+    )
+
     | toSortedList()
     | map { output_list ->
       assert output_list.size() == 1 : "output channel should contain one event"

diff --git a/src/workflows/ingestion/conversion/test.nf b/src/workflows/ingestion/conversion/test.nf
@@ -1,6 +1,7 @@
 nextflow.enable.dsl=2
 
 include { conversion } from params.rootDir + "/target/nextflow/workflows/ingestion/conversion/main.nf"
+include { conversion_test } from params.rootDir + "/target/nextflow/test_workflows/ingestion/conversion_test/main.nf"
 
 workflow test_wf {
   // allow changing the resources_test dir
@@ -41,9 +42,14 @@ workflow test_wf {
         assert output.size() == 2 : "outputs should contain two elements; [id, file]"
         assert output[1].output.toString().endsWith(".h5mu") : "Output file should be a h5mu file. Found: ${output[1]}"
         "Output: $output"
-      }
-      | toSortedList()
-      | map { output_list ->
-        assert output_list.size() == 4 : "output channel should contain four events"
-      }
+    }
+
+    | conversion_test.run(
+      fromState: ["input": "output"]
+    )
+
+    | toSortedList()
+    | map { output_list ->
+      assert output_list.size() == 4 : "output channel should contain four events"
+    }
 }
diff --git a/src/workflows/test_workflows/ingestion/bd_rhapsody/config.vsh.yaml b/src/workflows/test_workflows/ingestion/bd_rhapsody/config.vsh.yaml
@@ -0,0 +1,35 @@
+functionality:
+  name: "bd_rhapsody_test"
+  namespace: "test_workflows/ingestion"
+  description: "This component test the output of the integration test of the bd_rhapsody workflow."
+  authors:
+    - __merge__: /src/authors/jakub_majercik.yaml
+  argument_groups:
+    - name: Inputs
+      arguments:
+        - name: "--input"
+          type: file
+          required: true
+          description: Path to h5mu output.
+          example: foo.final.h5mu
+  resources:
+    - type: python_script
+      path: script.py
+    - path: /src/utils/setup_logger.py
+    - path: /src/base/openpipelinetestutils
+      dest: openpipelinetestutils
+platforms:
+  - type: docker
+    image: python:3.12-slim
+    setup:
+      - type: docker
+        copy: ["openpipelinetestutils /opt/openpipelinetestutils"]
+      - type: apt
+        packages: 
+          - procps
+      - type: python
+        packages: /opt/openpipelinetestutils
+        pypi:
+          - mudata
+        __merge__: /src/base/requirements/viashpy.yaml
+  - type: nextflow
diff --git a/src/workflows/test_workflows/ingestion/bd_rhapsody/script.py b/src/workflows/test_workflows/ingestion/bd_rhapsody/script.py
@@ -0,0 +1,35 @@
+from mudata import read_h5mu
+import numpy as np
+import shutil
+import os
+import sys
+from pathlib import Path
+import pytest
+
+##VIASH START
+par = {
+    "input": "input.h5mu"
+}
+
+meta = {
+    "resources_dir": "resources_test"
+}
+##VIASH END
+
+def test_run():
+    input_mudata = read_h5mu(par["input"])
+    expected_var = ['gene_name', 'feature_types', 'reference_file']
+    expected_obs = ['run_id', 'library_id', 'sample_id']
+
+    assert list(input_mudata.mod.keys()) == ["rna"], "Input should contain rna modality."
+    assert list(input_mudata.var.columns) == expected_var, f"Input var columns should be: {expected_var}."
+    assert list(input_mudata.mod["rna"].var.columns) == expected_var, f"Input mod['rna'] var columns should be: {expected_var}."
+    assert list(input_mudata.mod["rna"].obs.columns) == expected_obs, f"Input obs columns should be: {expected_obs}."
+
+    assert np.array_equal(input_mudata.var["feature_types"].unique(), ["Gene Expression"]), "Output X should only contain Gene Expression vars."
+
+if __name__ == "__main__":
+    HERE_DIR = Path(__file__).resolve().parent
+    shutil.copyfile(os.path.join(meta['resources_dir'], "openpipelinetestutils", "conftest.py"),
+                    os.path.join(HERE_DIR, "conftest.py"))
+    sys.exit(pytest.main(["--import-mode=importlib"]))
diff --git a/src/workflows/test_workflows/ingestion/cellranger_mapping/config.vsh.yaml b/src/workflows/test_workflows/ingestion/cellranger_mapping/config.vsh.yaml
@@ -0,0 +1,36 @@
+functionality:
+  name: "cellranger_mapping_test"
+  namespace: "test_workflows/ingestion"
+  description: "This component test the output of the integration test of the cellranger mapping workflow."
+  authors:
+    - __merge__: /src/authors/jakub_majercik.yaml
+  argument_groups:
+    - name: Inputs
+      arguments:
+        - name: "--input"
+          type: file
+          required: true
+          description: Path to h5mu output.
+          example: foo.final.h5mu
+  resources:
+    - type: python_script
+      path: script.py
+    - path: /src/utils/setup_logger.py
+    - path: /src/base/openpipelinetestutils
+      dest: openpipelinetestutils
+platforms:
+  - type: docker
+    image: python:3.12-slim
+    setup:
+      - type: docker
+        copy: ["openpipelinetestutils /opt/openpipelinetestutils"]
+      - type: apt
+        packages: 
+          - procps
+      - type: python
+        packages: /opt/openpipelinetestutils
+      - type: python
+        pypi:
+          - mudata
+        __merge__: /src/base/requirements/viashpy.yaml
+  - type: nextflow
diff --git a/src/workflows/test_workflows/ingestion/cellranger_mapping/script.py b/src/workflows/test_workflows/ingestion/cellranger_mapping/script.py
@@ -0,0 +1,30 @@
+from mudata import read_h5mu
+from pathlib import Path
+import shutil
+import os
+import sys
+import pytest
+
+##VIASH START
+par = {
+    "input": "input.h5mu"
+}
+
+meta = {
+    "resources_dir": "resources_test"
+}
+##VIASH END
+
+def test_run():
+    input_mudata = read_h5mu(par["input"])
+    expected_colnames = ['gene_symbol', 'feature_types', 'genome']
+
+    assert list(input_mudata.mod.keys()) == ["rna"], "Input should contain rna modality."
+    assert list(input_mudata.var.columns) == expected_colnames, f"Input var columns should be: {expected_colnames}."
+    assert list(input_mudata.mod["rna"].var.columns) == expected_colnames, f"Input mod['rna'] var columns should be: {expected_colnames}."
+
+if __name__ == "__main__":
+    HERE_DIR = Path(__file__).resolve().parent
+    shutil.copyfile(os.path.join(meta['resources_dir'], "openpipelinetestutils", "conftest.py"),
+                    os.path.join(HERE_DIR, "conftest.py"))
+    sys.exit(pytest.main(["--import-mode=importlib"]))