Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ingestion integration tests #801

Merged
merged 12 commits into from
Aug 6, 2024
Merged
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
* `transform/clr` component: Added the option to set the `axis` along which to apply CLR. Possible to override
on workflow level as well (PR #767).

* `workflows/test_workflows/ingestion` components & `workflows/ingestion`: Added standalone components for integration testing of ingestion workflows (PR #801).

## MINOR CHANGES

* Bump scvelo to `0.3.2` (PR #828).
Expand Down
13 changes: 13 additions & 0 deletions src/base/openpipelinetestutils/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import importlib
import pytest
from pathlib import Path

def pytest_collect_file(file_path: Path, parent):
if (file_path.name == ".viash_script.sh"):
# Allow file ending in .sh to be imported
importlib.machinery.SOURCE_SUFFIXES.append('.viash_script.sh')
return pytest.Module.from_parent(parent, path=file_path)


def pytest_collection_finish(session):
importlib.machinery.SOURCE_SUFFIXES.remove('.viash_script.sh')
8 changes: 8 additions & 0 deletions src/workflows/ingestion/bd_rhapsody/test.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
nextflow.enable.dsl=2

include { bd_rhapsody } from params.rootDir + "/target/nextflow/workflows/ingestion/bd_rhapsody/main.nf"
include { bd_rhapsody_test } from params.rootDir + "/target/nextflow/test_workflows/ingestion/bd_rhapsody_test/main.nf"

workflow test_wf {
// allow changing the resources_test dir
Expand Down Expand Up @@ -32,9 +33,16 @@ workflow test_wf {
assert data.output_h5mu.toString().endsWith(".h5mu") : "Output file should be a h5mu file. Found: ${output[1]}"
"Output: $output"
}

| bd_rhapsody_test.run(
fromState: ["input": "output_h5mu"]
)

| toList()
| view { output_list ->
assert output_list.size() == 1 : "output channel should contain one event"
}

// | view { output -> output[1]}
// | check_format(args: {""}) // todo: check whether output h5mu has the right slots defined
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
# ensure that the command below is run from the root of the repository
cd "$REPO_ROOT"

viash ns build -q ingestion/cellranger_mapping --setup cb --platform nextflow

export NXF_VER=21.10.6

nextflow \
Expand Down
6 changes: 6 additions & 0 deletions src/workflows/ingestion/cellranger_mapping/test.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
nextflow.enable.dsl=2

include { cellranger_mapping } from params.rootDir + "/target/nextflow/workflows/ingestion/cellranger_mapping/main.nf"
include { cellranger_mapping_test } from params.rootDir + "/target/nextflow/test_workflows/ingestion/cellranger_mapping_test/main.nf"

workflow test_wf {
// allow changing the resources_test dir
Expand All @@ -22,6 +23,11 @@ workflow test_wf {
// todo: check whether output dir contains fastq files
"Output: $output"
}

| cellranger_mapping_test.run(
fromState: ["input": "output_h5mu"]
)

| toSortedList()
| map { output_list ->
assert output_list.size() == 1 : "output channel should contain one event"
Expand Down
6 changes: 6 additions & 0 deletions src/workflows/ingestion/cellranger_multi/test.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
nextflow.enable.dsl=2

include { cellranger_multi } from params.rootDir + "/target/nextflow/workflows/ingestion/cellranger_multi/main.nf"
include { cellranger_multi_test } from params.rootDir + "/target/nextflow/test_workflows/ingestion/cellranger_multi_test/main.nf"

workflow test_wf {
resources_test = file("${params.rootDir}/resources_test")
Expand Down Expand Up @@ -29,6 +30,11 @@ workflow test_wf {
// todo: check whether output dir contains fastq files
"Output: $output"
}

| cellranger_multi_test.run(
fromState: ["input": "output_h5mu"]
)

| toSortedList()
| map { output_list ->
assert output_list.size() == 1 : "output channel should contain one event"
Expand Down
46 changes: 44 additions & 2 deletions src/workflows/ingestion/cellranger_postprocessing/test.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ nextflow.enable.dsl=2

include { cellranger_postprocessing } from params.rootDir + "/target/nextflow/workflows/ingestion/cellranger_postprocessing/main.nf"
include { from_10xh5_to_h5mu } from params.rootDir + "/target/nextflow/convert/from_10xh5_to_h5mu/main.nf"
include { cellranger_postprocessing_test } from params.rootDir + "/target/nextflow/test_workflows/ingestion/cellranger_postprocessing_test/main.nf"

workflow test_wf {
// allow changing the resources_test dir
Expand All @@ -11,6 +12,7 @@ workflow test_wf {
[
id: "foo",
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5"),
input_og: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5mu"),
perform_correction: true,
min_genes: 100,
min_counts: 1000,
Expand All @@ -24,13 +26,32 @@ workflow test_wf {
toState: ["input": "output"]
)

| cellranger_postprocessing
| cellranger_postprocessing.run(
toState: {id, output, state ->
output + [
input_og: state.input_og,
perform_correction: state.perform_correction
]
}
)

| view { output ->
assert output.size() == 2 : "outputs should contain two elements; [id, out]"
assert output[1] instanceof Map : "Output should be a Map."
// todo: check whether output dir contains fastq files
"Output: $output"
}

| cellranger_postprocessing_test.run(
fromState: {id, state ->
[
input: state.output,
input_og: state.input_og,
is_corrected: state.perform_correction
]
}
)

| toSortedList()
| map { output_list ->
assert output_list.size() == 1 : "output channel should contain one event"
Expand All @@ -47,6 +68,7 @@ workflow test_wf2 {
[
id: "zing",
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5"),
input_og: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5mu"),
perform_correction: false,
min_genes: 100,
min_counts: 1000,
Expand All @@ -59,13 +81,33 @@ workflow test_wf2 {
fromState: ["input"],
toState: ["input": "output"]
)
| cellranger_postprocessing

| cellranger_postprocessing.run(
toState: {id, output, state ->
output + [
input_og: state.input_og,
perform_correction: state.perform_correction
]
}
)

| view { output ->
assert output.size() == 2 : "outputs should contain two elements; [id, out]"
assert output[1] instanceof Map : "Output should be a Map."
// todo: check whether output dir contains fastq files
"Output: $output"
}

| cellranger_postprocessing_test.run(
fromState: {id, state ->
[
input: state.output,
input_og: state.input_og,
is_corrected: state.perform_correction
]
}
)

| toSortedList()
| map { output_list ->
assert output_list.size() == 1 : "output channel should contain one event"
Expand Down
16 changes: 11 additions & 5 deletions src/workflows/ingestion/conversion/test.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
nextflow.enable.dsl=2

include { conversion } from params.rootDir + "/target/nextflow/workflows/ingestion/conversion/main.nf"
include { conversion_test } from params.rootDir + "/target/nextflow/test_workflows/ingestion/conversion_test/main.nf"

workflow test_wf {
// allow changing the resources_test dir
Expand Down Expand Up @@ -41,9 +42,14 @@ workflow test_wf {
assert output.size() == 2 : "outputs should contain two elements; [id, file]"
assert output[1].output.toString().endsWith(".h5mu") : "Output file should be a h5mu file. Found: ${output[1]}"
"Output: $output"
}
| toSortedList()
| map { output_list ->
assert output_list.size() == 4 : "output channel should contain four events"
}
}

| conversion_test.run(
fromState: ["input": "output"]
)

| toSortedList()
| map { output_list ->
assert output_list.size() == 4 : "output channel should contain four events"
}
}
35 changes: 35 additions & 0 deletions src/workflows/test_workflows/ingestion/bd_rhapsody/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
functionality:
name: "bd_rhapsody_test"
namespace: "test_workflows/ingestion"
description: "This component test the output of the integration test of the bd_rhapsody workflow."
authors:
- __merge__: /src/authors/jakub_majercik.yaml
argument_groups:
- name: Inputs
arguments:
- name: "--input"
type: file
required: true
description: Path to h5mu output.
example: foo.final.h5mu
resources:
- type: python_script
path: script.py
- path: /src/utils/setup_logger.py
- path: /src/base/openpipelinetestutils
dest: openpipelinetestutils
platforms:
- type: docker
image: python:3.12-slim
setup:
- type: docker
copy: ["openpipelinetestutils /opt/openpipelinetestutils"]
- type: apt
packages:
- procps
- type: python
packages: /opt/openpipelinetestutils
pypi:
- mudata
__merge__: /src/base/requirements/viashpy.yaml
- type: nextflow
35 changes: 35 additions & 0 deletions src/workflows/test_workflows/ingestion/bd_rhapsody/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from mudata import read_h5mu
import numpy as np
import shutil
import os
import sys
from pathlib import Path
import pytest

##VIASH START
par = {
"input": "input.h5mu"
}

meta = {
"resources_dir": "resources_test"
}
##VIASH END

def test_run():
input_mudata = read_h5mu(par["input"])
expected_var = ['gene_name', 'feature_types', 'reference_file']
expected_obs = ['run_id', 'library_id', 'sample_id']

assert list(input_mudata.mod.keys()) == ["rna"], "Input should contain rna modality."
assert list(input_mudata.var.columns) == expected_var, f"Input var columns should be: {expected_var}."
assert list(input_mudata.mod["rna"].var.columns) == expected_var, f"Input mod['rna'] var columns should be: {expected_var}."
assert list(input_mudata.mod["rna"].obs.columns) == expected_obs, f"Input obs columns should be: {expected_obs}."

assert np.array_equal(input_mudata.var["feature_types"].unique(), ["Gene Expression"]), "Output X should only contain Gene Expression vars."

if __name__ == "__main__":
HERE_DIR = Path(__file__).resolve().parent
shutil.copyfile(os.path.join(meta['resources_dir'], "openpipelinetestutils", "conftest.py"),
os.path.join(HERE_DIR, "conftest.py"))
sys.exit(pytest.main(["--import-mode=importlib"]))
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
functionality:
name: "cellranger_mapping_test"
namespace: "test_workflows/ingestion"
description: "This component test the output of the integration test of the cellranger mapping workflow."
authors:
- __merge__: /src/authors/jakub_majercik.yaml
argument_groups:
- name: Inputs
arguments:
- name: "--input"
type: file
required: true
description: Path to h5mu output.
example: foo.final.h5mu
resources:
- type: python_script
path: script.py
- path: /src/utils/setup_logger.py
- path: /src/base/openpipelinetestutils
dest: openpipelinetestutils
platforms:
- type: docker
image: python:3.12-slim
setup:
- type: docker
copy: ["openpipelinetestutils /opt/openpipelinetestutils"]
- type: apt
packages:
- procps
- type: python
packages: /opt/openpipelinetestutils
- type: python
pypi:
- mudata
__merge__: /src/base/requirements/viashpy.yaml
- type: nextflow
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from mudata import read_h5mu
from pathlib import Path
import shutil
import os
import sys
import pytest

##VIASH START
par = {
"input": "input.h5mu"
}

meta = {
"resources_dir": "resources_test"
}
##VIASH END

def test_run():
input_mudata = read_h5mu(par["input"])
expected_colnames = ['gene_symbol', 'feature_types', 'genome']

assert list(input_mudata.mod.keys()) == ["rna"], "Input should contain rna modality."
assert list(input_mudata.var.columns) == expected_colnames, f"Input var columns should be: {expected_colnames}."
assert list(input_mudata.mod["rna"].var.columns) == expected_colnames, f"Input mod['rna'] var columns should be: {expected_colnames}."

if __name__ == "__main__":
HERE_DIR = Path(__file__).resolve().parent
shutil.copyfile(os.path.join(meta['resources_dir'], "openpipelinetestutils", "conftest.py"),
os.path.join(HERE_DIR, "conftest.py"))
sys.exit(pytest.main(["--import-mode=importlib"]))
Loading