diff --git a/modules.json b/modules.json index 1a0ff7b6..1a3c6102 100644 --- a/modules.json +++ b/modules.json @@ -162,20 +162,45 @@ "git_sha": "2c6b1144ed58b6184ad58fc4e6b6a90219b4bf4f", "installed_by": ["modules"] }, + "rseqc/bamstat": { + "branch": "master", + "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", + "installed_by": ["bam_rseqc"] + }, "rseqc/inferexperiment": { "branch": "master", "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", - "installed_by": ["modules"] + "installed_by": ["bam_rseqc", "modules"] + }, + "rseqc/innerdistance": { + "branch": "master", + "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", + "installed_by": ["bam_rseqc"] + }, + "rseqc/junctionannotation": { + "branch": "master", + "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", + "installed_by": ["bam_rseqc"] + }, + "rseqc/junctionsaturation": { + "branch": "master", + "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", + "installed_by": ["bam_rseqc"] }, "rseqc/readdistribution": { "branch": "master", "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", - "installed_by": ["modules"] + "installed_by": ["bam_rseqc", "modules"] }, "rseqc/readduplication": { "branch": "master", "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", - "installed_by": ["modules"] + "installed_by": ["bam_rseqc", "modules"] + }, + "rseqc/tin": { + "branch": "master", + "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", + "installed_by": ["bam_rseqc"] }, "samtools/flagstat": { "branch": "master", @@ -236,6 +261,11 @@ "git_sha": "46eca555142d6e597729fcb682adcc791796f514", "installed_by": ["subworkflows"] }, + "bam_rseqc": { + "branch": "master", + "git_sha": "0eacd714effe5aac1c1de26593873960b3346cab", + "installed_by": ["subworkflows"] + }, "bam_sort_stats_samtools": { "branch": "master", "git_sha": "46eca555142d6e597729fcb682adcc791796f514", diff --git a/modules/nf-core/rseqc/bamstat/environment.yml b/modules/nf-core/rseqc/bamstat/environment.yml new file mode 100644 index 00000000..e960c994 --- /dev/null +++ b/modules/nf-core/rseqc/bamstat/environment.yml @@ -0,0 +1,8 @@ +name: rseqc_bamstat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::rseqc=5.0.3 + - conda-forge::r-base>=3.5 diff --git a/modules/nf-core/rseqc/bamstat/main.nf b/modules/nf-core/rseqc/bamstat/main.nf new file mode 100644 index 00000000..167240c4 --- /dev/null +++ b/modules/nf-core/rseqc/bamstat/main.nf @@ -0,0 +1,45 @@ +process RSEQC_BAMSTAT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rseqc:5.0.3--py39hf95cd2a_0' : + 'biocontainers/rseqc:5.0.3--py39hf95cd2a_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bam_stat.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + bam_stat.py \\ + -i $bam \\ + $args \\ + > ${prefix}.bam_stat.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(bam_stat.py --version | sed -e "s/bam_stat.py //g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam_stat.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(bam_stat.py --version | sed -e "s/bam_stat.py //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rseqc/bamstat/meta.yml b/modules/nf-core/rseqc/bamstat/meta.yml new file mode 100644 index 00000000..72745310 --- /dev/null +++ b/modules/nf-core/rseqc/bamstat/meta.yml @@ -0,0 +1,40 @@ +name: rseqc_bamstat +description: Generate statistics from a bam file +keywords: + - bam + - qc + - bamstat +tools: + - rseqc: + description: | + RSeQC package provides a number of useful modules that can comprehensively evaluate + high throughput sequence data especially RNA-seq data. + homepage: http://rseqc.sourceforge.net/ + documentation: http://rseqc.sourceforge.net/ + doi: 10.1093/bioinformatics/bts356 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: the bam file to calculate statistics of + pattern: "*.{bam}" +output: + - txt: + type: file + description: bam statistics report + pattern: "*.bam_stat.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/rseqc/bamstat/tests/main.nf.test b/modules/nf-core/rseqc/bamstat/tests/main.nf.test new file mode 100644 index 00000000..86f4301e --- /dev/null +++ b/modules/nf-core/rseqc/bamstat/tests/main.nf.test @@ -0,0 +1,53 @@ +nextflow_process { + + name "Test Process RSEQC_BAMSTAT" + script "../main.nf" + process "RSEQC_BAMSTAT" + + tag "modules" + tag "modules_nfcore" + tag "rseqc" + tag "rseqc/bamstat" + + config "./nextflow.config" + + test("sarscov2 - [meta] - bam") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam') + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } + + test("sarscov2 - [meta] - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam') + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/modules/nf-core/rseqc/bamstat/tests/main.nf.test.snap b/modules/nf-core/rseqc/bamstat/tests/main.nf.test.snap new file mode 100644 index 00000000..deae0e80 --- /dev/null +++ b/modules/nf-core/rseqc/bamstat/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "sarscov2 - [meta] - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam_stat.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,58f74a7ff9d2966142c81a4a4735dbf3" + ], + "txt": [ + [ + { + "id": "test" + }, + "test.bam_stat.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,58f74a7ff9d2966142c81a4a4735dbf3" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T09:16:13.970299" + }, + "sarscov2 - [meta] - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam_stat.txt:md5,2675857864c1d1139b2a19d25dc36b09" + ] + ], + "1": [ + "versions.yml:md5,58f74a7ff9d2966142c81a4a4735dbf3" + ], + "txt": [ + [ + { + "id": "test" + }, + "test.bam_stat.txt:md5,2675857864c1d1139b2a19d25dc36b09" + ] + ], + "versions": [ + "versions.yml:md5,58f74a7ff9d2966142c81a4a4735dbf3" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2023-12-14T17:00:23.830276754" + } +} \ No newline at end of file diff --git a/modules/nf-core/rseqc/bamstat/tests/nextflow.config b/modules/nf-core/rseqc/bamstat/tests/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/modules/nf-core/rseqc/bamstat/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/modules/nf-core/rseqc/bamstat/tests/tags.yml b/modules/nf-core/rseqc/bamstat/tests/tags.yml new file mode 100644 index 00000000..ed9a41fa --- /dev/null +++ b/modules/nf-core/rseqc/bamstat/tests/tags.yml @@ -0,0 +1,2 @@ +rseqc/bamstat: + - modules/nf-core/rseqc/bamstat/** diff --git a/modules/nf-core/rseqc/innerdistance/environment.yml b/modules/nf-core/rseqc/innerdistance/environment.yml new file mode 100644 index 00000000..fb6e9e92 --- /dev/null +++ b/modules/nf-core/rseqc/innerdistance/environment.yml @@ -0,0 +1,8 @@ +name: rseqc_innerdistance +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::rseqc=5.0.3 + - conda-forge::r-base>=3.5 diff --git a/modules/nf-core/rseqc/innerdistance/main.nf b/modules/nf-core/rseqc/innerdistance/main.nf new file mode 100644 index 00000000..207f5fb4 --- /dev/null +++ b/modules/nf-core/rseqc/innerdistance/main.nf @@ -0,0 +1,66 @@ +process RSEQC_INNERDISTANCE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rseqc:5.0.3--py39hf95cd2a_0' : + 'biocontainers/rseqc:5.0.3--py39hf95cd2a_0' }" + + input: + tuple val(meta), path(bam) + path bed + + output: + tuple val(meta), path("*distance.txt"), optional:true, emit: distance + tuple val(meta), path("*freq.txt") , optional:true, emit: freq + tuple val(meta), path("*mean.txt") , optional:true, emit: mean + tuple val(meta), path("*.pdf") , optional:true, emit: pdf + tuple val(meta), path("*.r") , optional:true, emit: rscript + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (!meta.single_end) { + """ + inner_distance.py \\ + -i $bam \\ + -r $bed \\ + -o $prefix \\ + $args \\ + > stdout.txt + head -n 2 stdout.txt > ${prefix}.inner_distance_mean.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(inner_distance.py --version | sed -e "s/inner_distance.py //g") + END_VERSIONS + """ + } else { + """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(inner_distance.py --version | sed -e "s/inner_distance.py //g") + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.inner_distance.txt + touch ${prefix}.inner_distance_freq.txt + touch ${prefix}.inner_distance_mean.txt + touch ${prefix}.inner_distance_plot.pdf + touch ${prefix}.inner_distance_plot.r + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(inner_distance.py --version | sed -e "s/inner_distance.py //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rseqc/innerdistance/meta.yml b/modules/nf-core/rseqc/innerdistance/meta.yml new file mode 100644 index 00000000..d0a5bf18 --- /dev/null +++ b/modules/nf-core/rseqc/innerdistance/meta.yml @@ -0,0 +1,60 @@ +name: rseqc_innerdistance +description: Calculate inner distance between read pairs. +keywords: + - read_pairs + - fragment_size + - inner_distance +tools: + - rseqc: + description: | + RSeQC package provides a number of useful modules that can comprehensively evaluate + high throughput sequence data especially RNA-seq data. + homepage: http://rseqc.sourceforge.net/ + documentation: http://rseqc.sourceforge.net/ + doi: 10.1093/bioinformatics/bts356 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: the alignment in bam format + pattern: "*.{bam}" + - bed: + type: file + description: a bed file for the reference gene model + pattern: "*.{bed}" +output: + - distance: + type: file + description: the inner distances + pattern: "*.inner_distance.txt" + - freq: + type: file + description: frequencies of different insert sizes + pattern: "*.inner_distance_freq.txt" + - mean: + type: file + description: mean/median values of inner distances + pattern: "*.inner_distance_mean.txt" + - pdf: + type: file + description: distribution plot of inner distances + pattern: "*.inner_distance_plot.pdf" + - rscript: + type: file + description: script to reproduce the plot + pattern: "*.inner_distance_plot.R" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/rseqc/innerdistance/tests/main.nf.test b/modules/nf-core/rseqc/innerdistance/tests/main.nf.test new file mode 100644 index 00000000..cb19fe14 --- /dev/null +++ b/modules/nf-core/rseqc/innerdistance/tests/main.nf.test @@ -0,0 +1,63 @@ +nextflow_process { + + name "Test Process RSEQC_INNERDISTANCE" + script "../main.nf" + process "RSEQC_INNERDISTANCE" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "rseqc" + tag "rseqc/innerdistance" + + test("sarscov2 - [[meta] - bam] - bed") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = Channel.of(file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed12", checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.pdf[0][1]).name).match("pdf") }, + { assert snapshot(process.out.distance).match("distance") }, + { assert snapshot(process.out.freq).match("freq") }, + { assert snapshot(process.out.freq).match("rscript") }, + { assert snapshot(process.out.mean).match("mean") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 - [[meta] - bam] - bed - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = Channel.of(file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed12", checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/rseqc/innerdistance/tests/main.nf.test.snap b/modules/nf-core/rseqc/innerdistance/tests/main.nf.test.snap new file mode 100644 index 00000000..83f33363 --- /dev/null +++ b/modules/nf-core/rseqc/innerdistance/tests/main.nf.test.snap @@ -0,0 +1,189 @@ +{ + "rscript": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,3fc037501f5899b5da009c8ce02fc25e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:33:42.467401" + }, + "pdf": { + "content": [ + "test.inner_distance_plot.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:33:42.461959" + }, + "distance": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.inner_distance.txt:md5,a1acc9def0f64a5500d4c4cb47cbe32b" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:33:42.463083" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,3ca19644c7f02a53db3ffe50c7706797" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:33:42.474576" + }, + "mean": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.inner_distance_mean.txt:md5,58398b7d5a29a5e564f9e3c50b55996c" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:33:42.470638" + }, + "sarscov2 - [[meta] - bam] - bed - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.inner_distance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.inner_distance_mean.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.inner_distance_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.inner_distance_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + "versions.yml:md5,3ca19644c7f02a53db3ffe50c7706797" + ], + "distance": [ + [ + { + "id": "test" + }, + "test.inner_distance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "freq": [ + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "mean": [ + [ + { + "id": "test" + }, + "test.inner_distance_mean.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pdf": [ + [ + { + "id": "test" + }, + "test.inner_distance_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "rscript": [ + [ + { + "id": "test" + }, + "test.inner_distance_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3ca19644c7f02a53db3ffe50c7706797" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T09:22:20.844642" + }, + "freq": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,3fc037501f5899b5da009c8ce02fc25e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:33:42.464983" + } +} \ No newline at end of file diff --git a/modules/nf-core/rseqc/innerdistance/tests/nextflow.config b/modules/nf-core/rseqc/innerdistance/tests/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/modules/nf-core/rseqc/innerdistance/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/modules/nf-core/rseqc/innerdistance/tests/tags.yml b/modules/nf-core/rseqc/innerdistance/tests/tags.yml new file mode 100644 index 00000000..4a9d0acf --- /dev/null +++ b/modules/nf-core/rseqc/innerdistance/tests/tags.yml @@ -0,0 +1,2 @@ +rseqc/innerdistance: + - modules/nf-core/rseqc/innerdistance/** diff --git a/modules/nf-core/rseqc/junctionannotation/environment.yml b/modules/nf-core/rseqc/junctionannotation/environment.yml new file mode 100644 index 00000000..35276b71 --- /dev/null +++ b/modules/nf-core/rseqc/junctionannotation/environment.yml @@ -0,0 +1,8 @@ +name: rseqc_junctionannotation +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::rseqc=5.0.3 + - conda-forge::r-base>=3.5 diff --git a/modules/nf-core/rseqc/junctionannotation/main.nf b/modules/nf-core/rseqc/junctionannotation/main.nf new file mode 100644 index 00000000..9a8a4653 --- /dev/null +++ b/modules/nf-core/rseqc/junctionannotation/main.nf @@ -0,0 +1,60 @@ +process RSEQC_JUNCTIONANNOTATION { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rseqc:5.0.3--py39hf95cd2a_0' : + 'biocontainers/rseqc:5.0.3--py39hf95cd2a_0' }" + + input: + tuple val(meta), path(bam) + path bed + + output: + tuple val(meta), path("*.xls") , emit: xls + tuple val(meta), path("*.r") , emit: rscript + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*.junction.bed"), optional:true, emit: bed + tuple val(meta), path("*.Interact.bed"), optional:true, emit: interact_bed + tuple val(meta), path("*junction.pdf") , optional:true, emit: pdf + tuple val(meta), path("*events.pdf") , optional:true, emit: events_pdf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + junction_annotation.py \\ + -i $bam \\ + -r $bed \\ + -o $prefix \\ + $args \\ + 2> >(grep -v 'E::idx_find_and_load' | tee ${prefix}.junction_annotation.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(junction_annotation.py --version | sed -e "s/junction_annotation.py //g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.junction.xls + touch ${prefix}.junction_plot.r + touch ${prefix}.junction_annotation.log + touch ${prefix}.junction.bed + touch ${prefix}.Interact.bed + touch ${prefix}.junction.pdf + touch ${prefix}.events.pdf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(junction_annotation.py --version | sed -e "s/junction_annotation.py //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rseqc/junctionannotation/meta.yml b/modules/nf-core/rseqc/junctionannotation/meta.yml new file mode 100644 index 00000000..a88aa2db --- /dev/null +++ b/modules/nf-core/rseqc/junctionannotation/meta.yml @@ -0,0 +1,68 @@ +name: rseqc_junctionannotation +description: compare detected splice junctions to reference gene model +keywords: + - junctions + - splicing + - rnaseq +tools: + - rseqc: + description: | + RSeQC package provides a number of useful modules that can comprehensively evaluate + high throughput sequence data especially RNA-seq data. + homepage: http://rseqc.sourceforge.net/ + documentation: http://rseqc.sourceforge.net/ + doi: 10.1093/bioinformatics/bts356 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: the alignment in bam format + pattern: "*.{bam}" + - bed: + type: file + description: a bed file for the reference gene model + pattern: "*.{bed}" +output: + - bed: + type: file + description: bed file of annotated junctions + pattern: "*.junction.bed" + - interact_bed: + type: file + description: Interact bed file + pattern: "*.Interact.bed" + - xls: + type: file + description: xls file with junction information + pattern: "*.xls" + - pdf: + type: file + description: junction plot + pattern: "*.junction.pdf" + - events_pdf: + type: file + description: events plot + pattern: "*.events.pdf" + - rscript: + type: file + description: Rscript to reproduce the plots + pattern: "*.r" + - log: + type: file + description: Log file of execution + pattern: "*.junction_annotation.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/rseqc/junctionannotation/tests/main.nf.test b/modules/nf-core/rseqc/junctionannotation/tests/main.nf.test new file mode 100644 index 00000000..36d5f6e2 --- /dev/null +++ b/modules/nf-core/rseqc/junctionannotation/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process RSEQC_JUNCTIONANNOTATION" + script "../main.nf" + process "RSEQC_JUNCTIONANNOTATION" + + tag "modules" + tag "modules_nfcore" + tag "rseqc" + tag "rseqc/junctionannotation" + + test("sarscov2 - paired end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = Channel.of(file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed", checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.rscript.get(0).get(1) ==~ ".*/test.junction_plot.r" }, + { assert process.out.xls.get(0).get(1) ==~ ".*/test.junction.xls" }, + { assert snapshot(process.out.log).match("log") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 - paired end [bam] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = Channel.of(file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed", checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/rseqc/junctionannotation/tests/main.nf.test.snap b/modules/nf-core/rseqc/junctionannotation/tests/main.nf.test.snap new file mode 100644 index 00000000..e87594f0 --- /dev/null +++ b/modules/nf-core/rseqc/junctionannotation/tests/main.nf.test.snap @@ -0,0 +1,175 @@ +{ + "log": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction_annotation.log:md5,ef37d06d169a1adbeec23fddf82aee2b" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:32:52.732937" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,091865c0ebd6de262c9b0968b5771091" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:32:52.734515" + }, + "sarscov2 - paired end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction_annotation.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Interact.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.events.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + "versions.yml:md5,091865c0ebd6de262c9b0968b5771091" + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "events_pdf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.events.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "interact_bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Interact.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction_annotation.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pdf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "rscript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,091865c0ebd6de262c9b0968b5771091" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junction.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T09:39:10.213511" + } +} \ No newline at end of file diff --git a/modules/nf-core/rseqc/junctionannotation/tests/tags.yml b/modules/nf-core/rseqc/junctionannotation/tests/tags.yml new file mode 100644 index 00000000..5f719fb8 --- /dev/null +++ b/modules/nf-core/rseqc/junctionannotation/tests/tags.yml @@ -0,0 +1,2 @@ +rseqc/junctionannotation: + - modules/nf-core/rseqc/junctionannotation/** diff --git a/modules/nf-core/rseqc/junctionsaturation/environment.yml b/modules/nf-core/rseqc/junctionsaturation/environment.yml new file mode 100644 index 00000000..7b9cc4fa --- /dev/null +++ b/modules/nf-core/rseqc/junctionsaturation/environment.yml @@ -0,0 +1,8 @@ +name: rseqc_junctionsaturation +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::rseqc=5.0.3 + - conda-forge::r-base>=3.5 diff --git a/modules/nf-core/rseqc/junctionsaturation/main.nf b/modules/nf-core/rseqc/junctionsaturation/main.nf new file mode 100644 index 00000000..18f063f3 --- /dev/null +++ b/modules/nf-core/rseqc/junctionsaturation/main.nf @@ -0,0 +1,49 @@ +process RSEQC_JUNCTIONSATURATION { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rseqc:5.0.3--py39hf95cd2a_0' : + 'biocontainers/rseqc:5.0.3--py39hf95cd2a_0' }" + + input: + tuple val(meta), path(bam) + path bed + + output: + tuple val(meta), path("*.pdf"), emit: pdf + tuple val(meta), path("*.r") , emit: rscript + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + junction_saturation.py \\ + -i $bam \\ + -r $bed \\ + -o $prefix \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(junction_saturation.py --version | sed -e "s/junction_saturation.py //g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.junctionSaturation_plot.pdf + touch ${prefix}.junctionSaturation_plot.r + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(junction_saturation.py --version | sed -e "s/junction_saturation.py //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rseqc/junctionsaturation/meta.yml b/modules/nf-core/rseqc/junctionsaturation/meta.yml new file mode 100644 index 00000000..19ae3f52 --- /dev/null +++ b/modules/nf-core/rseqc/junctionsaturation/meta.yml @@ -0,0 +1,48 @@ +name: rseqc_junctionsaturation +description: compare detected splice junctions to reference gene model +keywords: + - junctions + - splicing + - rnaseq +tools: + - rseqc: + description: | + RSeQC package provides a number of useful modules that can comprehensively evaluate + high throughput sequence data especially RNA-seq data. + homepage: http://rseqc.sourceforge.net/ + documentation: http://rseqc.sourceforge.net/ + doi: 10.1093/bioinformatics/bts356 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: the alignment in bam format + pattern: "*.{bam}" + - bed: + type: file + description: a bed file for the reference gene model + pattern: "*.{bed}" +output: + - pdf: + type: file + description: Junction saturation report + pattern: "*.pdf" + - rscript: + type: file + description: Junction saturation R-script + pattern: "*.r" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/rseqc/junctionsaturation/tests/main.nf.test b/modules/nf-core/rseqc/junctionsaturation/tests/main.nf.test new file mode 100644 index 00000000..7bd3cfc2 --- /dev/null +++ b/modules/nf-core/rseqc/junctionsaturation/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process RSEQC_JUNCTIONSATURATION" + script "../main.nf" + process "RSEQC_JUNCTIONSATURATION" + + tag "modules" + tag "modules_nfcore" + tag "rseqc" + tag "rseqc/junctionsaturation" + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed", checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.pdf[0][1]).name).match("pdf") }, + { assert snapshot(process.out.rscript).match("rscript") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 paired-end [bam] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed", checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/rseqc/junctionsaturation/tests/main.nf.test.snap b/modules/nf-core/rseqc/junctionsaturation/tests/main.nf.test.snap new file mode 100644 index 00000000..8d702a4e --- /dev/null +++ b/modules/nf-core/rseqc/junctionsaturation/tests/main.nf.test.snap @@ -0,0 +1,95 @@ +{ + "rscript": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.junctionSaturation_plot.r:md5,caa6e63dcb477aabb169882b2f30dadd" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:32:24.018924" + }, + "pdf": { + "content": [ + "test.junctionSaturation_plot.pdf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:32:24.011942" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,cd4638541a825dbd44e3cb65d2980aa1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:32:24.021984" + }, + "sarscov2 paired-end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junctionSaturation_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junctionSaturation_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,cd4638541a825dbd44e3cb65d2980aa1" + ], + "pdf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junctionSaturation_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "rscript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.junctionSaturation_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,cd4638541a825dbd44e3cb65d2980aa1" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T09:43:55.853893" + } +} \ No newline at end of file diff --git a/modules/nf-core/rseqc/junctionsaturation/tests/tags.yml b/modules/nf-core/rseqc/junctionsaturation/tests/tags.yml new file mode 100644 index 00000000..7022b59a --- /dev/null +++ b/modules/nf-core/rseqc/junctionsaturation/tests/tags.yml @@ -0,0 +1,2 @@ +rseqc/junctionsaturation: + - modules/nf-core/rseqc/junctionsaturation/** diff --git a/modules/nf-core/rseqc/tin/environment.yml b/modules/nf-core/rseqc/tin/environment.yml new file mode 100644 index 00000000..288da430 --- /dev/null +++ b/modules/nf-core/rseqc/tin/environment.yml @@ -0,0 +1,8 @@ +name: rseqc_tin +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::rseqc=5.0.3 + - conda-forge::r-base>=3.5 diff --git a/modules/nf-core/rseqc/tin/main.nf b/modules/nf-core/rseqc/tin/main.nf new file mode 100644 index 00000000..397442f0 --- /dev/null +++ b/modules/nf-core/rseqc/tin/main.nf @@ -0,0 +1,48 @@ +process RSEQC_TIN { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rseqc:5.0.3--py39hf95cd2a_0' : + 'biocontainers/rseqc:5.0.3--py39hf95cd2a_0' }" + + input: + tuple val(meta), path(bam), path(bai) + path bed + + output: + tuple val(meta), path("*.txt"), emit: txt + tuple val(meta), path("*.xls"), emit: xls + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + tin.py \\ + -i $bam \\ + -r $bed \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(tin.py --version | sed -e "s/tin.py //g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${bam.fileName}.summary.txt + touch ${bam.fileName}.tin.xls + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(tin.py --version | sed -e "s/tin.py //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rseqc/tin/meta.yml b/modules/nf-core/rseqc/tin/meta.yml new file mode 100644 index 00000000..f760bb2f --- /dev/null +++ b/modules/nf-core/rseqc/tin/meta.yml @@ -0,0 +1,50 @@ +name: rseqc_tin +description: Calculte TIN (transcript integrity number) from RNA-seq reads +keywords: + - rnaseq + - transcript + - integrity +tools: + - rseqc: + description: | + RSeQC package provides a number of useful modules that can comprehensively evaluate + high throughput sequence data especially RNA-seq data. + homepage: http://rseqc.sourceforge.net/ + documentation: http://rseqc.sourceforge.net/ + doi: 10.1093/bioinformatics/bts356 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM file + pattern: "*.{bam}" + - bai: + type: file + description: Index for input BAM file + pattern: "*.{bai}" + - bed: + type: file + description: BED file containing the reference gene model + pattern: "*.{bed}" +output: + - txt: + type: file + description: TXT file containing tin.py results summary + pattern: "*.txt" + - xls: + type: file + description: XLS file containing tin.py results + pattern: "*.xls" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/rseqc/tin/tests/main.nf.test b/modules/nf-core/rseqc/tin/tests/main.nf.test new file mode 100644 index 00000000..b11eba80 --- /dev/null +++ b/modules/nf-core/rseqc/tin/tests/main.nf.test @@ -0,0 +1,58 @@ +nextflow_process { + + name "Test Process RSEQC_TIN" + script "../main.nf" + process "RSEQC_TIN" + tag "modules" + tag "modules_nfcore" + tag "rseqc" + tag "rseqc/tin" + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + ]) + input[1] = Channel.of(file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed12", checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [bam] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + ]) + input[1] = Channel.of(file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/test.bed12", checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/rseqc/tin/tests/main.nf.test.snap b/modules/nf-core/rseqc/tin/tests/main.nf.test.snap new file mode 100644 index 00000000..caab3a09 --- /dev/null +++ b/modules/nf-core/rseqc/tin/tests/main.nf.test.snap @@ -0,0 +1,108 @@ +{ + "sarscov2 paired-end [bam]": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.summary.txt:md5,9d98447e178b89a89f6f5aba7a772fe6" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.tin.xls:md5,abdbb6d51a5cd7038cd862ce241ecef1" + ] + ], + "2": [ + "versions.yml:md5,79670dedaa11a978951c16ead7f49c24" + ], + "txt": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.summary.txt:md5,9d98447e178b89a89f6f5aba7a772fe6" + ] + ], + "versions": [ + "versions.yml:md5,79670dedaa11a978951c16ead7f49c24" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.tin.xls:md5,abdbb6d51a5cd7038cd862ce241ecef1" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-26T14:32:42.823857" + }, + "sarscov2 paired-end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.tin.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,79670dedaa11a978951c16ead7f49c24" + ], + "txt": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,79670dedaa11a978951c16ead7f49c24" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.tin.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T09:55:57.905927" + } +} \ No newline at end of file diff --git a/modules/nf-core/rseqc/tin/tests/tags.yml b/modules/nf-core/rseqc/tin/tests/tags.yml new file mode 100644 index 00000000..741bbd0c --- /dev/null +++ b/modules/nf-core/rseqc/tin/tests/tags.yml @@ -0,0 +1,2 @@ +rseqc/tin: + - modules/nf-core/rseqc/tin/** diff --git a/subworkflows/local/coverage_graphs.nf b/subworkflows/local/coverage_graphs.nf index 8df49aa4..5add10a7 100644 --- a/subworkflows/local/coverage_graphs.nf +++ b/subworkflows/local/coverage_graphs.nf @@ -14,14 +14,15 @@ include { DREG_PREP } from '../../modules/local/dreg_prep/main' workflow COVERAGE_GRAPHS { take: - bam - bai + bam_bai sizes fasta fai main: + bam = bam_bai.map{ [ it[0], it[1] ] } + ch_versions = Channel.empty() ch_genomecov_bam = bam.combine(Channel.from(1)) @@ -42,18 +43,15 @@ workflow COVERAGE_GRAPHS { ) ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV_MINUS.out.versions.first()) - - bam.join(bai, by: [0], remainder: true).set { ch_bam_bai } - DEEPTOOLS_BAMCOVERAGE_PLUS ( - ch_bam_bai, + bam_bai, fasta, fai ) ch_versions = ch_versions.mix(DEEPTOOLS_BAMCOVERAGE_PLUS.out.versions.first()) DEEPTOOLS_BAMCOVERAGE_MINUS ( - ch_bam_bai, + bam_bai, fasta, fai ) @@ -62,7 +60,7 @@ workflow COVERAGE_GRAPHS { ch_plus_minus = DEEPTOOLS_BAMCOVERAGE_PLUS.out.bigwig.join(DEEPTOOLS_BAMCOVERAGE_MINUS.out.bigwig) DREG_PREP ( - ch_bam_bai, + bam_bai, sizes ) diff --git a/subworkflows/local/quality_control.nf b/subworkflows/local/quality_control.nf index dd438bcc..ad24c7d9 100644 --- a/subworkflows/local/quality_control.nf +++ b/subworkflows/local/quality_control.nf @@ -1,17 +1,18 @@ include { PRESEQ_CCURVE } from '../../modules/nf-core/preseq/ccurve/main' include { PRESEQ_LCEXTRAP } from '../../modules/nf-core/preseq/lcextrap/main' -include { RSEQC_READDISTRIBUTION } from '../../modules/nf-core/rseqc/readdistribution/main' -include { RSEQC_READDUPLICATION } from '../../modules/nf-core/rseqc/readduplication/main' -include { RSEQC_INFEREXPERIMENT } from '../../modules/nf-core/rseqc/inferexperiment/main' include { BBMAP_PILEUP } from '../../modules/nf-core/bbmap/pileup/main' +include { BAM_RSEQC } from '../../subworkflows/nf-core/bam_rseqc' + workflow QUALITY_CONTROL { take: - bam + bam_bai bed main: + bam = bam_bai.map{ [ it[0], it[1] ] } + ch_versions = Channel.empty() PRESEQ_CCURVE ( bam ) @@ -20,20 +21,10 @@ workflow QUALITY_CONTROL { PRESEQ_LCEXTRAP ( bam ) ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions.first()) - RSEQC_READDISTRIBUTION ( - bam, - bed - ) - ch_versions = ch_versions.mix(RSEQC_READDISTRIBUTION.out.versions.first()) - - RSEQC_READDUPLICATION ( bam ) - ch_versions = ch_versions.mix(RSEQC_READDUPLICATION.out.versions.first()) - - RSEQC_INFEREXPERIMENT ( - bam, - bed - ) - ch_versions = ch_versions.mix(RSEQC_INFEREXPERIMENT.out.versions.first()) + // TODO Set this in a param? + rseqc_modules = ['read_duplication', 'read_distribution', 'infer_experiment'] + BAM_RSEQC ( bam_bai, bed, rseqc_modules ) + ch_versions = ch_versions.mix(BAM_RSEQC.out.versions) BBMAP_PILEUP ( bam ) ch_versions = ch_versions.mix(BBMAP_PILEUP.out.versions.first()) @@ -42,12 +33,12 @@ workflow QUALITY_CONTROL { preseq_ccurve = PRESEQ_CCURVE.out.c_curve preseq_lcextrap = PRESEQ_LCEXTRAP.out.lc_extrap - readdistribution_txt = RSEQC_READDISTRIBUTION.out.txt - readduplication_seq_xls = RSEQC_READDUPLICATION.out.seq_xls - readduplication_pos_xls = RSEQC_READDUPLICATION.out.pos_xls - readduplication_pdf = RSEQC_READDUPLICATION.out.pdf - readduplication_rscript = RSEQC_READDUPLICATION.out.rscript - inferexperiment_txt = RSEQC_INFEREXPERIMENT.out.txt + inferexperiment_txt = BAM_RSEQC.out.inferexperiment_txt + readdistribution_txt = BAM_RSEQC.out.readdistribution_txt + readduplication_seq_xls = BAM_RSEQC.out.readduplication_seq_xls + readduplication_pos_xls = BAM_RSEQC.out.readduplication_pos_xls + readduplication_pdf = BAM_RSEQC.out.readduplication_pdf + readduplication_rscript = BAM_RSEQC.out.readduplication_rscript pileup_stats = BBMAP_PILEUP.out.covstats pileup_hist = BBMAP_PILEUP.out.hist diff --git a/subworkflows/nf-core/bam_rseqc/main.nf b/subworkflows/nf-core/bam_rseqc/main.nf new file mode 100644 index 00000000..043321a1 --- /dev/null +++ b/subworkflows/nf-core/bam_rseqc/main.nf @@ -0,0 +1,185 @@ +// +// Run RSeQC modules +// + +include { RSEQC_BAMSTAT } from '../../../modules/nf-core/rseqc/bamstat/main' +include { RSEQC_INNERDISTANCE } from '../../../modules/nf-core/rseqc/innerdistance/main' +include { RSEQC_INFEREXPERIMENT } from '../../../modules/nf-core/rseqc/inferexperiment/main' +include { RSEQC_JUNCTIONANNOTATION } from '../../../modules/nf-core/rseqc/junctionannotation/main' +include { RSEQC_JUNCTIONSATURATION } from '../../../modules/nf-core/rseqc/junctionsaturation/main' +include { RSEQC_READDISTRIBUTION } from '../../../modules/nf-core/rseqc/readdistribution/main' +include { RSEQC_READDUPLICATION } from '../../../modules/nf-core/rseqc/readduplication/main' +include { RSEQC_TIN } from '../../../modules/nf-core/rseqc/tin/main' + +workflow BAM_RSEQC { + take: + bam_bai // channel: [ val(meta), [ bam, bai ] ] + bed // channel: [ genome.bed ] + rseqc_modules // list: rseqc modules to run + + main: + + bam = bam_bai.map{ [ it[0], it[1] ] } + + versions = Channel.empty() + + // + // Run RSeQC bam_stat.py + // + bamstat_txt = Channel.empty() + + if ('bam_stat' in rseqc_modules) { + RSEQC_BAMSTAT(bam) + bamstat_txt = RSEQC_BAMSTAT.out.txt + versions = versions.mix(RSEQC_BAMSTAT.out.versions.first()) + } + + // + // Run RSeQC inner_distance.py + // + innerdistance_all = Channel.empty() + innerdistance_distance = Channel.empty() + innerdistance_freq = Channel.empty() + innerdistance_mean = Channel.empty() + innerdistance_pdf = Channel.empty() + innerdistance_rscript = Channel.empty() + + if ('inner_distance' in rseqc_modules) { + RSEQC_INNERDISTANCE(bam, bed) + innerdistance_distance = RSEQC_INNERDISTANCE.out.distance + innerdistance_freq = RSEQC_INNERDISTANCE.out.freq + innerdistance_mean = RSEQC_INNERDISTANCE.out.mean + innerdistance_pdf = RSEQC_INNERDISTANCE.out.pdf + innerdistance_rscript = RSEQC_INNERDISTANCE.out.rscript + innerdistance_all = innerdistance_distance.mix(innerdistance_freq, innerdistance_mean, innerdistance_pdf, innerdistance_rscript) + versions = versions.mix(RSEQC_INNERDISTANCE.out.versions.first()) + } + + // + // Run RSeQC infer_experiment.py + // + inferexperiment_txt = Channel.empty() + if ('infer_experiment' in rseqc_modules) { + RSEQC_INFEREXPERIMENT(bam, bed) + inferexperiment_txt = RSEQC_INFEREXPERIMENT.out.txt + versions = versions.mix(RSEQC_INFEREXPERIMENT.out.versions.first()) + } + + // + // Run RSeQC junction_annotation.py + // + junctionannotation_all = Channel.empty() + junctionannotation_bed = Channel.empty() + junctionannotation_interact_bed = Channel.empty() + junctionannotation_xls = Channel.empty() + junctionannotation_pdf = Channel.empty() + junctionannotation_events_pdf = Channel.empty() + junctionannotation_rscript = Channel.empty() + junctionannotation_log = Channel.empty() + + if ('junction_annotation' in rseqc_modules) { + RSEQC_JUNCTIONANNOTATION(bam, bed) + junctionannotation_bed = RSEQC_JUNCTIONANNOTATION.out.bed + junctionannotation_interact_bed = RSEQC_JUNCTIONANNOTATION.out.interact_bed + junctionannotation_xls = RSEQC_JUNCTIONANNOTATION.out.xls + junctionannotation_pdf = RSEQC_JUNCTIONANNOTATION.out.pdf + junctionannotation_events_pdf = RSEQC_JUNCTIONANNOTATION.out.events_pdf + junctionannotation_rscript = RSEQC_JUNCTIONANNOTATION.out.rscript + junctionannotation_log = RSEQC_JUNCTIONANNOTATION.out.log + junctionannotation_all = junctionannotation_bed.mix(junctionannotation_interact_bed, junctionannotation_xls, junctionannotation_pdf, junctionannotation_events_pdf, junctionannotation_rscript, junctionannotation_log) + versions = versions.mix(RSEQC_JUNCTIONANNOTATION.out.versions.first()) + } + + // + // Run RSeQC junction_saturation.py + // + junctionsaturation_all = Channel.empty() + junctionsaturation_pdf = Channel.empty() + junctionsaturation_rscript = Channel.empty() + + if ('junction_saturation' in rseqc_modules) { + RSEQC_JUNCTIONSATURATION(bam, bed) + junctionsaturation_pdf = RSEQC_JUNCTIONSATURATION.out.pdf + junctionsaturation_rscript = RSEQC_JUNCTIONSATURATION.out.rscript + junctionsaturation_all = junctionsaturation_pdf.mix(junctionsaturation_rscript) + versions = versions.mix(RSEQC_JUNCTIONSATURATION.out.versions.first()) + } + + // + // Run RSeQC read_distribution.py + // + readdistribution_txt = Channel.empty() + + if ('read_distribution' in rseqc_modules) { + RSEQC_READDISTRIBUTION(bam, bed) + readdistribution_txt = RSEQC_READDISTRIBUTION.out.txt + versions = versions.mix(RSEQC_READDISTRIBUTION.out.versions.first()) + } + + // + // Run RSeQC read_duplication.py + // + readduplication_all = Channel.empty() + readduplication_seq_xls = Channel.empty() + readduplication_pos_xls = Channel.empty() + readduplication_pdf = Channel.empty() + readduplication_rscript = Channel.empty() + + if ('read_duplication' in rseqc_modules) { + RSEQC_READDUPLICATION(bam ) + readduplication_seq_xls = RSEQC_READDUPLICATION.out.seq_xls + readduplication_pos_xls = RSEQC_READDUPLICATION.out.pos_xls + readduplication_pdf = RSEQC_READDUPLICATION.out.pdf + readduplication_rscript = RSEQC_READDUPLICATION.out.rscript + readduplication_all = readduplication_seq_xls.mix(readduplication_pos_xls, readduplication_pdf, readduplication_rscript) + versions = versions.mix(RSEQC_READDUPLICATION.out.versions.first()) + } + + // + // Run RSeQC tin.py + // + tin_txt = Channel.empty() + + if ('tin' in rseqc_modules) { + RSEQC_TIN(bam_bai, bed) + tin_txt = RSEQC_TIN.out.txt + versions = versions.mix(RSEQC_TIN.out.versions.first()) + } + + emit: + bamstat_txt // channel: [ val(meta), txt ] + + innerdistance_all // channel: [ val(meta), {txt, pdf, r} ] + innerdistance_distance // channel: [ val(meta), txt ] + innerdistance_freq // channel: [ val(meta), txt ] + innerdistance_mean // channel: [ val(meta), txt ] + innerdistance_pdf // channel: [ val(meta), pdf ] + innerdistance_rscript // channel: [ val(meta), r ] + + inferexperiment_txt // channel: [ val(meta), txt ] + + junctionannotation_all // channel: [ val(meta), {bed, xls, pdf, r, log} ] + junctionannotation_bed // channel: [ val(meta), bed ] + junctionannotation_interact_bed // channel: [ val(meta), bed ] + junctionannotation_xls // channel: [ val(meta), xls ] + junctionannotation_pdf // channel: [ val(meta), pdf ] + junctionannotation_events_pdf // channel: [ val(meta), pdf ] + junctionannotation_rscript // channel: [ val(meta), r ] + junctionannotation_log // channel: [ val(meta), log ] + + junctionsaturation_all // channel: [ val(meta), {pdf, r} ] + junctionsaturation_pdf // channel: [ val(meta), pdf ] + junctionsaturation_rscript // channel: [ val(meta), r ] + + readdistribution_txt // channel: [ val(meta), txt ] + + readduplication_all // channel: [ val(meta), {xls, pdf, r} ] + readduplication_seq_xls // channel: [ val(meta), xls ] + readduplication_pos_xls // channel: [ val(meta), xls ] + readduplication_pdf // channel: [ val(meta), pdf ] + readduplication_rscript // channel: [ val(meta), r ] + + tin_txt // channel: [ val(meta), txt ] + + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_rseqc/meta.yml b/subworkflows/nf-core/bam_rseqc/meta.yml new file mode 100644 index 00000000..6e76ff56 --- /dev/null +++ b/subworkflows/nf-core/bam_rseqc/meta.yml @@ -0,0 +1,162 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_rseqc +description: Subworkflow to run multiple commands in the RSeqC package +keywords: + - rnaseq + - experiment + - inferexperiment + - bamstat + - innerdistance + - junctionannotation + - junctionsaturation + - readdistribution + - readduplication + - tin +components: + - rseqc + - rseqc/tin + - rseqc/readduplication + - rseqc/readdistribution + - rseqc/junctionsaturation + - rseqc/junctionannotation + - rseqc/innerdistance + - rseqc/inferexperiment + - rseqc/bamstat +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file to calculate statistics + pattern: "*.{bam}" + - bai: + type: file + description: Index for input BAM file + pattern: "*.{bai}" + - bed: + type: file + description: BED file for the reference gene model + pattern: "*.{bed}" + - rseqc_modules: + type: list + description: | + List of rseqc modules to run + e.g. [ 'bam_stat', 'infer_experiment' ] +output: + - bamstat_txt: + type: file + description: bam statistics report + pattern: "*.bam_stat.txt" + - innerdistance_all: + type: file + description: All the output files from RSEQC_INNERDISTANCE + pattern: "*.{txt,pdf,R}" + - innerdistance_distance: + type: file + description: the inner distances + pattern: "*.inner_distance.txt" + - innerdistance_freq: + type: file + description: frequencies of different insert sizes + pattern: "*.inner_distance_freq.txt" + - innerdistance_mean: + type: file + description: mean/median values of inner distances + pattern: "*.inner_distance_mean.txt" + - innerdistance_pdf: + type: file + description: distribution plot of inner distances + pattern: "*.inner_distance_plot.pdf" + - innerdistance_rscript: + type: file + description: script to reproduce the plot + pattern: "*.inner_distance_plot.R" + - inferexperiment_txt: + type: file + description: infer_experiment results report + pattern: "*.infer_experiment.txt" + - junctionannotation_all: + type: file + description: All the output files from RSEQC_JUNCTIONANNOTATION + pattern: "*.{bed,xls,pdf,R,log}" + - junctionannotation_bed: + type: file + description: bed file of annotated junctions + pattern: "*.junction.bed" + - junctionannotation_interact_bed: + type: file + description: Interact bed file + pattern: "*.Interact.bed" + - junctionannotation_xls: + type: file + description: xls file with junction information + pattern: "*.xls" + - junctionannotation_pdf: + type: file + description: junction plot + pattern: "*.junction.pdf" + - junctionannotation_events_pdf: + type: file + description: events plot + pattern: "*.events.pdf" + - junctionannotation_rscript: + type: file + description: Rscript to reproduce the plots + pattern: "*.r" + - junctionannotation_log: + type: file + description: Log file generated by tool + pattern: "*.log" + - junctionsaturation_all: + type: file + description: All the output files from RSEQC_JUNCTIONSATURATION + pattern: "*.{pdf,R}" + - junctionsaturation_pdf: + type: file + description: Junction saturation report + pattern: "*.pdf" + - junctionsaturation_rscript: + type: file + description: Junction saturation R-script + pattern: "*.r" + - readdistribution_txt: + type: file + description: the read distribution report + pattern: "*.read_distribution.txt" + - readduplication_all: + type: file + description: All the output files from RSEQC_READDUPLICATION + pattern: "*.{xls,pdf,R}" + - readduplication_seq_xls: + type: file + description: Read duplication rate determined from mapping position of read + pattern: "*seq.DupRate.xls" + - readduplication_pos_xls: + type: file + description: Read duplication rate determined from sequence of read + pattern: "*pos.DupRate.xls" + - readduplication_pdf: + type: file + description: plot of duplication rate + pattern: "*.pdf" + - readduplication_rscript: + type: file + description: script to reproduce the plot + pattern: "*.R" + - tin_txt: + type: file + description: TXT file containing tin.py results summary + pattern: "*.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/subworkflows/nf-core/bam_rseqc/tests/main.nf.test b/subworkflows/nf-core/bam_rseqc/tests/main.nf.test new file mode 100644 index 00000000..d228c6b4 --- /dev/null +++ b/subworkflows/nf-core/bam_rseqc/tests/main.nf.test @@ -0,0 +1,147 @@ +nextflow_workflow { + + name "Test Workflow BAM_RSEQC" + script "../main.nf" + workflow "BAM_RSEQC" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/bam_rseqc" + tag "bam_rseqc" + tag "rseqc" + tag "rseqc/bamstat" + tag "rseqc/inferexperiment" + tag "rseqc/innerdistance" + tag "rseqc/junctionannotation" + tag "rseqc/junctionsaturation" + tag "rseqc/readdistribution" + tag "rseqc/readduplication" + tag "rseqc/tin" + + test("sarscov2 paired-end [bam]") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed12', checkIfExists: true) + ]) + input[2] = ['bam_stat', 'inner_distance', 'infer_experiment', 'junction_annotation', 'junction_saturation', 'read_distribution', 'read_duplication', 'tin'] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert workflow.out.innerdistance_all.any { it[1].endsWith('.pdf') && file(it[1]).exists() } }, + { assert workflow.out.readduplication_all.any { it[1].endsWith('.pdf') && file(it[1]).exists() } }, + { assert workflow.out.junctionsaturation_all.any { it[1].endsWith('.pdf') && file(it[1]).exists() } }, + { assert snapshot( + workflow.out.bamstat_txt, + workflow.out.innerdistance_all.findAll { it[1].endsWith('.pdf') == false }, + workflow.out.inferexperiment_txt, + workflow.out.junctionannotation_all.findAll { it[1].endsWith('.xls') == false && it[1].endsWith('.r') == false }, + workflow.out.junctionsaturation_all.findAll { it[1].endsWith('.pdf') == false }, + workflow.out.readdistribution_txt, + workflow.out.readduplication_all.findAll { it[1].endsWith('.pdf') == false }, + workflow.out.tin_txt, + workflow.out.versions).match()} + ) + } + } + + test("sarscov2 paired-end [bam] no modules") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed12', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.out.bamstat_txt.size() == 0 }, + { assert workflow.out.innerdistance_all.size() == 0 }, + { assert workflow.out.inferexperiment_txt.size() == 0 }, + { assert workflow.out.junctionannotation_all.size() == 0 }, + { assert workflow.out.junctionsaturation_all.size() == 0 }, + { assert workflow.out.readdistribution_txt.size() == 0 }, + { assert workflow.out.readduplication_all.size() == 0 }, + { assert workflow.out.tin_txt.size() == 0 }, + { assert workflow.out.versions.size() == 0 } + ) + } + } + + test("sarscov2 paired-end [bam] - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed12', checkIfExists: true) + ]) + input[2] = ['bam_stat', 'inner_distance', 'infer_experiment', 'junction_annotation', 'junction_saturation', 'read_distribution', 'read_duplication', 'tin'] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } + + test("sarscov2 paired-end [bam] no modules - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed12', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } +} diff --git a/subworkflows/nf-core/bam_rseqc/tests/main.nf.test.snap b/subworkflows/nf-core/bam_rseqc/tests/main.nf.test.snap new file mode 100644 index 00000000..0040810c --- /dev/null +++ b/subworkflows/nf-core/bam_rseqc/tests/main.nf.test.snap @@ -0,0 +1,903 @@ +{ + "sarscov2 paired-end [bam]": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.bam_stat.txt:md5,2675857864c1d1139b2a19d25dc36b09" + ] + ], + [ + [ + { + "id": "test" + }, + "test.inner_distance.txt:md5,a1acc9def0f64a5500d4c4cb47cbe32b" + ], + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,3fc037501f5899b5da009c8ce02fc25e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_mean.txt:md5,58398b7d5a29a5e564f9e3c50b55996c" + ], + [ + { + "id": "test" + }, + "test.inner_distance_plot.r:md5,5859fbd5b42046d47e8b9aa85077f4ea" + ] + ], + [ + [ + { + "id": "test" + }, + "test.infer_experiment.txt:md5,f9d0bfc239df637cd8aeda40ade3c59a" + ] + ], + [ + [ + { + "id": "test" + }, + "test.junction_annotation.log:md5,d75e0f5d62fada8aa9449991b209554c" + ] + ], + [ + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.r:md5,caa6e63dcb477aabb169882b2f30dadd" + ] + ], + [ + [ + { + "id": "test" + }, + "test.read_distribution.txt:md5,56893fdc0809d968629a363551a1655f" + ] + ], + [ + [ + { + "id": "test" + }, + "test.DupRate_plot.r:md5,3c0325095cee4835b921e57d61c23dca" + ], + [ + { + "id": "test" + }, + "test.pos.DupRate.xls:md5,a859bc2031d46bf1cc4336205847caa3" + ], + [ + { + "id": "test" + }, + "test.seq.DupRate.xls:md5,ee8783399eec5a18522a6f08bece338b" + ] + ], + [ + [ + { + "id": "test" + }, + "test.paired_end.sorted.summary.txt:md5,9d98447e178b89a89f6f5aba7a772fe6" + ] + ], + [ + "versions.yml:md5,60dc1afce7fecb7270e998a00ee393e2", + "versions.yml:md5,9bc3caee6aeb54f23f5296b499546515", + "versions.yml:md5,a25161998ca60d5ce4e9a86abbf1bcb8", + "versions.yml:md5,c175b92f50b5be38a8808cbf7ac7452e", + "versions.yml:md5,ca7e95cf7ff7cff5d052b890729f7ead", + "versions.yml:md5,d03beea3c68934c3f3623a005c1424d2", + "versions.yml:md5,f2f3ecd549045f7595245f904f5d9414", + "versions.yml:md5,fd16f1098b9c285f3ea7bd3daf4e8f10" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-03T12:07:54.452949" + }, + "sarscov2 paired-end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam_stat.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.inner_distance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_mean.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test" + }, + "test.Interact.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + [ + { + "id": "test" + }, + "test.junction.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test" + }, + "test.junction.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test" + }, + "test.events.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test" + }, + "test.junction_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test" + }, + "test.junction_annotation.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "17": [ + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "18": [ + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "19": [ + [ + { + "id": "test" + }, + "test.read_distribution.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.inner_distance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "20": [ + [ + { + "id": "test" + }, + "test.DupRate_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.DupRate_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.pos.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.seq.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "21": [ + [ + { + "id": "test" + }, + "test.seq.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "22": [ + [ + { + "id": "test" + }, + "test.pos.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "23": [ + [ + { + "id": "test" + }, + "test.DupRate_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "24": [ + [ + { + "id": "test" + }, + "test.DupRate_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "25": [ + [ + { + "id": "test" + }, + "test.paired_end.sorted.bam.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "26": [ + "versions.yml:md5,60dc1afce7fecb7270e998a00ee393e2", + "versions.yml:md5,9bc3caee6aeb54f23f5296b499546515", + "versions.yml:md5,a25161998ca60d5ce4e9a86abbf1bcb8", + "versions.yml:md5,c175b92f50b5be38a8808cbf7ac7452e", + "versions.yml:md5,ca7e95cf7ff7cff5d052b890729f7ead", + "versions.yml:md5,d03beea3c68934c3f3623a005c1424d2", + "versions.yml:md5,f2f3ecd549045f7595245f904f5d9414", + "versions.yml:md5,fd16f1098b9c285f3ea7bd3daf4e8f10" + ], + "3": [ + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.inner_distance_mean.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.inner_distance_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test" + }, + "test.inner_distance_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test" + }, + "test.infer_experiment.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test" + }, + "test.Interact.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.events.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction_annotation.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test" + }, + "test.junction.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bamstat_txt": [ + [ + { + "id": "test" + }, + "test.bam_stat.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "inferexperiment_txt": [ + [ + { + "id": "test" + }, + "test.infer_experiment.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "innerdistance_all": [ + [ + { + "id": "test" + }, + "test.inner_distance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_mean.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.inner_distance_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "innerdistance_distance": [ + [ + { + "id": "test" + }, + "test.inner_distance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "innerdistance_freq": [ + [ + { + "id": "test" + }, + "test.inner_distance_freq.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "innerdistance_mean": [ + [ + { + "id": "test" + }, + "test.inner_distance_mean.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "innerdistance_pdf": [ + [ + { + "id": "test" + }, + "test.inner_distance_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "innerdistance_rscript": [ + [ + { + "id": "test" + }, + "test.inner_distance_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionannotation_all": [ + [ + { + "id": "test" + }, + "test.Interact.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.events.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction_annotation.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junction_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionannotation_bed": [ + [ + { + "id": "test" + }, + "test.junction.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionannotation_events_pdf": [ + [ + { + "id": "test" + }, + "test.events.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionannotation_interact_bed": [ + [ + { + "id": "test" + }, + "test.Interact.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionannotation_log": [ + [ + { + "id": "test" + }, + "test.junction_annotation.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionannotation_pdf": [ + [ + { + "id": "test" + }, + "test.junction.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionannotation_rscript": [ + [ + { + "id": "test" + }, + "test.junction_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionannotation_xls": [ + [ + { + "id": "test" + }, + "test.junction.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionsaturation_all": [ + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionsaturation_pdf": [ + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "junctionsaturation_rscript": [ + [ + { + "id": "test" + }, + "test.junctionSaturation_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "readdistribution_txt": [ + [ + { + "id": "test" + }, + "test.read_distribution.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "readduplication_all": [ + [ + { + "id": "test" + }, + "test.DupRate_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.DupRate_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.pos.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test" + }, + "test.seq.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "readduplication_pdf": [ + [ + { + "id": "test" + }, + "test.DupRate_plot.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "readduplication_pos_xls": [ + [ + { + "id": "test" + }, + "test.pos.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "readduplication_rscript": [ + [ + { + "id": "test" + }, + "test.DupRate_plot.r:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "readduplication_seq_xls": [ + [ + { + "id": "test" + }, + "test.seq.DupRate.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tin_txt": [ + [ + { + "id": "test" + }, + "test.paired_end.sorted.bam.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,60dc1afce7fecb7270e998a00ee393e2", + "versions.yml:md5,9bc3caee6aeb54f23f5296b499546515", + "versions.yml:md5,a25161998ca60d5ce4e9a86abbf1bcb8", + "versions.yml:md5,c175b92f50b5be38a8808cbf7ac7452e", + "versions.yml:md5,ca7e95cf7ff7cff5d052b890729f7ead", + "versions.yml:md5,d03beea3c68934c3f3623a005c1424d2", + "versions.yml:md5,f2f3ecd549045f7595245f904f5d9414", + "versions.yml:md5,fd16f1098b9c285f3ea7bd3daf4e8f10" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-03T12:08:17.304769" + }, + "sarscov2 paired-end [bam] no modules - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + + ], + "13": [ + + ], + "14": [ + + ], + "15": [ + + ], + "16": [ + + ], + "17": [ + + ], + "18": [ + + ], + "19": [ + + ], + "2": [ + + ], + "20": [ + + ], + "21": [ + + ], + "22": [ + + ], + "23": [ + + ], + "24": [ + + ], + "25": [ + + ], + "26": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "bamstat_txt": [ + + ], + "inferexperiment_txt": [ + + ], + "innerdistance_all": [ + + ], + "innerdistance_distance": [ + + ], + "innerdistance_freq": [ + + ], + "innerdistance_mean": [ + + ], + "innerdistance_pdf": [ + + ], + "innerdistance_rscript": [ + + ], + "junctionannotation_all": [ + + ], + "junctionannotation_bed": [ + + ], + "junctionannotation_events_pdf": [ + + ], + "junctionannotation_interact_bed": [ + + ], + "junctionannotation_log": [ + + ], + "junctionannotation_pdf": [ + + ], + "junctionannotation_rscript": [ + + ], + "junctionannotation_xls": [ + + ], + "junctionsaturation_all": [ + + ], + "junctionsaturation_pdf": [ + + ], + "junctionsaturation_rscript": [ + + ], + "readdistribution_txt": [ + + ], + "readduplication_all": [ + + ], + "readduplication_pdf": [ + + ], + "readduplication_pos_xls": [ + + ], + "readduplication_rscript": [ + + ], + "readduplication_seq_xls": [ + + ], + "tin_txt": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-03T12:08:24.443731" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_rseqc/tests/tags.yml b/subworkflows/nf-core/bam_rseqc/tests/tags.yml new file mode 100644 index 00000000..c8dfce3a --- /dev/null +++ b/subworkflows/nf-core/bam_rseqc/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_rseqc: + - subworkflows/nf-core/bam_rseqc/** diff --git a/workflows/nascent.nf b/workflows/nascent.nf index e893c9c8..c74dbead 100644 --- a/workflows/nascent.nf +++ b/workflows/nascent.nf @@ -246,15 +246,16 @@ workflow NASCENT { ch_versions = ch_versions.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.versions) } + ch_genome_bam_bai = ch_genome_bam.join(ch_genome_bai, by: [0], remainder: true) + QUALITY_CONTROL ( - ch_genome_bam, + ch_genome_bam_bai, PREPARE_GENOME.out.gene_bed ) ch_versions = ch_versions.mix(QUALITY_CONTROL.out.versions) COVERAGE_GRAPHS ( - ch_genome_bam, - ch_genome_bai, + ch_genome_bam_bai, PREPARE_GENOME.out.chrom_sizes, PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.fai diff --git a/workflows/tests/aligner/bowtie2.nf.test.snap b/workflows/tests/aligner/bowtie2.nf.test.snap index 3af0ee7e..9fa959db 100644 --- a/workflows/tests/aligner/bowtie2.nf.test.snap +++ b/workflows/tests/aligner/bowtie2.nf.test.snap @@ -30,6 +30,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-08-03T21:20:15.261777672" + "timestamp": "2024-08-04T20:09:31.105618713" } } \ No newline at end of file diff --git a/workflows/tests/aligner/bwa.nf.test.snap b/workflows/tests/aligner/bwa.nf.test.snap index 482f3ec8..084b00e4 100644 --- a/workflows/tests/aligner/bwa.nf.test.snap +++ b/workflows/tests/aligner/bwa.nf.test.snap @@ -49,7 +49,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-08-03T07:37:00.256122533" + "timestamp": "2024-08-04T20:12:41.277553487" }, "Should work with BWA Index": { "content": [ diff --git a/workflows/tests/aligner/bwamem2.nf.test.snap b/workflows/tests/aligner/bwamem2.nf.test.snap index 73979232..ec1151b7 100644 --- a/workflows/tests/aligner/bwamem2.nf.test.snap +++ b/workflows/tests/aligner/bwamem2.nf.test.snap @@ -30,6 +30,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-08-01T19:59:55.332896924" + "timestamp": "2024-08-04T20:16:56.413251993" } } \ No newline at end of file diff --git a/workflows/tests/aligner/dragmap.nf.test.snap b/workflows/tests/aligner/dragmap.nf.test.snap index dcaf9f23..c77e5711 100644 --- a/workflows/tests/aligner/dragmap.nf.test.snap +++ b/workflows/tests/aligner/dragmap.nf.test.snap @@ -30,6 +30,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-08-01T20:04:34.724457691" + "timestamp": "2024-08-04T20:19:06.859222166" } } \ No newline at end of file diff --git a/workflows/tests/aligner/hisat2.nf.test.snap b/workflows/tests/aligner/hisat2.nf.test.snap index 8d83c296..00a6968a 100644 --- a/workflows/tests/aligner/hisat2.nf.test.snap +++ b/workflows/tests/aligner/hisat2.nf.test.snap @@ -30,6 +30,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-08-03T09:05:42.254786523" + "timestamp": "2024-08-04T20:20:46.356464865" } } \ No newline at end of file diff --git a/workflows/tests/aligner/star.nf.test.snap b/workflows/tests/aligner/star.nf.test.snap index 0f8d9e62..d34a85fa 100644 --- a/workflows/tests/aligner/star.nf.test.snap +++ b/workflows/tests/aligner/star.nf.test.snap @@ -51,6 +51,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-08-01T20:15:17.868733535" + "timestamp": "2024-08-04T20:24:33.931557374" } } \ No newline at end of file