From a7ec1e7bfb83f7608f71c7dd65dfaa90747ab064 Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Thu, 30 May 2024 15:55:34 +0100 Subject: [PATCH 01/11] Fix the coverage report, we were using the samtools flags for decont instead of mapping --- conf/modules.config | 21 +++++++++++++-------- subworkflows/local/assembly_coverage.nf | 12 ++++++------ workflows/miassembler.nf | 3 ++- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 67b430b..c6352b9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -72,19 +72,24 @@ process { ] } + // This BWAMEM2_MEM belongs to the decontamination module withName: 'BWAMEM2_MEM' { cpus = { check_max( 12 * task.attempt, 'cpus' ) } memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + time = { check_max( 1.h * task.attempt, 'time' ) } ext.args = "-M" - ext.args2 = [ - '-f', - '12', - '-F', - '256', - '-uS', - ].join(' ').trim() + ext.args2 = "-f 12 -F 256 -uS" + } + + // This BWAMEM2_MEM belongs to the coverage module + withName: 'BWAMEM2_MEM_COVERAGE' { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 1.h * task.attempt, 'time' ) } + + ext.args = "-M" + ext.args2 = "-F 12 -F 256 -uS" } withName: 'SAMTOOLS_BAM2FQ' { diff --git a/subworkflows/local/assembly_coverage.nf b/subworkflows/local/assembly_coverage.nf index 962401c..e80c2a3 100644 --- a/subworkflows/local/assembly_coverage.nf +++ b/subworkflows/local/assembly_coverage.nf @@ -1,5 +1,5 @@ include { BWAMEM2_INDEX } from '../../modules/nf-core/bwamem2/index/main' -include { BWAMEM2_MEM } from '../../modules/ebi-metagenomics/bwamem2/mem/main' +include { BWAMEM2_MEM as BWAMEM2_MEM_COVERAGE } from '../../modules/ebi-metagenomics/bwamem2/mem/main' include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/samtools/idxstats/main' include { METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS } from '../../modules/nf-core/metabat2/jgisummarizebamcontigdepths/main' @@ -19,21 +19,21 @@ workflow ASSEMBLY_COVERAGE { ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions) - BWAMEM2_MEM( + BWAMEM2_MEM_COVERAGE( reads, BWAMEM2_INDEX.out.index ) - - ch_versions = ch_versions.mix(BWAMEM2_MEM.out.versions) + + ch_versions = ch_versions.mix(BWAMEM2_MEM_COVERAGE.out.versions) METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS( - BWAMEM2_MEM.out.bam + BWAMEM2_MEM_COVERAGE.out.bam ) ch_versions = ch_versions.mix(METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS.out.versions) SAMTOOLS_IDXSTATS( - BWAMEM2_MEM.out.bam + BWAMEM2_MEM_COVERAGE.out.bam ) ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) diff --git a/workflows/miassembler.nf b/workflows/miassembler.nf index e75ff43..37b35c6 100644 --- a/workflows/miassembler.nf +++ b/workflows/miassembler.nf @@ -120,7 +120,7 @@ workflow MIASSEMBLER { - Paired-end reads are assembled with MetaSPAdes, unless specified otherwise - An error is raised if the assembler and read layout are incompatible (shouldn't happen...) */ - qc_reads_extended = READS_QC.out.qc_reads.map{ meta, reads -> + qc_reads_extended = READS_QC.out.qc_reads.map { meta, reads -> if ( params.assembler == "megahit" || meta.single_end ) { return [ meta + [assembler: "megahit", assembler_version: params.megahit_version], reads] } else if ( ["metaspades", "spades"].contains(params.assembler) || !meta.single_end ) { @@ -134,6 +134,7 @@ workflow MIASSEMBLER { megahit: meta.assembler == "megahit" xspades: ["metaspades", "spades"].contains(meta.assembler) }.set { qc_reads } + ch_versions = ch_versions.mix(READS_QC.out.versions) /******************/ From 6ed34e12d83090d8f86674efc059829016338ed2 Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Thu, 30 May 2024 15:59:09 +0100 Subject: [PATCH 02/11] Use an alias for BWAMEM2_MEM_DECONT --- conf/modules.config | 2 +- .../ebi-metagenomics/reads_bwamem2_decontamination/main.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index c6352b9..80baae6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -73,7 +73,7 @@ process { } // This BWAMEM2_MEM belongs to the decontamination module - withName: 'BWAMEM2_MEM' { + withName: 'BWAMEM2_MEM_DECONT' { cpus = { check_max( 12 * task.attempt, 'cpus' ) } memory = { check_max( 72.GB * task.attempt, 'memory' ) } time = { check_max( 1.h * task.attempt, 'time' ) } diff --git a/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/main.nf b/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/main.nf index 02bae4d..1d69358 100644 --- a/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/main.nf +++ b/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/main.nf @@ -1,5 +1,5 @@ -include { BWAMEM2_MEM } from '../../../modules/ebi-metagenomics/bwamem2/mem/main' -include { SAMTOOLS_BAM2FQ } from '../../../modules/ebi-metagenomics/samtools/bam2fq/main' +include { BWAMEM2_MEM as BWAMEM2_MEM_DECONT } from '../../../modules/ebi-metagenomics/bwamem2/mem/main' +include { SAMTOOLS_BAM2FQ } from '../../../modules/ebi-metagenomics/samtools/bam2fq/main' workflow READS_BWAMEM2_DECONTAMINATION { From 622773a6594ccbe2263dc4d460948adade0b3bf2 Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Thu, 30 May 2024 16:13:39 +0100 Subject: [PATCH 03/11] Forgot to used the renamed thingy --- .../ebi-metagenomics/reads_bwamem2_decontamination/main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/main.nf b/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/main.nf index 1d69358..e197946 100644 --- a/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/main.nf +++ b/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/main.nf @@ -13,10 +13,10 @@ workflow READS_BWAMEM2_DECONTAMINATION { ch_versions = Channel.empty() - BWAMEM2_MEM(ch_reads, ch_reference) - ch_versions = ch_versions.mix(BWAMEM2_MEM.out.versions.first()) + BWAMEM2_MEM_DECONT(ch_reads, ch_reference) + ch_versions = ch_versions.mix(BWAMEM2_MEM_DECONT.out.versions.first()) - SAMTOOLS_BAM2FQ( BWAMEM2_MEM.out.bam.map { meta, bam, _ -> [ meta, bam, meta.single_end == false ] } ) + SAMTOOLS_BAM2FQ( BWAMEM2_MEM_DECONT.out.bam.map { meta, bam, _ -> [ meta, bam, meta.single_end == false ] } ) ch_versions = ch_versions.mix(SAMTOOLS_BAM2FQ.out.versions.first()) emit: From d6119ea5f28b0e43bd6d00271bac52b293cf0702 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=ADn=20Beracochea?= Date: Thu, 30 May 2024 16:27:15 +0100 Subject: [PATCH 04/11] Update samtools flag Co-authored-by: Varsha Kale <34109092+Vkale1@users.noreply.github.com> --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 80baae6..9409c04 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -89,7 +89,7 @@ process { time = { check_max( 1.h * task.attempt, 'time' ) } ext.args = "-M" - ext.args2 = "-F 12 -F 256 -uS" + ext.args2 = "-F 268 -uS" } withName: 'SAMTOOLS_BAM2FQ' { From 598d183d2f1b48390ce16fbfce00adf137706e17 Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Thu, 30 May 2024 16:27:42 +0100 Subject: [PATCH 05/11] Update module diff --- modules.json | 3 ++- .../bwamem2/mem/bwamem2-mem.diff | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff diff --git a/modules.json b/modules.json index 2deaa91..76bd354 100644 --- a/modules.json +++ b/modules.json @@ -8,7 +8,8 @@ "bwamem2/mem": { "branch": "main", "git_sha": "75707538d91ddd27fb6007b4ac3710cb05154780", - "installed_by": ["reads_bwamem2_decontamination"] + "installed_by": ["reads_bwamem2_decontamination"], + "patch": "modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff" }, "samtools/bam2fq": { "branch": "main", diff --git a/modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff b/modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff new file mode 100644 index 0000000..9bc8e23 --- /dev/null +++ b/modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff @@ -0,0 +1,19 @@ +Changes in module 'ebi-metagenomics/bwamem2/mem' +--- modules/ebi-metagenomics/bwamem2/mem/environment.yml ++++ modules/ebi-metagenomics/bwamem2/mem/environment.yml +@@ -1,10 +1,11 @@ + name: bwamem2_mem ++ + channels: + - conda-forge + - bioconda + - defaults ++ + dependencies: + - bwa-mem2=2.2.1 +- # renovate: datasource=conda depName=bioconda/samtools ++ - htslib=1.19.1 + - samtools=1.19.2 +- - htslib=1.19.1 + +************************************************************ From 3d294ef021b3fe43e4cc31c540ec45def4c3a4b8 Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Mon, 3 Jun 2024 11:29:54 +0100 Subject: [PATCH 06/11] Increate runtime limit to 8 hours for BWAMEM2_MEM_DECONT --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 9409c04..9705bb8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -76,7 +76,7 @@ process { withName: 'BWAMEM2_MEM_DECONT' { cpus = { check_max( 12 * task.attempt, 'cpus' ) } memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } ext.args = "-M" ext.args2 = "-f 12 -F 256 -uS" From 42d8a8a93c98e325bd3de98b72aa6ff27edd0262 Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Mon, 3 Jun 2024 13:38:38 +0100 Subject: [PATCH 07/11] Replaced reads_bwamem2_decontamination swf with the bwamem2decontnobams module. This will save storage, as the decontamination .bam files are not created --- assets/multiqc_config.yml | 3 + modules.json | 19 ++--- .../bwamem2/mem/bwamem2-mem.diff | 19 ----- .../bwamem2/mem/environment.yml | 5 +- .../bwamem2decontnobams/environment.yml | 11 +++ .../bwamem2decontnobams/main.nf | 56 +++++++++++++++ .../bwamem2decontnobams/meta.yml | 56 +++++++++++++++ .../bwamem2decontnobams}/tests/main.nf.test | 18 +++-- .../bwamem2decontnobams/tests/tags.yml | 2 + .../samtools/bam2fq/environment.yml | 7 -- .../ebi-metagenomics/samtools/bam2fq/main.nf | 57 --------------- .../ebi-metagenomics/samtools/bam2fq/meta.yml | 56 --------------- .../samtools/bam2fq/samtools-bam2fq.diff | 14 ---- .../samtools/bam2fq/tests/main.nf.test | 71 ------------------- .../samtools/bam2fq/tests/main.nf.test.snap | 49 ------------- .../samtools/bam2fq/tests/nextflow.config | 3 - .../samtools/bam2fq/tests/tags.yml | 2 - .../reads_bwamem2_decontamination/main.nf | 26 ------- .../reads_bwamem2_decontamination/meta.yml | 59 --------------- .../tests/nextflow.config | 6 -- .../tests/tags.yml | 2 - subworkflows/local/reads_qc.nf | 12 ++-- tests/main.nf.test | 2 +- workflows/miassembler.nf | 6 +- 24 files changed, 152 insertions(+), 409 deletions(-) delete mode 100644 modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff create mode 100644 modules/ebi-metagenomics/bwamem2decontnobams/environment.yml create mode 100644 modules/ebi-metagenomics/bwamem2decontnobams/main.nf create mode 100644 modules/ebi-metagenomics/bwamem2decontnobams/meta.yml rename {subworkflows/ebi-metagenomics/reads_bwamem2_decontamination => modules/ebi-metagenomics/bwamem2decontnobams}/tests/main.nf.test (86%) create mode 100644 modules/ebi-metagenomics/bwamem2decontnobams/tests/tags.yml delete mode 100644 modules/ebi-metagenomics/samtools/bam2fq/environment.yml delete mode 100644 modules/ebi-metagenomics/samtools/bam2fq/main.nf delete mode 100644 modules/ebi-metagenomics/samtools/bam2fq/meta.yml delete mode 100644 modules/ebi-metagenomics/samtools/bam2fq/samtools-bam2fq.diff delete mode 100644 modules/ebi-metagenomics/samtools/bam2fq/tests/main.nf.test delete mode 100644 modules/ebi-metagenomics/samtools/bam2fq/tests/main.nf.test.snap delete mode 100644 modules/ebi-metagenomics/samtools/bam2fq/tests/nextflow.config delete mode 100644 modules/ebi-metagenomics/samtools/bam2fq/tests/tags.yml delete mode 100644 subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/main.nf delete mode 100644 subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/meta.yml delete mode 100644 subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/nextflow.config delete mode 100644 subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/tags.yml diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 337607f..ebc3f78 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -20,3 +20,6 @@ top_modules: sp: quast_config: fn: "*.tsv" + +section_comments: + Samtools: Assembly coverage diff --git a/modules.json b/modules.json index 76bd354..002b1f6 100644 --- a/modules.json +++ b/modules.json @@ -8,23 +8,12 @@ "bwamem2/mem": { "branch": "main", "git_sha": "75707538d91ddd27fb6007b4ac3710cb05154780", - "installed_by": ["reads_bwamem2_decontamination"], - "patch": "modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff" + "installed_by": ["modules"] }, - "samtools/bam2fq": { - "branch": "main", - "git_sha": "88f2bfbe6f0ba858d0833db590e647c4678656a7", - "installed_by": ["reads_bwamem2_decontamination"], - "patch": "modules/ebi-metagenomics/samtools/bam2fq/samtools-bam2fq.diff" - } - } - }, - "subworkflows": { - "ebi-metagenomics": { - "reads_bwamem2_decontamination": { + "bwamem2decontnobams": { "branch": "main", - "git_sha": "0b40060df67681e0172aab145460618c08d99516", - "installed_by": ["subworkflows"] + "git_sha": "32049180387cf2406254acf57882fc55915cb52e", + "installed_by": ["modules"] } } } diff --git a/modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff b/modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff deleted file mode 100644 index 9bc8e23..0000000 --- a/modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff +++ /dev/null @@ -1,19 +0,0 @@ -Changes in module 'ebi-metagenomics/bwamem2/mem' ---- modules/ebi-metagenomics/bwamem2/mem/environment.yml -+++ modules/ebi-metagenomics/bwamem2/mem/environment.yml -@@ -1,10 +1,11 @@ - name: bwamem2_mem -+ - channels: - - conda-forge - - bioconda - - defaults -+ - dependencies: - - bwa-mem2=2.2.1 -- # renovate: datasource=conda depName=bioconda/samtools -+ - htslib=1.19.1 - - samtools=1.19.2 -- - htslib=1.19.1 - -************************************************************ diff --git a/modules/ebi-metagenomics/bwamem2/mem/environment.yml b/modules/ebi-metagenomics/bwamem2/mem/environment.yml index 31db06a..cbf06d3 100644 --- a/modules/ebi-metagenomics/bwamem2/mem/environment.yml +++ b/modules/ebi-metagenomics/bwamem2/mem/environment.yml @@ -1,11 +1,10 @@ name: bwamem2_mem - channels: - conda-forge - bioconda - defaults - dependencies: - bwa-mem2=2.2.1 - - htslib=1.19.1 + # renovate: datasource=conda depName=bioconda/samtools - samtools=1.19.2 + - htslib=1.19.1 diff --git a/modules/ebi-metagenomics/bwamem2decontnobams/environment.yml b/modules/ebi-metagenomics/bwamem2decontnobams/environment.yml new file mode 100644 index 0000000..5e236a7 --- /dev/null +++ b/modules/ebi-metagenomics/bwamem2decontnobams/environment.yml @@ -0,0 +1,11 @@ +name: bwamem2decontnobams + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - bwa-mem2=2.2.1 + - htslib=1.19.1 + - samtools=1.19.2 diff --git a/modules/ebi-metagenomics/bwamem2decontnobams/main.nf b/modules/ebi-metagenomics/bwamem2decontnobams/main.nf new file mode 100644 index 0000000..d9e6f36 --- /dev/null +++ b/modules/ebi-metagenomics/bwamem2decontnobams/main.nf @@ -0,0 +1,56 @@ +process BWAMEM2DECONTNOBAMS { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2d15960ccea84e249a150b7f5d4db3a42fc2d6c3-0' : + 'biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2d15960ccea84e249a150b7f5d4db3a42fc2d6c3-0' }" + + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index) + + output: + tuple val(meta), path("*{_1,_2,_interleaved}.fq.gz"), emit: decont_reads + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def ref_prefix = task.ext.ref_prefix ?: "${meta2.id}" + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + if [[ "${meta.single_end}" == "true" ]]; then + bwa-mem2 \\ + mem \\ + -M \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | samtools view -@ ${task.cpus} -f 4 -F 256 -uS - \\ + | samtools sort -@ ${task.cpus} -n -O bam - \\ + | samtools bam2fq -@ $task.cpus - | gzip --no-name > ${ref_prefix}_${prefix}_interleaved.fq.gz + else + bwa-mem2 \\ + mem \\ + -M \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | samtools view -@ ${task.cpus} -f 4 -F 256 -uS - \\ + | samtools sort -@ ${task.cpus} -n -O bam - \\ + | samtools bam2fq -@ ${task.cpus} -1 ${ref_prefix}_${prefix}_1.fq.gz -2 ${ref_prefix}_${prefix}_2.fq.gz -0 /dev/null -s /dev/null + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa-mem2: \$(bwa-mem2 version 2> /dev/null) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/ebi-metagenomics/bwamem2decontnobams/meta.yml b/modules/ebi-metagenomics/bwamem2decontnobams/meta.yml new file mode 100644 index 0000000..45cf782 --- /dev/null +++ b/modules/ebi-metagenomics/bwamem2decontnobams/meta.yml @@ -0,0 +1,56 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "bwamem2decontnobams" +description: Decontamination module using bwamem2 and samtools that generates fastq files on the fly +keywords: + - alignment + - decontamination + - fastq +tools: + - bwamem2: + description: "Mapping DNA sequences against a large reference genome" + tool_dev_url: "https://github.com/bwa-mem2/bwa-mem2" + - samtools: + description: "Tools for dealing with SAM, BAM and CRAM files" + documentation: "http://www.htslib.org/doc/1.1/samtools.html" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 + for single-end and paired-end data, respectively + - meta2: + type: map + description: | + Groovy Map containing reference genome information + e.g. [ id:'ref_name' ] + - index: + type: file + description: | + A list of BWA index files + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - decont_reads: + type: file + description: | + List of fastq files. Two files for paired-end reads and one file for single-end reads + +authors: + - "@EBI-metagenomics" +maintainers: + - "@EBI-metagenomics" diff --git a/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/main.nf.test b/modules/ebi-metagenomics/bwamem2decontnobams/tests/main.nf.test similarity index 86% rename from subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/main.nf.test rename to modules/ebi-metagenomics/bwamem2decontnobams/tests/main.nf.test index 056fb39..03114b1 100644 --- a/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/main.nf.test +++ b/modules/ebi-metagenomics/bwamem2decontnobams/tests/main.nf.test @@ -1,14 +1,12 @@ nextflow_workflow { - name "Test Subworkflow READS_BWAMEM2_DECONTAMINATION" + name "Test module bwamem2decontnobams" script "../main.nf" - workflow "READS_BWAMEM2_DECONTAMINATION" + workflow "BWAMEM2DECONTNOBAMS" - tag "subworkflows" - tag "subworkflows_nfcore" - tag "subworkflows/reads_bwamem2_decontamination" - tag "bwamem2/mem" - tag "samtools/bam2fq" + tag "modules" + tag "modules_nfcore" + tag "bwamem2decontnobams" test("Illumina paired_end decontamination with MGYG000317500") { when { @@ -42,8 +40,8 @@ nextflow_workflow { { assert workflow.success }, // gzip stores extra information in the header, which makes comparing checksums impossible between operating systems. // that is why we use the sizes of files, and that sort of thing - { assert path(workflow.out.decontaminated_reads.get(0).get(1).get(0)).linesGzip.size() == 374028 }, - { assert path(workflow.out.decontaminated_reads.get(0).get(1).get(1)).linesGzip.size() == 374028 } + { assert path(workflow.out.decont_reads.get(0).get(1).get(0)).linesGzip.size() == 374028 }, + { assert path(workflow.out.decont_reads.get(0).get(1).get(1)).linesGzip.size() == 374028 } ) } } @@ -79,7 +77,7 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, - { assert path(workflow.out.decontaminated_reads.get(0).get(1)).linesGzip.size() == 378312 } + { assert path(workflow.out.decont_reads.get(0).get(1)).linesGzip.size() == 378312 } ) } } diff --git a/modules/ebi-metagenomics/bwamem2decontnobams/tests/tags.yml b/modules/ebi-metagenomics/bwamem2decontnobams/tests/tags.yml new file mode 100644 index 0000000..7c794e0 --- /dev/null +++ b/modules/ebi-metagenomics/bwamem2decontnobams/tests/tags.yml @@ -0,0 +1,2 @@ +bwamem2decontnobams: + - modules/ebi-metagenomics/bwamem2decontnobams/** diff --git a/modules/ebi-metagenomics/samtools/bam2fq/environment.yml b/modules/ebi-metagenomics/samtools/bam2fq/environment.yml deleted file mode 100644 index 5297496..0000000 --- a/modules/ebi-metagenomics/samtools/bam2fq/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: samtools_bam2fq -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::samtools=1.18 diff --git a/modules/ebi-metagenomics/samtools/bam2fq/main.nf b/modules/ebi-metagenomics/samtools/bam2fq/main.nf deleted file mode 100644 index 15b5a65..0000000 --- a/modules/ebi-metagenomics/samtools/bam2fq/main.nf +++ /dev/null @@ -1,57 +0,0 @@ -process SAMTOOLS_BAM2FQ { - tag "$meta.id" - label 'process_low' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : - 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" - - input: - tuple val(meta), path(inputbam), val(split) - - output: - tuple val(meta), path("*{_1,_2,_interleaved}.fq.gz"), emit: reads - tuple val(meta), path("*_singleton.fq.gz"), optional: true, emit: singleton_reads - tuple val(meta), path("*_other.fq.gz"), optional: true, emit: other_reads - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ? "${task.ext.prefix}_${meta.id}": "${meta.id}" - - if (split) { - """ - samtools \\ - bam2fq \\ - $args \\ - -@ $task.cpus \\ - -1 ${prefix}_1.fq.gz \\ - -2 ${prefix}_2.fq.gz \\ - -0 ${prefix}_other.fq.gz \\ - -s ${prefix}_singleton.fq.gz \\ - $inputbam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ - } else { - """ - samtools \\ - bam2fq \\ - $args \\ - -@ $task.cpus \\ - $inputbam | gzip --no-name > ${prefix}_interleaved.fq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ - } -} diff --git a/modules/ebi-metagenomics/samtools/bam2fq/meta.yml b/modules/ebi-metagenomics/samtools/bam2fq/meta.yml deleted file mode 100644 index 1dfc1a0..0000000 --- a/modules/ebi-metagenomics/samtools/bam2fq/meta.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: samtools_bam2fq -description: | - The module uses bam2fq method from samtools to - convert a SAM, BAM or CRAM file to FASTQ format -keywords: - - bam2fq - - samtools - - fastq -tools: - - samtools: - description: Tools for dealing with SAM, BAM and CRAM files - documentation: http://www.htslib.org/doc/1.1/samtools.html - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam_file: - type: file - description: | - Sorted BAM file - - split: - type: boolean - description: | - True or false indicating whether the output - is for single or paired-end reads - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of fastq files. Two files for paired-end reads and one file for single-end reads - - singleton_reads: - type: file - description: | - Fastq files generated for unpaired paired-end reads - - other_reads: - type: file - description: | - Other fastq files generated for paired-end reads - - versions: - type: file - description: | - File containing software versions - -authors: - - "@EBI-Metagenomics" -maintainers: - - "@EBI-Metagenomics" diff --git a/modules/ebi-metagenomics/samtools/bam2fq/samtools-bam2fq.diff b/modules/ebi-metagenomics/samtools/bam2fq/samtools-bam2fq.diff deleted file mode 100644 index 98dfd92..0000000 --- a/modules/ebi-metagenomics/samtools/bam2fq/samtools-bam2fq.diff +++ /dev/null @@ -1,14 +0,0 @@ -Changes in module 'ebi-metagenomics/samtools/bam2fq' ---- modules/ebi-metagenomics/samtools/bam2fq/main.nf -+++ modules/ebi-metagenomics/samtools/bam2fq/main.nf -@@ -21,7 +21,7 @@ - - script: - def args = task.ext.args ?: '' -- def prefix = task.ext.prefix ?: "${meta.id}" -+ def prefix = task.ext.prefix ? "${task.ext.prefix}_${meta.id}": "${meta.id}" - - if (split) { - """ - -************************************************************ diff --git a/modules/ebi-metagenomics/samtools/bam2fq/tests/main.nf.test b/modules/ebi-metagenomics/samtools/bam2fq/tests/main.nf.test deleted file mode 100644 index cd65abb..0000000 --- a/modules/ebi-metagenomics/samtools/bam2fq/tests/main.nf.test +++ /dev/null @@ -1,71 +0,0 @@ -nextflow_process { - - name "Test Process SAMTOOLS_BAM2FQ" - script "../main.nf" - process "SAMTOOLS_BAM2FQ" - - tag "modules" - tag "modules_nfcore" - tag "samtools" - tag "samtools/bam2fq" - - config "./nextflow.config" - - test("homo_sapiens - bam, false") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_converted_bam'], checkIfExists: true) - ] - input[1] = false - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - path(process.out.reads[0][1]).linesGzip[0..6], - process.out.versions - ).match() } - ) - } - - } - - test("homo_sapiens - bam, true") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_converted_bam'], checkIfExists: true) - ] - input[1] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.reads[0][1].collect{ - if(it ==~ /.*(other|singleton)\.fq\.gz$/) { - return file(it).name - } - return path(it).linesGzip[0..6] - }, - process.out.versions - ).match() } - ) - } - - } - -} diff --git a/modules/ebi-metagenomics/samtools/bam2fq/tests/main.nf.test.snap b/modules/ebi-metagenomics/samtools/bam2fq/tests/main.nf.test.snap deleted file mode 100644 index 1f82450..0000000 --- a/modules/ebi-metagenomics/samtools/bam2fq/tests/main.nf.test.snap +++ /dev/null @@ -1,49 +0,0 @@ -{ - "homo_sapiens - bam, false": { - "content": [ - [ - "@922332/1\tRX:Z:ATTTCAG-TATTATT", - "GAGAGGATCTCGTGTAGAAATTGCTTTGAGCTGTTCTTTGTCATTTTCCCTTAATTCATTGTCTCTAGCTAGTCTGTTACTCTGTAAAATAAAATAATAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTTAAGGTCAGTG", - "+", - "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE [ meta, bam, meta.single_end == false ] } ) - ch_versions = ch_versions.mix(SAMTOOLS_BAM2FQ.out.versions.first()) - - emit: - decontaminated_reads = SAMTOOLS_BAM2FQ.out.reads // channel: [ val(meta), [ path(decont_reads) ]] - versions = ch_versions // channel: [ versions.yml ] - -} diff --git a/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/meta.yml b/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/meta.yml deleted file mode 100644 index 8e73676..0000000 --- a/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/meta.yml +++ /dev/null @@ -1,59 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "reads_bwamem2_decontamination" - -description: Short-reads mapping to a reference genome and remove matching reads -keywords: - - decontamination - - short-reads - - mapping - -components: - - bwamem2/mem - - samtools/bam2fq - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - A list of input FastQ files of size 1 or 2 - for single-end and paired-end data, respectively - pattern: "*.{fastq/fq}.gz" - - meta2: - type: map - description: | - Groovy Map containing reference genome information - e.g. [ id:'ref_name' ] - - ref_index: - type: file - description: | - A list of BWA index reference files - pattern: "*.{amb,ann,bwt,pac,sa}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - decont_reads: - type: file - description: | - A list of decontaminated FastQ files of size 1 or 2 - for single-end and paired-end data, respectively - - versions: - type: file - description: | - File containing software versions - Structure: [path(versions.yml)] - pattern: "versions.yml" - -authors: - - "@Ales-ibt" -maintainers: - - "@Ales-ibt" - - "@mberacochea" diff --git a/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/nextflow.config b/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/nextflow.config deleted file mode 100644 index a57a5cb..0000000 --- a/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/nextflow.config +++ /dev/null @@ -1,6 +0,0 @@ -process { - withName: BWAMEM2_MEM { - ext.args = "-M" - ext.args2 = "-f 4 -F 256 -uS" - } -} diff --git a/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/tags.yml b/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/tags.yml deleted file mode 100644 index 6614dbd..0000000 --- a/subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/reads_bwamem2_decontamination: - - subworkflows/ebi-metagenomics/reads_bwamem2_decontamination/** diff --git a/subworkflows/local/reads_qc.nf b/subworkflows/local/reads_qc.nf index 0346739..68e597b 100644 --- a/subworkflows/local/reads_qc.nf +++ b/subworkflows/local/reads_qc.nf @@ -1,6 +1,6 @@ -include { FASTP } from '../../modules/nf-core/fastp/main' -include { READS_BWAMEM2_DECONTAMINATION as HUMAN_PHIX_DECONTAMINATION } from '../ebi-metagenomics/reads_bwamem2_decontamination/main' -include { READS_BWAMEM2_DECONTAMINATION as HOST_DECONTAMINATION } from '../ebi-metagenomics/reads_bwamem2_decontamination/main' +include { FASTP } from '../../modules/nf-core/fastp/main' +include { BWAMEM2DECONTNOBAMS as HUMAN_PHIX_DECONTAMINATION } from '../../modules/ebi-metagenomics/bwamem2decontnobams/main' +include { BWAMEM2DECONTNOBAMS as HOST_DECONTAMINATION } from '../../modules/ebi-metagenomics/bwamem2decontnobams/main' workflow READS_QC { @@ -53,7 +53,7 @@ workflow READS_QC { ch_versions = ch_versions.mix(HUMAN_PHIX_DECONTAMINATION.out.versions) - decontaminated_reads = HUMAN_PHIX_DECONTAMINATION.out.decontaminated_reads + decontaminated_reads = HUMAN_PHIX_DECONTAMINATION.out.decont_reads } else { decontaminated_reads = FASTP.out.reads @@ -67,13 +67,13 @@ workflow READS_QC { } HOST_DECONTAMINATION( - HUMAN_PHIX_DECONTAMINATION.out.decontaminated_reads, + HUMAN_PHIX_DECONTAMINATION.out.decont_reads, ch_bwamem2_host_refs ) ch_versions = ch_versions.mix(HOST_DECONTAMINATION.out.versions) - decontaminated_reads = HOST_DECONTAMINATION.out.decontaminated_reads + decontaminated_reads = HOST_DECONTAMINATION.out.decont_reads } emit: diff --git a/tests/main.nf.test b/tests/main.nf.test index 32a50cf..2d1a3d9 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -16,7 +16,7 @@ nextflow_pipeline { then { with(workflow) { assert success - assert trace.tasks().size() == 19 + assert trace.tasks().size() == 18 } } diff --git a/workflows/miassembler.nf b/workflows/miassembler.nf index 37b35c6..a7145a4 100644 --- a/workflows/miassembler.nf +++ b/workflows/miassembler.nf @@ -137,9 +137,9 @@ workflow MIASSEMBLER { ch_versions = ch_versions.mix(READS_QC.out.versions) - /******************/ - /* Assembly */ - /******************/ + /*********************/ + /* Assembly */ + /********************/ /* -- Clarification -- */ /* At the moment, the pipeline only processes one set of reads at a time. From 04afe3819afd062b27dd8f0d92d7ec17437d650f Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Mon, 3 Jun 2024 13:55:46 +0100 Subject: [PATCH 08/11] Adjust the modules.config --- conf/modules.config | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 9705bb8..9baa21b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -72,16 +72,6 @@ process { ] } - // This BWAMEM2_MEM belongs to the decontamination module - withName: 'BWAMEM2_MEM_DECONT' { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } - - ext.args = "-M" - ext.args2 = "-f 12 -F 256 -uS" - } - // This BWAMEM2_MEM belongs to the coverage module withName: 'BWAMEM2_MEM_COVERAGE' { cpus = { check_max( 12 * task.attempt, 'cpus' ) } @@ -92,7 +82,7 @@ process { ext.args2 = "-F 268 -uS" } - withName: 'SAMTOOLS_BAM2FQ' { + withName: 'BWAMEM2DECONTNOBAMS' { cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } From bcfa37d89008a37547e72c70332b38c25e4a0095 Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Mon, 3 Jun 2024 14:52:42 +0100 Subject: [PATCH 09/11] Adjust the memory reqs for the decontamination --- conf/modules.config | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 9baa21b..cd45b97 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -82,13 +82,23 @@ process { ext.args2 = "-F 268 -uS" } + /* Decontamination */ withName: 'BWAMEM2DECONTNOBAMS' { cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - ext.prefix = "decontaminated" + ext.prefix = "${meta2.id}_decontaminated" + } + + withName: 'HUMAN_PHIX_DECONTAMINATION' { + memory = { check_max( 64.GB * task.attempt, 'memory' ) } + } + + withName: 'HOST_DECONTAMINATION' { + memory = { check_max( 24.GB * task.attempt, 'memory' ) } } + /* --------- */ + /* Assembly */ withName: 'SPADES' { memory = { check_max(params.assembly_memory.GB * task.attempt, 'memory') } cpus = { check_max( 32 * task.attempt, 'cpus') } From 303026eddd2fbae66d9acd299773d329ea99a798 Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Mon, 3 Jun 2024 14:55:14 +0100 Subject: [PATCH 10/11] Fix config (meta2 replacement) --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index cd45b97..45bae91 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -86,7 +86,7 @@ process { withName: 'BWAMEM2DECONTNOBAMS' { cpus = { check_max( 2 * task.attempt, 'cpus' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - ext.prefix = "${meta2.id}_decontaminated" + ext.prefix = { "${meta2.id}_decontaminated" } } withName: 'HUMAN_PHIX_DECONTAMINATION' { From 5e9ba5329c08fcd134d17492083be4a2d9528a03 Mon Sep 17 00:00:00 2001 From: Martin Beracochea Date: Mon, 3 Jun 2024 16:17:07 +0100 Subject: [PATCH 11/11] Change the decontaminated prefix --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 45bae91..4d0cfe2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -86,7 +86,7 @@ process { withName: 'BWAMEM2DECONTNOBAMS' { cpus = { check_max( 2 * task.attempt, 'cpus' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - ext.prefix = { "${meta2.id}_decontaminated" } + ext.prefix = "decontaminated" } withName: 'HUMAN_PHIX_DECONTAMINATION' {