From 4552dc7119878a0c2ae1209b59ba8ae4df090ed0 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Fri, 1 Nov 2024 15:03:00 +0100 Subject: [PATCH 01/11] move short read preprocessing into a subworkflow --- subworkflows/local/shortread_preprocessing.nf | 182 ++++++++++++++++++ workflows/mag.nf | 151 +-------------- 2 files changed, 189 insertions(+), 144 deletions(-) create mode 100644 subworkflows/local/shortread_preprocessing.nf diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf new file mode 100644 index 00000000..c8de5bcf --- /dev/null +++ b/subworkflows/local/shortread_preprocessing.nf @@ -0,0 +1,182 @@ +/* + * SHORTREAD_PREPROCESSING: Preprocessing and QC for short reads + */ + +include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_TRIMMED } from '../../modules/nf-core/fastqc/main' +include { FASTP } from '../../modules/nf-core/fastp/main' +include { ADAPTERREMOVAL_PE, ADAPTERREMOVAL_SE } from '../../modules/nf-core/adapterremoval/main' +include { BOWTIE2_HOST_REMOVAL_BUILD, BOWTIE2_HOST_REMOVAL_ALIGN } from '../../modules/nf-core/bowtie2/main' +include { BOWTIE2_PHIX_REMOVAL_BUILD, BOWTIE2_PHIX_REMOVAL_ALIGN } from '../../modules/nf-core/bowtie2/main' +include { CAT_FASTQ } from '../../modules/nf-core/cat/main' +include { SEQTK_MERGEPE } from '../../modules/nf-core/seqtk/main' +include { BBMAP_BBNORM } from '../../modules/nf-core/bbmap/main' + +workflow SHORTREAD_PREPROCESSING { + take: + ch_raw_short_reads // [ [meta] , fastq1, fastq2] (mandatory) + ch_host_fasta // [fasta] (optional) + ch_phix_db_file // [fasta] (optional) + ch_metaeuk_db // [fasta] (optional) + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + FASTQC_RAW( + ch_raw_short_reads + ) + ch_versions = ch_versions.mix(FASTQC_RAW.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_RAW.out.zip.collect { it[1] }.ifEmpty([])) + + ch_bowtie2_removal_host_multiqc = Channel.empty() + if (!params.assembly_input) { + if (!params.skip_clipping) { + if (params.clip_tool == 'fastp') { + ch_clipmerge_out = FASTP( + ch_raw_short_reads, + [], + params.fastp_save_trimmed_fail, + [] + ) + ch_short_reads_prepped = FASTP.out.reads + ch_versions = ch_versions.mix(FASTP.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(FASTP.out.json.collect { it[1] }.ifEmpty([])) + } + else if (params.clip_tool == 'adapterremoval') { + + // due to strange output file scheme in AR2, have to manually separate + // SE/PE to allow correct pulling of reads after. + ch_adapterremoval_in = ch_raw_short_reads.branch { + single: it[0]['single_end'] + paired: !it[0]['single_end'] + } + + ADAPTERREMOVAL_PE(ch_adapterremoval_in.paired, []) + ADAPTERREMOVAL_SE(ch_adapterremoval_in.single, []) + + ch_short_reads_prepped = Channel.empty() + ch_short_reads_prepped = ch_short_reads_prepped.mix(ADAPTERREMOVAL_SE.out.singles_truncated, ADAPTERREMOVAL_PE.out.paired_truncated) + + ch_versions = ch_versions.mix(ADAPTERREMOVAL_PE.out.versions.first(), ADAPTERREMOVAL_SE.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_PE.out.settings.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_SE.out.settings.collect { it[1] }.ifEmpty([])) + } + } + else { + ch_short_reads_prepped = ch_raw_short_reads + } + + if (params.host_fasta) { + if (params.host_fasta_bowtie2index) { + ch_host_bowtie2index = file(params.host_fasta_bowtie2index, checkIfExists: true) + } + else { + BOWTIE2_HOST_REMOVAL_BUILD( + ch_host_fasta + ) + ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index + } + } + + if (params.host_fasta || params.host_genome) { + BOWTIE2_HOST_REMOVAL_ALIGN( + ch_short_reads_prepped, + ch_host_bowtie2index + ) + ch_short_reads_hostremoved = BOWTIE2_HOST_REMOVAL_ALIGN.out.reads + ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.log.collect { it[1] }.ifEmpty([])) + } + else { + ch_short_reads_hostremoved = ch_short_reads_prepped + } + + if (!params.keep_phix) { + BOWTIE2_PHIX_REMOVAL_BUILD( + ch_phix_db_file + ) + BOWTIE2_PHIX_REMOVAL_ALIGN( + ch_short_reads_hostremoved, + BOWTIE2_PHIX_REMOVAL_BUILD.out.index + ) + ch_short_reads_phixremoved = BOWTIE2_PHIX_REMOVAL_ALIGN.out.reads + ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.log.collect { it[1] }.ifEmpty([])) + } + else { + ch_short_reads_phixremoved = ch_short_reads_hostremoved + } + + if (!(params.keep_phix && params.skip_clipping && !(params.host_genome || params.host_fasta))) { + FASTQC_TRIMMED( + ch_short_reads_phixremoved + ) + ch_versions = ch_versions.mix(FASTQC_TRIMMED.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_TRIMMED.out.zip.collect { it[1] }.ifEmpty([])) + } + + // Run/Lane merging + + ch_short_reads_forcat = ch_short_reads_phixremoved + .map { meta, reads -> + def meta_new = meta - meta.subMap('run') + [meta_new, reads] + } + .groupTuple() + .branch { meta, reads -> + cat: reads.size() >= 2 + skip_cat: true + } + + CAT_FASTQ(ch_short_reads_forcat.cat.map { meta, reads -> [meta, reads.flatten()] }) + + // Ensure we don't have nests of nests so that structure is in form expected for assembly + ch_short_reads_catskipped = ch_short_reads_forcat.skip_cat.map { meta, reads -> + def new_reads = meta.single_end ? reads[0] : reads.flatten() + [meta, new_reads] + } + + // Combine single run and multi-run-merged data + ch_short_reads = Channel.empty() + ch_short_reads = CAT_FASTQ.out.reads.mix(ch_short_reads_catskipped) + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first()) + + if (params.bbnorm) { + if (params.coassemble_group) { + // Interleave pairs, to be able to treat them as single ends when calling bbnorm. This prepares + // for dropping the single_end parameter, but keeps assembly modules as they are, i.e. not + // accepting a mix of single end and pairs. + SEQTK_MERGEPE( + ch_short_reads.filter { !it[0].single_end } + ) + ch_versions = ch_versions.mix(SEQTK_MERGEPE.out.versions.first()) + // Combine the interleaved pairs with any single end libraries. Set the meta.single_end to true (used by the bbnorm module). + ch_bbnorm = SEQTK_MERGEPE.out.reads + .mix(ch_short_reads.filter { it[0].single_end }) + .map { [[id: sprintf("group%s", it[0].group), group: it[0].group, single_end: true], it[1]] } + .groupTuple() + } + else { + ch_bbnorm = ch_short_reads + } + BBMAP_BBNORM(ch_bbnorm) + ch_versions = ch_versions.mix(BBMAP_BBNORM.out.versions) + ch_short_reads_assembly = BBMAP_BBNORM.out.fastq + } + else { + ch_short_reads_assembly = ch_short_reads + } + } + else { + ch_short_reads = ch_raw_short_reads.map { meta, reads -> + def meta_new = meta - meta.subMap('run') + [meta_new, reads] + } + } + + emit: + short_reads = ch_short_reads + versions = ch_versions + multiqc_files = ch_multiqc_files +} diff --git a/workflows/mag.nf b/workflows/mag.nf index 7afb4316..e542accb 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -197,152 +197,15 @@ workflow MAG { ================================================================================ */ - FASTQC_RAW( - ch_raw_short_reads + SHORTREAD_PREPROCESSING( + ch_raw_short_reads, + ch_host_fasta, + ch_phix_db_file, + ch_metaeuk_db ) - ch_versions = ch_versions.mix(FASTQC_RAW.out.versions.first()) - ch_bowtie2_removal_host_multiqc = Channel.empty() - if (!params.assembly_input) { - if (!params.skip_clipping) { - if (params.clip_tool == 'fastp') { - ch_clipmerge_out = FASTP( - ch_raw_short_reads, - [], - params.fastp_save_trimmed_fail, - [] - ) - ch_short_reads_prepped = FASTP.out.reads - ch_versions = ch_versions.mix(FASTP.out.versions.first()) - } - else if (params.clip_tool == 'adapterremoval') { - - // due to strange output file scheme in AR2, have to manually separate - // SE/PE to allow correct pulling of reads after. - ch_adapterremoval_in = ch_raw_short_reads.branch { - single: it[0]['single_end'] - paired: !it[0]['single_end'] - } - - ADAPTERREMOVAL_PE(ch_adapterremoval_in.paired, []) - ADAPTERREMOVAL_SE(ch_adapterremoval_in.single, []) - - ch_short_reads_prepped = Channel.empty() - ch_short_reads_prepped = ch_short_reads_prepped.mix(ADAPTERREMOVAL_SE.out.singles_truncated, ADAPTERREMOVAL_PE.out.paired_truncated) - - ch_versions = ch_versions.mix(ADAPTERREMOVAL_PE.out.versions.first(), ADAPTERREMOVAL_SE.out.versions.first()) - } - } - else { - ch_short_reads_prepped = ch_raw_short_reads - } - - if (params.host_fasta) { - if (params.host_fasta_bowtie2index) { - ch_host_bowtie2index = file(params.host_fasta_bowtie2index, checkIfExists: true) - } - else { - BOWTIE2_HOST_REMOVAL_BUILD( - ch_host_fasta - ) - ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index - } - } - - ch_bowtie2_removal_host_multiqc = Channel.empty() - if (params.host_fasta || params.host_genome) { - BOWTIE2_HOST_REMOVAL_ALIGN( - ch_short_reads_prepped, - ch_host_bowtie2index - ) - ch_short_reads_hostremoved = BOWTIE2_HOST_REMOVAL_ALIGN.out.reads - ch_bowtie2_removal_host_multiqc = BOWTIE2_HOST_REMOVAL_ALIGN.out.log - ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.versions.first()) - } - else { - ch_short_reads_hostremoved = ch_short_reads_prepped - } - - if (!params.keep_phix) { - BOWTIE2_PHIX_REMOVAL_BUILD( - ch_phix_db_file - ) - BOWTIE2_PHIX_REMOVAL_ALIGN( - ch_short_reads_hostremoved, - BOWTIE2_PHIX_REMOVAL_BUILD.out.index - ) - ch_short_reads_phixremoved = BOWTIE2_PHIX_REMOVAL_ALIGN.out.reads - ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.versions.first()) - } - else { - ch_short_reads_phixremoved = ch_short_reads_hostremoved - } - - if (!(params.keep_phix && params.skip_clipping && !(params.host_genome || params.host_fasta))) { - FASTQC_TRIMMED( - ch_short_reads_phixremoved - ) - ch_versions = ch_versions.mix(FASTQC_TRIMMED.out.versions) - } - - // Run/Lane merging - - ch_short_reads_forcat = ch_short_reads_phixremoved - .map { meta, reads -> - def meta_new = meta - meta.subMap('run') - [meta_new, reads] - } - .groupTuple() - .branch { meta, reads -> - cat: reads.size() >= 2 - skip_cat: true - } - - CAT_FASTQ(ch_short_reads_forcat.cat.map { meta, reads -> [meta, reads.flatten()] }) - - // Ensure we don't have nests of nests so that structure is in form expected for assembly - ch_short_reads_catskipped = ch_short_reads_forcat.skip_cat.map { meta, reads -> - def new_reads = meta.single_end ? reads[0] : reads.flatten() - [meta, new_reads] - } - - // Combine single run and multi-run-merged data - ch_short_reads = Channel.empty() - ch_short_reads = CAT_FASTQ.out.reads.mix(ch_short_reads_catskipped) - ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first()) - - if (params.bbnorm) { - if (params.coassemble_group) { - // Interleave pairs, to be able to treat them as single ends when calling bbnorm. This prepares - // for dropping the single_end parameter, but keeps assembly modules as they are, i.e. not - // accepting a mix of single end and pairs. - SEQTK_MERGEPE( - ch_short_reads.filter { !it[0].single_end } - ) - ch_versions = ch_versions.mix(SEQTK_MERGEPE.out.versions.first()) - // Combine the interleaved pairs with any single end libraries. Set the meta.single_end to true (used by the bbnorm module). - ch_bbnorm = SEQTK_MERGEPE.out.reads - .mix(ch_short_reads.filter { it[0].single_end }) - .map { [[id: sprintf("group%s", it[0].group), group: it[0].group, single_end: true], it[1]] } - .groupTuple() - } - else { - ch_bbnorm = ch_short_reads - } - BBMAP_BBNORM(ch_bbnorm) - ch_versions = ch_versions.mix(BBMAP_BBNORM.out.versions) - ch_short_reads_assembly = BBMAP_BBNORM.out.fastq - } - else { - ch_short_reads_assembly = ch_short_reads - } - } - else { - ch_short_reads = ch_raw_short_reads.map { meta, reads -> - def meta_new = meta - meta.subMap('run') - [meta_new, reads] - } - } + ch_versions = ch_versions.mix(SHORTREAD_PREPROCESSING.out.versions) + ch_short_reads = SHORTREAD_PREPROCESSING.out.short_reads /* ================================================================================ From 00ac9250bc8c81c4312142634cf32804621d4fe3 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Sat, 23 Nov 2024 07:33:59 +0100 Subject: [PATCH 02/11] add include statements --- subworkflows/local/shortread_preprocessing.nf | 16 ++++++++++------ workflows/mag.nf | 2 ++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index c8de5bcf..2ae35237 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -5,12 +5,15 @@ include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' include { FASTQC as FASTQC_TRIMMED } from '../../modules/nf-core/fastqc/main' include { FASTP } from '../../modules/nf-core/fastp/main' -include { ADAPTERREMOVAL_PE, ADAPTERREMOVAL_SE } from '../../modules/nf-core/adapterremoval/main' -include { BOWTIE2_HOST_REMOVAL_BUILD, BOWTIE2_HOST_REMOVAL_ALIGN } from '../../modules/nf-core/bowtie2/main' -include { BOWTIE2_PHIX_REMOVAL_BUILD, BOWTIE2_PHIX_REMOVAL_ALIGN } from '../../modules/nf-core/bowtie2/main' -include { CAT_FASTQ } from '../../modules/nf-core/cat/main' -include { SEQTK_MERGEPE } from '../../modules/nf-core/seqtk/main' -include { BBMAP_BBNORM } from '../../modules/nf-core/bbmap/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../../modules/nf-core/adapterremoval/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../../modules/nf-core/adapterremoval/main' +include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../../modules/local/bowtie2_removal_build' +include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../../modules/local/bowtie2_removal_align' +include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../../modules/local/bowtie2_removal_build' +include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../../modules/local/bowtie2_removal_align' +include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main' +include { SEQTK_MERGEPE } from '../../modules/nf-core/seqtk/mergepe/main' +include { BBMAP_BBNORM } from '../..//modules/nf-core/bbmap/bbnorm/main' workflow SHORTREAD_PREPROCESSING { take: @@ -177,6 +180,7 @@ workflow SHORTREAD_PREPROCESSING { emit: short_reads = ch_short_reads + short_reads_assembly = ch_short_reads_assembly versions = ch_versions multiqc_files = ch_multiqc_files } diff --git a/workflows/mag.nf b/workflows/mag.nf index e542accb..269d3fda 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -24,6 +24,7 @@ include { ANCIENT_DNA_ASSEMBLY_VALIDATION } from '../subwo include { DOMAIN_CLASSIFICATION } from '../subworkflows/local/domain_classification' include { DEPTHS } from '../subworkflows/local/depths' include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' +include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing' // // MODULE: Installed directly from nf-core/modules @@ -206,6 +207,7 @@ workflow MAG { ch_versions = ch_versions.mix(SHORTREAD_PREPROCESSING.out.versions) ch_short_reads = SHORTREAD_PREPROCESSING.out.short_reads + ch_short_reads_assembly = SHORTREAD_PREPROCESSING.out.short_reads_assembly /* ================================================================================ From 2768f5eb706d344e744e962df348aed04549783d Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Sat, 23 Nov 2024 10:09:08 +0100 Subject: [PATCH 03/11] Fix multiqc input channel, channel mixing for short read processing is now handled in subworkflow --- workflows/mag.nf | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/workflows/mag.nf b/workflows/mag.nf index 352643b8..87397d97 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -880,32 +880,10 @@ workflow MAG { ) ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC_RAW.out.zip.collect { it[1] }.ifEmpty([])) + // Add all files from preprocessing to the MultiQC input channel + ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_PREPROCESSING.out.multiqc_files.collect { it[1] }.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.multiqc_files.collect { it[1] }.ifEmpty([])) - if (!params.assembly_input) { - - if (!params.skip_clipping && params.clip_tool == 'adapterremoval') { - ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_PE.out.settings.collect { it[1] }.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_SE.out.settings.collect { it[1] }.ifEmpty([])) - } - else if (!params.skip_clipping && params.clip_tool == 'fastp') { - ch_multiqc_files = ch_multiqc_files.mix(FASTP.out.json.collect { it[1] }.ifEmpty([])) - } - - if (!(params.keep_phix && params.skip_clipping && !(params.host_genome || params.host_fasta))) { - ch_multiqc_files = ch_multiqc_files.mix(FASTQC_TRIMMED.out.zip.collect { it[1] }.ifEmpty([])) - } - - if (params.host_fasta || params.host_genome) { - ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.log.collect { it[1] }.ifEmpty([])) - } - - if (!params.keep_phix) { - ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.log.collect { it[1] }.ifEmpty([])) - } - } - ch_multiqc_files = ch_multiqc_files.mix(CENTRIFUGE_KREPORT.out.kreport.collect { it[1] }.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2.out.report.collect { it[1] }.ifEmpty([])) From 9a039da6a98a680e3351a842391f6da8dac7178f Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Sun, 24 Nov 2024 20:32:47 +0100 Subject: [PATCH 04/11] Fix multiqc file mix --- subworkflows/local/shortread_preprocessing.nf | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index 2ae35237..2fbb3e95 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -30,13 +30,13 @@ workflow SHORTREAD_PREPROCESSING { ch_raw_short_reads ) ch_versions = ch_versions.mix(FASTQC_RAW.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC_RAW.out.zip.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_RAW.out.zip) ch_bowtie2_removal_host_multiqc = Channel.empty() if (!params.assembly_input) { if (!params.skip_clipping) { if (params.clip_tool == 'fastp') { - ch_clipmerge_out = FASTP( + FASTP( ch_raw_short_reads, [], params.fastp_save_trimmed_fail, @@ -44,7 +44,8 @@ workflow SHORTREAD_PREPROCESSING { ) ch_short_reads_prepped = FASTP.out.reads ch_versions = ch_versions.mix(FASTP.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(FASTP.out.json.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTP.out.json) + } else if (params.clip_tool == 'adapterremoval') { @@ -62,8 +63,8 @@ workflow SHORTREAD_PREPROCESSING { ch_short_reads_prepped = ch_short_reads_prepped.mix(ADAPTERREMOVAL_SE.out.singles_truncated, ADAPTERREMOVAL_PE.out.paired_truncated) ch_versions = ch_versions.mix(ADAPTERREMOVAL_PE.out.versions.first(), ADAPTERREMOVAL_SE.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_PE.out.settings.collect { it[1] }.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_SE.out.settings.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_PE.out.settings) + ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_SE.out.settings) } } else { @@ -89,7 +90,7 @@ workflow SHORTREAD_PREPROCESSING { ) ch_short_reads_hostremoved = BOWTIE2_HOST_REMOVAL_ALIGN.out.reads ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.log.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.log) } else { ch_short_reads_hostremoved = ch_short_reads_prepped @@ -105,7 +106,7 @@ workflow SHORTREAD_PREPROCESSING { ) ch_short_reads_phixremoved = BOWTIE2_PHIX_REMOVAL_ALIGN.out.reads ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.log.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.log) } else { ch_short_reads_phixremoved = ch_short_reads_hostremoved @@ -116,7 +117,7 @@ workflow SHORTREAD_PREPROCESSING { ch_short_reads_phixremoved ) ch_versions = ch_versions.mix(FASTQC_TRIMMED.out.versions) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC_TRIMMED.out.zip.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_TRIMMED.out.zip) } // Run/Lane merging From 78615271d3979208f5fa793d01cc0528d768afdb Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Sun, 24 Nov 2024 20:52:02 +0100 Subject: [PATCH 05/11] Move assembly-input logic to main workflow --- subworkflows/local/shortread_preprocessing.nf | 234 +++++++++--------- workflows/mag.nf | 26 +- 2 files changed, 130 insertions(+), 130 deletions(-) diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index 2fbb3e95..2181265d 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -33,150 +33,142 @@ workflow SHORTREAD_PREPROCESSING { ch_multiqc_files = ch_multiqc_files.mix(FASTQC_RAW.out.zip) ch_bowtie2_removal_host_multiqc = Channel.empty() - if (!params.assembly_input) { - if (!params.skip_clipping) { - if (params.clip_tool == 'fastp') { - FASTP( - ch_raw_short_reads, - [], - params.fastp_save_trimmed_fail, - [] - ) - ch_short_reads_prepped = FASTP.out.reads - ch_versions = ch_versions.mix(FASTP.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(FASTP.out.json) + if (!params.skip_clipping) { + if (params.clip_tool == 'fastp') { + FASTP( + ch_raw_short_reads, + [], + params.fastp_save_trimmed_fail, + [] + ) + ch_short_reads_prepped = FASTP.out.reads + ch_versions = ch_versions.mix(FASTP.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(FASTP.out.json) - } - else if (params.clip_tool == 'adapterremoval') { + } + else if (params.clip_tool == 'adapterremoval') { - // due to strange output file scheme in AR2, have to manually separate - // SE/PE to allow correct pulling of reads after. - ch_adapterremoval_in = ch_raw_short_reads.branch { - single: it[0]['single_end'] - paired: !it[0]['single_end'] - } + // due to strange output file scheme in AR2, have to manually separate + // SE/PE to allow correct pulling of reads after. + ch_adapterremoval_in = ch_raw_short_reads.branch { + single: it[0]['single_end'] + paired: !it[0]['single_end'] + } - ADAPTERREMOVAL_PE(ch_adapterremoval_in.paired, []) - ADAPTERREMOVAL_SE(ch_adapterremoval_in.single, []) + ADAPTERREMOVAL_PE(ch_adapterremoval_in.paired, []) + ADAPTERREMOVAL_SE(ch_adapterremoval_in.single, []) - ch_short_reads_prepped = Channel.empty() - ch_short_reads_prepped = ch_short_reads_prepped.mix(ADAPTERREMOVAL_SE.out.singles_truncated, ADAPTERREMOVAL_PE.out.paired_truncated) + ch_short_reads_prepped = Channel.empty() + ch_short_reads_prepped = ch_short_reads_prepped.mix(ADAPTERREMOVAL_SE.out.singles_truncated, ADAPTERREMOVAL_PE.out.paired_truncated) - ch_versions = ch_versions.mix(ADAPTERREMOVAL_PE.out.versions.first(), ADAPTERREMOVAL_SE.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_PE.out.settings) - ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_SE.out.settings) - } - } - else { - ch_short_reads_prepped = ch_raw_short_reads - } - - if (params.host_fasta) { - if (params.host_fasta_bowtie2index) { - ch_host_bowtie2index = file(params.host_fasta_bowtie2index, checkIfExists: true) - } - else { - BOWTIE2_HOST_REMOVAL_BUILD( - ch_host_fasta - ) - ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index - } + ch_versions = ch_versions.mix(ADAPTERREMOVAL_PE.out.versions.first(), ADAPTERREMOVAL_SE.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_PE.out.settings) + ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_SE.out.settings) } + } + else { + ch_short_reads_prepped = ch_raw_short_reads + } - if (params.host_fasta || params.host_genome) { - BOWTIE2_HOST_REMOVAL_ALIGN( - ch_short_reads_prepped, - ch_host_bowtie2index - ) - ch_short_reads_hostremoved = BOWTIE2_HOST_REMOVAL_ALIGN.out.reads - ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.log) + if (params.host_fasta) { + if (params.host_fasta_bowtie2index) { + ch_host_bowtie2index = file(params.host_fasta_bowtie2index, checkIfExists: true) } else { - ch_short_reads_hostremoved = ch_short_reads_prepped - } - - if (!params.keep_phix) { - BOWTIE2_PHIX_REMOVAL_BUILD( - ch_phix_db_file + BOWTIE2_HOST_REMOVAL_BUILD( + ch_host_fasta ) - BOWTIE2_PHIX_REMOVAL_ALIGN( - ch_short_reads_hostremoved, - BOWTIE2_PHIX_REMOVAL_BUILD.out.index - ) - ch_short_reads_phixremoved = BOWTIE2_PHIX_REMOVAL_ALIGN.out.reads - ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.log) - } - else { - ch_short_reads_phixremoved = ch_short_reads_hostremoved + ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index } + } - if (!(params.keep_phix && params.skip_clipping && !(params.host_genome || params.host_fasta))) { - FASTQC_TRIMMED( - ch_short_reads_phixremoved - ) - ch_versions = ch_versions.mix(FASTQC_TRIMMED.out.versions) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC_TRIMMED.out.zip) - } + if (params.host_fasta || params.host_genome) { + BOWTIE2_HOST_REMOVAL_ALIGN( + ch_short_reads_prepped, + ch_host_bowtie2index + ) + ch_short_reads_hostremoved = BOWTIE2_HOST_REMOVAL_ALIGN.out.reads + ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.log) + } + else { + ch_short_reads_hostremoved = ch_short_reads_prepped + } - // Run/Lane merging + if (!params.keep_phix) { + BOWTIE2_PHIX_REMOVAL_BUILD( + ch_phix_db_file + ) + BOWTIE2_PHIX_REMOVAL_ALIGN( + ch_short_reads_hostremoved, + BOWTIE2_PHIX_REMOVAL_BUILD.out.index + ) + ch_short_reads_phixremoved = BOWTIE2_PHIX_REMOVAL_ALIGN.out.reads + ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.log) + } + else { + ch_short_reads_phixremoved = ch_short_reads_hostremoved + } - ch_short_reads_forcat = ch_short_reads_phixremoved - .map { meta, reads -> - def meta_new = meta - meta.subMap('run') - [meta_new, reads] - } - .groupTuple() - .branch { meta, reads -> - cat: reads.size() >= 2 - skip_cat: true - } + if (!(params.keep_phix && params.skip_clipping && !(params.host_genome || params.host_fasta))) { + FASTQC_TRIMMED( + ch_short_reads_phixremoved + ) + ch_versions = ch_versions.mix(FASTQC_TRIMMED.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_TRIMMED.out.zip) + } - CAT_FASTQ(ch_short_reads_forcat.cat.map { meta, reads -> [meta, reads.flatten()] }) + // Run/Lane merging - // Ensure we don't have nests of nests so that structure is in form expected for assembly - ch_short_reads_catskipped = ch_short_reads_forcat.skip_cat.map { meta, reads -> - def new_reads = meta.single_end ? reads[0] : reads.flatten() - [meta, new_reads] + ch_short_reads_forcat = ch_short_reads_phixremoved + .map { meta, reads -> + def meta_new = meta - meta.subMap('run') + [meta_new, reads] + } + .groupTuple() + .branch { meta, reads -> + cat: reads.size() >= 2 + skip_cat: true } - // Combine single run and multi-run-merged data - ch_short_reads = Channel.empty() - ch_short_reads = CAT_FASTQ.out.reads.mix(ch_short_reads_catskipped) - ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first()) - - if (params.bbnorm) { - if (params.coassemble_group) { - // Interleave pairs, to be able to treat them as single ends when calling bbnorm. This prepares - // for dropping the single_end parameter, but keeps assembly modules as they are, i.e. not - // accepting a mix of single end and pairs. - SEQTK_MERGEPE( - ch_short_reads.filter { !it[0].single_end } - ) - ch_versions = ch_versions.mix(SEQTK_MERGEPE.out.versions.first()) - // Combine the interleaved pairs with any single end libraries. Set the meta.single_end to true (used by the bbnorm module). - ch_bbnorm = SEQTK_MERGEPE.out.reads - .mix(ch_short_reads.filter { it[0].single_end }) - .map { [[id: sprintf("group%s", it[0].group), group: it[0].group, single_end: true], it[1]] } - .groupTuple() - } - else { - ch_bbnorm = ch_short_reads - } - BBMAP_BBNORM(ch_bbnorm) - ch_versions = ch_versions.mix(BBMAP_BBNORM.out.versions) - ch_short_reads_assembly = BBMAP_BBNORM.out.fastq + CAT_FASTQ(ch_short_reads_forcat.cat.map { meta, reads -> [meta, reads.flatten()] }) + + // Ensure we don't have nests of nests so that structure is in form expected for assembly + ch_short_reads_catskipped = ch_short_reads_forcat.skip_cat.map { meta, reads -> + def new_reads = meta.single_end ? reads[0] : reads.flatten() + [meta, new_reads] + } + + // Combine single run and multi-run-merged data + ch_short_reads = Channel.empty() + ch_short_reads = CAT_FASTQ.out.reads.mix(ch_short_reads_catskipped) + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first()) + + if (params.bbnorm) { + if (params.coassemble_group) { + // Interleave pairs, to be able to treat them as single ends when calling bbnorm. This prepares + // for dropping the single_end parameter, but keeps assembly modules as they are, i.e. not + // accepting a mix of single end and pairs. + SEQTK_MERGEPE( + ch_short_reads.filter { !it[0].single_end } + ) + ch_versions = ch_versions.mix(SEQTK_MERGEPE.out.versions.first()) + // Combine the interleaved pairs with any single end libraries. Set the meta.single_end to true (used by the bbnorm module). + ch_bbnorm = SEQTK_MERGEPE.out.reads + .mix(ch_short_reads.filter { it[0].single_end }) + .map { [[id: sprintf("group%s", it[0].group), group: it[0].group, single_end: true], it[1]] } + .groupTuple() } else { - ch_short_reads_assembly = ch_short_reads + ch_bbnorm = ch_short_reads } + BBMAP_BBNORM(ch_bbnorm) + ch_versions = ch_versions.mix(BBMAP_BBNORM.out.versions) + ch_short_reads_assembly = BBMAP_BBNORM.out.fastq } else { - ch_short_reads = ch_raw_short_reads.map { meta, reads -> - def meta_new = meta - meta.subMap('run') - [meta_new, reads] - } + ch_short_reads_assembly = ch_short_reads } emit: diff --git a/workflows/mag.nf b/workflows/mag.nf index 87397d97..d86aed57 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -200,16 +200,24 @@ workflow MAG { ================================================================================ */ - SHORTREAD_PREPROCESSING( - ch_raw_short_reads, - ch_host_fasta, - ch_phix_db_file, - ch_metaeuk_db - ) + if (!params.assembly_input) { + SHORTREAD_PREPROCESSING( + ch_raw_short_reads, + ch_host_fasta, + ch_phix_db_file, + ch_metaeuk_db + ) - ch_versions = ch_versions.mix(SHORTREAD_PREPROCESSING.out.versions) - ch_short_reads = SHORTREAD_PREPROCESSING.out.short_reads - ch_short_reads_assembly = SHORTREAD_PREPROCESSING.out.short_reads_assembly + ch_versions = ch_versions.mix(SHORTREAD_PREPROCESSING.out.versions) + ch_short_reads = SHORTREAD_PREPROCESSING.out.short_reads + ch_short_reads_assembly = SHORTREAD_PREPROCESSING.out.short_reads_assembly + } + else { + ch_short_reads = ch_raw_short_reads.map { meta, reads -> + def meta_new = meta - meta.subMap('run') + [meta_new, reads] + } + } /* ================================================================================ From 745a6c1a6aede887b93465d29a56fa3bd9746af0 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Sun, 24 Nov 2024 20:57:33 +0100 Subject: [PATCH 06/11] Update Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 98c1efbe..24ab3868 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- [#716](https://github.com/nf-core/mag/pull/692) - Make short read processing a subworkflow (added by @muabnezor) - [#708](https://github.com/nf-core/mag/pull/708) - Fixed channel passed as GUNC input (added by @dialvarezs) ### `Dependencies` From 927371717e711af69625819409feca50fb079263 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Sun, 24 Nov 2024 21:04:46 +0100 Subject: [PATCH 07/11] remove include statements from main workflow --- workflows/mag.nf | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/workflows/mag.nf b/workflows/mag.nf index d86aed57..7a9ae1f6 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -30,20 +30,12 @@ include { SHORTREAD_PREPROCESSING } from '../subwo // MODULE: Installed directly from nf-core/modules // include { ARIA2 as ARIA2_UNTAR } from '../modules/nf-core/aria2/main' -include { FASTQC as FASTQC_RAW } from '../modules/nf-core/fastqc/main' -include { FASTQC as FASTQC_TRIMMED } from '../modules/nf-core/fastqc/main' -include { SEQTK_MERGEPE } from '../modules/nf-core/seqtk/mergepe/main' -include { BBMAP_BBNORM } from '../modules/nf-core/bbmap/bbnorm/main' -include { FASTP } from '../modules/nf-core/fastp/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../modules/nf-core/adapterremoval/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../modules/nf-core/adapterremoval/main' include { UNTAR as CENTRIFUGEDB_UNTAR } from '../modules/nf-core/untar/main' include { CENTRIFUGE_CENTRIFUGE } from '../modules/nf-core/centrifuge/centrifuge/main' include { CENTRIFUGE_KREPORT } from '../modules/nf-core/centrifuge/kreport/main' include { KRONA_KRONADB } from '../modules/nf-core/krona/kronadb/main' include { KRONA_KTIMPORTTAXONOMY } from '../modules/nf-core/krona/ktimporttaxonomy/main' include { KRAKENTOOLS_KREPORT2KRONA as KREPORT2KRONA_CENTRIFUGE } from '../modules/nf-core/krakentools/kreport2krona/main' -include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' include { MEGAHIT } from '../modules/nf-core/megahit/main' include { SPADES as METASPADES } from '../modules/nf-core/spades/main' include { SPADES as METASPADESHYBRID } from '../modules/nf-core/spades/main' @@ -57,10 +49,6 @@ include { METAEUK_EASYPREDICT } from '../modul // // MODULE: Local to the pipeline // -include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../modules/local/bowtie2_removal_build' -include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' -include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../modules/local/bowtie2_removal_build' -include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' include { KRAKEN2_DB_PREPARATION } from '../modules/local/kraken2_db_preparation' include { KRAKEN2 } from '../modules/local/kraken2' include { POOL_SINGLE_READS as POOL_SHORT_SINGLE_READS } from '../modules/local/pool_single_reads' From 16ba80864b80c500061766a81c204ddd8a33a258 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Mon, 25 Nov 2024 08:11:01 +0100 Subject: [PATCH 08/11] Move assembly-input logic back into shortread subworkflow. Make sure to always declare the ch_short_reads_assembly even if assembly-input --- subworkflows/local/shortread_preprocessing.nf | 235 +++++++++--------- workflows/mag.nf | 27 +- 2 files changed, 132 insertions(+), 130 deletions(-) diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index 2181265d..f961584b 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -25,6 +25,7 @@ workflow SHORTREAD_PREPROCESSING { main: ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() + ch_short_reads_assembly = Channel.empty() FASTQC_RAW( ch_raw_short_reads @@ -33,142 +34,150 @@ workflow SHORTREAD_PREPROCESSING { ch_multiqc_files = ch_multiqc_files.mix(FASTQC_RAW.out.zip) ch_bowtie2_removal_host_multiqc = Channel.empty() - if (!params.skip_clipping) { - if (params.clip_tool == 'fastp') { - FASTP( - ch_raw_short_reads, - [], - params.fastp_save_trimmed_fail, - [] - ) - ch_short_reads_prepped = FASTP.out.reads - ch_versions = ch_versions.mix(FASTP.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(FASTP.out.json) - - } - else if (params.clip_tool == 'adapterremoval') { + if (!params.assembly_input) { + if (!params.skip_clipping) { + if (params.clip_tool == 'fastp') { + FASTP( + ch_raw_short_reads, + [], + params.fastp_save_trimmed_fail, + [] + ) + ch_short_reads_prepped = FASTP.out.reads + ch_versions = ch_versions.mix(FASTP.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(FASTP.out.json) - // due to strange output file scheme in AR2, have to manually separate - // SE/PE to allow correct pulling of reads after. - ch_adapterremoval_in = ch_raw_short_reads.branch { - single: it[0]['single_end'] - paired: !it[0]['single_end'] } + else if (params.clip_tool == 'adapterremoval') { - ADAPTERREMOVAL_PE(ch_adapterremoval_in.paired, []) - ADAPTERREMOVAL_SE(ch_adapterremoval_in.single, []) + // due to strange output file scheme in AR2, have to manually separate + // SE/PE to allow correct pulling of reads after. + ch_adapterremoval_in = ch_raw_short_reads.branch { + single: it[0]['single_end'] + paired: !it[0]['single_end'] + } - ch_short_reads_prepped = Channel.empty() - ch_short_reads_prepped = ch_short_reads_prepped.mix(ADAPTERREMOVAL_SE.out.singles_truncated, ADAPTERREMOVAL_PE.out.paired_truncated) + ADAPTERREMOVAL_PE(ch_adapterremoval_in.paired, []) + ADAPTERREMOVAL_SE(ch_adapterremoval_in.single, []) - ch_versions = ch_versions.mix(ADAPTERREMOVAL_PE.out.versions.first(), ADAPTERREMOVAL_SE.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_PE.out.settings) - ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_SE.out.settings) + ch_short_reads_prepped = Channel.empty() + ch_short_reads_prepped = ch_short_reads_prepped.mix(ADAPTERREMOVAL_SE.out.singles_truncated, ADAPTERREMOVAL_PE.out.paired_truncated) + + ch_versions = ch_versions.mix(ADAPTERREMOVAL_PE.out.versions.first(), ADAPTERREMOVAL_SE.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_PE.out.settings) + ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_SE.out.settings) + } + } + else { + ch_short_reads_prepped = ch_raw_short_reads + } + + if (params.host_fasta) { + if (params.host_fasta_bowtie2index) { + ch_host_bowtie2index = file(params.host_fasta_bowtie2index, checkIfExists: true) + } + else { + BOWTIE2_HOST_REMOVAL_BUILD( + ch_host_fasta + ) + ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index + } } - } - else { - ch_short_reads_prepped = ch_raw_short_reads - } - if (params.host_fasta) { - if (params.host_fasta_bowtie2index) { - ch_host_bowtie2index = file(params.host_fasta_bowtie2index, checkIfExists: true) + if (params.host_fasta || params.host_genome) { + BOWTIE2_HOST_REMOVAL_ALIGN( + ch_short_reads_prepped, + ch_host_bowtie2index + ) + ch_short_reads_hostremoved = BOWTIE2_HOST_REMOVAL_ALIGN.out.reads + ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.log) } else { - BOWTIE2_HOST_REMOVAL_BUILD( - ch_host_fasta + ch_short_reads_hostremoved = ch_short_reads_prepped + } + + if (!params.keep_phix) { + BOWTIE2_PHIX_REMOVAL_BUILD( + ch_phix_db_file ) - ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index + BOWTIE2_PHIX_REMOVAL_ALIGN( + ch_short_reads_hostremoved, + BOWTIE2_PHIX_REMOVAL_BUILD.out.index + ) + ch_short_reads_phixremoved = BOWTIE2_PHIX_REMOVAL_ALIGN.out.reads + ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.log) + } + else { + ch_short_reads_phixremoved = ch_short_reads_hostremoved } - } - if (params.host_fasta || params.host_genome) { - BOWTIE2_HOST_REMOVAL_ALIGN( - ch_short_reads_prepped, - ch_host_bowtie2index - ) - ch_short_reads_hostremoved = BOWTIE2_HOST_REMOVAL_ALIGN.out.reads - ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.log) - } - else { - ch_short_reads_hostremoved = ch_short_reads_prepped - } + if (!(params.keep_phix && params.skip_clipping && !(params.host_genome || params.host_fasta))) { + FASTQC_TRIMMED( + ch_short_reads_phixremoved + ) + ch_versions = ch_versions.mix(FASTQC_TRIMMED.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_TRIMMED.out.zip) + } - if (!params.keep_phix) { - BOWTIE2_PHIX_REMOVAL_BUILD( - ch_phix_db_file - ) - BOWTIE2_PHIX_REMOVAL_ALIGN( - ch_short_reads_hostremoved, - BOWTIE2_PHIX_REMOVAL_BUILD.out.index - ) - ch_short_reads_phixremoved = BOWTIE2_PHIX_REMOVAL_ALIGN.out.reads - ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.log) - } - else { - ch_short_reads_phixremoved = ch_short_reads_hostremoved - } + // Run/Lane merging - if (!(params.keep_phix && params.skip_clipping && !(params.host_genome || params.host_fasta))) { - FASTQC_TRIMMED( - ch_short_reads_phixremoved - ) - ch_versions = ch_versions.mix(FASTQC_TRIMMED.out.versions) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC_TRIMMED.out.zip) - } + ch_short_reads_forcat = ch_short_reads_phixremoved + .map { meta, reads -> + def meta_new = meta - meta.subMap('run') + [meta_new, reads] + } + .groupTuple() + .branch { meta, reads -> + cat: reads.size() >= 2 + skip_cat: true + } - // Run/Lane merging + CAT_FASTQ(ch_short_reads_forcat.cat.map { meta, reads -> [meta, reads.flatten()] }) - ch_short_reads_forcat = ch_short_reads_phixremoved - .map { meta, reads -> - def meta_new = meta - meta.subMap('run') - [meta_new, reads] - } - .groupTuple() - .branch { meta, reads -> - cat: reads.size() >= 2 - skip_cat: true + // Ensure we don't have nests of nests so that structure is in form expected for assembly + ch_short_reads_catskipped = ch_short_reads_forcat.skip_cat.map { meta, reads -> + def new_reads = meta.single_end ? reads[0] : reads.flatten() + [meta, new_reads] } - CAT_FASTQ(ch_short_reads_forcat.cat.map { meta, reads -> [meta, reads.flatten()] }) - - // Ensure we don't have nests of nests so that structure is in form expected for assembly - ch_short_reads_catskipped = ch_short_reads_forcat.skip_cat.map { meta, reads -> - def new_reads = meta.single_end ? reads[0] : reads.flatten() - [meta, new_reads] - } - - // Combine single run and multi-run-merged data - ch_short_reads = Channel.empty() - ch_short_reads = CAT_FASTQ.out.reads.mix(ch_short_reads_catskipped) - ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first()) - - if (params.bbnorm) { - if (params.coassemble_group) { - // Interleave pairs, to be able to treat them as single ends when calling bbnorm. This prepares - // for dropping the single_end parameter, but keeps assembly modules as they are, i.e. not - // accepting a mix of single end and pairs. - SEQTK_MERGEPE( - ch_short_reads.filter { !it[0].single_end } - ) - ch_versions = ch_versions.mix(SEQTK_MERGEPE.out.versions.first()) - // Combine the interleaved pairs with any single end libraries. Set the meta.single_end to true (used by the bbnorm module). - ch_bbnorm = SEQTK_MERGEPE.out.reads - .mix(ch_short_reads.filter { it[0].single_end }) - .map { [[id: sprintf("group%s", it[0].group), group: it[0].group, single_end: true], it[1]] } - .groupTuple() + // Combine single run and multi-run-merged data + ch_short_reads = Channel.empty() + ch_short_reads = CAT_FASTQ.out.reads.mix(ch_short_reads_catskipped) + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first()) + + if (params.bbnorm) { + if (params.coassemble_group) { + // Interleave pairs, to be able to treat them as single ends when calling bbnorm. This prepares + // for dropping the single_end parameter, but keeps assembly modules as they are, i.e. not + // accepting a mix of single end and pairs. + SEQTK_MERGEPE( + ch_short_reads.filter { !it[0].single_end } + ) + ch_versions = ch_versions.mix(SEQTK_MERGEPE.out.versions.first()) + // Combine the interleaved pairs with any single end libraries. Set the meta.single_end to true (used by the bbnorm module). + ch_bbnorm = SEQTK_MERGEPE.out.reads + .mix(ch_short_reads.filter { it[0].single_end }) + .map { [[id: sprintf("group%s", it[0].group), group: it[0].group, single_end: true], it[1]] } + .groupTuple() + } + else { + ch_bbnorm = ch_short_reads + } + BBMAP_BBNORM(ch_bbnorm) + ch_versions = ch_versions.mix(BBMAP_BBNORM.out.versions) + ch_short_reads_assembly = BBMAP_BBNORM.out.fastq } else { - ch_bbnorm = ch_short_reads + ch_short_reads_assembly = ch_short_reads } - BBMAP_BBNORM(ch_bbnorm) - ch_versions = ch_versions.mix(BBMAP_BBNORM.out.versions) - ch_short_reads_assembly = BBMAP_BBNORM.out.fastq } else { - ch_short_reads_assembly = ch_short_reads + ch_short_reads = ch_raw_short_reads.map { meta, reads -> + def meta_new = meta - meta.subMap('run') + [meta_new, reads] + } } emit: diff --git a/workflows/mag.nf b/workflows/mag.nf index 7a9ae1f6..97b5c4ca 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -188,24 +188,17 @@ workflow MAG { ================================================================================ */ - if (!params.assembly_input) { - SHORTREAD_PREPROCESSING( - ch_raw_short_reads, - ch_host_fasta, - ch_phix_db_file, - ch_metaeuk_db - ) + SHORTREAD_PREPROCESSING( + ch_raw_short_reads, + ch_host_fasta, + ch_phix_db_file, + ch_metaeuk_db + ) + + ch_versions = ch_versions.mix(SHORTREAD_PREPROCESSING.out.versions) + ch_short_reads = SHORTREAD_PREPROCESSING.out.short_reads + ch_short_reads_assembly = SHORTREAD_PREPROCESSING.out.short_reads_assembly - ch_versions = ch_versions.mix(SHORTREAD_PREPROCESSING.out.versions) - ch_short_reads = SHORTREAD_PREPROCESSING.out.short_reads - ch_short_reads_assembly = SHORTREAD_PREPROCESSING.out.short_reads_assembly - } - else { - ch_short_reads = ch_raw_short_reads.map { meta, reads -> - def meta_new = meta - meta.subMap('run') - [meta_new, reads] - } - } /* ================================================================================ From f2507b25cb997fde073c0ab81bc5a9b10dd9e57e Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Mon, 25 Nov 2024 09:27:12 +0100 Subject: [PATCH 09/11] Format include statements --- subworkflows/local/shortread_preprocessing.nf | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index f961584b..9b7314ff 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -2,18 +2,18 @@ * SHORTREAD_PREPROCESSING: Preprocessing and QC for short reads */ -include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' -include { FASTQC as FASTQC_TRIMMED } from '../../modules/nf-core/fastqc/main' -include { FASTP } from '../../modules/nf-core/fastp/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../../modules/nf-core/adapterremoval/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../../modules/nf-core/adapterremoval/main' -include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../../modules/local/bowtie2_removal_build' -include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../../modules/local/bowtie2_removal_align' -include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../../modules/local/bowtie2_removal_build' -include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../../modules/local/bowtie2_removal_align' -include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main' -include { SEQTK_MERGEPE } from '../../modules/nf-core/seqtk/mergepe/main' -include { BBMAP_BBNORM } from '../..//modules/nf-core/bbmap/bbnorm/main' +include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_TRIMMED } from '../../modules/nf-core/fastqc/main' +include { FASTP } from '../../modules/nf-core/fastp/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../../modules/nf-core/adapterremoval/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../../modules/nf-core/adapterremoval/main' +include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../../modules/local/bowtie2_removal_build' +include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../../modules/local/bowtie2_removal_align' +include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../../modules/local/bowtie2_removal_build' +include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../../modules/local/bowtie2_removal_align' +include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main' +include { SEQTK_MERGEPE } from '../../modules/nf-core/seqtk/mergepe/main' +include { BBMAP_BBNORM } from '../../modules/nf-core/bbmap/bbnorm/main' workflow SHORTREAD_PREPROCESSING { take: From 0b3c9ba731ff866198da10b9a911670c3d8a29da Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Mon, 25 Nov 2024 10:37:44 +0100 Subject: [PATCH 10/11] Move assembly-input logic back to main workflow --- subworkflows/local/shortread_preprocessing.nf | 235 +++++++++--------- workflows/mag.nf | 32 ++- 2 files changed, 132 insertions(+), 135 deletions(-) diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index 9b7314ff..53f5e7c1 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -25,7 +25,6 @@ workflow SHORTREAD_PREPROCESSING { main: ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - ch_short_reads_assembly = Channel.empty() FASTQC_RAW( ch_raw_short_reads @@ -34,150 +33,142 @@ workflow SHORTREAD_PREPROCESSING { ch_multiqc_files = ch_multiqc_files.mix(FASTQC_RAW.out.zip) ch_bowtie2_removal_host_multiqc = Channel.empty() - if (!params.assembly_input) { - if (!params.skip_clipping) { - if (params.clip_tool == 'fastp') { - FASTP( - ch_raw_short_reads, - [], - params.fastp_save_trimmed_fail, - [] - ) - ch_short_reads_prepped = FASTP.out.reads - ch_versions = ch_versions.mix(FASTP.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(FASTP.out.json) + if (!params.skip_clipping) { + if (params.clip_tool == 'fastp') { + FASTP( + ch_raw_short_reads, + [], + params.fastp_save_trimmed_fail, + [] + ) + ch_short_reads_prepped = FASTP.out.reads + ch_versions = ch_versions.mix(FASTP.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(FASTP.out.json) - } - else if (params.clip_tool == 'adapterremoval') { + } + else if (params.clip_tool == 'adapterremoval') { - // due to strange output file scheme in AR2, have to manually separate - // SE/PE to allow correct pulling of reads after. - ch_adapterremoval_in = ch_raw_short_reads.branch { - single: it[0]['single_end'] - paired: !it[0]['single_end'] - } + // due to strange output file scheme in AR2, have to manually separate + // SE/PE to allow correct pulling of reads after. + ch_adapterremoval_in = ch_raw_short_reads.branch { + single: it[0]['single_end'] + paired: !it[0]['single_end'] + } - ADAPTERREMOVAL_PE(ch_adapterremoval_in.paired, []) - ADAPTERREMOVAL_SE(ch_adapterremoval_in.single, []) + ADAPTERREMOVAL_PE(ch_adapterremoval_in.paired, []) + ADAPTERREMOVAL_SE(ch_adapterremoval_in.single, []) - ch_short_reads_prepped = Channel.empty() - ch_short_reads_prepped = ch_short_reads_prepped.mix(ADAPTERREMOVAL_SE.out.singles_truncated, ADAPTERREMOVAL_PE.out.paired_truncated) + ch_short_reads_prepped = Channel.empty() + ch_short_reads_prepped = ch_short_reads_prepped.mix(ADAPTERREMOVAL_SE.out.singles_truncated, ADAPTERREMOVAL_PE.out.paired_truncated) - ch_versions = ch_versions.mix(ADAPTERREMOVAL_PE.out.versions.first(), ADAPTERREMOVAL_SE.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_PE.out.settings) - ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_SE.out.settings) - } - } - else { - ch_short_reads_prepped = ch_raw_short_reads - } - - if (params.host_fasta) { - if (params.host_fasta_bowtie2index) { - ch_host_bowtie2index = file(params.host_fasta_bowtie2index, checkIfExists: true) - } - else { - BOWTIE2_HOST_REMOVAL_BUILD( - ch_host_fasta - ) - ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index - } + ch_versions = ch_versions.mix(ADAPTERREMOVAL_PE.out.versions.first(), ADAPTERREMOVAL_SE.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_PE.out.settings) + ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_SE.out.settings) } + } + else { + ch_short_reads_prepped = ch_raw_short_reads + } - if (params.host_fasta || params.host_genome) { - BOWTIE2_HOST_REMOVAL_ALIGN( - ch_short_reads_prepped, - ch_host_bowtie2index - ) - ch_short_reads_hostremoved = BOWTIE2_HOST_REMOVAL_ALIGN.out.reads - ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.log) + if (params.host_fasta) { + if (params.host_fasta_bowtie2index) { + ch_host_bowtie2index = file(params.host_fasta_bowtie2index, checkIfExists: true) } else { - ch_short_reads_hostremoved = ch_short_reads_prepped - } - - if (!params.keep_phix) { - BOWTIE2_PHIX_REMOVAL_BUILD( - ch_phix_db_file + BOWTIE2_HOST_REMOVAL_BUILD( + ch_host_fasta ) - BOWTIE2_PHIX_REMOVAL_ALIGN( - ch_short_reads_hostremoved, - BOWTIE2_PHIX_REMOVAL_BUILD.out.index - ) - ch_short_reads_phixremoved = BOWTIE2_PHIX_REMOVAL_ALIGN.out.reads - ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.versions.first()) - ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.log) - } - else { - ch_short_reads_phixremoved = ch_short_reads_hostremoved + ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index } + } - if (!(params.keep_phix && params.skip_clipping && !(params.host_genome || params.host_fasta))) { - FASTQC_TRIMMED( - ch_short_reads_phixremoved - ) - ch_versions = ch_versions.mix(FASTQC_TRIMMED.out.versions) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC_TRIMMED.out.zip) - } + if (params.host_fasta || params.host_genome) { + BOWTIE2_HOST_REMOVAL_ALIGN( + ch_short_reads_prepped, + ch_host_bowtie2index + ) + ch_short_reads_hostremoved = BOWTIE2_HOST_REMOVAL_ALIGN.out.reads + ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.log) + } + else { + ch_short_reads_hostremoved = ch_short_reads_prepped + } - // Run/Lane merging + if (!params.keep_phix) { + BOWTIE2_PHIX_REMOVAL_BUILD( + ch_phix_db_file + ) + BOWTIE2_PHIX_REMOVAL_ALIGN( + ch_short_reads_hostremoved, + BOWTIE2_PHIX_REMOVAL_BUILD.out.index + ) + ch_short_reads_phixremoved = BOWTIE2_PHIX_REMOVAL_ALIGN.out.reads + ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.log) + } + else { + ch_short_reads_phixremoved = ch_short_reads_hostremoved + } - ch_short_reads_forcat = ch_short_reads_phixremoved - .map { meta, reads -> - def meta_new = meta - meta.subMap('run') - [meta_new, reads] - } - .groupTuple() - .branch { meta, reads -> - cat: reads.size() >= 2 - skip_cat: true - } + if (!(params.keep_phix && params.skip_clipping && !(params.host_genome || params.host_fasta))) { + FASTQC_TRIMMED( + ch_short_reads_phixremoved + ) + ch_versions = ch_versions.mix(FASTQC_TRIMMED.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_TRIMMED.out.zip) + } - CAT_FASTQ(ch_short_reads_forcat.cat.map { meta, reads -> [meta, reads.flatten()] }) + // Run/Lane merging - // Ensure we don't have nests of nests so that structure is in form expected for assembly - ch_short_reads_catskipped = ch_short_reads_forcat.skip_cat.map { meta, reads -> - def new_reads = meta.single_end ? reads[0] : reads.flatten() - [meta, new_reads] + ch_short_reads_forcat = ch_short_reads_phixremoved + .map { meta, reads -> + def meta_new = meta - meta.subMap('run') + [meta_new, reads] + } + .groupTuple() + .branch { meta, reads -> + cat: reads.size() >= 2 + skip_cat: true } - // Combine single run and multi-run-merged data - ch_short_reads = Channel.empty() - ch_short_reads = CAT_FASTQ.out.reads.mix(ch_short_reads_catskipped) - ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first()) - - if (params.bbnorm) { - if (params.coassemble_group) { - // Interleave pairs, to be able to treat them as single ends when calling bbnorm. This prepares - // for dropping the single_end parameter, but keeps assembly modules as they are, i.e. not - // accepting a mix of single end and pairs. - SEQTK_MERGEPE( - ch_short_reads.filter { !it[0].single_end } - ) - ch_versions = ch_versions.mix(SEQTK_MERGEPE.out.versions.first()) - // Combine the interleaved pairs with any single end libraries. Set the meta.single_end to true (used by the bbnorm module). - ch_bbnorm = SEQTK_MERGEPE.out.reads - .mix(ch_short_reads.filter { it[0].single_end }) - .map { [[id: sprintf("group%s", it[0].group), group: it[0].group, single_end: true], it[1]] } - .groupTuple() - } - else { - ch_bbnorm = ch_short_reads - } - BBMAP_BBNORM(ch_bbnorm) - ch_versions = ch_versions.mix(BBMAP_BBNORM.out.versions) - ch_short_reads_assembly = BBMAP_BBNORM.out.fastq + CAT_FASTQ(ch_short_reads_forcat.cat.map { meta, reads -> [meta, reads.flatten()] }) + + // Ensure we don't have nests of nests so that structure is in form expected for assembly + ch_short_reads_catskipped = ch_short_reads_forcat.skip_cat.map { meta, reads -> + def new_reads = meta.single_end ? reads[0] : reads.flatten() + [meta, new_reads] + } + + // Combine single run and multi-run-merged data + ch_short_reads = Channel.empty() + ch_short_reads = CAT_FASTQ.out.reads.mix(ch_short_reads_catskipped) + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first()) + + if (params.bbnorm) { + if (params.coassemble_group) { + // Interleave pairs, to be able to treat them as single ends when calling bbnorm. This prepares + // for dropping the single_end parameter, but keeps assembly modules as they are, i.e. not + // accepting a mix of single end and pairs. + SEQTK_MERGEPE( + ch_short_reads.filter { !it[0].single_end } + ) + ch_versions = ch_versions.mix(SEQTK_MERGEPE.out.versions.first()) + // Combine the interleaved pairs with any single end libraries. Set the meta.single_end to true (used by the bbnorm module). + ch_bbnorm = SEQTK_MERGEPE.out.reads + .mix(ch_short_reads.filter { it[0].single_end }) + .map { [[id: sprintf("group%s", it[0].group), group: it[0].group, single_end: true], it[1]] } + .groupTuple() } else { - ch_short_reads_assembly = ch_short_reads + ch_bbnorm = ch_short_reads } + BBMAP_BBNORM(ch_bbnorm) + ch_versions = ch_versions.mix(BBMAP_BBNORM.out.versions) + ch_short_reads_assembly = BBMAP_BBNORM.out.fastq } else { - ch_short_reads = ch_raw_short_reads.map { meta, reads -> - def meta_new = meta - meta.subMap('run') - [meta_new, reads] - } + ch_short_reads_assembly = ch_short_reads } emit: diff --git a/workflows/mag.nf b/workflows/mag.nf index 97b5c4ca..7f4ae3ec 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -188,17 +188,26 @@ workflow MAG { ================================================================================ */ - SHORTREAD_PREPROCESSING( - ch_raw_short_reads, - ch_host_fasta, - ch_phix_db_file, - ch_metaeuk_db - ) + if (!params.assembly_input) { + SHORTREAD_PREPROCESSING( + ch_raw_short_reads, + ch_host_fasta, + ch_phix_db_file, + ch_metaeuk_db + ) - ch_versions = ch_versions.mix(SHORTREAD_PREPROCESSING.out.versions) - ch_short_reads = SHORTREAD_PREPROCESSING.out.short_reads - ch_short_reads_assembly = SHORTREAD_PREPROCESSING.out.short_reads_assembly + ch_versions = ch_versions.mix(SHORTREAD_PREPROCESSING.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_PREPROCESSING.out.multiqc_files.collect { it[1] }.ifEmpty([])) + ch_short_reads = SHORTREAD_PREPROCESSING.out.short_reads + ch_short_reads_assembly = SHORTREAD_PREPROCESSING.out.short_reads_assembly + } + else { + ch_short_reads = ch_raw_short_reads.map { meta, reads -> + def meta_new = meta - meta.subMap('run') + [meta_new, reads] + } + } /* ================================================================================ @@ -213,6 +222,7 @@ workflow MAG { ) ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.multiqc_files.collect { it[1] }.ifEmpty([])) ch_long_reads = LONGREAD_PREPROCESSING.out.long_reads /* @@ -869,10 +879,6 @@ workflow MAG { ) ) - // Add all files from preprocessing to the MultiQC input channel - ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_PREPROCESSING.out.multiqc_files.collect { it[1] }.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.multiqc_files.collect { it[1] }.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(CENTRIFUGE_KREPORT.out.kreport.collect { it[1] }.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2.out.report.collect { it[1] }.ifEmpty([])) From 2262d6040d6d9ac4d7f54f7b8dbfa2aaaf262ca5 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Fri, 29 Nov 2024 07:24:39 +0100 Subject: [PATCH 11/11] Remove unused bowtie2 multiqc channel --- subworkflows/local/shortread_preprocessing.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index 53f5e7c1..ad33b56f 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -32,7 +32,6 @@ workflow SHORTREAD_PREPROCESSING { ch_versions = ch_versions.mix(FASTQC_RAW.out.versions.first()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC_RAW.out.zip) - ch_bowtie2_removal_host_multiqc = Channel.empty() if (!params.skip_clipping) { if (params.clip_tool == 'fastp') { FASTP(