last subworkflow for now; will likely move chimera removal into anoth…

…er one at a later point
h3abionet · Jan 13, 2025 · cfd0f83 · cfd0f83
1 parent 30d80f0
commit cfd0f83
Show file tree

Hide file tree

Showing 10 changed files with 167 additions and 165 deletions.
diff --git a/conf/modules.config b/conf/modules.config
@@ -242,19 +242,19 @@ process {
         ]
     }
 
-    withName: SEQTABLE2TEXT {
+    withName: DADA2_SEQTABLE2TEXT {
         publishDir = [
             path: { "${params.outdir}/TSV" },
             mode: params.publish_dir_mode,
-            pattern: '*.txt'
+            pattern: 'seqtab_final.txt'
         ]
     }
 
     withName: DADA2_TAXTABLE2TEXT {
         publishDir = [
             path: { "${params.outdir}/TSV" },
             mode: params.publish_dir_mode,
-            pattern: '*.txt'
+            pattern: 'tax_final*.txt'
         ]
     }
 

diff --git a/conf/test.config b/conf/test.config
@@ -25,5 +25,6 @@ params {
     trim_rev   = 25
     reference = 'https://file-server.igb.illinois.edu/~cjfields/TADA/silva_nr99_v138.1_train_set.fa.gz'
     phylo_tool  = 'fasttree'
+    to_QIIME2   = true
     // species   = 'https://file-server.igb.illinois.edu/~cjfields/TADA/silva_species_assignment_v138.1.fa.gz'
 }
diff --git a/nextflow.config b/nextflow.config
@@ -16,19 +16,26 @@ params {
     genome                     = null
     igenomes_base              = 's3://ngi-igenomes/igenomes/'
     igenomes_ignore            = false
-    fasta                      = null// MultiQC options
+    fasta                      = null // MultiQC options
     multiqc_config             = null
     multiqc_title              = null
     multiqc_logo               = null
     max_multiqc_email_size     = '25.MB'
     multiqc_methods_description = null
 
     // TODO: this needs to be removed or made more specific
-    amplicon = "16S" 
-    quality_binning = false  // set to true if using binned qualities (NovaSeq, PacBio Revio)
-    quality_bins = ""
+    amplicon = "16S"
+
+    // loessErrfun, PacBioErrfun, makeBinnedQualErrfun, loessErrfun_mod1, loessErrfun_mod2, loessErrfun_mod3, loessErrfun_mod4
+    error_function = "loessErrfun"
+
+    quality_binning = false // set to true if using binned qualities (NovaSeq, PacBio Revio)
+
+    // if quality_binning is true and error_function is set to 'makeBinnedQualErrfun', this is required to be set
+
+    quality_bins = "" 
     amplicon_type = "overlapping"
-    platform = "illumina"
+    platform = "illumina" // illumina, pacbio; 454 and others could be added
 
     // QC
     skip_FASTQC = false  // set to run this step by default, this can fail with large sample #'s
@@ -63,9 +70,6 @@ params {
     // I think we can make these bool 'false' as above with R coersion (either through as.logical or using optparse in a Rscript)
     rmPhiX = false
 
-    // Error model
-    // custom_error_model = 'illumina' // NYI, thinking about best way to implement this
-
     // ASV inference pooling
     pool = "pseudo"
 

diff --git a/subworkflows/local/dada2_denoise.nf b/subworkflows/local/dada2_denoise.nf
@@ -5,6 +5,7 @@ include { DADA_INFER                            } from '../../modules/local/dada
 include { POOLED_SEQTABLE                       } from '../../modules/local/pooledseqtable'
 include { DADA2_REMOVE_CHIMERAS                 } from '../../modules/local/removechimeras'
 include { RENAME_ASVS                           } from '../../modules/local/renameasvs'
+include { DADA2_SEQTABLE2TEXT                   } from '../../modules/local/seqtable2txt'
 
 workflow DADA2_DENOISE {
 
@@ -55,7 +56,12 @@ workflow DADA2_DENOISE {
         POOLED_SEQTABLE.out.filtered_seqtable
     )
 
+    DADA2_SEQTABLE2TEXT(
+        RENAME_ASVS.out.seqtable_renamed
+    )
+
     emit:
+    seqtab2qiime = DADA2_SEQTABLE2TEXT.out.seqtab2qiime
     nonchimeric_asvs = RENAME_ASVS.out.nonchimeric_asvs
     seqtable_renamed = RENAME_ASVS.out.seqtable_renamed
     readmap = RENAME_ASVS.out.readmap

diff --git a/subworkflows/local/generate_output.nf b/subworkflows/local/generate_output.nf
@@ -1,36 +1,66 @@
-// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :)
-//               https://github.com/nf-core/modules/tree/master/subworkflows
-//               You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace:
-//               https://nf-co.re/join
-// TODO nf-core: A subworkflow SHOULD import at least two modules
-
-include { SAMTOOLS_SORT      } from '../../../modules/nf-core/samtools/sort/main'
-include { SAMTOOLS_INDEX     } from '../../../modules/nf-core/samtools/index/main'
+include { BIOM                   } from '../../modules/local/biom'
+include { QIIME2_FEATURETABLE    } from '../../modules/local/qiime2featuretable'
+include { QIIME2_TAXTABLE        } from '../../modules/local/qiime2taxtable'
+include { QIIME2_SEQUENCE        } from '../../modules/local/qiime2seqs'
+include { QIIME2_ALIGNMENT       } from '../../modules/local/qiime2aln'
+include { QIIME2_TREE            } from '../../modules/local/qiime2tree'
+include { SESSION_INFO           } from '../../modules/local/rsessioninfo'
 
 workflow GENERATE_OUTPUT {
 
+    // TODO: I'd like to have this simply be TSV files (no RDS)
+    //       so we can generate from other subworkflows if needed
     take:
-    // TODO nf-core: edit input (take) channels
-    ch_bam // channel: [ val(meta), [ bam ] ]
+    seq_table_rds
+    seq_table_qiime
+    tax_table_rds
+    tax_table_tsv
+    asvs
+    alignment
+    unrooted_tree
+    rooted_tree
 
     main:
-
     ch_versions = Channel.empty()
 
-    // TODO nf-core: substitute modules here for the modules of your subworkflow
+    if (params.to_BIOM) {
+        BIOM(
+            seq_table_rds,
+            tax_table_rds
+        )
+    }
 
-    SAMTOOLS_SORT ( ch_bam )
-    ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first())
+    if (params.to_QIIME2) {
+        QIIME2_FEATURETABLE(
+            seq_table_qiime
+        )
 
-    SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam )
-    ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
+        QIIME2_TAXTABLE(
+            tax_table_tsv
+        )
 
-    emit:
-    // TODO nf-core: edit emitted channels
-    bam      = SAMTOOLS_SORT.out.bam           // channel: [ val(meta), [ bam ] ]
-    bai      = SAMTOOLS_INDEX.out.bai          // channel: [ val(meta), [ bai ] ]
-    csi      = SAMTOOLS_INDEX.out.csi          // channel: [ val(meta), [ csi ] ]
+        QIIME2_SEQUENCE(
+            asvs
+        )
+
+        if (!params.skip_alignment) {
+            QIIME2_ALIGNMENT(
+                alignment
+            )
+        }
 
+        if (!params.skip_tree) {
+            QIIME2_TREE(
+                unrooted_tree,
+                rooted_tree
+            )
+        }
+    }
+
+    // TODO: May become redundant with versions
+    SESSION_INFO()
+
+    emit:
     versions = ch_versions                     // channel: [ versions.yml ]
 }
 
diff --git a/subworkflows/local/phylogeny.nf b/subworkflows/local/phylogeny.nf
@@ -1,36 +1,49 @@
-// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :)
-//               https://github.com/nf-core/modules/tree/master/subworkflows
-//               You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace:
-//               https://nf-co.re/join
-// TODO nf-core: A subworkflow SHOULD import at least two modules
-
-include { SAMTOOLS_SORT      } from '../../../modules/nf-core/samtools/sort/main'
-include { SAMTOOLS_INDEX     } from '../../../modules/nf-core/samtools/index/main'
+include { DECIPHER               } from '../../modules/local/decipher'
+include { PHANGORN               } from '../../modules/local/phangorn'
+include { FASTTREE               } from '../../modules/local/fasttree'
+include { ROOT_TREE              } from '../../modules/local/roottree'
 
 workflow PHYLOGENY {
-
     take:
-    // TODO nf-core: edit input (take) channels
-    ch_bam // channel: [ val(meta), [ bam ] ]
+    asvs
 
     main:
 
+    ch_alignment = Channel.empty()
+    ch_unrooted_tree = Channel.empty()
+    ch_rooted_tree = Channel.empty()
     ch_versions = Channel.empty()
 
-    // TODO nf-core: substitute modules here for the modules of your subworkflow
-
-    SAMTOOLS_SORT ( ch_bam )
-    ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first())
-
-    SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam )
-    ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
+    if (!params.skip_alignment) {
+        DECIPHER(
+            asvs
+        )
+        ch_alignment = DECIPHER.out.alignment
+        if (!params.skip_tree) {
+            if (params.phylo_tool == 'phangorn') {
+                PHANGORN(
+                    ch_alignment
+                )
+                ch_unrooted_tree = PHANGORN.out.treeGTR
+            } else if (params.phylo_tool == 'fasttree') {
+                FASTTREE(
+                    ch_alignment
+                )
+                ch_unrooted_tree = FASTTREE.out.treeGTR
+            }
+
+            ROOT_TREE(
+                ch_unrooted_tree,
+                params.phylo_tool
+            )
+            ch_rooted_tree = ROOT_TREE.out.rooted_tree
+        }
+    }
 
     emit:
-    // TODO nf-core: edit emitted channels
-    bam      = SAMTOOLS_SORT.out.bam           // channel: [ val(meta), [ bam ] ]
-    bai      = SAMTOOLS_INDEX.out.bai          // channel: [ val(meta), [ bai ] ]
-    csi      = SAMTOOLS_INDEX.out.csi          // channel: [ val(meta), [ csi ] ]
-
-    versions = ch_versions                     // channel: [ versions.yml ]
+    ch_alignment
+    ch_unrooted_tree
+    ch_rooted_tree
+    versions = ch_versions // channel: [ versions.yml ]
 }
 
diff --git a/subworkflows/local/pre_qc.nf b/subworkflows/local/pre_qc.nf
@@ -4,6 +4,7 @@
 //               https://nf-co.re/join
 // TODO nf-core: A subworkflow SHOULD import at least two modules
 
+include { FASTQC                 } from '../../modules/nf-core/fastqc/main'
 include { PLOT_QUALITY_PROFILE   } from '../../modules/local/plotqualityprofile'
 include { VSEARCH_EESTATS        } from '../../modules/local/vsearch_eestats'
 include { VSEARCH_OVERLAP        } from '../../modules/local/vsearchoverlap'
@@ -20,13 +21,22 @@ workflow PRE_QC {
 
     main:
     ch_versions = Channel.empty()
+    ch_multiqc_files = Channel.empty()
+
+    FASTQC (
+        ch_samplesheet
+    )
+
+    ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]})
+    ch_versions = ch_versions.mix(FASTQC.out.versions.first())
 
     if (!skip_merging) {
         VSEARCH_OVERLAP(
             ch_samplesheet
         )
 
         ch_versions = ch_versions.mix(VSEARCH_OVERLAP.out.versions.first())
+
         MERGE_OVERLAP_CHECK(
             VSEARCH_OVERLAP.out.merged_log.collect()
         )
@@ -54,4 +64,5 @@ workflow PRE_QC {
 
     emit:
     versions = ch_versions                     // channel: [ versions.yml ]
+    zip = FASTQC.out.zip
 }
diff --git a/subworkflows/local/qualitycontrol.nf b/subworkflows/local/qualitycontrol.nf
@@ -1,36 +1,30 @@
-// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :)
-//               https://github.com/nf-core/modules/tree/master/subworkflows
-//               You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace:
-//               https://nf-co.re/join
-// TODO nf-core: A subworkflow SHOULD import at least two modules
+include { READ_TRACKING          } from '../../modules/local/readtracking'
+include { PLOT_MERGED_HEATMAP    } from '../../modules/local/plotmerged'
+include { PLOT_ASV_DIST          } from '../../modules/local/plotasvlen'
 
-include { SAMTOOLS_SORT      } from '../../../modules/nf-core/samtools/sort/main'
-include { SAMTOOLS_INDEX     } from '../../../modules/nf-core/samtools/index/main'
-
-workflow QUALITYCONTROL {
+workflow QUALITY_CONTROL {
 
     take:
-    // TODO nf-core: edit input (take) channels
-    ch_bam // channel: [ val(meta), [ bam ] ]
+    ch_readtracking
+    merged_seqs
+    filtered_seqtable
 
     main:
-
     ch_versions = Channel.empty()
 
-    // TODO nf-core: substitute modules here for the modules of your subworkflow
+    READ_TRACKING(
+        ch_readtracking.collect()
+    )
 
-    SAMTOOLS_SORT ( ch_bam )
-    ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first())
+    PLOT_MERGED_HEATMAP(
+        merged_seqs
+    )
 
-    SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam )
-    ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
+    PLOT_ASV_DIST(
+        filtered_seqtable
+    )
 
     emit:
-    // TODO nf-core: edit emitted channels
-    bam      = SAMTOOLS_SORT.out.bam           // channel: [ val(meta), [ bam ] ]
-    bai      = SAMTOOLS_INDEX.out.bai          // channel: [ val(meta), [ bai ] ]
-    csi      = SAMTOOLS_INDEX.out.csi          // channel: [ val(meta), [ csi ] ]
-
     versions = ch_versions                     // channel: [ versions.yml ]
 }
 
diff --git a/subworkflows/local/taxonomy.nf b/subworkflows/local/taxonomy.nf
@@ -12,7 +12,8 @@ workflow TAXONOMY {
     ch_versions = Channel.empty()
     ch_taxtab = Channel.empty()
     ch_metrics =  Channel.empty()
-
+    // TODO: eventually this will have multiple options for
+    //       taxonomic assignment
     DADA2_ASSIGN_TAXA_SPECIES(
         readmap,
         ref_file,