From cfd0f835171922f4a7527ae976ba1c74e5487d71 Mon Sep 17 00:00:00 2001
From: Chris Fields <cjfields@illinois.edu>
Date: Mon, 13 Jan 2025 17:21:11 -0600
Subject: [PATCH] last subworkflow for now; will likely move chimera removal
 into another one at a later point

---
 conf/modules.config                   |   6 +-
 conf/test.config                      |   1 +
 nextflow.config                       |  20 +++--
 subworkflows/local/dada2_denoise.nf   |   6 ++
 subworkflows/local/generate_output.nf |  72 +++++++++++-----
 subworkflows/local/phylogeny.nf       |  61 ++++++++------
 subworkflows/local/pre_qc.nf          |  11 +++
 subworkflows/local/qualitycontrol.nf  |  38 ++++-----
 subworkflows/local/taxonomy.nf        |   3 +-
 workflows/tada.nf                     | 114 +++++++-------------------
 10 files changed, 167 insertions(+), 165 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 958fba7..6c5d9a0 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -242,11 +242,11 @@ process {
         ]
     }
 
-    withName: SEQTABLE2TEXT {
+    withName: DADA2_SEQTABLE2TEXT {
         publishDir = [
             path: { "${params.outdir}/TSV" },
             mode: params.publish_dir_mode,
-            pattern: '*.txt'
+            pattern: 'seqtab_final.txt'
         ]
     }
 
@@ -254,7 +254,7 @@ process {
         publishDir = [
             path: { "${params.outdir}/TSV" },
             mode: params.publish_dir_mode,
-            pattern: '*.txt'
+            pattern: 'tax_final*.txt'
         ]
     }
 
diff --git a/conf/test.config b/conf/test.config
index 11e1deb..e6d079d 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -25,5 +25,6 @@ params {
     trim_rev   = 25
     reference = 'https://file-server.igb.illinois.edu/~cjfields/TADA/silva_nr99_v138.1_train_set.fa.gz'
     phylo_tool  = 'fasttree'
+    to_QIIME2   = true
     // species   = 'https://file-server.igb.illinois.edu/~cjfields/TADA/silva_species_assignment_v138.1.fa.gz'
 }
diff --git a/nextflow.config b/nextflow.config
index a92b56e..01f2ca8 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -16,7 +16,7 @@ params {
     genome                     = null
     igenomes_base              = 's3://ngi-igenomes/igenomes/'
     igenomes_ignore            = false
-    fasta                      = null// MultiQC options
+    fasta                      = null // MultiQC options
     multiqc_config             = null
     multiqc_title              = null
     multiqc_logo               = null
@@ -24,11 +24,18 @@ params {
     multiqc_methods_description = null
 
     // TODO: this needs to be removed or made more specific
-    amplicon = "16S" 
-    quality_binning = false  // set to true if using binned qualities (NovaSeq, PacBio Revio)
-    quality_bins = ""
+    amplicon = "16S"
+
+    // loessErrfun, PacBioErrfun, makeBinnedQualErrfun, loessErrfun_mod1, loessErrfun_mod2, loessErrfun_mod3, loessErrfun_mod4
+    error_function = "loessErrfun"
+
+    quality_binning = false // set to true if using binned qualities (NovaSeq, PacBio Revio)
+
+    // if quality_binning is true and error_function is set to 'makeBinnedQualErrfun', this is required to be set
+
+    quality_bins = "" 
     amplicon_type = "overlapping"
-    platform = "illumina"
+    platform = "illumina" // illumina, pacbio; 454 and others could be added
 
     // QC
     skip_FASTQC = false  // set to run this step by default, this can fail with large sample #'s
@@ -63,9 +70,6 @@ params {
     // I think we can make these bool 'false' as above with R coersion (either through as.logical or using optparse in a Rscript)
     rmPhiX = false
 
-    // Error model
-    // custom_error_model = 'illumina' // NYI, thinking about best way to implement this
-
     // ASV inference pooling
     pool = "pseudo"
 
diff --git a/subworkflows/local/dada2_denoise.nf b/subworkflows/local/dada2_denoise.nf
index 23f5475..4957ed1 100644
--- a/subworkflows/local/dada2_denoise.nf
+++ b/subworkflows/local/dada2_denoise.nf
@@ -5,6 +5,7 @@ include { DADA_INFER                            } from '../../modules/local/dada
 include { POOLED_SEQTABLE                       } from '../../modules/local/pooledseqtable'
 include { DADA2_REMOVE_CHIMERAS                 } from '../../modules/local/removechimeras'
 include { RENAME_ASVS                           } from '../../modules/local/renameasvs'
+include { DADA2_SEQTABLE2TEXT                   } from '../../modules/local/seqtable2txt'
 
 workflow DADA2_DENOISE {
 
@@ -55,7 +56,12 @@ workflow DADA2_DENOISE {
         POOLED_SEQTABLE.out.filtered_seqtable
     )
 
+    DADA2_SEQTABLE2TEXT(
+        RENAME_ASVS.out.seqtable_renamed
+    )
+
     emit:
+    seqtab2qiime = DADA2_SEQTABLE2TEXT.out.seqtab2qiime
     nonchimeric_asvs = RENAME_ASVS.out.nonchimeric_asvs
     seqtable_renamed = RENAME_ASVS.out.seqtable_renamed
     readmap = RENAME_ASVS.out.readmap
diff --git a/subworkflows/local/generate_output.nf b/subworkflows/local/generate_output.nf
index 393e8a8..1df4b8d 100644
--- a/subworkflows/local/generate_output.nf
+++ b/subworkflows/local/generate_output.nf
@@ -1,36 +1,66 @@
-// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :)
-//               https://github.com/nf-core/modules/tree/master/subworkflows
-//               You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace:
-//               https://nf-co.re/join
-// TODO nf-core: A subworkflow SHOULD import at least two modules
-
-include { SAMTOOLS_SORT      } from '../../../modules/nf-core/samtools/sort/main'
-include { SAMTOOLS_INDEX     } from '../../../modules/nf-core/samtools/index/main'
+include { BIOM                   } from '../../modules/local/biom'
+include { QIIME2_FEATURETABLE    } from '../../modules/local/qiime2featuretable'
+include { QIIME2_TAXTABLE        } from '../../modules/local/qiime2taxtable'
+include { QIIME2_SEQUENCE        } from '../../modules/local/qiime2seqs'
+include { QIIME2_ALIGNMENT       } from '../../modules/local/qiime2aln'
+include { QIIME2_TREE            } from '../../modules/local/qiime2tree'
+include { SESSION_INFO           } from '../../modules/local/rsessioninfo'
 
 workflow GENERATE_OUTPUT {
 
+    // TODO: I'd like to have this simply be TSV files (no RDS)
+    //       so we can generate from other subworkflows if needed
     take:
-    // TODO nf-core: edit input (take) channels
-    ch_bam // channel: [ val(meta), [ bam ] ]
+    seq_table_rds
+    seq_table_qiime
+    tax_table_rds
+    tax_table_tsv
+    asvs
+    alignment
+    unrooted_tree
+    rooted_tree
 
     main:
-
     ch_versions = Channel.empty()
 
-    // TODO nf-core: substitute modules here for the modules of your subworkflow
+    if (params.to_BIOM) {
+        BIOM(
+            seq_table_rds,
+            tax_table_rds
+        )
+    }
 
-    SAMTOOLS_SORT ( ch_bam )
-    ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first())
+    if (params.to_QIIME2) {
+        QIIME2_FEATURETABLE(
+            seq_table_qiime
+        )
 
-    SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam )
-    ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
+        QIIME2_TAXTABLE(
+            tax_table_tsv
+        )
 
-    emit:
-    // TODO nf-core: edit emitted channels
-    bam      = SAMTOOLS_SORT.out.bam           // channel: [ val(meta), [ bam ] ]
-    bai      = SAMTOOLS_INDEX.out.bai          // channel: [ val(meta), [ bai ] ]
-    csi      = SAMTOOLS_INDEX.out.csi          // channel: [ val(meta), [ csi ] ]
+        QIIME2_SEQUENCE(
+            asvs
+        )
+
+        if (!params.skip_alignment) {
+            QIIME2_ALIGNMENT(
+                alignment
+            )
+        }
 
+        if (!params.skip_tree) {
+            QIIME2_TREE(
+                unrooted_tree,
+                rooted_tree
+            )
+        }
+    }
+
+    // TODO: May become redundant with versions
+    SESSION_INFO()
+
+    emit:
     versions = ch_versions                     // channel: [ versions.yml ]
 }
 
diff --git a/subworkflows/local/phylogeny.nf b/subworkflows/local/phylogeny.nf
index 70d3095..e44f5da 100644
--- a/subworkflows/local/phylogeny.nf
+++ b/subworkflows/local/phylogeny.nf
@@ -1,36 +1,49 @@
-// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :)
-//               https://github.com/nf-core/modules/tree/master/subworkflows
-//               You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace:
-//               https://nf-co.re/join
-// TODO nf-core: A subworkflow SHOULD import at least two modules
-
-include { SAMTOOLS_SORT      } from '../../../modules/nf-core/samtools/sort/main'
-include { SAMTOOLS_INDEX     } from '../../../modules/nf-core/samtools/index/main'
+include { DECIPHER               } from '../../modules/local/decipher'
+include { PHANGORN               } from '../../modules/local/phangorn'
+include { FASTTREE               } from '../../modules/local/fasttree'
+include { ROOT_TREE              } from '../../modules/local/roottree'
 
 workflow PHYLOGENY {
-
     take:
-    // TODO nf-core: edit input (take) channels
-    ch_bam // channel: [ val(meta), [ bam ] ]
+    asvs
 
     main:
 
+    ch_alignment = Channel.empty()
+    ch_unrooted_tree = Channel.empty()
+    ch_rooted_tree = Channel.empty()
     ch_versions = Channel.empty()
 
-    // TODO nf-core: substitute modules here for the modules of your subworkflow
-
-    SAMTOOLS_SORT ( ch_bam )
-    ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first())
-
-    SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam )
-    ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
+    if (!params.skip_alignment) {
+        DECIPHER(
+            asvs
+        )
+        ch_alignment = DECIPHER.out.alignment
+        if (!params.skip_tree) {
+            if (params.phylo_tool == 'phangorn') {
+                PHANGORN(
+                    ch_alignment
+                )
+                ch_unrooted_tree = PHANGORN.out.treeGTR
+            } else if (params.phylo_tool == 'fasttree') {
+                FASTTREE(
+                    ch_alignment
+                )
+                ch_unrooted_tree = FASTTREE.out.treeGTR
+            }
+
+            ROOT_TREE(
+                ch_unrooted_tree,
+                params.phylo_tool
+            )
+            ch_rooted_tree = ROOT_TREE.out.rooted_tree
+        }
+    }
 
     emit:
-    // TODO nf-core: edit emitted channels
-    bam      = SAMTOOLS_SORT.out.bam           // channel: [ val(meta), [ bam ] ]
-    bai      = SAMTOOLS_INDEX.out.bai          // channel: [ val(meta), [ bai ] ]
-    csi      = SAMTOOLS_INDEX.out.csi          // channel: [ val(meta), [ csi ] ]
-
-    versions = ch_versions                     // channel: [ versions.yml ]
+    ch_alignment
+    ch_unrooted_tree
+    ch_rooted_tree
+    versions = ch_versions // channel: [ versions.yml ]
 }
 
diff --git a/subworkflows/local/pre_qc.nf b/subworkflows/local/pre_qc.nf
index e4aa683..4df8cc8 100644
--- a/subworkflows/local/pre_qc.nf
+++ b/subworkflows/local/pre_qc.nf
@@ -4,6 +4,7 @@
 //               https://nf-co.re/join
 // TODO nf-core: A subworkflow SHOULD import at least two modules
 
+include { FASTQC                 } from '../../modules/nf-core/fastqc/main'
 include { PLOT_QUALITY_PROFILE   } from '../../modules/local/plotqualityprofile'
 include { VSEARCH_EESTATS        } from '../../modules/local/vsearch_eestats'
 include { VSEARCH_OVERLAP        } from '../../modules/local/vsearchoverlap'
@@ -20,6 +21,14 @@ workflow PRE_QC {
 
     main:
     ch_versions = Channel.empty()
+    ch_multiqc_files = Channel.empty()
+
+    FASTQC (
+        ch_samplesheet
+    )
+    
+    ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]})
+    ch_versions = ch_versions.mix(FASTQC.out.versions.first())
 
     if (!skip_merging) {
         VSEARCH_OVERLAP(
@@ -27,6 +36,7 @@ workflow PRE_QC {
         )
 
         ch_versions = ch_versions.mix(VSEARCH_OVERLAP.out.versions.first())
+
         MERGE_OVERLAP_CHECK(
             VSEARCH_OVERLAP.out.merged_log.collect()
         )
@@ -54,4 +64,5 @@ workflow PRE_QC {
     
     emit:
     versions = ch_versions                     // channel: [ versions.yml ]
+    zip = FASTQC.out.zip
 }
diff --git a/subworkflows/local/qualitycontrol.nf b/subworkflows/local/qualitycontrol.nf
index 530cfa5..e18ab86 100644
--- a/subworkflows/local/qualitycontrol.nf
+++ b/subworkflows/local/qualitycontrol.nf
@@ -1,36 +1,30 @@
-// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :)
-//               https://github.com/nf-core/modules/tree/master/subworkflows
-//               You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace:
-//               https://nf-co.re/join
-// TODO nf-core: A subworkflow SHOULD import at least two modules
+include { READ_TRACKING          } from '../../modules/local/readtracking'
+include { PLOT_MERGED_HEATMAP    } from '../../modules/local/plotmerged'
+include { PLOT_ASV_DIST          } from '../../modules/local/plotasvlen'
 
-include { SAMTOOLS_SORT      } from '../../../modules/nf-core/samtools/sort/main'
-include { SAMTOOLS_INDEX     } from '../../../modules/nf-core/samtools/index/main'
-
-workflow QUALITYCONTROL {
+workflow QUALITY_CONTROL {
 
     take:
-    // TODO nf-core: edit input (take) channels
-    ch_bam // channel: [ val(meta), [ bam ] ]
+    ch_readtracking
+    merged_seqs
+    filtered_seqtable
 
     main:
-
     ch_versions = Channel.empty()
 
-    // TODO nf-core: substitute modules here for the modules of your subworkflow
+    READ_TRACKING(
+        ch_readtracking.collect()
+    )
 
-    SAMTOOLS_SORT ( ch_bam )
-    ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first())
+    PLOT_MERGED_HEATMAP(
+        merged_seqs
+    )
 
-    SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam )
-    ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
+    PLOT_ASV_DIST(
+        filtered_seqtable
+    )
 
     emit:
-    // TODO nf-core: edit emitted channels
-    bam      = SAMTOOLS_SORT.out.bam           // channel: [ val(meta), [ bam ] ]
-    bai      = SAMTOOLS_INDEX.out.bai          // channel: [ val(meta), [ bai ] ]
-    csi      = SAMTOOLS_INDEX.out.csi          // channel: [ val(meta), [ csi ] ]
-
     versions = ch_versions                     // channel: [ versions.yml ]
 }
 
diff --git a/subworkflows/local/taxonomy.nf b/subworkflows/local/taxonomy.nf
index 50d9b8a..c1d4e8c 100644
--- a/subworkflows/local/taxonomy.nf
+++ b/subworkflows/local/taxonomy.nf
@@ -12,7 +12,8 @@ workflow TAXONOMY {
     ch_versions = Channel.empty()
     ch_taxtab = Channel.empty()
     ch_metrics =  Channel.empty()
-
+    // TODO: eventually this will have multiple options for
+    //       taxonomic assignment
     DADA2_ASSIGN_TAXA_SPECIES(
         readmap,
         ref_file,
diff --git a/workflows/tada.nf b/workflows/tada.nf
index e51d546..9edabb9 100644
--- a/workflows/tada.nf
+++ b/workflows/tada.nf
@@ -4,37 +4,15 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { FASTQC                 } from '../modules/nf-core/fastqc/main'
 include { MULTIQC                } from '../modules/nf-core/multiqc/main'
 
 include { PRE_QC                 } from '../subworkflows/local/pre_qc'
 include { FILTER_AND_TRIM        } from '../subworkflows/local/filter_and_trim'
 include { DADA2_DENOISE          } from '../subworkflows/local/dada2_denoise'
 include { TAXONOMY               } from '../subworkflows/local/taxonomy'
-
-// TODO: Move into phylogenetic subworkflow
-// include { PHYLOGENETICS          } from '../subworkflows/local/taxonomic_assignment'
-include { DECIPHER               } from '../modules/local/decipher'
-include { PHANGORN               } from '../modules/local/phangorn'
-include { FASTTREE               } from '../modules/local/fasttree'
-include { ROOT_TREE              } from '../modules/local/roottree'
-
-// TODO: Move into subworkflow(s)
-// include { QUALITY_CONTROL        } from '../subworkflows/local/taxonomic_assignment'
-include { READ_TRACKING          } from '../modules/local/readtracking'
-include { PLOT_MERGED_HEATMAP    } from '../modules/local/plotmerged'
-include { PLOT_ASV_DIST          } from '../modules/local/plotasvlen'
-include { SESSION_INFO           } from '../modules/local/rsessioninfo'
-
-// TODO: Move into subworkflow(s)
-// include { OUTPUT                 } from '../subworkflows/local/taxonomic_assignment'
-include { DADA2_SEQTABLE2TEXT       } from '../modules/local/seqtable2txt'
-include { BIOM                   } from '../modules/local/biom'
-include { QIIME2_FEATURETABLE    } from '../modules/local/qiime2featuretable'
-include { QIIME2_TAXTABLE        } from '../modules/local/qiime2taxtable'
-include { QIIME2_SEQUENCE        } from '../modules/local/qiime2seqs'
-include { QIIME2_ALIGNMENT       } from '../modules/local/qiime2aln'
-include { QIIME2_TREE            } from '../modules/local/qiime2tree'
+include { PHYLOGENY              } from '../subworkflows/local/phylogeny'
+include { QUALITY_CONTROL        } from '../subworkflows/local/qualitycontrol'
+include { GENERATE_OUTPUT        } from '../subworkflows/local/generate_output'
 
 include { paramsSummaryMap       } from 'plugin/nf-validation'
 include { paramsSummaryMultiqc   } from '../subworkflows/nf-core/utils_nfcore_pipeline'
@@ -92,13 +70,10 @@ workflow TADA {
         exit 1, "--id_type can currently only be set to 'simple' or 'md5', got ${params.id_type}"
     }
 
-    FASTQC (
-        ch_samplesheet
-    )
+    // FASTQC (
+    //     ch_samplesheet
+    // )
     
-    ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]})
-    ch_versions = ch_versions.mix(FASTQC.out.versions.first())
-
     PRE_QC(
         ch_samplesheet,
         params.skip_ee,
@@ -106,6 +81,7 @@ workflow TADA {
         params.skip_dadaQC
     )
 
+    ch_multiqc_files = ch_multiqc_files.mix(PRE_QC.out.zip.collect{it[1]})
     ch_versions = ch_versions.mix(PRE_QC.out.versions)
     
     // ch_multiqc_files = ch_multiqc_files.mix(PLOTQUALITYPROFILE.out.zip.collect{it[1]})
@@ -162,71 +138,37 @@ workflow TADA {
         ch_metrics = TAXONOMY.out.ch_metrics
     }
     
-    // Subworkflows-Alignment + Phylogenetic Tree (optional)
-    DECIPHER(
-        DADA2_DENOISE.out.nonchimeric_asvs
-    )
-
-    ch_tree = Channel.empty()
-
-    if (params.phylo_tool == 'phangorn') {
-        PHANGORN(
-            DECIPHER.out.alignment
-        )
-        ch_tree = PHANGORN.out.treeGTR
-    } else if (params.phylo_tool == 'fasttree') {
-        FASTTREE(
-            DECIPHER.out.alignment
-        )
-        ch_tree = FASTTREE.out.treeGTR
-    }
-
-    ROOT_TREE(
-        ch_tree,
-        params.phylo_tool
-    )
+    PHYLOGENY(DADA2_DENOISE.out.nonchimeric_asvs)
 
     // Post-QC
-    READ_TRACKING(
-        ch_readtracking.collect()
-    )
-
-    PLOT_MERGED_HEATMAP(
-        DADA2_DENOISE.out.merged_seqs
-    )
-
-    PLOT_ASV_DIST(
+    QUALITY_CONTROL(
+        ch_readtracking,
+        DADA2_DENOISE.out.merged_seqs,
         DADA2_DENOISE.out.filtered_seqtable
     )
+    // READ_TRACKING(
+    //     ch_readtracking.collect()
+    // )
 
-    // Subworkflow - Outputs
+    // PLOT_MERGED_HEATMAP(
+    //     DADA2_DENOISE.out.merged_seqs
+    // )
 
-    // TODO: these could be moved into the 
-    DADA2_SEQTABLE2TEXT(
-        DADA2_DENOISE.out.seqtable_renamed
-    )
+    // PLOT_ASV_DIST(
+    //     DADA2_DENOISE.out.filtered_seqtable
+    // )
 
-    // TODO: BIOM should read in TSV files, not RDS
-    BIOM(
+    GENERATE_OUTPUT(
         DADA2_DENOISE.out.seqtable_renamed,
-        TAXONOMY.out.ch_taxtab_rds
+        DADA2_DENOISE.out.seqtab2qiime,
+        TAXONOMY.out.ch_taxtab_rds,
+        TAXONOMY.out.ch_taxtab,
+        DADA2_DENOISE.out.nonchimeric_asvs,
+        PHYLOGENY.out.ch_alignment,
+        PHYLOGENY.out.ch_unrooted_tree,
+        PHYLOGENY.out.ch_rooted_tree
     )
 
-    QIIME2_FEATURETABLE(DADA2_SEQTABLE2TEXT.out.seqtab2qiime)
-
-    QIIME2_TAXTABLE(ch_taxtab)
-
-    QIIME2_SEQUENCE(DADA2_DENOISE.out.nonchimeric_asvs)
-
-    QIIME2_ALIGNMENT(DECIPHER.out.alignment)
-
-    QIIME2_TREE(
-        ch_tree,
-        ROOT_TREE.out.rooted_tree
-    )
-
-    SESSION_INFO()
-
     //
     // Collate and save software versions
     //