Skip to content

Commit

Permalink
last subworkflow for now; will likely move chimera removal into anoth…
Browse files Browse the repository at this point in the history
…er one at a later point
  • Loading branch information
cjfields committed Jan 13, 2025
1 parent 30d80f0 commit cfd0f83
Show file tree
Hide file tree
Showing 10 changed files with 167 additions and 165 deletions.
6 changes: 3 additions & 3 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -242,19 +242,19 @@ process {
]
}

withName: SEQTABLE2TEXT {
withName: DADA2_SEQTABLE2TEXT {
publishDir = [
path: { "${params.outdir}/TSV" },
mode: params.publish_dir_mode,
pattern: '*.txt'
pattern: 'seqtab_final.txt'
]
}

withName: DADA2_TAXTABLE2TEXT {
publishDir = [
path: { "${params.outdir}/TSV" },
mode: params.publish_dir_mode,
pattern: '*.txt'
pattern: 'tax_final*.txt'
]
}

Expand Down
1 change: 1 addition & 0 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,6 @@ params {
trim_rev = 25
reference = 'https://file-server.igb.illinois.edu/~cjfields/TADA/silva_nr99_v138.1_train_set.fa.gz'
phylo_tool = 'fasttree'
to_QIIME2 = true
// species = 'https://file-server.igb.illinois.edu/~cjfields/TADA/silva_species_assignment_v138.1.fa.gz'
}
20 changes: 12 additions & 8 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,26 @@ params {
genome = null
igenomes_base = 's3://ngi-igenomes/igenomes/'
igenomes_ignore = false
fasta = null// MultiQC options
fasta = null // MultiQC options
multiqc_config = null
multiqc_title = null
multiqc_logo = null
max_multiqc_email_size = '25.MB'
multiqc_methods_description = null

// TODO: this needs to be removed or made more specific
amplicon = "16S"
quality_binning = false // set to true if using binned qualities (NovaSeq, PacBio Revio)
quality_bins = ""
amplicon = "16S"

// loessErrfun, PacBioErrfun, makeBinnedQualErrfun, loessErrfun_mod1, loessErrfun_mod2, loessErrfun_mod3, loessErrfun_mod4
error_function = "loessErrfun"

quality_binning = false // set to true if using binned qualities (NovaSeq, PacBio Revio)

// if quality_binning is true and error_function is set to 'makeBinnedQualErrfun', this is required to be set

quality_bins = ""
amplicon_type = "overlapping"
platform = "illumina"
platform = "illumina" // illumina, pacbio; 454 and others could be added

// QC
skip_FASTQC = false // set to run this step by default, this can fail with large sample #'s
Expand Down Expand Up @@ -63,9 +70,6 @@ params {
// I think we can make these bool 'false' as above with R coersion (either through as.logical or using optparse in a Rscript)
rmPhiX = false

// Error model
// custom_error_model = 'illumina' // NYI, thinking about best way to implement this

// ASV inference pooling
pool = "pseudo"

Expand Down
6 changes: 6 additions & 0 deletions subworkflows/local/dada2_denoise.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ include { DADA_INFER } from '../../modules/local/dada
include { POOLED_SEQTABLE } from '../../modules/local/pooledseqtable'
include { DADA2_REMOVE_CHIMERAS } from '../../modules/local/removechimeras'
include { RENAME_ASVS } from '../../modules/local/renameasvs'
include { DADA2_SEQTABLE2TEXT } from '../../modules/local/seqtable2txt'

workflow DADA2_DENOISE {

Expand Down Expand Up @@ -55,7 +56,12 @@ workflow DADA2_DENOISE {
POOLED_SEQTABLE.out.filtered_seqtable
)

DADA2_SEQTABLE2TEXT(
RENAME_ASVS.out.seqtable_renamed
)

emit:
seqtab2qiime = DADA2_SEQTABLE2TEXT.out.seqtab2qiime
nonchimeric_asvs = RENAME_ASVS.out.nonchimeric_asvs
seqtable_renamed = RENAME_ASVS.out.seqtable_renamed
readmap = RENAME_ASVS.out.readmap
Expand Down
72 changes: 51 additions & 21 deletions subworkflows/local/generate_output.nf
Original file line number Diff line number Diff line change
@@ -1,36 +1,66 @@
// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :)
// https://github.com/nf-core/modules/tree/master/subworkflows
// You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace:
// https://nf-co.re/join
// TODO nf-core: A subworkflow SHOULD import at least two modules

include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main'
include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main'
include { BIOM } from '../../modules/local/biom'
include { QIIME2_FEATURETABLE } from '../../modules/local/qiime2featuretable'
include { QIIME2_TAXTABLE } from '../../modules/local/qiime2taxtable'
include { QIIME2_SEQUENCE } from '../../modules/local/qiime2seqs'
include { QIIME2_ALIGNMENT } from '../../modules/local/qiime2aln'
include { QIIME2_TREE } from '../../modules/local/qiime2tree'
include { SESSION_INFO } from '../../modules/local/rsessioninfo'

workflow GENERATE_OUTPUT {

// TODO: I'd like to have this simply be TSV files (no RDS)
// so we can generate from other subworkflows if needed
take:
// TODO nf-core: edit input (take) channels
ch_bam // channel: [ val(meta), [ bam ] ]
seq_table_rds
seq_table_qiime
tax_table_rds
tax_table_tsv
asvs
alignment
unrooted_tree
rooted_tree

main:

ch_versions = Channel.empty()

// TODO nf-core: substitute modules here for the modules of your subworkflow
if (params.to_BIOM) {
BIOM(
seq_table_rds,
tax_table_rds
)
}

SAMTOOLS_SORT ( ch_bam )
ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first())
if (params.to_QIIME2) {
QIIME2_FEATURETABLE(
seq_table_qiime
)

SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam )
ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
QIIME2_TAXTABLE(
tax_table_tsv
)

emit:
// TODO nf-core: edit emitted channels
bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ]
bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ]
csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ]
QIIME2_SEQUENCE(
asvs
)

if (!params.skip_alignment) {
QIIME2_ALIGNMENT(
alignment
)
}

if (!params.skip_tree) {
QIIME2_TREE(
unrooted_tree,
rooted_tree
)
}
}

// TODO: May become redundant with versions
SESSION_INFO()

emit:
versions = ch_versions // channel: [ versions.yml ]
}

61 changes: 37 additions & 24 deletions subworkflows/local/phylogeny.nf
Original file line number Diff line number Diff line change
@@ -1,36 +1,49 @@
// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :)
// https://github.com/nf-core/modules/tree/master/subworkflows
// You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace:
// https://nf-co.re/join
// TODO nf-core: A subworkflow SHOULD import at least two modules

include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main'
include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main'
include { DECIPHER } from '../../modules/local/decipher'
include { PHANGORN } from '../../modules/local/phangorn'
include { FASTTREE } from '../../modules/local/fasttree'
include { ROOT_TREE } from '../../modules/local/roottree'

workflow PHYLOGENY {

take:
// TODO nf-core: edit input (take) channels
ch_bam // channel: [ val(meta), [ bam ] ]
asvs

main:

ch_alignment = Channel.empty()
ch_unrooted_tree = Channel.empty()
ch_rooted_tree = Channel.empty()
ch_versions = Channel.empty()

// TODO nf-core: substitute modules here for the modules of your subworkflow

SAMTOOLS_SORT ( ch_bam )
ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first())

SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam )
ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
if (!params.skip_alignment) {
DECIPHER(
asvs
)
ch_alignment = DECIPHER.out.alignment
if (!params.skip_tree) {
if (params.phylo_tool == 'phangorn') {
PHANGORN(
ch_alignment
)
ch_unrooted_tree = PHANGORN.out.treeGTR
} else if (params.phylo_tool == 'fasttree') {
FASTTREE(
ch_alignment
)
ch_unrooted_tree = FASTTREE.out.treeGTR
}

ROOT_TREE(
ch_unrooted_tree,
params.phylo_tool
)
ch_rooted_tree = ROOT_TREE.out.rooted_tree
}
}

emit:
// TODO nf-core: edit emitted channels
bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ]
bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ]
csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ]

versions = ch_versions // channel: [ versions.yml ]
ch_alignment
ch_unrooted_tree
ch_rooted_tree
versions = ch_versions // channel: [ versions.yml ]
}

11 changes: 11 additions & 0 deletions subworkflows/local/pre_qc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
// https://nf-co.re/join
// TODO nf-core: A subworkflow SHOULD import at least two modules

include { FASTQC } from '../../modules/nf-core/fastqc/main'
include { PLOT_QUALITY_PROFILE } from '../../modules/local/plotqualityprofile'
include { VSEARCH_EESTATS } from '../../modules/local/vsearch_eestats'
include { VSEARCH_OVERLAP } from '../../modules/local/vsearchoverlap'
Expand All @@ -20,13 +21,22 @@ workflow PRE_QC {

main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()

FASTQC (
ch_samplesheet
)

ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]})
ch_versions = ch_versions.mix(FASTQC.out.versions.first())

if (!skip_merging) {
VSEARCH_OVERLAP(
ch_samplesheet
)

ch_versions = ch_versions.mix(VSEARCH_OVERLAP.out.versions.first())

MERGE_OVERLAP_CHECK(
VSEARCH_OVERLAP.out.merged_log.collect()
)
Expand Down Expand Up @@ -54,4 +64,5 @@ workflow PRE_QC {

emit:
versions = ch_versions // channel: [ versions.yml ]
zip = FASTQC.out.zip
}
38 changes: 16 additions & 22 deletions subworkflows/local/qualitycontrol.nf
Original file line number Diff line number Diff line change
@@ -1,36 +1,30 @@
// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :)
// https://github.com/nf-core/modules/tree/master/subworkflows
// You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace:
// https://nf-co.re/join
// TODO nf-core: A subworkflow SHOULD import at least two modules
include { READ_TRACKING } from '../../modules/local/readtracking'
include { PLOT_MERGED_HEATMAP } from '../../modules/local/plotmerged'
include { PLOT_ASV_DIST } from '../../modules/local/plotasvlen'

include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main'
include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main'

workflow QUALITYCONTROL {
workflow QUALITY_CONTROL {

take:
// TODO nf-core: edit input (take) channels
ch_bam // channel: [ val(meta), [ bam ] ]
ch_readtracking
merged_seqs
filtered_seqtable

main:

ch_versions = Channel.empty()

// TODO nf-core: substitute modules here for the modules of your subworkflow
READ_TRACKING(
ch_readtracking.collect()
)

SAMTOOLS_SORT ( ch_bam )
ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first())
PLOT_MERGED_HEATMAP(
merged_seqs
)

SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam )
ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
PLOT_ASV_DIST(
filtered_seqtable
)

emit:
// TODO nf-core: edit emitted channels
bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ]
bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ]
csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ]

versions = ch_versions // channel: [ versions.yml ]
}

3 changes: 2 additions & 1 deletion subworkflows/local/taxonomy.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ workflow TAXONOMY {
ch_versions = Channel.empty()
ch_taxtab = Channel.empty()
ch_metrics = Channel.empty()

// TODO: eventually this will have multiple options for
// taxonomic assignment
DADA2_ASSIGN_TAXA_SPECIES(
readmap,
ref_file,
Expand Down
Loading

0 comments on commit cfd0f83

Please sign in to comment.