-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquasiflow2.nf
201 lines (168 loc) · 7.41 KB
/
quasiflow2.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#!/usr/bin/env nextflow
/*
========================================================================================
Q U A S I F L O W P I P E L I N E
========================================================================================
A Nextflow pipeline for analysis of NGS-based HIV Drug Resistance data
----------------------------------------------------------------------------------------
*/
def helpMessage() {
log.info"""
============================================================
nedjoni/QuasiFlow2 ~ version ${params.version}
============================================================
Usage:
The typical command for running the pipeline is as follows:
nextflow run QuasiFlow2 --reads <path to fastq files> --outdir <path to output directory>
Optional arguments:
--reads Path to input data (must be surrounded with quotes, default is the folder "fastq")
--outdir Path to directory where results will be saved (default - results).
HyDRA arguments (optional):
--reporting_threshold Minimum mutation frequency percent to report.
--consensus_pct The minimum percentage of a base needs to be incorporated into the consensus sequence.
--length_cutoff Reads that fall short of the specified length will be filtered out.
--score_cutoff Reads that have a median or mean quality score (depending on the score type specified) less than the score cutoff value will be filtered out.
--min_variant_qual Minimum quality for the variant to be considered later on in the pipeline.
--min_dp Minimum required read depth for the variant to be considered later on in the pipeline.
--min_ac The minimum required allele count for the variant to be considered later on in the pipeline.
--min_freq The minimum required frequency for a mutation to be considered in the drug resistance report.
Other arguments (optional):
--overwrite Set to true to overwrite previous reports (default - false).
--name Name of the run.
--email Email to receive notification once the run is done.
""".stripIndent()
}
// Show help message
params.help = false
if (params.help){
helpMessage()
exit 0
}
// Has the run name been specified by the user?
// this has the bonus effect of catching both -name and --name
custom_runName = params.name
if( !(workflow.runName ==~ /[a-z]+_[a-z]+/) ){
custom_runName = workflow.runName
}
// Header log info
log.info "============================================================"
log.info " nedjoni/QuasiFlow2 ~ version ${params.version}"
log.info "============================================================"
log.info " Use parameter - help for the full list of parameters"
log.info "************************************************************"
def summary = [:]
summary['Run Name'] = custom_runName ?: workflow.runName
summary['Reads'] = params.reads
summary['Output directory'] = params.outdir
summary['Reporting threshold'] = params.reporting_threshold
summary['Consensus percentage'] = params.consensus_pct
summary['Length cut off'] = params.length_cutoff
summary['Score cutoff'] = params.score_cutoff
summary['Minimum variant quality'] = params.min_variant_qual
summary['Minimum depth'] = params.min_dp
summary['Minimum allele count'] = params.min_ac
summary['Minimum mutation frequency'] = params.min_freq
if(workflow.revision) summary['Pipeline Release'] = workflow.revision
summary['Current home'] = "$HOME"
summary['Current user'] = "$USER"
summary['Current path'] = "$PWD"
summary['Script dir'] = workflow.projectDir
summary['Config Profile'] = workflow.profile
if(params.email) {
summary['E-mail Address'] = params.email
}
log.info summary.collect { k,v -> "${k.padRight(15)}: $v" }.join("\n")
log.info "========================================="
process runFastQC {
tag "${pairId}"
publishDir "${params.outdir}/fastqc", mode: "copy", overwrite: false
input:
tuple val(pairId), path(in_fastq)
output:
tuple val(pairId), path("*.zip"), path("*.html")
script:
"""
# mkdir -p ${pairId}_fastqc
fastqc ${in_fastq[0]} ${in_fastq[1]}
"""
}
process runMultiQC {
publishDir "${params.outdir}", mode: "copy", overwrite: false
input:
path fastqc_results
output:
path("raw_reads_multiqc_report.html")
script:
"""
multiqc ${fastqc_results} -o .
mv multiqc_report.html raw_reads_multiqc_report.html
"""
}
process runTrimGalore {
tag "${pairId}"
publishDir "${params.outdir}/adaptors-trimmed-reads", mode: "copy", overwrite: false
input:
tuple val(pairId), path(in_fastq)
output:
tuple val(pairId), path("*.fq") // Return the trimmed reads output
script:
"""
trim_galore --dont_gzip -q 30 --paired ${in_fastq[0]} ${in_fastq[1]} -o .
"""
}
process runHydra {
tag "${pairId}"
publishDir params.outdir, mode: "copy", overwrite: false
input:
tuple val(pairId), path(trimmed_reads)
output:
tuple val(pairId), path("consensus_${pairId}.fasta"), // Output JSON file
path("dr_report_${pairId}.csv"),
path("mutation_report_${pairId}.aavf"),
path("filtered_${pairId}.fastq")
script:
"""
# Run Hydra
quasitools hydra \
${trimmed_reads[0]} ${trimmed_reads[1]} \
-o . \
--generate_consensus \
--reporting_threshold ${params.reporting_threshold} \
--consensus_pct ${params.consensus_pct} \
--length_cutoff ${params.length_cutoff} \
--score_cutoff ${params.score_cutoff} \
--min_variant_qual ${params.min_variant_qual} \
--min_dp ${params.min_dp} \
--min_ac ${params.min_ac} \
--min_freq ${params.min_freq}
mv consensus.fasta consensus_${pairId}.fasta
mv dr_report.csv dr_report_${pairId}.csv
mv mutation_report.aavf mutation_report_${pairId}.aavf
mv filtered.fastq filtered_${pairId}.fastq
"""
}
// Main workflow block
workflow {
// Define input channel for read pairs
reads_channel = Channel.fromFilePairs(params.reads)
// Check if the reads_channel is empty
reads_channel.view { "Read pairs: $it" } // Optional, for debugging purposes
reads_channel.count().subscribe { count ->
if (count == 0) {
error "Cannot find any reads matching: ${params.reads}"
}
}
// Step 1: Run FastQC for each read pair
fastqc_results_channel = runFastQC(reads_channel)
// Step 2: Run MultiQC after collecting all FastQC results
multiqc_input = fastqc_results_channel \
.map { it[1] } \
.collect()
runMultiQC(multiqc_input)
// Step 3: Run Trim Galore for adapter and quality trimming
trimmed_reads_channel = runTrimGalore(reads_channel)
trimmed_reads_channel.view { "Trimmed reads: $it" }
// Step 4: Run Hydra for mapping, consensus sequences, and mutation and drug resistance reports
hydra_results_channel = runHydra(trimmed_reads_channel)
hydra_results_channel.view { "Hydra results: $it" }
}