forked from NorwegianVeterinaryInstitute/Talos
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01_build_kraken_db.nf
131 lines (91 loc) · 2.89 KB
/
01_build_kraken_db.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/*
* This is a nextflow workflow to build a kraken database using the following databases
* Archaea
* Bacteria
* plasmids
* viral sequences
* human
* fungi
* protozoa
*/
/*
* pipeline input parameters
*/
log.info """\
METAGENOMICS - N F P I P E L I N E
===================================
temporary - directory : ${workDir}
location of kraken2 directory : ${params.kraken2.path}
kraken2 database : ${params.kraken2_dir}
"""
.stripIndent()
/*
* process names
*/
// list of taxa to download to build the database
//taxons = Channel.from('archaea', 'plasmid','bacteria', 'viral','human','fungi', 'protozoa')
taxons = Channel.value('viral')
// downloading the taxonomy
process download_taxonomy {
conda 'conda_yml/kraken2_env.yml'
publishDir "${params.kraken2.path}", mode: "${params.savemode}"
executor='local'
output:
file ("${params.kraken2_dir}/taxonomy") into taxonomy_ch
"""
kraken2-build --download-taxonomy --threads 1 --db ${params.kraken2_dir}
"""
}
// downloading the taxa for the database
process download_taxa {
conda 'conda_yml/kraken2_env.yml'
//publishDir "${params.kraken2.path}", mode: "${params.savemode}"
tag "$taxa"
executor='local'
input:
val taxa from taxons
output:
file ("${params.kraken2_dir}/library/$taxa") into downloads_ch
"""
kraken2-build --download-library $taxa --threads 2 --no-masking --db ${params.kraken2_dir}
"""
}
// masking low-complexity sequencing, overwriting the previous unmasked dataset.
process masking_taxa {
conda 'conda_yml/kraken2_env.yml'
publishDir "${params.kraken2.path}", mode: "${params.savemode}"
tag "$taxa"
executor='slurm'
label 'small'
input:
val taxa from taxons
file ("${params.kraken2_dir}/library/$taxa") from downloads_ch
output:
file ("${params.kraken2_dir}/library/$taxa") into database_ch
"""
cd ${params.kraken2_dir}/library/$taxa
ls -lath
#dustmasker commands
dustmasker -in library.fna -outfmt fasta | sed -e '/^>/!s/[a-z]/x/g' > library.fna.tmp
mv library.fna.tmp library.fna
touch library.fna.masked
cd -
"""
}
process build_Kraken2_db {
conda 'conda_yml/kraken2_env.yml'
executor='slurm'
label 'large'
label 'longtime'
input:
val taxa from taxons
file ("${params.kraken2_dir}/taxonomy") from taxonomy_ch
file ("${params.kraken2_dir}/library/$taxa") from database_ch.collect()
"""
echo This is working
ls ${params.kraken2.path}/${params.kraken2_dir}
ls ${params.kraken2.path}/${params.kraken2_dir}/library
kraken2-build build --threads=8 --db ${params.kraken2.path}/${params.kraken2_dir}
#kraken2-build build --threads=24 --db ${params.kraken2.path}/${params.kraken2_dir}
"""
}