Skip to content

Commit 716faf2

Browse files
committed
Upgrade the blast/makedb module - took it from nf-core
1 parent 4258031 commit 716faf2

File tree

17 files changed

+247
-177
lines changed

17 files changed

+247
-177
lines changed

README.md

+4
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ Then import the desired module in your pipeline script:
4646
include { <subworkflow_name> } from '../subworkflows/ebi-metagenomics/<subworkflow_name>.nf'
4747
```
4848

49+
## nf-core modules
50+
51+
The [nf-core](https://nf-co.re/) team supports a large number of high-quality modules, and our team contributes whenever we can. At the moment, the [nf-core tools](https://github.com/nf-core/tools/) don't support subworkflows that install modules from different repos ([#3083](https://github.com/nf-core/tools/pull/3083)). That is why we decided to copy some modules from nf-core into this repo (a nasty hack, but it works). The nf-core team has been making impressive progress on supporting this use case (subworkflows with modules from different repos), and we will remove the duplicated modules once they reach that point. In the meantime, you will find duplicated modules from nf-core here.
52+
4953
## References
5054

5155
This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE).

modules/ebi-metagenomics/blast/blastn/tests/main.nf.test

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ nextflow_process {
66
config "./nextflow.config"
77
tag "modules"
88
tag "modules_nfcore"
9+
tag "modules_ebimetagenomics"
910
tag "blast"
1011
tag "blast/blastn"
1112
tag "blast/makeblastdb"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- bioconda::blast=2.15.0

modules/ebi-metagenomics/blast/makeblastdb/main.nf

+43-11
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,62 @@
11
process BLAST_MAKEBLASTDB {
2-
tag "$fasta"
2+
tag "$meta.id"
33
label 'process_medium'
44

5-
conda "bioconda::blast=2.13.0"
5+
conda "${moduleDir}/environment.yml"
66
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
7-
'https://depot.galaxyproject.org/singularity/blast:2.13.0--hf3cf87c_0' :
8-
'biocontainers/blast:2.13.0--hf3cf87c_0' }"
7+
'https://depot.galaxyproject.org/singularity/blast:2.15.0--pl5321h6f7f691_1':
8+
'biocontainers/blast:2.15.0--pl5321h6f7f691_1' }"
99

1010
input:
11-
path fasta
11+
tuple val(meta), path(fasta)
1212

1313
output:
14-
path 'blast_db' , emit: db
15-
path "versions.yml" , emit: versions
14+
tuple val(meta), path("${meta.id}"), emit: db
15+
path "versions.yml" , emit: versions
1616

1717
when:
1818
task.ext.when == null || task.ext.when
1919

2020
script:
2121
def args = task.ext.args ?: ''
22+
def prefix = task.ext.prefix ?: "${meta.id}"
23+
def is_compressed = fasta.getExtension() == "gz" ? true : false
24+
def fasta_name = is_compressed ? fasta.getBaseName() : fasta
2225
"""
26+
if [ "${is_compressed}" == "true" ]; then
27+
gzip -c -d ${fasta} > ${fasta_name}
28+
fi
29+
2330
makeblastdb \\
24-
-in $fasta \\
25-
$args
26-
mkdir blast_db
27-
mv ${fasta}* blast_db
31+
-in ${fasta_name} \\
32+
${args}
33+
mkdir ${prefix}
34+
mv ${fasta_name}* ${prefix}
35+
36+
cat <<-END_VERSIONS > versions.yml
37+
"${task.process}":
38+
blast: \$(blastn -version 2>&1 | sed 's/^.*blastn: //; s/ .*\$//')
39+
END_VERSIONS
40+
"""
41+
42+
stub:
43+
def args = task.ext.args ?: ''
44+
def prefix = task.ext.prefix ?: "${meta.id}"
45+
def is_compressed = fasta.getExtension() == "gz" ? true : false
46+
def fasta_name = is_compressed ? fasta.getBaseName() : fasta
47+
"""
48+
touch ${fasta_name}.fasta
49+
touch ${fasta_name}.fasta.ndb
50+
touch ${fasta_name}.fasta.nhr
51+
touch ${fasta_name}.fasta.nin
52+
touch ${fasta_name}.fasta.njs
53+
touch ${fasta_name}.fasta.not
54+
touch ${fasta_name}.fasta.nsq
55+
touch ${fasta_name}.fasta.ntf
56+
touch ${fasta_name}.fasta.nto
57+
mkdir ${prefix}
58+
mv ${fasta_name}* ${prefix}
59+
2860
cat <<-END_VERSIONS > versions.yml
2961
"${task.process}":
3062
blast: \$(blastn -version 2>&1 | sed 's/^.*blastn: //; s/ .*\$//')

modules/ebi-metagenomics/blast/makeblastdb/meta.yml

+27-10
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,37 @@ tools:
1212
documentation: https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs
1313
doi: 10.1016/S0022-2836(05)80360-2
1414
licence: ["US-Government-Work"]
15+
identifier: ""
1516
input:
16-
- fasta:
17-
type: file
18-
description: Input fasta file
19-
pattern: "*.{fa,fasta}"
17+
- - meta:
18+
type: map
19+
description: |
20+
Groovy Map containing sample information
21+
e.g. [ id:'test', single_end:false ]
22+
- fasta:
23+
type: file
24+
description: Input fasta file
25+
pattern: "*.{fa,fasta,fa.gz,fasta.gz}"
2026
output:
2127
- db:
22-
type: directory
23-
description: Output directory containing blast database files
24-
pattern: "*"
28+
- meta:
29+
type: map
30+
description: |
31+
Groovy Map containing sample information
32+
e.g. [ id:'test', single_end:false ]
33+
- ${meta.id}:
34+
type: directory
35+
description: Output directory containing blast database files
36+
pattern: "*"
2537
- versions:
26-
type: file
27-
description: File containing software versions
28-
pattern: "versions.yml"
38+
- versions.yml:
39+
type: file
40+
description: File containing software versions
41+
pattern: "versions.yml"
2942
authors:
3043
- "@joseespinosa"
3144
- "@drpatelh"
45+
maintainers:
46+
- "@joseespinosa"
47+
- "@drpatelh"
48+
- "@vagkaratzas"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
nextflow_process {
2+
3+
name "Test Process BLAST_MAKEBLASTDB"
4+
script "../main.nf"
5+
process "BLAST_MAKEBLASTDB"
6+
config "./nextflow.config"
7+
tag "modules"
8+
tag "modules_nfcore"
9+
tag "modules_ebimetagenomics"
10+
tag "blast"
11+
tag "blast/makeblastdb"
12+
13+
test("Should build a blast db folder from a fasta file") {
14+
15+
when {
16+
process {
17+
"""
18+
input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]
19+
"""
20+
}
21+
}
22+
23+
then {
24+
assertAll(
25+
{ assert process.success },
26+
{
27+
assert process.out.db.size() == 1
28+
29+
def all_files = ( new File(process.out.db[0][1]) ).listFiles()
30+
def stable_file_names = [
31+
'genome.fasta',
32+
'genome.fasta.ndb',
33+
'genome.fasta.nhr',
34+
'genome.fasta.not',
35+
'genome.fasta.nsq',
36+
'genome.fasta.ntf',
37+
'genome.fasta.nto'
38+
]
39+
40+
def stable_files = all_files.findAll { it.name in stable_file_names }.toSorted()
41+
42+
assert snapshot(
43+
all_files.collect { it.name }.toSorted(),
44+
stable_files,
45+
process.out.versions[0]
46+
).match()
47+
}
48+
)
49+
}
50+
51+
}
52+
53+
test("Should build a blast db folder from a zipped fasta file") {
54+
55+
when {
56+
process {
57+
"""
58+
input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) ]
59+
"""
60+
}
61+
}
62+
63+
then {
64+
assertAll(
65+
{ assert process.success },
66+
{
67+
assert process.out.db.size() == 1
68+
69+
def all_files = ( new File(process.out.db[0][1]) ).listFiles()
70+
def stable_file_names = [
71+
'genome.fasta',
72+
'genome.fasta.ndb',
73+
'genome.fasta.nhr',
74+
'genome.fasta.not',
75+
'genome.fasta.nsq',
76+
'genome.fasta.ntf',
77+
'genome.fasta.nto'
78+
]
79+
80+
def stable_files = all_files.findAll { it.name in stable_file_names }.toSorted()
81+
82+
assert snapshot(
83+
all_files.collect { it.name }.toSorted(),
84+
stable_files,
85+
process.out.versions[0]
86+
).match()
87+
}
88+
)
89+
}
90+
91+
}
92+
93+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
2+
{
3+
"Should build a blast db folder from a fasta file": {
4+
"content": [
5+
[
6+
"genome.fasta",
7+
"genome.fasta.ndb",
8+
"genome.fasta.nhr",
9+
"genome.fasta.nin",
10+
"genome.fasta.njs",
11+
"genome.fasta.not",
12+
"genome.fasta.nsq",
13+
"genome.fasta.ntf",
14+
"genome.fasta.nto"
15+
],
16+
[
17+
"genome.fasta:md5,6e9fe4042a72f2345f644f239272b7e6",
18+
"genome.fasta.ndb:md5,0d553c830656469211de113c5022f06d",
19+
"genome.fasta.nhr:md5,f4b4ddb034fd3dd7b25c89e9d50c004e",
20+
"genome.fasta.not:md5,1e53e9d08f1d23af0299cfa87478a7bb",
21+
"genome.fasta.nsq:md5,982cbc7d9e38743b9b1037588862b9da",
22+
"genome.fasta.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb",
23+
"genome.fasta.nto:md5,33cdeccccebe80329f1fdbee7f5874cb"
24+
],
25+
"versions.yml:md5,cb63396fd8d8f4df57913b63452d6ba8"
26+
],
27+
"meta": {
28+
"nf-test": "0.8.4",
29+
"nextflow": "24.04.4"
30+
},
31+
"timestamp": "2024-08-09T15:40:32.52079"
32+
},
33+
"Should build a blast db folder from a zipped fasta file": {
34+
"content": [
35+
[
36+
"genome.fasta",
37+
"genome.fasta.gz",
38+
"genome.fasta.ndb",
39+
"genome.fasta.nhr",
40+
"genome.fasta.nin",
41+
"genome.fasta.njs",
42+
"genome.fasta.not",
43+
"genome.fasta.nsq",
44+
"genome.fasta.ntf",
45+
"genome.fasta.nto"
46+
],
47+
[
48+
"genome.fasta:md5,6e9fe4042a72f2345f644f239272b7e6",
49+
"genome.fasta.ndb:md5,0d553c830656469211de113c5022f06d",
50+
"genome.fasta.nhr:md5,f4b4ddb034fd3dd7b25c89e9d50c004e",
51+
"genome.fasta.not:md5,1e53e9d08f1d23af0299cfa87478a7bb",
52+
"genome.fasta.nsq:md5,982cbc7d9e38743b9b1037588862b9da",
53+
"genome.fasta.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb",
54+
"genome.fasta.nto:md5,33cdeccccebe80329f1fdbee7f5874cb"
55+
],
56+
"versions.yml:md5,cb63396fd8d8f4df57913b63452d6ba8"
57+
],
58+
"meta": {
59+
"nf-test": "0.8.4",
60+
"nextflow": "24.04.4"
61+
},
62+
"timestamp": "2024-08-09T15:40:37.45154"
63+
}
64+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
process {
2+
withName: BLAST_MAKEBLASTDB {
3+
ext.args = '-dbtype nucl'
4+
}
5+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
blast/makeblastdb:
2+
- modules/ebi-metagenomics/blast/makeblastdb/**

subworkflows/ebi-metagenomics/assembly_decontamination/tests/main.nf.test

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
nextflow_workflow {
2-
2+
33
name "Test subworkflow ASSEMBLY_DECONTAMINATION"
44
script "../main.nf"
55
config "./nextflow.config"

tests/config/pytest_modules.yml

-8
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,3 @@
1-
blast/blastp:
2-
- modules/ebi-metagenomics/blast/blastp/**
3-
- tests/modules/ebi-metagenomics/blast/blastp/**
4-
5-
blast/makeblastdb:
6-
- modules/ebi-metagenomics/blast/makeblastdb/**
7-
- tests/modules/ebi-metagenomics/blast/makeblastdb/**
8-
91
bmtagger/bmtagger:
102
- modules/ebi-metagenomics/bmtagger/bmtagger/**
113
- tests/modules/ebi-metagenomics/bmtagger/bmtagger/**

tests/modules/ebi-metagenomics/blast/blastp/main.nf

-42
This file was deleted.

tests/modules/ebi-metagenomics/blast/blastp/nextflow.config

-9
This file was deleted.

0 commit comments

Comments
 (0)