From d85ac9e3345c6b2871081b439479a705377a1851 Mon Sep 17 00:00:00 2001 From: d4straub Date: Fri, 15 Nov 2024 16:38:35 +0100 Subject: [PATCH 1/5] add silva=138.2 key for --dada_ref_taxonomy --- conf/ref_databases.config | 13 ++++++++++--- nextflow.config | 2 +- nextflow_schema.json | 5 +++-- .../local/utils_nfcore_ampliseq_pipeline/main.nf | 2 +- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/conf/ref_databases.config b/conf/ref_databases.config index e640179a..6d3687c4 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -178,11 +178,18 @@ params { taxlevels = "Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species" } 'silva' { - title = "Silva 138.1 prokaryotic SSU" - file = [ "https://zenodo.org/record/4587955/files/silva_nr99_v138.1_wSpecies_train_set.fa.gz", "https://zenodo.org/record/4587955/files/silva_species_assignment_v138.1.fa.gz" ] + title = "Silva 138.2 prokaryotic SSU" + file = [ "https://zenodo.org/records/14169026/files/silva_nr99_v138.2_toSpecies_trainset.fa.gz", "https://zenodo.org/records/14169026/files/silva_v138.2_assignSpecies.fa.gz" ] citation = "Quast C, Pruesse E, Yilmaz P, Gerken J, Schweer T, Yarza P, Peplies J, Glöckner FO. The SILVA ribosomal RNA gene database project: improved data processing and web-based tools. Nucleic Acids Res. 2013 Jan;41(Database issue):D590-6. doi: 10.1093/nar/gks1219. Epub 2012 Nov 28. PMID: 23193283; PMCID: PMC3531112." fmtscript = "taxref_reformat_standard.sh" - dbversion = "SILVA v138.1 (https://zenodo.org/record/4587955)" + dbversion = "SILVA v138.2 (https://zenodo.org/records/14169026)" + } + 'silva=138.2' { + title = "Silva 138.2 prokaryotic SSU" + file = [ "https://zenodo.org/records/14169026/files/silva_nr99_v138.2_toSpecies_trainset.fa.gz", "https://zenodo.org/records/14169026/files/silva_v138.2_assignSpecies.fa.gz" ] + citation = "Quast C, Pruesse E, Yilmaz P, Gerken J, Schweer T, Yarza P, Peplies J, Glöckner FO. The SILVA ribosomal RNA gene database project: improved data processing and web-based tools. Nucleic Acids Res. 2013 Jan;41(Database issue):D590-6. doi: 10.1093/nar/gks1219. Epub 2012 Nov 28. PMID: 23193283; PMCID: PMC3531112." + fmtscript = "taxref_reformat_standard.sh" + dbversion = "SILVA v138.2 (https://zenodo.org/records/14169026)" } 'silva=138' { title = "Silva 138.1 prokaryotic SSU" diff --git a/nextflow.config b/nextflow.config index 1982ab26..64af5e64 100644 --- a/nextflow.config +++ b/nextflow.config @@ -109,7 +109,7 @@ params { skip_report = false // Database options - dada_ref_taxonomy = "silva=138" + dada_ref_taxonomy = "silva=138.2" dada_assign_taxlevels = null dada_ref_tax_custom = null dada_ref_tax_custom_sp = null diff --git a/nextflow_schema.json b/nextflow_schema.json index e0b263b2..41ff591c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -353,7 +353,7 @@ "type": "string", "help_text": "Choose any of the supported databases, and optionally also specify the version. Database and version are separated by an equal sign (`=`, e.g. `silva=138`) . This will download the desired database, format it to produce a file that is compatible with DADA2's assignTaxonomy and another file that is compatible with DADA2's addSpecies.\n\nThe following databases are supported:\n- GTDB - Genome Taxonomy Database - 16S rRNA\n- SBDI-GTDB, a Sativa-vetted version of the GTDB 16S rRNA\n- PR2 - Protist Reference Ribosomal Database - 18S rRNA\n- RDP - Ribosomal Database Project - 16S rRNA\n- SILVA ribosomal RNA gene database project - 16S rRNA\n- UNITE - eukaryotic nuclear ribosomal ITS region - ITS\n- COIDB - eukaryotic Cytochrome Oxidase I (COI) from The Barcode of Life Data System (BOLD) - COI\n\nGenerally, using `gtdb`, `pr2`, `rdp`, `sbdi-gtdb`, `silva`, `coidb`, `unite-fungi`, or `unite-alleuk` will select the most recent supported version.\n\nPlease note that commercial/non-academic entities [require licensing](https://www.arb-silva.de/silva-license-information) for SILVA v132 database (non-default) but not from v138 on (default).", "description": "Name of supported database, and optionally also version number", - "default": "silva=138", + "default": "silva=138.2", "enum": [ "coidb", "coidb=221216", @@ -378,8 +378,9 @@ "sbdi-gtdb=R06-RS202-3", "sbdi-gtdb=R06-RS202-1", "silva", - "silva=132", + "silva=138.2", "silva=138", + "silva=132", "unite-alleuk", "unite-alleuk=9.0", "unite-alleuk=8.3", diff --git a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf index 52da55d4..dd504e41 100644 --- a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf @@ -233,7 +233,7 @@ def validateInputParameters() { "pr2","pr2=5.0.0","pr2=4.14.0","pr2=4.13.0", "rdp","rdp=18", "sbdi-gtdb","sbdi-gtdb=R09-RS220-1","sbdi-gtdb=R08-RS214-1","sbdi-gtdb=R07-RS207-1", - "silva","silva=138","silva=132", + "silva","silva=138.2","silva=138","silva=132", "unite-fungi","unite-fungi=10.0","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2", "unite-alleuk","unite-alleuk=10.0","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2" ] From 9af80aa64ceea6d94a11174efbc0c4e864e33503 Mon Sep 17 00:00:00 2001 From: d4straub Date: Fri, 15 Nov 2024 16:40:23 +0100 Subject: [PATCH 2/5] update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ecf1f6b..0921344f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#798](https://github.com/nf-core/ampliseq/pull/798) - Added SILVA version 138.2 of DADA2 taxonomy database: `silva=13.2` or `silva` as parameter to `--dada2_ref_taxonomy` + ### `Changed` ### `Fixed` From fe6aaab3aa364174d9731d6f9932b3c007c9f471 Mon Sep 17 00:00:00 2001 From: d4straub Date: Fri, 15 Nov 2024 16:48:47 +0100 Subject: [PATCH 3/5] update docs --- docs/usage.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 3a340800..52319a2f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -214,7 +214,7 @@ Please note the following additional requirements: Taxonomic classification of ASVs can be performed with tools DADA2, SINTAX, Kraken2 or QIIME2. Multiple taxonomic reference databases are pre-configured for those tools, but user supplied databases are also supported for some tools. Alternatively (or in addition), phylogenetic placement can be used to extract taxonomic classifications. -In case multiple tools for taxonomic classification are executed in one pipeline run, only the taxonomic classification result of one tool is forwarded to downstream analysis with QIIME2. The priority is `phylogenetic placement` > `DADA2` > `SINTAX` > `Kraken2` > `QIIME2`. +In case multiple tools for taxonomic classification are executed in one pipeline run, only the taxonomic classification result of one tool is forwarded to downstream analysis with QIIME2. The priority is `phylogenetic placement` > `DADA2` > `SINTAX` > `Kraken2` > `QIIME2`, that is by no means a recommendation for a specific tool but a technical limitation. Default setting for taxonomic classification is DADA2 with the SILVA reference taxonomy database. @@ -222,11 +222,11 @@ Pre-configured reference taxonomy databases are: | Database key | DADA2 | SINTAX | Kraken2 | QIIME2 | Target genes | | ------------ | ----- | ------ | ------- | ------ | --------------------------------------------- | -| silva | + | - | + | + | 16S rRNA | -| gtdb | +¹ | - | - | - | 16S rRNA | +| silva | +¹ | - | + | + | 16S rRNA | +| gtdb | +² | - | - | - | 16S rRNA | | sbdi-gtdb | + | - | - | - | 16S rRNA | | rdp | + | - | + | - | 16S rRNA | -| greengenes | - | - | + | (+)² | 16S rRNA | +| greengenes | - | - | + | (+)³ | 16S rRNA | | greengenes2 | - | - | - | + | 16S rRNA | | pr2 | + | - | - | - | 18S rRNA | | unite-fungi | + | + | - | - | eukaryotic nuclear ribosomal ITS region | @@ -235,9 +235,9 @@ Pre-configured reference taxonomy databases are: | midori2-co1 | + | - | - | - | eukaryotic Cytochrome Oxidase I (COI) | | phytoref | + | - | - | - | eukaryotic plastid 16S rRNA | | zehr-nifh | + | - | - | - | Nitrogenase iron protein NifH | -| standard | - | - | + | - | any in genomes of archaea, bacteria, viruses³ | +| standard | - | - | + | - | any in genomes of archaea, bacteria, viruses⁴ | -¹[`--dada_taxonomy_rc`](https://nf-co.re/ampliseq/parameters#dada_taxonomy_rc) is recommended; ²: de-replicated at 85%, only for testing purposes; ³: quality of results might vary +¹: As of Silva version 138 optimized for classification of Bacteria and Archaea, not suitable for Eukaryotes; ²[`--dada_taxonomy_rc`](https://nf-co.re/ampliseq/parameters#dada_taxonomy_rc) is recommended; ³: de-replicated at 85%, only for testing purposes; ⁴: quality of results might vary Special features of taxonomic classification tools: From 286e75ed5531cacb62b2cfaee11bcf240bf57f7a Mon Sep 17 00:00:00 2001 From: d4straub Date: Fri, 15 Nov 2024 17:41:13 +0100 Subject: [PATCH 4/5] fix typo --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0921344f..61f8695f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- [#798](https://github.com/nf-core/ampliseq/pull/798) - Added SILVA version 138.2 of DADA2 taxonomy database: `silva=13.2` or `silva` as parameter to `--dada2_ref_taxonomy` +- [#798](https://github.com/nf-core/ampliseq/pull/798) - Added SILVA version 138.2 of DADA2 taxonomy database: `silva=138.2` or `silva` as parameter to `--dada2_ref_taxonomy` ### `Changed` From bc8b1467458457c322d11721e2763ea2a237b177 Mon Sep 17 00:00:00 2001 From: d4straub Date: Fri, 15 Nov 2024 17:41:34 +0100 Subject: [PATCH 5/5] modify ref tax parsing --- bin/taxref_reformat_standard.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/taxref_reformat_standard.sh b/bin/taxref_reformat_standard.sh index e9585a81..008b73ce 100755 --- a/bin/taxref_reformat_standard.sh +++ b/bin/taxref_reformat_standard.sh @@ -5,4 +5,4 @@ gunzip -c *train*gz > assignTaxonomy.fna # and the file for add species, identified by containing "species" in the name, is renamed -mv *species*gz addSpecies.fna.gz +mv *assign*gz addSpecies.fna.gz