From d85ac9e3345c6b2871081b439479a705377a1851 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Fri, 15 Nov 2024 16:38:35 +0100
Subject: [PATCH 1/5] add silva=138.2 key for --dada_ref_taxonomy

---
 conf/ref_databases.config                           | 13 ++++++++++---
 nextflow.config                                     |  2 +-
 nextflow_schema.json                                |  5 +++--
 .../local/utils_nfcore_ampliseq_pipeline/main.nf    |  2 +-
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/conf/ref_databases.config b/conf/ref_databases.config
index e640179a..6d3687c4 100644
--- a/conf/ref_databases.config
+++ b/conf/ref_databases.config
@@ -178,11 +178,18 @@ params {
             taxlevels = "Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species"
         }
         'silva' {
-            title = "Silva 138.1 prokaryotic SSU"
-            file = [ "https://zenodo.org/record/4587955/files/silva_nr99_v138.1_wSpecies_train_set.fa.gz", "https://zenodo.org/record/4587955/files/silva_species_assignment_v138.1.fa.gz" ]
+            title = "Silva 138.2 prokaryotic SSU"
+            file = [ "https://zenodo.org/records/14169026/files/silva_nr99_v138.2_toSpecies_trainset.fa.gz", "https://zenodo.org/records/14169026/files/silva_v138.2_assignSpecies.fa.gz" ]
             citation = "Quast C, Pruesse E, Yilmaz P, Gerken J, Schweer T, Yarza P, Peplies J, Glöckner FO. The SILVA ribosomal RNA gene database project: improved data processing and web-based tools. Nucleic Acids Res. 2013 Jan;41(Database issue):D590-6. doi: 10.1093/nar/gks1219. Epub 2012 Nov 28. PMID: 23193283; PMCID: PMC3531112."
             fmtscript = "taxref_reformat_standard.sh"
-            dbversion = "SILVA v138.1 (https://zenodo.org/record/4587955)"
+            dbversion = "SILVA v138.2 (https://zenodo.org/records/14169026)"
+        }
+        'silva=138.2' {
+            title = "Silva 138.2 prokaryotic SSU"
+            file = [ "https://zenodo.org/records/14169026/files/silva_nr99_v138.2_toSpecies_trainset.fa.gz", "https://zenodo.org/records/14169026/files/silva_v138.2_assignSpecies.fa.gz" ]
+            citation = "Quast C, Pruesse E, Yilmaz P, Gerken J, Schweer T, Yarza P, Peplies J, Glöckner FO. The SILVA ribosomal RNA gene database project: improved data processing and web-based tools. Nucleic Acids Res. 2013 Jan;41(Database issue):D590-6. doi: 10.1093/nar/gks1219. Epub 2012 Nov 28. PMID: 23193283; PMCID: PMC3531112."
+            fmtscript = "taxref_reformat_standard.sh"
+            dbversion = "SILVA v138.2 (https://zenodo.org/records/14169026)"
         }
         'silva=138' {
             title = "Silva 138.1 prokaryotic SSU"
diff --git a/nextflow.config b/nextflow.config
index 1982ab26..64af5e64 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -109,7 +109,7 @@ params {
     skip_report            = false
 
     // Database options
-    dada_ref_taxonomy        = "silva=138"
+    dada_ref_taxonomy        = "silva=138.2"
     dada_assign_taxlevels    = null
     dada_ref_tax_custom      = null
     dada_ref_tax_custom_sp   = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index e0b263b2..41ff591c 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -353,7 +353,7 @@
                     "type": "string",
                     "help_text": "Choose any of the supported databases, and optionally also specify the version. Database and version are separated by an equal sign (`=`, e.g. `silva=138`) . This will download the desired database, format it to produce a file that is compatible with DADA2's assignTaxonomy and another file that is compatible with DADA2's addSpecies.\n\nThe following databases are supported:\n- GTDB - Genome Taxonomy Database - 16S rRNA\n- SBDI-GTDB, a Sativa-vetted version of the GTDB 16S rRNA\n- PR2 - Protist Reference Ribosomal Database - 18S rRNA\n- RDP - Ribosomal Database Project - 16S rRNA\n- SILVA ribosomal RNA gene database project - 16S rRNA\n- UNITE - eukaryotic nuclear ribosomal ITS region - ITS\n- COIDB - eukaryotic Cytochrome Oxidase I (COI) from The Barcode of Life Data System (BOLD) - COI\n\nGenerally, using `gtdb`, `pr2`, `rdp`, `sbdi-gtdb`, `silva`, `coidb`, `unite-fungi`, or `unite-alleuk` will select the most recent supported version.\n\nPlease note that commercial/non-academic entities [require licensing](https://www.arb-silva.de/silva-license-information) for SILVA v132 database (non-default) but not from v138 on (default).",
                     "description": "Name of supported database, and optionally also version number",
-                    "default": "silva=138",
+                    "default": "silva=138.2",
                     "enum": [
                         "coidb",
                         "coidb=221216",
@@ -378,8 +378,9 @@
                         "sbdi-gtdb=R06-RS202-3",
                         "sbdi-gtdb=R06-RS202-1",
                         "silva",
-                        "silva=132",
+                        "silva=138.2",
                         "silva=138",
+                        "silva=132",
                         "unite-alleuk",
                         "unite-alleuk=9.0",
                         "unite-alleuk=8.3",
diff --git a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf
index 52da55d4..dd504e41 100644
--- a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf
@@ -233,7 +233,7 @@ def validateInputParameters() {
         "pr2","pr2=5.0.0","pr2=4.14.0","pr2=4.13.0",
         "rdp","rdp=18",
         "sbdi-gtdb","sbdi-gtdb=R09-RS220-1","sbdi-gtdb=R08-RS214-1","sbdi-gtdb=R07-RS207-1",
-        "silva","silva=138","silva=132",
+        "silva","silva=138.2","silva=138","silva=132",
         "unite-fungi","unite-fungi=10.0","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2",
         "unite-alleuk","unite-alleuk=10.0","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"
     ]

From 9af80aa64ceea6d94a11174efbc0c4e864e33503 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Fri, 15 Nov 2024 16:40:23 +0100
Subject: [PATCH 2/5] update CHANGELOG

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1ecf1f6b..0921344f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Added`
 
+- [#798](https://github.com/nf-core/ampliseq/pull/798) - Added SILVA version 138.2 of DADA2 taxonomy database: `silva=13.2` or `silva` as parameter to `--dada2_ref_taxonomy`
+
 ### `Changed`
 
 ### `Fixed`

From fe6aaab3aa364174d9731d6f9932b3c007c9f471 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Fri, 15 Nov 2024 16:48:47 +0100
Subject: [PATCH 3/5] update docs

---
 docs/usage.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 3a340800..52319a2f 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -214,7 +214,7 @@ Please note the following additional requirements:
 
 Taxonomic classification of ASVs can be performed with tools DADA2, SINTAX, Kraken2 or QIIME2. Multiple taxonomic reference databases are pre-configured for those tools, but user supplied databases are also supported for some tools. Alternatively (or in addition), phylogenetic placement can be used to extract taxonomic classifications.
 
-In case multiple tools for taxonomic classification are executed in one pipeline run, only the taxonomic classification result of one tool is forwarded to downstream analysis with QIIME2. The priority is `phylogenetic placement` > `DADA2` > `SINTAX` > `Kraken2` > `QIIME2`.
+In case multiple tools for taxonomic classification are executed in one pipeline run, only the taxonomic classification result of one tool is forwarded to downstream analysis with QIIME2. The priority is `phylogenetic placement` > `DADA2` > `SINTAX` > `Kraken2` > `QIIME2`, that is by no means a recommendation for a specific tool but a technical limitation.
 
 Default setting for taxonomic classification is DADA2 with the SILVA reference taxonomy database.
 
@@ -222,11 +222,11 @@ Pre-configured reference taxonomy databases are:
 
 | Database key | DADA2 | SINTAX | Kraken2 | QIIME2 | Target genes                                  |
 | ------------ | ----- | ------ | ------- | ------ | --------------------------------------------- |
-| silva        | +     | -      | +       | +      | 16S rRNA                                      |
-| gtdb         | +¹    | -      | -       | -      | 16S rRNA                                      |
+| silva        | +¹    | -      | +       | +      | 16S rRNA                                      |
+| gtdb         | +²    | -      | -       | -      | 16S rRNA                                      |
 | sbdi-gtdb    | +     | -      | -       | -      | 16S rRNA                                      |
 | rdp          | +     | -      | +       | -      | 16S rRNA                                      |
-| greengenes   | -     | -      | +       | (+)²   | 16S rRNA                                      |
+| greengenes   | -     | -      | +       | (+)³   | 16S rRNA                                      |
 | greengenes2  | -     | -      | -       | +      | 16S rRNA                                      |
 | pr2          | +     | -      | -       | -      | 18S rRNA                                      |
 | unite-fungi  | +     | +      | -       | -      | eukaryotic nuclear ribosomal ITS region       |
@@ -235,9 +235,9 @@ Pre-configured reference taxonomy databases are:
 | midori2-co1  | +     | -      | -       | -      | eukaryotic Cytochrome Oxidase I (COI)         |
 | phytoref     | +     | -      | -       | -      | eukaryotic plastid 16S rRNA                   |
 | zehr-nifh    | +     | -      | -       | -      | Nitrogenase iron protein NifH                 |
-| standard     | -     | -      | +       | -      | any in genomes of archaea, bacteria, viruses³ |
+| standard     | -     | -      | +       | -      | any in genomes of archaea, bacteria, viruses⁴ |
 
-¹[`--dada_taxonomy_rc`](https://nf-co.re/ampliseq/parameters#dada_taxonomy_rc) is recommended; ²: de-replicated at 85%, only for testing purposes; ³: quality of results might vary
+¹: As of Silva version 138 optimized for classification of Bacteria and Archaea, not suitable for Eukaryotes; ²[`--dada_taxonomy_rc`](https://nf-co.re/ampliseq/parameters#dada_taxonomy_rc) is recommended; ³: de-replicated at 85%, only for testing purposes; ⁴: quality of results might vary
 
 Special features of taxonomic classification tools:
 

From 286e75ed5531cacb62b2cfaee11bcf240bf57f7a Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Fri, 15 Nov 2024 17:41:13 +0100
Subject: [PATCH 4/5] fix typo

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0921344f..61f8695f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Added`
 
-- [#798](https://github.com/nf-core/ampliseq/pull/798) - Added SILVA version 138.2 of DADA2 taxonomy database: `silva=13.2` or `silva` as parameter to `--dada2_ref_taxonomy`
+- [#798](https://github.com/nf-core/ampliseq/pull/798) - Added SILVA version 138.2 of DADA2 taxonomy database: `silva=138.2` or `silva` as parameter to `--dada2_ref_taxonomy`
 
 ### `Changed`
 

From bc8b1467458457c322d11721e2763ea2a237b177 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Fri, 15 Nov 2024 17:41:34 +0100
Subject: [PATCH 5/5] modify ref tax parsing

---
 bin/taxref_reformat_standard.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/taxref_reformat_standard.sh b/bin/taxref_reformat_standard.sh
index e9585a81..008b73ce 100755
--- a/bin/taxref_reformat_standard.sh
+++ b/bin/taxref_reformat_standard.sh
@@ -5,4 +5,4 @@
 gunzip -c *train*gz > assignTaxonomy.fna
 
 # and the file for add species, identified by containing "species" in the name, is renamed
-mv *species*gz addSpecies.fna.gz
+mv *assign*gz addSpecies.fna.gz