From 9c34d48a9015fd3158ab85ff1fc0e213366c5cbb Mon Sep 17 00:00:00 2001
From: jasmezz <jasmin.frangenberg@hki-jena.de>
Date: Mon, 12 Aug 2024 13:47:06 +0200
Subject: [PATCH 1/8] Update hamronization fargene input channel, optimize
 fargene publish_dir

---
 conf/modules.config       | 4 ++--
 docs/output.md            | 2 +-
 subworkflows/local/arg.nf | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 28a74a28..96b1eb98 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -279,13 +279,13 @@ process {
                 path: { "${params.outdir}/arg/fargene/${meta.id}" },
                 mode: params.publish_dir_mode,
                 saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
-                pattern: "*/{predictedGenes,retrievedFragments}/*"
+                pattern: "*/{hmmsearchresults,predictedGenes,retrievedFragments}/*"
             ],
             [
                 path: { "${params.outdir}/arg/fargene/${meta.id}/" },
                 mode: params.publish_dir_mode,
                 saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
-                pattern: "*/{hmmsearchresults,tmpdir}/*",
+                pattern: "*/{tmpdir}/*",
                 enabled: params.arg_fargene_savetmpfiles
             ]
         ]
diff --git a/docs/output.md b/docs/output.md
index 08be618e..9f71278a 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -327,7 +327,7 @@ Output Summaries:
 - `fargene/`
   - `fargene_analysis.log`: logging output that fARGene produced during its run
   - `<sample_name>/`:
-    - `hmmsearchresults/`: output from intermediate hmmsearch step (only if `--arg_fargene_savetmpfiles` supplied)
+    - `hmmsearchresults/`: output from intermediate hmmsearch step
     - `predictedGenes/`:
       - `*-filtered.fasta`: nucleotide sequences of predicted ARGs
       - `*-filtered-peptides.fasta`: amino acid sequences of predicted ARGs
diff --git a/subworkflows/local/arg.nf b/subworkflows/local/arg.nf
index 8761109e..81dffb72 100644
--- a/subworkflows/local/arg.nf
+++ b/subworkflows/local/arg.nf
@@ -85,7 +85,7 @@ workflow ARG {
 
         // Reporting
         // Note: currently hardcoding versions, has to be updated with every fARGene-update
-        HAMRONIZATION_FARGENE( FARGENE.out.hmm.transpose(), 'tsv', '0.1', '0.1' )
+        HAMRONIZATION_FARGENE( FARGENE.out.hmm_genes.transpose(), 'tsv', '0.1', '0.1' )
         ch_versions = ch_versions.mix( HAMRONIZATION_FARGENE.out.versions )
         ch_input_to_hamronization_summarize = ch_input_to_hamronization_summarize.mix( HAMRONIZATION_FARGENE.out.tsv )
     }

From 8f16bd79e345ec11a83fcc3da54fd7f14a2dd881 Mon Sep 17 00:00:00 2001
From: jasmezz <jasmin.frangenberg@hki-jena.de>
Date: Mon, 12 Aug 2024 13:54:23 +0200
Subject: [PATCH 2/8] Update changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4a9cf09b..c23ad5d7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -60,6 +60,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#406](https://github.com/nf-core/funcscan/pull/406) Fixed prediction tools not being executed if annotation workflow skipped. (by @jasmezz)
 - [#407](https://github.com/nf-core/funcscan/pull/407) Fixed comBGC bug when parsing multiple antiSMASH files. (by @jasmezz)
 - [#409](https://github.com/nf-core/funcscan/pull/409) Fixed argNorm overwriting its output for DeepARG. (by @jasmezz, @jfy133)
+- [#411](https://github.com/nf-core/funcscan/pull/411) Optimised hAMRonization input: only required gene annotations from fARGene output are parsed. (by @jasmezz)
 
 ### `Dependencies`
 

From 30417bf3518ccdc5b06b3e66d68f73a310bf80d0 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Mon, 12 Aug 2024 19:01:39 +0200
Subject: [PATCH 3/8] Update fargene module

---
 modules.json                                  |  12 +-
 modules/nf-core/fargene/main.nf               |  32 +-
 modules/nf-core/fargene/meta.yml              |   6 +-
 modules/nf-core/fargene/tests/main.nf.test    |  61 ++--
 .../nf-core/fargene/tests/main.nf.test.snap   | 275 +++++++++++++++++-
 5 files changed, 353 insertions(+), 33 deletions(-)

diff --git a/modules.json b/modules.json
index c50f5186..21bc54fa 100644
--- a/modules.json
+++ b/modules.json
@@ -55,6 +55,11 @@
                         "git_sha": "4e5f4687318f24ba944a13609d3ea6ebd890737d",
                         "installed_by": ["modules"]
                     },
+                    "argnorm": {
+                        "branch": "master",
+                        "git_sha": "e4fc46af5ec30070e6aef780aba14f89a28caa88",
+                        "installed_by": ["modules"]
+                    },
                     "bakta/bakta": {
                         "branch": "master",
                         "git_sha": "9d0f89b445e1f5b2fb30476f4be9a8b519c07846",
@@ -87,7 +92,7 @@
                     },
                     "fargene": {
                         "branch": "master",
-                        "git_sha": "a7231cbccb86535529e33859e05d19ac93f3ea04",
+                        "git_sha": "9cf6f5e4ad9cc11a670a94d56021f1c4f9a91ec1",
                         "installed_by": ["modules"]
                     },
                     "gecco/run": {
@@ -205,11 +210,6 @@
                         "git_sha": "4e5f4687318f24ba944a13609d3ea6ebd890737d",
                         "installed_by": ["modules"],
                         "patch": "modules/nf-core/untar/untar.diff"
-                    },
-                    "argnorm": {
-                        "branch": "master",
-                        "git_sha": "e4fc46af5ec30070e6aef780aba14f89a28caa88",
-                        "installed_by": ["modules"]
                     }
                 }
             },
diff --git a/modules/nf-core/fargene/main.nf b/modules/nf-core/fargene/main.nf
index b2feb86a..42aa2ca2 100644
--- a/modules/nf-core/fargene/main.nf
+++ b/modules/nf-core/fargene/main.nf
@@ -17,6 +17,7 @@ process FARGENE {
     tuple val(meta), path("*.log")                                                                               , emit: log
     tuple val(meta), path("${prefix}/results_summary.txt")                                                       , emit: txt
     tuple val(meta), path("${prefix}/hmmsearchresults/*.out")                                    , optional: true, emit: hmm
+    tuple val(meta), path("${prefix}/hmmsearchresults/retrieved-*.out")                          , optional: true, emit: hmm_genes
     tuple val(meta), path("${prefix}/predictedGenes/predicted-orfs.fasta")                       , optional: true, emit: orfs
     tuple val(meta), path("${prefix}/predictedGenes/predicted-orfs-amino.fasta")                 , optional: true, emit: orfs_amino
     tuple val(meta), path("${prefix}/predictedGenes/retrieved-contigs.fasta")                    , optional: true, emit: contigs
@@ -24,7 +25,7 @@ process FARGENE {
     tuple val(meta), path("${prefix}/predictedGenes/*filtered.fasta")                            , optional: true, emit: filtered
     tuple val(meta), path("${prefix}/predictedGenes/*filtered-peptides.fasta")                   , optional: true, emit: filtered_pept
     tuple val(meta), path("${prefix}/retrievedFragments/all_retrieved_*.fastq")                  , optional: true, emit: fragments
-    tuple val(meta), path("${prefix}/retrievedFragments/retrievedFragments/trimmedReads/*.fasta"), optional: true, emit: trimmed
+    tuple val(meta), path("${prefix}/retrievedFragments/trimmedReads/*.fasta")                   , optional: true, emit: trimmed
     tuple val(meta), path("${prefix}/spades_assembly/*")                                         , optional: true, emit: spades
     tuple val(meta), path("${prefix}/tmpdir/*.fasta")                                            , optional: true, emit: metagenome
     tuple val(meta), path("${prefix}/tmpdir/*.out")                                              , optional: true, emit: tmp
@@ -50,4 +51,33 @@ process FARGENE {
         fargene: $VERSION
     END_VERSIONS
     """
+
+    stub:
+    def args = task.ext.args   ?: ''
+    prefix   = task.ext.prefix ?: "${meta.id}"
+    def VERSION = '0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+    """
+    touch ${prefix}.log
+    mkdir -p ${prefix}/{hmmsearchresults,predictedGenes,retrievedFragments}
+    mkdir -p ${prefix}/retrievedFragments/trimmedReads/
+
+    touch ${prefix}/results_summary.txt
+    touch ${prefix}/hmmsearchresults/retrieved-${prefix}.out
+    touch ${prefix}/hmmsearchresults/${prefix}.out
+    touch ${prefix}/predictedGenes/predicted-orfs.fasta
+    touch ${prefix}/predictedGenes/predicted-orfs-amino.fasta
+    touch ${prefix}/predictedGenes/retrieved-contigs.fasta
+    touch ${prefix}/predictedGenes/retrieved-contigs-peptides.fasta
+    touch ${prefix}/predictedGenes/${prefix}-filtered.fasta
+    touch ${prefix}/predictedGenes/${prefix}-filtered-peptides.fasta
+    touch ${prefix}/retrievedFragments/all_retrieved_${prefix}.fastq
+    touch ${prefix}/retrievedFragments/trimmedReads/${prefix}.fasta
+
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        fargene: $VERSION
+    END_VERSIONS
+    """
+
 }
diff --git a/modules/nf-core/fargene/meta.yml b/modules/nf-core/fargene/meta.yml
index 8f1540b1..9fc5ce0f 100644
--- a/modules/nf-core/fargene/meta.yml
+++ b/modules/nf-core/fargene/meta.yml
@@ -46,8 +46,12 @@ output:
       pattern: "*.{txt}"
   - hmm:
       type: file
-      description: output from hmmsearch
+      description: output from hmmsearch (both single gene annotations + contigs)
       pattern: "*.{out}"
+  - hmm_genes:
+      type: file
+      description: output from hmmsearch (single gene annotations only)
+      pattern: "retrieved-*.{out}"
   - orfs:
       type: file
       description: open reading frames (ORFs)
diff --git a/modules/nf-core/fargene/tests/main.nf.test b/modules/nf-core/fargene/tests/main.nf.test
index 4e2c5353..924405c6 100644
--- a/modules/nf-core/fargene/tests/main.nf.test
+++ b/modules/nf-core/fargene/tests/main.nf.test
@@ -9,23 +9,52 @@ nextflow_process {
     tag "fargene"
     tag "gunzip"
 
+    setup {
+
+        run("GUNZIP") {
+            script "../../gunzip/main.nf"
+            process {
+                """
+                input[0] =  Channel.fromList([
+                    tuple([ id:'test', single_end:false ], // meta map
+                    file(params.test_data['bacteroides_fragilis']['illumina']['test1_contigs_fa_gz'], checkIfExists: true))
+                ])
+                """
+            }
+        }
+    }
+
     test("fargene - bacteroides fragilis - contigs.fa.gz") {
 
-        setup {
-
-            run("GUNZIP") {
-                script "../../gunzip/main.nf"
-                process {
-                    """
-                    input[0] =  Channel.fromList([
-                        tuple([ id:'test', single_end:false ], // meta map
-                        file(params.test_data['bacteroides_fragilis']['illumina']['test1_contigs_fa_gz'], checkIfExists: true))
-                    ])
-                    """
+        when {
+            process {
+                """
+                input[0] = GUNZIP.out.gunzip
+                input[1] = 'class_a'
+                """
+            }
+        }
+
+        then {
+            assertAll {
+                { assert process.success }
+                { assert snapshot(
+                            process.out.txt,
+                            path(process.out.log[0][1]).readLines().last().contains("Output can be found in"),
+                            path(process.out.hmm[0][1]).readLines().last().contains("[ok]"),
+                            file(process.out.tmp[0][1].find { file(it).name == "tmp.out" }).readLines().last().contains("[ok]"),
+                            process.out.versions
+                    ).match()
                 }
             }
         }
 
+    }
+
+    test("fargene - bacteroides fragilis - contigs.fa.gz - stub") {
+
+        options "-stub"
+
         when {
             process {
                 """
@@ -36,12 +65,10 @@ nextflow_process {
         }
 
         then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(process.out.versions).match("versions") },
-                { assert snapshot(process.out.txt).match("txt") },
-                { assert path(process.out.log.get(0).get(1)).exists() },
-            )
+            assertAll {
+                { assert process.success }
+                { assert snapshot(process.out).match() }
+            }
         }
 
     }
diff --git a/modules/nf-core/fargene/tests/main.nf.test.snap b/modules/nf-core/fargene/tests/main.nf.test.snap
index 0e28de41..54724f1b 100644
--- a/modules/nf-core/fargene/tests/main.nf.test.snap
+++ b/modules/nf-core/fargene/tests/main.nf.test.snap
@@ -1,5 +1,262 @@
 {
-    "txt": {
+    "fargene - bacteroides fragilis - contigs.fa.gz - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "results_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "10": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "all_retrieved_test.fastq:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "11": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "12": [
+                    
+                ],
+                "13": [
+                    
+                ],
+                "14": [
+                    
+                ],
+                "15": [
+                    "versions.yml:md5,a146d432794c87b5850fb7c4cbee11fc"
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "retrieved-test.out:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "3": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "retrieved-test.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "4": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "predicted-orfs.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "5": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "predicted-orfs-amino.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "6": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "retrieved-contigs.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "7": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "retrieved-contigs-peptides.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "8": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test-filtered.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "9": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test-filtered-peptides.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "contigs": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "retrieved-contigs.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "contigs_pept": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "retrieved-contigs-peptides.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "filtered": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test-filtered.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "filtered_pept": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test-filtered-peptides.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "fragments": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "all_retrieved_test.fastq:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "hmm": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "retrieved-test.out:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "hmm_genes": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "retrieved-test.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "log": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "metagenome": [
+                    
+                ],
+                "orfs": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "predicted-orfs.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "orfs_amino": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "predicted-orfs-amino.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "spades": [
+                    
+                ],
+                "tmp": [
+                    
+                ],
+                "trimmed": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "txt": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "results_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,a146d432794c87b5850fb7c4cbee11fc"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-08-12T15:08:54.830926802"
+    },
+    "fargene - bacteroides fragilis - contigs.fa.gz": {
         "content": [
             [
                 [
@@ -9,16 +266,18 @@
                     },
                     "results_summary.txt:md5,690d351cfc52577263ef4cfab1c81f50"
                 ]
-            ]
-        ],
-        "timestamp": "2023-11-28T16:42:10.29998128"
-    },
-    "versions": {
-        "content": [
+            ],
+            true,
+            true,
+            true,
             [
                 "versions.yml:md5,a146d432794c87b5850fb7c4cbee11fc"
             ]
         ],
-        "timestamp": "2023-11-28T16:42:10.290144568"
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-08-12T15:24:49.384451483"
     }
 }
\ No newline at end of file

From 272905bed8f55515c20eae2799a658e4d9cf5ec0 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Tue, 13 Aug 2024 11:31:50 +0200
Subject: [PATCH 4/8] Tweak CHANGELOG

---
 CHANGELOG.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c23ad5d7..0cad94d0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Breaking change`
 
-[#391](https://github.com/nf-core/funcscan/pull/391) Made all "database" parameter names consistent, skip hmmsearch by default. (by @jasmezz)
+- [#391](https://github.com/nf-core/funcscan/pull/391) Made all "database" parameter names consistent, skip hmmsearch by default. (by @jasmezz)
 
 | Old parameter                                    | New parameter                           |
 | ------------------------------------------------ | --------------------------------------- |
@@ -27,6 +27,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 | `amp_skip_hmmsearch`                             | `amp_run_hmmsearch`                     |
 | `bgc_skip_hmmsearch`                             | `bgc_run_hmmsearch`                     |
 
+- [#343](https://github.com/nf-core/funcscan/pull/343) Standardized the resulting workflow summary tables to always start with 'sample_id\tcontig_id\t..'. Reformatted the output of `hamronization/summarize` module. (by @darcy220606)
+- [#411](https://github.com/nf-core/funcscan/pull/411) Optimised hAMRonization input: only high-quality hits from fARGene output are reported. (by @jasmezz, @jfy133)
+
 ### `Added`
 
 - [#322](https://github.com/nf-core/funcscan/pull/322) Updated all modules: introduce environment.yml files. (by @jasmezz)
@@ -44,7 +47,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Fixed`
 
-- [#343](https://github.com/nf-core/funcscan/pull/343) Standardized the resulting workflow summary tables to always start with 'sample_id\tcontig_id\t..'. Reformatted the output of `hamronization/summarize` module. (by @darcy220606)
 - [#348](https://github.com/nf-core/funcscan/pull/348) Updated samplesheet for pipeline tests to 'samplesheet_reduced.csv' with smaller datasets to reduce resource consumption. Updated prodigal module to fix pigz issue. Removed `tests/` from `.gitignore`. (by @darcy220606)
 - [#362](https://github.com/nf-core/funcscan/pull/362) Save annotations from bakta in subdirectories per sample. (by @jasmezz)
 - [#363](https://github.com/nf-core/funcscan/pull/363) Removed warning from DeepBGC usage docs. (by @jasmezz)
@@ -53,14 +55,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#376](https://github.com/nf-core/funcscan/pull/376) Fixed an occasional RGI process failure when certain files not produced. (❤️ to @amizeranschi for reporting, fix by @amizeranschi & @jfy133)
 - [#386](https://github.com/nf-core/funcscan/pull/386) Updated DeepBGC module to fix output file names, separate annotation step for all BGC tools, add warning if no BGCs found, fix MultiQC reporting of annotation workflow. (by @jfy133, @jasmezz)
 - [#392](https://github.com/nf-core/funcscan/pull/392) & [#397](https://github.com/nf-core/funcscan/pull/397) Fixed a docker/singularity only error appearing when running with conda. (❤️ to @ewissel for reporting, fix by @jfy33 & @jasmezz)
-- [#394](https://github.com/nf-core/funcscan/pull/394) Fixed BGC input channel: pre-annotated input is picked up correctly now. (by @jfy133, @jasmezz)
 - [#391](https://github.com/nf-core/funcscan/pull/391) Skip hmmmsearch by default to not crash pipeline if user provides no HMM files, updated docs. (by @jasmezz)
 - [#397](https://github.com/nf-core/funcscan/pull/397) Removed deprecated AMPcombi module, fixed variable name in BGC workflow, updated minor parts in docs (usage, parameter schema). (by @jasmezz)
 - [#402](https://github.com/nf-core/funcscan/pull/402) Fixed BGC length calculation for antiSMASH hits by comBGC. (by @jasmezz)
 - [#406](https://github.com/nf-core/funcscan/pull/406) Fixed prediction tools not being executed if annotation workflow skipped. (by @jasmezz)
 - [#407](https://github.com/nf-core/funcscan/pull/407) Fixed comBGC bug when parsing multiple antiSMASH files. (by @jasmezz)
 - [#409](https://github.com/nf-core/funcscan/pull/409) Fixed argNorm overwriting its output for DeepARG. (by @jasmezz, @jfy133)
-- [#411](https://github.com/nf-core/funcscan/pull/411) Optimised hAMRonization input: only required gene annotations from fARGene output are parsed. (by @jasmezz)
 
 ### `Dependencies`
 

From 56278bd57be507400cbd37bb009c1a56a25e1844 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Tue, 13 Aug 2024 14:25:50 +0200
Subject: [PATCH 5/8] Update nf-test as we reduce number of fargene hits

---
 tests/test_preannotated.nf.test.snap | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_preannotated.nf.test.snap b/tests/test_preannotated.nf.test.snap
index cbaa48d3..b1843ff4 100644
--- a/tests/test_preannotated.nf.test.snap
+++ b/tests/test_preannotated.nf.test.snap
@@ -106,13 +106,13 @@
     },
     "hamronization": {
         "content": [
-            "hamronization_combined_report.tsv:md5,bded2a60a7c2cb28ec35aa5cdcb85de5"
+            "hamronization_combined_report.tsv:md5,69a16cdf66a817c2ed1a725ecce02d5b"
         ],
         "meta": {
-            "nf-test": "0.9.0",
-            "nextflow": "24.04.3"
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.4"
         },
-        "timestamp": "2024-07-27T08:11:25.408635625"
+        "timestamp": "2024-08-13T14:22:38.851885764"
     },
     "abricate": {
         "content": [

From 4b3ed324c95418f4e8527d1ca360593a97b2010e Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Thu, 15 Aug 2024 13:51:43 +0200
Subject: [PATCH 6/8] Add more specific descriptions of what directories should
 go into each database directroy

---
 docs/usage.md | 103 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 65 insertions(+), 38 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index f58afa9a..36a3ef12 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -44,6 +44,31 @@ work            # Directory containing temporary files required for the run
 # Other nextflow hidden files, eg. history of pipeline runs and old logs
 ```
 
+If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file.
+
+Pipeline settings can be provided in a `yaml` or `json` file via `-params-file <file>`.
+
+:::warning
+Do not use `-c <file>` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args).
+:::
+
+The above pipeline run specified with a params file in yaml format:
+
+```bash
+nextflow run nf-core/funcscan -profile docker -params-file params.yaml
+```
+
+with `params.yaml` containing:
+
+```yaml
+input: './samplesheet.csv'
+outdir: './results/'
+genome: 'GRCh37'
+<...>
+```
+
+You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch).
+
 ## Samplesheet input
 
 nf-core/funcscan takes FASTA files as input, typically contigs or whole genome sequences. To supply these to the pipeline, you will need to create a samplesheet with information about the samples you would like to analyse. Use this parameter to specify its location.
@@ -95,13 +120,15 @@ The implementation of some tools in the pipeline may have some particular behavi
 
 MMseqs2 is currently the only taxonomic classification tool used in the pipeline to assign a taxonomic lineage to the input contigs. The database used to assign the taxonomic lineage can either be:
 
-- a custom based database created by the user using `mmseqs createdb` externally and beforehand. If this flag is assigned, this database takes precedence over the default database in `--mmseqs_db_id`.
+- A custom based database created by the user using `mmseqs createdb` externally and beforehand. If this flag is assigned, this database takes precedence over the default database in `--mmseqs_db_id`.
 
   ```bash
-  --taxa_classification_mmseqs_db 'path/to/mmsesqs_custom_database/dir'
+  --taxa_classification_mmseqs_db '<path>/<to>/<mmsesqs_custom_database>/<directory>'
   ```
 
-- an MMseqs2 ready database. These databases were compiled by the developers of MMseqs2 and can be called using their labels. All available options can be found [here](https://github.com/soedinglab/MMseqs2/wiki#downloading-databases). Only use those databases that have taxonomy files available (i.e., Taxonomy == Yes). By default mmseqs2 in the pipeline uses '[Kalamari](https://github.com/lskatz/Kalamari)', and runs an aminoacid based alignment. However, if the user requires a more comprehensive taxonomic classification, we recommend the use of [GTDB](https://gtdb.ecogenomic.org/), but for that please remember to increase the memory, CPU threads and time required for the process `MMSEQS_TAXONOMY`.
+  The contents of the directory should have files such as `<dbname>.version` and `<dbname>.taxonomy` in the top level.
+
+- An MMseqs2 ready database. These databases were compiled by the developers of MMseqs2 and can be called using their labels. All available options can be found [here](https://github.com/soedinglab/MMseqs2/wiki#downloading-databases). Only use those databases that have taxonomy files available (i.e., Taxonomy == Yes). By default mmseqs2 in the pipeline uses '[Kalamari](https://github.com/lskatz/Kalamari)', and runs an aminoacid based alignment. However, if the user requires a more comprehensive taxonomic classification, we recommend the use of [GTDB](https://gtdb.ecogenomic.org/), but for that please remember to increase the memory, CPU threads and time required for the process `MMSEQS_TAXONOMY`.
 
   ```bash
   --taxa_classification_mmseqs_db_id 'Kalamari'
@@ -146,9 +173,11 @@ tar xvzf db.tar.gz
 And then passed to the pipeline with:
 
 ```bash
---annotation_bakta_db /<path>/<to>/db/
+--annotation_bakta_db /<path>/<to>/<db>/
 ```
 
+The contents of the directory should have files such as `*.dmnd` in the top level.
+
 :::info
 The flag `--save_db` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future.
 :::
@@ -174,9 +203,11 @@ Ensure to wrap this path in double quotes if using an asterisk, to ensure Nextfl
 For AMPcombi, nf-core/funcscan will by default download the most recent version of the [DRAMP](http://dramp.cpu-bioinfor.org/) database as a reference database for aligning the AMP hits in the AMP workflow. However, the user can also supply their own custom AMP database by following the guidelines in [AMPcombi](https://github.com/Darcy220606/AMPcombi). This can then be passed to the pipeline with:
 
 ```bash
---amp_ampcombi_db '/<path>/<to>/<amp_ref_database>
+--amp_ampcombi_db '/<path>/<to>/<ampcombi_database>
 ```
 
+The contents of the directory should have files such as `*.dmnd` and `*.fasta` in the top level.
+
 :::warning
 The pipeline will automatically run Pyrodigal instead of Prodigal if the parameters `--run_annotation_tool prodigal --run_amp_screening` are both provided. This is due to an incompatibility issue of Prodigal's output `.gbk` file with multiple downstream tools.
 :::
@@ -210,11 +241,16 @@ conda activate abricate
 
 ## Download the bacmet2 database
 abricate-get_db --db bacmet2 ## the logging will tell you where the database is downloaded to, e.g. /home/<user>/bin/miniconda3/envs/abricate/db/bacmet2/sequences
+```
 
-## Run nextflow
-nextflow run nf-core/funcscan -r <version> -profile docker --input samplesheet.csv --outdir <outdir> --run_arg_screening --arg_abricate_db /home/<user>/bin/miniconda3/envs/abricate/db/ --arg_abricate_db_id bacmet2
+The resulting directory and database name can be passed to the pipeline as follows
+
+```bash
+--arg_abricate_db /<path>/<to>/<abricate>/db/ --arg_abricate_db_id bacmet2
 ```
 
+The contents of the directory should have a directory named with the database name in the top level (e.g. `bacmet2/`).
+
 ### AMRFinderPlus
 
 AMRFinderPlus relies on NCBI's curated Reference Gene Database and curated collection of Hidden Markov Models.
@@ -222,9 +258,11 @@ AMRFinderPlus relies on NCBI's curated Reference Gene Database and curated colle
 nf-core/funcscan will download this database for you, unless the path to a local version is given with:
 
 ```bash
---arg_amrfinderplus_db '/<path>/<to>/<amrfinderplus_db>/'
+--arg_amrfinderplus_db '/<path>/<to>/<amrfinderplus_db>/latest'
 ```
 
+You must give the `latest` directory to the pipeline, and the contents of the directory should include files such as `*.nbd`, `*.nhr`, `versions.txt` etc. in the top level.
+
 To obtain a local version of the database:
 
 1. Install AMRFinderPlus from [bioconda](https://bioconda.github.io/recipes/ncbi-amrfinderplus/README.html?highlight=amrfinderplus). To ensure database compatibility, please use the same version as is used in your nf-core/funcscan release (check version in file `<installation>/<path>/funcscan/modules/nf-core/amrfinderplus/run/environment.yml`).
@@ -284,6 +322,8 @@ You can then supply the path to resulting database directory with:
 --arg_deeparg_db '/<path>/<to>/<deeparg>/<db>/'
 ```
 
+The contents of the directory should include directories such as `database`, `moderl`, and files such as `deeparg.gz` etc. in the top level.
+
 Note that if you supply your own database that is not downloaded by the pipeline, make sure to also supply `--arg_deeparg_db_version` along
 with the version number so hAMRonization will correctly display the database version in the summary report.
 
@@ -304,6 +344,8 @@ You can then supply the path to resulting database directory with:
 --arg_rgi_db '/<path>/<to>/<card>/'
 ```
 
+The contents of the directory should include files such as `card.json`, `aro_index.tsv`, `snps.txt` etc. in the top level.
+
 :::info
 The flag `--save_db` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future.
 :::
@@ -324,24 +366,34 @@ To supply the database directories to the pipeline:
 
 ```bash
 --bgc_antismash_db '/<path>/<to>/<antismash>/<db>/'
---bgc_antismash_installdir '/<path>/<to>/<antismash>/<dir>/'
+--bgc_antismash_installdir '/<path>/<to>/<antismash>/<dir>/antismash'
 ```
 
-Note that the names of the supplied folders must differ from each other (e.g. `antismash_db` and `antismash_dir`). If they are not provided, the databases will be auto-downloaded upon each BGC screening run of the pipeline.
+The contents of the database directory should include directories such as `as-js/`, `clusterblast/`, `clustercompare/` etc. in the top level.
+The contents of the installation directory should include directories such as `common/` `config/` and files such as `custom_typing.py` `custom_typing.pyi` etc. in the top level.
 
 :::info
-The flag `--save_db` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future.
+If installing with conda, the installation directory will be `lib/python3.10/site-packages/antismash` from the base directory of your conda install or conda environment directory.
 :::
 
+Note that the names of the two required folders must differ from each other (i.e., the `--bgc_antismash_db` directory must not be called `antismash`).
+If they are not provided, the databases will be auto-downloaded upon each BGC screening run of the pipeline.
+
 :::info
-If installing with conda, the installation directory will be `lib/python3.10/site-packages/antismash` from the base directory of your conda install or conda environment directory.
+The flag `--save_db` saves the pipeline-downloaded databases in your results directory. You can then move these to a central cache directory of your choice for re-use in the future.
 :::
 
 ### DeepBGC
 
 DeepBGC relies on trained models and Pfams to run its analysis. nf-core/funcscan will download these databases for you. If the flag `--save_db` is set, the downloaded files will be stored in the output directory under `databases/deepbgc/`.
 
-Alternatively, if you already downloaded the database locally with `deepbgc download`, you can indicate the path to the database folder with `--bgc_deepbgc_db <path>/<to>/<deepbgc_db>/`. The folder has to contain the subfolders as in the database folder downloaded by `deepbgc download`:
+Alternatively, if you already downloaded the database locally with `deepbgc download`, you can indicate the path to the database folder with:
+
+```bash
+--bgc_deepbgc_db <path>/<to>/<deepbgc_db>/
+```
+
+The contents of the database directory should include directories such as `common`, `0.1.0` in the top level.
 
 ```console
 deepbgc_db/
@@ -354,31 +406,6 @@ deepbgc_db/
     └── myDetectors*.pkl
 ```
 
-If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file.
-
-Pipeline settings can be provided in a `yaml` or `json` file via `-params-file <file>`.
-
-:::warning
-Do not use `-c <file>` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args).
-:::
-
-The above pipeline run specified with a params file in yaml format:
-
-```bash
-nextflow run nf-core/funcscan -profile docker -params-file params.yaml
-```
-
-with `params.yaml` containing:
-
-```yaml
-input: './samplesheet.csv'
-outdir: './results/'
-genome: 'GRCh37'
-<...>
-```
-
-You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch).
-
 ## Updating the pipeline
 
 When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:

From c820d20a38bd1ea4ccc3a6c5940e9d4cbf3cabc5 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Thu, 15 Aug 2024 14:10:00 +0200
Subject: [PATCH 7/8] Add exact db files list to schema too

---
 nextflow_schema.json | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index c9b4a45a..2235fca3 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -99,7 +99,7 @@
                 "taxa_classification_mmseqs_db": {
                     "type": "string",
                     "description": "Specify a path to MMseqs2-formatted database.",
-                    "help_text": "Specify a path to a database that is prepared in MMseqs2 format as detailed in the [documentation](https://mmseqs.com/latest/userguide.pdf).",
+                    "help_text": "Specify a path to a database that is prepared in MMseqs2 format as detailed in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\nThe contents of the directory should have files such as `<dbname>.version` and `<dbname>.taxonomy` in the top level.",
                     "fa_icon": "fas fa-database"
                 },
                 "taxa_classification_mmseqs_db_id": {
@@ -214,7 +214,7 @@
                     "type": "string",
                     "fa_icon": "fas fa-database",
                     "description": "Specify a path to a local copy of a BAKTA database.",
-                    "help_text": "If a local copy of a BAKTA database exists, specify the path to that database which is prepared in a BAKTA format. Otherwise this will be downloaded for you."
+                    "help_text": "If a local copy of a BAKTA database exists, specify the path to that database which is prepared in a BAKTA format. Otherwise this will be downloaded for you.\n\nThe contents of the directory should have files such as `*.dmnd` in the top level."
                 },
                 "annotation_bakta_db_downloadtype": {
                     "type": "string",
@@ -371,7 +371,7 @@
                     "default": "Bacteria",
                     "fa_icon": "fas fa-crown",
                     "description": "Specify the kingdom that the input represents.",
-                    "help_text": "Specifies the kingdom that the input sample is derived from and/or you wish to screen for\n\n> ⚠️ Prokka cannot annotate Eukaryotes.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--kingdom`",
+                    "help_text": "Specifies the kingdom that the input sample is derived from and/or you wish to screen for\n\n> \u26a0\ufe0f Prokka cannot annotate Eukaryotes.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--kingdom`",
                     "enum": ["Archaea", "Bacteria", "Mitochondria", "Viruses"]
                 },
                 "annotation_prokka_gcode": {
@@ -387,12 +387,12 @@
                     "type": "integer",
                     "default": 1,
                     "description": "Minimum contig size required for annotation (bp).",
-                    "help_text": "Specify the minimum contig lengths to carry out annotations on. The Prokka developers recommend that this should be ≥ 200 bp, if you plan to submit such annotations to NCBI.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--mincontiglen`",
+                    "help_text": "Specify the minimum contig lengths to carry out annotations on. The Prokka developers recommend that this should be \u2265 200 bp, if you plan to submit such annotations to NCBI.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--mincontiglen`",
                     "fa_icon": "fas fa-ruler-horizontal"
                 },
                 "annotation_prokka_evalue": {
                     "type": "number",
-                    "default": 0.000001,
+                    "default": 1e-6,
                     "description": "E-value cut-off.",
                     "help_text": "Specifiy the maximum E-value used for filtering the alignment hits.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--evalue`",
                     "fa_icon": "fas fa-sort-amount-down"
@@ -629,7 +629,7 @@
                 "amp_ampcombi_db": {
                     "type": "string",
                     "description": "Path to AMPcombi reference database directory (DRAMP).",
-                    "help_text": "AMPcombi uses the 'general AMPs' dataset of the [DRAMP database](http://dramp.cpu-bioinfor.org/downloads/) for taxonomic classification. If you have a local version of it, you can provide the path to the directory(!) that contains the following reference database files:\n1. fasta file with `.fasta` file extension\n2. the corresponding table with with functional and taxonomic classifications in `.tsv` file extension.\n\nFor more information check the AMPcombi [documentation](https://github.com/Darcy220606/AMPcombi).",
+                    "help_text": "AMPcombi uses the 'general AMPs' dataset of the [DRAMP database](http://dramp.cpu-bioinfor.org/downloads/) for taxonomic classification. If you have a local version of it, you can provide the path to the directory(!) that contains the following reference database files:\n1. fasta file with `.fasta` file extension\n2. the corresponding table with with functional and taxonomic classifications in `.tsv` file extension.\n\nThe contents of the directory should have files such as `*.dmnd` and `*.fasta` in the top level.\n\nFor more information check the AMPcombi [documentation](https://github.com/Darcy220606/AMPcombi).",
                     "fa_icon": "fas fa-address-book"
                 },
                 "amp_ampcombi_parsetables_cutoff": {
@@ -783,7 +783,7 @@
                 "arg_amrfinderplus_db": {
                     "type": "string",
                     "fa_icon": "fas fa-layer-group",
-                    "help_text": "Specify the path to a local version of the ARMFinderPlus database. If no input is given, the pipeline will download the database for you.\n\n See the nf-core/funcscan usage [documentation](https://nf-co.re/funcscan/usage) for more information.\n\n> Modifies tool parameter(s):\n> - AMRFinderPlus: `--database`",
+                    "help_text": "Specify the path to a local version of the ARMFinderPlus database.\n\nYou must give the `latest` directory to the pipeline, and the contents of the directory should include files such as `*.nbd`, `*.nhr`, `versions.txt` etc. in the top level.\n\nIf no input is given, the pipeline will download the database for you.\n\n See the nf-core/funcscan usage [documentation](https://nf-co.re/funcscan/usage) for more information.\n\n> Modifies tool parameter(s):\n> - AMRFinderPlus: `--database`",
                     "description": "Specify the path to a local version of the ARMFinderPlus database."
                 },
                 "arg_amrfinderplus_identmin": {
@@ -840,7 +840,7 @@
                     "type": "string",
                     "fa_icon": "fas fa-database",
                     "description": "Specify the path to the DeepARG database.",
-                    "help_text": "Specify the path to a local version of the DeepARG database (see the pipelines' usage [documentation](https://nf-co.re/funcscan/dev/docs/usage#databases-and-reference-files)). If no input is given, the module will download the database for you, however this is not recommended, as the database is large and this will take time.\n\n> Modifies tool parameter(s):\n> - DeepARG: `--data-path`"
+                    "help_text": "Specify the path to a local version of the DeepARG database (see the pipelines' usage [documentation](https://nf-co.re/funcscan/dev/docs/usage#databases-and-reference-files)).\n\nThe contents of the directory should include directories such as `database`, `moderl`, and files such as `deeparg.gz` etc. in the top level.\n\nIf no input is given, the module will download the database for you, however this is not recommended, as the database is large and this will take time.\n\n> Modifies tool parameter(s):\n> - DeepARG: `--data-path`"
                 },
                 "arg_deeparg_db_version": {
                     "type": "integer",
@@ -966,7 +966,7 @@
                     "type": "string",
                     "description": "Path to user-defined local CARD database.",
                     "fa_icon": "fas fa-database",
-                    "help_text": "You can pre-download the CARD database to your machine and pass the path of it to this parameter.\n\nSee the pipeline [documentation](https://nf-co.re/funcscan/usage#rgi) for details on how to download this.\n\n>  Modifies tool parameter(s):\n> - RGI_CARDANNOTATION: `--input`"
+                    "help_text": "You can pre-download the CARD database to your machine and pass the path of it to this parameter.\n\nThe contents of the directory should include files such as `card.json`, `aro_index.tsv`, `snps.txt` etc. in the top level.\n\nSee the pipeline [documentation](https://nf-co.re/funcscan/usage#rgi) for details on how to download this.\n\n>  Modifies tool parameter(s):\n> - RGI_CARDANNOTATION: `--input`"
                 },
                 "arg_rgi_savejson": {
                     "type": "boolean",
@@ -990,14 +990,14 @@
                 },
                 "arg_rgi_includeloose": {
                     "type": "boolean",
-                    "description": "Include all of loose, strict and perfect hits (i.e. ≥ 95% identity) found by RGI.",
+                    "description": "Include all of loose, strict and perfect hits (i.e. \u2265 95% identity) found by RGI.",
                     "help_text": "When activated RGI output will include 'Loose' hits in addition to 'Strict' and 'Perfect' hits. The 'Loose' algorithm works outside of the detection model cut-offs to provide detection of new, emergent threats and more distant homologs of AMR genes, but will also catalog homologous sequences and spurious partial matches that may not have a role in AMR.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--include_loose`",
                     "fa_icon": "far fa-hand-scissors"
                 },
                 "arg_rgi_includenudge": {
                     "type": "boolean",
                     "description": "Suppresses the default behaviour of RGI with `--arg_rgi_includeloose`.",
-                    "help_text": "This flag suppresses the default behaviour of RGI, by listing all 'Loose' matches of ≥ 95% identity as 'Strict' or 'Perfect', regardless of alignment length.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--include_nudge`",
+                    "help_text": "This flag suppresses the default behaviour of RGI, by listing all 'Loose' matches of \u2265 95% identity as 'Strict' or 'Perfect', regardless of alignment length.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--include_nudge`",
                     "fa_icon": "fas fa-hand-scissors"
                 },
                 "arg_rgi_lowquality": {
@@ -1047,7 +1047,7 @@
                     "type": "string",
                     "description": "Path to user-defined local ABRicate database directory for using custom databases.",
                     "fa_icon": "far fa-folder-open",
-                    "help_text": "Supply this only if you want to use additional custom databases you yourself have added to your ABRicate installation following the instructions [here](https://github.com/tseemann/abricate?tab=readme-ov-file#making-your-own-database).You must also specify the name of the custom database with `--arg_abricate_db_id`.\n\n> Modifies tool parameter(s):\n> - ABRicate: `--datadir`"
+                    "help_text": "Supply this only if you want to use additional custom databases you yourself have added to your ABRicate installation following the instructions [here](https://github.com/tseemann/abricate?tab=readme-ov-file#making-your-own-database).\n\nThe contents of the directory should have a directory named with the database name in the top level (e.g. `bacmet2/`).\n\nYou must also specify the name of the custom database with `--arg_abricate_db_id`.\n\n> Modifies tool parameter(s):\n> - ABRicate: `--datadir`"
                 },
                 "arg_abricate_minid": {
                     "type": "integer",
@@ -1137,13 +1137,13 @@
                     "type": "string",
                     "description": "Path to user-defined local antiSMASH database.",
                     "fa_icon": "fas fa-database",
-                    "help_text": "It is recommend to pre-download the antiSMASH databases to your machine and pass the path of it to this parameter, as this can take a long time to download - particularly when running lots of pipeline runs. \n\nSee the pipeline [documentation](https://nf-co.re/funcscan/usage#antismash-1) for details on how to download this. If running with docker or singularity, please also check `--bgc_antismash_installdir` for important information."
+                    "help_text": "It is recommend to pre-download the antiSMASH databases to your machine and pass the path of it to this parameter, as this can take a long time to download - particularly when running lots of pipeline runs.\n\nThe contents of the database directory should include directories such as `as-js/`, `clusterblast/`, `clustercompare/` etc. in the top level.\n\nSee the pipeline [documentation](https://nf-co.re/funcscan/usage#antismash-1) for details on how to download this. If running with docker or singularity, please also check `--bgc_antismash_installdir` for important information."
                 },
                 "bgc_antismash_installdir": {
                     "type": "string",
                     "description": "Path to user-defined local antiSMASH directory. Only required when running with docker/singularity.",
                     "fa_icon": "far fa-folder-open",
-                    "help_text": "This is required when running with **docker and singularity** (not required for conda), due to attempted 'modifications' of files during database checks in the installation directory, something that cannot be done in immutable docker/singularity containers.\n\nTherefore, a local installation directory needs to be mounted (including all modified files from the downloading step) to the container as a workaround.\n\nSee the pipeline [documentation](https://nf-co.re/funcscan/usage#antismash-1) for details on how to download this. If running with docker or singularity, please also check `--bgc_antismash_installdir` for important information."
+                    "help_text": "This is required when running with **docker and singularity** (not required for conda), due to attempted 'modifications' of files during database checks in the installation directory, something that cannot be done in immutable docker/singularity containers.\n\nTherefore, a local installation directory needs to be mounted (including all modified files from the downloading step) to the container as a workaround.\n\nThe contents of the installation directory should include directories such as `common/` `config/` and files such as `custom_typing.py` `custom_typing.pyi` etc. in the top level.\n\nSee the pipeline [documentation](https://nf-co.re/funcscan/usage#antismash-1) for details on how to download this. If running with docker or singularity, please also check `--bgc_antismash_installdir` for important information."
                 },
                 "bgc_antismash_contigminlength": {
                     "type": "integer",
@@ -1192,14 +1192,14 @@
                 },
                 "bgc_antismash_pfam2go": {
                     "type": "boolean",
-                    "default": "false",
+                    "default": false,
                     "description": "Run Pfam to Gene Ontology mapping module.",
                     "help_text": "This maps the proteins to Pfam database to annotate BGC modules with functional information based on the protein families they contain. For more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--pfam2go`",
                     "fa_icon": "fas fa-search"
                 },
                 "bgc_antismash_rre": {
                     "type": "boolean",
-                    "default": "false",
+                    "default": false,
                     "description": "Run RREFinder precision mode on all RiPP gene clusters.",
                     "help_text": "This enables the prediction of regulatory elements on the BGC that help in the control of protein expression. For more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--rre`",
                     "fa_icon": "fas fa-search"
@@ -1214,7 +1214,7 @@
                 },
                 "bgc_antismash_tfbs": {
                     "type": "boolean",
-                    "default": "false",
+                    "default": false,
                     "description": "Run TFBS finder on all gene clusters.",
                     "help_text": "This enables the prediction of transcription factor binding sites which control the gene expression. For more information see the antiSMASH [documentation](https://docs.antismash.secondarymetabolites.org/).\n\n> Modifies tool parameter(s):\n> - antiSMASH: `--tfbs`",
                     "fa_icon": "fas fa-search"
@@ -1237,7 +1237,7 @@
                     "type": "string",
                     "fa_icon": "fas fa-database",
                     "description": "Path to local DeepBGC database folder.",
-                    "help_text": "For more information see the DeepBGC [documentation](https://github.com/Merck/deepbgc).\n\n> Modifies tool parameter(s)\n> -  DeepBGC: environment variable `DEEPBGC_DOWNLOADS_DIR`"
+                    "help_text": "The contents of the database directory should include directories such as `common`, `0.1.0` in the top level.\n\nFor more information see the DeepBGC [documentation](https://github.com/Merck/deepbgc).\n\n> Modifies tool parameter(s)\n> -  DeepBGC: environment variable `DEEPBGC_DOWNLOADS_DIR`"
                 },
                 "bgc_deepbgc_score": {
                     "type": "number",
@@ -1332,7 +1332,7 @@
                     "type": "number",
                     "description": "The p-value cutoff for protein domains to be included.",
                     "fa_icon": "fas fa-filter",
-                    "default": 0.000000001,
+                    "default": 1e-9,
                     "help_text": "The p-value cutoff for protein domains to be included.\n\nFor more information see the GECCO [documentation](https://github.com/zellerlab/GECCO).\n\n> Modifies tool parameter(s):\n> - GECCO: `--pfilter`"
                 },
                 "bgc_gecco_threshold": {

From 5fb7be906a0451cb1854843d26f7a1d98b114e01 Mon Sep 17 00:00:00 2001
From: jasmezz <jasmin.frangenberg@hki-jena.de>
Date: Tue, 20 Aug 2024 09:32:29 +0200
Subject: [PATCH 8/8] Fix param typo in schema.json [skip ci]

---
 nextflow_schema.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 2235fca3..ca01d496 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -1040,7 +1040,7 @@
                     "type": "string",
                     "default": "ncbi",
                     "fa_icon": "fas fa-database",
-                    "description": "Specify the name of the ABRicate database to use. Names of non-default databases can be supplied if `--arg_abricate_localdbdir` provided.",
+                    "description": "Specify the name of the ABRicate database to use. Names of non-default databases can be supplied if `--arg_abricate_db` provided.",
                     "help_text": "Specifies which database to use from dedicated list of databases available by ABRicate.\n\nDefault supported are one of: `argannot`, `card`, `ecoh`, `ecoli_vf`, `megares`, `ncbi`, `plasmidfinder`, `resfinder`, `vfdb`. Other options can be supplied if you have installed a custom one within the directory you have supplied to `--arg_abricate_db`.\n\nFor more information check the ABRicate [documentation](https://github.com/tseemann/abricate).\n\n> Modifies tool parameter(s):\n> - ABRicate: `--db`"
                 },
                 "arg_abricate_db": {