Bump fgbio versions and swap to nftest (#5624)

* Convert callduplex and callmolecular to nftest and update version * Convert zipperbams * Convert groupreadsbyumi * Convert sortbam and add stubs * Convert filterconsensusreads * Update snapshot * Update modules/nf-core/fgbio/callmolecularconsensusreads/main.nf * Add collision checks, swap test data paths * Fix conda version and linting * Apply suggestions from code review Co-authored-by: Nils Homer <nh13@users.noreply.github.com> * Update modules/nf-core/fgbio/callmolecularconsensusreads/meta.yml Co-authored-by: Nils Homer <nh13@users.noreply.github.com> * Update duplex snap --------- Co-authored-by: Nils Homer <nh13@users.noreply.github.com> Co-authored-by: Maxime U Garcia <max.u.garcia@gmail.com>
nf-core · Jul 2, 2024 · 2fc7438 · 2fc7438
1 parent a460c26
commit 2fc7438
Show file tree

Hide file tree

Showing 60 changed files with 1,215 additions and 331 deletions.
diff --git a/modules/nf-core/fgbio/callduplexconsensusreads/environment.yml b/modules/nf-core/fgbio/callduplexconsensusreads/environment.yml
@@ -4,4 +4,4 @@ channels:
   - bioconda
   - defaults
 dependencies:
-  - bioconda::fgbio=2.0.2
+  - bioconda::fgbio=2.2.1
diff --git a/modules/nf-core/fgbio/callduplexconsensusreads/main.nf b/modules/nf-core/fgbio/callduplexconsensusreads/main.nf
@@ -4,26 +4,24 @@ process FGBIO_CALLDUPLEXCONSENSUSREADS {
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/fgbio:2.0.2--hdfd78af_0' :
-        'biocontainers/fgbio:2.0.2--hdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/fgbio:2.2.1--hdfd78af_0' :
+        'biocontainers/fgbio:2.2.1--hdfd78af_0' }"
 
     input:
-    tuple val(meta), path(bam)
-    // please note:
-    // --min-reads is a required argument with no default
-    // --min-input-base-quality is a required argument with no default
-    // make sure they are specified via ext.args in your config
+    tuple val(meta), path(grouped_bam)
+    val min_reads
+    val min_baseq
 
     output:
     tuple val(meta), path("${prefix}.bam"), emit: bam
-    path "versions.yml"                     , emit: versions
+    path "versions.yml"                   , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
     def args = task.ext.args ?: ''
-    prefix = task.ext.prefix ?: "${meta.id}_consensus"
+    prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped"
 
     def mem_gb = 8
     if (!task.memory) {
@@ -35,6 +33,7 @@ process FGBIO_CALLDUPLEXCONSENSUSREADS {
             mem_gb = task.memory.giga - 1
         }
     }
+    if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
 
     """
     fgbio \\
@@ -43,8 +42,10 @@ process FGBIO_CALLDUPLEXCONSENSUSREADS {
         --async-io=true \\
         --compression=1 \\
         CallDuplexConsensusReads \\
-        --input $bam \\
+        --input $grouped_bam \\
         --output ${prefix}.bam \\
+        --min-reads ${min_reads} \\
+        --min-input-base-quality ${min_baseq} \\
         --threads ${task.cpus} \\
         $args
 
@@ -53,4 +54,17 @@ process FGBIO_CALLDUPLEXCONSENSUSREADS {
         fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
     END_VERSIONS
     """
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped"
+    if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+    """
+    touch ${prefix}.bam
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+    END_VERSIONS
+    """
+
 }
diff --git a/modules/nf-core/fgbio/callduplexconsensusreads/meta.yml b/modules/nf-core/fgbio/callduplexconsensusreads/meta.yml
@@ -10,18 +10,23 @@ tools:
       homepage: http://fulcrumgenomics.github.io/fgbio/
       documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/CallDuplexConsensusReads.html
       tool_dev_url: https://github.com/fulcrumgenomics/fgbio
-      licence: "['MIT']"
+      licence: ["MIT"]
 input:
-  # Only when we have meta
   - meta:
       type: map
       description: |
         Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
   - bam:
       type: file
-      description: BAM/SAM file
+      description: BAM/SAM file, grouped by UMI
       pattern: "*.{bam,sam}"
+  - min_reads:
+      type: string
+      description: Minimum number of raw/original reads to build each consensus read.  Can be a space delimited list of 1-3 values.  See fgbio documentation for more details.
+  - min_baseq:
+      type: integer
+      description: Ignore bases in raw reads that have Q below this value
 output:
   - meta:
       type: map

diff --git a/modules/nf-core/fgbio/callduplexconsensusreads/tests/main.nf.test b/modules/nf-core/fgbio/callduplexconsensusreads/tests/main.nf.test
@@ -0,0 +1,62 @@
+nextflow_process {
+
+    name "Test Process FGBIO_CALLDUPLEXCONSENSUSREADS"
+    script "../main.nf"
+    process "FGBIO_CALLDUPLEXCONSENSUSREADS"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "fgbio"
+    tag "fgbio/callduplexconsensusreads"
+
+    test("homo_sapiens - bam") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_grouped.bam', checkIfExists: true)
+                ]
+                input[1] = 3
+                input[2] = 20
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+   test("homo_sapiens - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_grouped.bam', checkIfExists: true)
+                ]
+                input[1] = 3
+                input[2] = 20
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/fgbio/callduplexconsensusreads/tests/main.nf.test.snap b/modules/nf-core/fgbio/callduplexconsensusreads/tests/main.nf.test.snap
@@ -0,0 +1,72 @@
+{
+    "homo_sapiens - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test_consensus_unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,7277dba1bc055b578eb6d8d6af43b128"
+                ],
+                "bam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test_consensus_unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,7277dba1bc055b578eb6d8d6af43b128"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-02T17:44:41.656625835"
+    },
+    "homo_sapiens - bam": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test_consensus_unmapped.bam:md5,4f0e87feb7601d06617c9f29d7aec352"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,7277dba1bc055b578eb6d8d6af43b128"
+                ],
+                "bam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test_consensus_unmapped.bam:md5,4f0e87feb7601d06617c9f29d7aec352"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,7277dba1bc055b578eb6d8d6af43b128"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-05-17T06:05:28.894178772"
+    }
+}
diff --git a/modules/nf-core/fgbio/callduplexconsensusreads/tests/tags.yml b/modules/nf-core/fgbio/callduplexconsensusreads/tests/tags.yml
@@ -0,0 +1,2 @@
+fgbio/callduplexconsensusreads:
+  - "modules/nf-core/fgbio/callduplexconsensusreads/**"
diff --git a/modules/nf-core/fgbio/callmolecularconsensusreads/environment.yml b/modules/nf-core/fgbio/callmolecularconsensusreads/environment.yml
@@ -4,4 +4,4 @@ channels:
   - bioconda
   - defaults
 dependencies:
-  - bioconda::fgbio=2.0.2
+  - bioconda::fgbio=2.2.1
diff --git a/modules/nf-core/fgbio/callmolecularconsensusreads/main.nf b/modules/nf-core/fgbio/callmolecularconsensusreads/main.nf
@@ -4,11 +4,13 @@ process FGBIO_CALLMOLECULARCONSENSUSREADS {
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/fgbio:2.0.2--hdfd78af_0' :
-        'biocontainers/fgbio:2.0.2--hdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/fgbio:2.2.1--hdfd78af_0' :
+        'biocontainers/fgbio:2.2.1--hdfd78af_0' }"
 
     input:
-    tuple val(meta), path(bam)
+    tuple val(meta), path(grouped_bam)
+    val min_reads
+    val min_baseq
 
     output:
     tuple val(meta), path("*.bam"), emit: bam
@@ -19,19 +21,44 @@ process FGBIO_CALLMOLECULARCONSENSUSREADS {
 
     script:
     def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
+    def prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped"
+    def mem_gb = 8
+    if (!task.memory) {
+        log.info '[fgbio CallMolecularConsensusReads] Available memory not known - defaulting to 8GB. Specify process memory requirements to change this.'
+    } else {
+        mem_gb = task.memory.giga
+    }
+    if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
     """
     fgbio \\
+        -Xmx${mem_gb}g \\
         --tmp-dir=. \\
+        --async-io=true \\
+        --compression=1 \\
         CallMolecularConsensusReads \\
-        --input $bam \\
+        --input $grouped_bam \\
+        --output ${prefix}.bam \\
+        --min-reads ${min_reads} \\
+        --min-input-base-quality ${min_baseq} \\
         --threads ${task.cpus} \\
-        $args \\
-        --output ${prefix}.bam
+        $args;
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
     END_VERSIONS
     """
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped"
+    if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+    """
+    touch ${prefix}.bam
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+    END_VERSIONS
+    """
+
 }
diff --git a/modules/nf-core/fgbio/callmolecularconsensusreads/meta.yml b/modules/nf-core/fgbio/callmolecularconsensusreads/meta.yml
@@ -4,7 +4,6 @@ keywords:
   - UMIs
   - consensus sequence
   - bam
-  - sam
 tools:
   - fgbio:
       description: Tools for working with genomic and high throughput sequencing data.
@@ -17,11 +16,17 @@ input:
       description: |
         Groovy Map containing sample information
         e.g. [ id:'test', single_end:false, collapse:false ]
-  - bam:
+  - grouped_bam:
       type: file
       description: |
-        The input SAM or BAM file.
+        The input SAM or BAM file, grouped by UMIs
       pattern: "*.{bam,sam}"
+  - min_reads:
+      type: integer
+      description: Minimum number of original reads to build each consensus read.
+  - min_baseq:
+      type: integer
+      description: Ignore bases in raw reads that have Q below this value.
 output:
   - meta:
       type: map