Skip to content

Commit

Permalink
Bump fgbio versions and swap to nftest (#5624)
Browse files Browse the repository at this point in the history
* Convert callduplex and callmolecular to nftest and update version

* Convert zipperbams

* Convert groupreadsbyumi

* Convert sortbam and add stubs

* Convert filterconsensusreads

* Update snapshot

* Update modules/nf-core/fgbio/callmolecularconsensusreads/main.nf

* Add collision checks, swap test data paths

* Fix conda version and linting

* Apply suggestions from code review

Co-authored-by: Nils Homer <nh13@users.noreply.github.com>

* Update modules/nf-core/fgbio/callmolecularconsensusreads/meta.yml

Co-authored-by: Nils Homer <nh13@users.noreply.github.com>

* Update duplex snap

---------

Co-authored-by: Nils Homer <nh13@users.noreply.github.com>
Co-authored-by: Maxime U Garcia <max.u.garcia@gmail.com>
  • Loading branch information
3 people authored Jul 2, 2024
1 parent a460c26 commit 2fc7438
Show file tree
Hide file tree
Showing 60 changed files with 1,215 additions and 331 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- bioconda::fgbio=2.0.2
- bioconda::fgbio=2.2.1
34 changes: 24 additions & 10 deletions modules/nf-core/fgbio/callduplexconsensusreads/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,24 @@ process FGBIO_CALLDUPLEXCONSENSUSREADS {

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/fgbio:2.0.2--hdfd78af_0' :
'biocontainers/fgbio:2.0.2--hdfd78af_0' }"
'https://depot.galaxyproject.org/singularity/fgbio:2.2.1--hdfd78af_0' :
'biocontainers/fgbio:2.2.1--hdfd78af_0' }"

input:
tuple val(meta), path(bam)
// please note:
// --min-reads is a required argument with no default
// --min-input-base-quality is a required argument with no default
// make sure they are specified via ext.args in your config
tuple val(meta), path(grouped_bam)
val min_reads
val min_baseq

output:
tuple val(meta), path("${prefix}.bam"), emit: bam
path "versions.yml" , emit: versions
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}_consensus"
prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped"

def mem_gb = 8
if (!task.memory) {
Expand All @@ -35,6 +33,7 @@ process FGBIO_CALLDUPLEXCONSENSUSREADS {
mem_gb = task.memory.giga - 1
}
}
if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"

"""
fgbio \\
Expand All @@ -43,8 +42,10 @@ process FGBIO_CALLDUPLEXCONSENSUSREADS {
--async-io=true \\
--compression=1 \\
CallDuplexConsensusReads \\
--input $bam \\
--input $grouped_bam \\
--output ${prefix}.bam \\
--min-reads ${min_reads} \\
--min-input-base-quality ${min_baseq} \\
--threads ${task.cpus} \\
$args
Expand All @@ -53,4 +54,17 @@ process FGBIO_CALLDUPLEXCONSENSUSREADS {
fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
END_VERSIONS
"""

stub:
prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped"
if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
"""
touch ${prefix}.bam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
END_VERSIONS
"""

}
11 changes: 8 additions & 3 deletions modules/nf-core/fgbio/callduplexconsensusreads/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,23 @@ tools:
homepage: http://fulcrumgenomics.github.io/fgbio/
documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/CallDuplexConsensusReads.html
tool_dev_url: https://github.com/fulcrumgenomics/fgbio
licence: "['MIT']"
licence: ["MIT"]
input:
# Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM/SAM file
description: BAM/SAM file, grouped by UMI
pattern: "*.{bam,sam}"
- min_reads:
type: string
description: Minimum number of raw/original reads to build each consensus read. Can be a space delimited list of 1-3 values. See fgbio documentation for more details.
- min_baseq:
type: integer
description: Ignore bases in raw reads that have Q below this value
output:
- meta:
type: map
Expand Down
62 changes: 62 additions & 0 deletions modules/nf-core/fgbio/callduplexconsensusreads/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
nextflow_process {

name "Test Process FGBIO_CALLDUPLEXCONSENSUSREADS"
script "../main.nf"
process "FGBIO_CALLDUPLEXCONSENSUSREADS"

tag "modules"
tag "modules_nfcore"
tag "fgbio"
tag "fgbio/callduplexconsensusreads"

test("homo_sapiens - bam") {

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_grouped.bam', checkIfExists: true)
]
input[1] = 3
input[2] = 20
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("homo_sapiens - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_grouped.bam', checkIfExists: true)
]
input[1] = 3
input[2] = 20
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
{
"homo_sapiens - stub": {
"content": [
{
"0": [
[
{
"id": "test",
"single_end": false
},
"test_consensus_unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
"versions.yml:md5,7277dba1bc055b578eb6d8d6af43b128"
],
"bam": [
[
{
"id": "test",
"single_end": false
},
"test_consensus_unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,7277dba1bc055b578eb6d8d6af43b128"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
"timestamp": "2024-07-02T17:44:41.656625835"
},
"homo_sapiens - bam": {
"content": [
{
"0": [
[
{
"id": "test",
"single_end": false
},
"test_consensus_unmapped.bam:md5,4f0e87feb7601d06617c9f29d7aec352"
]
],
"1": [
"versions.yml:md5,7277dba1bc055b578eb6d8d6af43b128"
],
"bam": [
[
{
"id": "test",
"single_end": false
},
"test_consensus_unmapped.bam:md5,4f0e87feb7601d06617c9f29d7aec352"
]
],
"versions": [
"versions.yml:md5,7277dba1bc055b578eb6d8d6af43b128"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-05-17T06:05:28.894178772"
}
}
2 changes: 2 additions & 0 deletions modules/nf-core/fgbio/callduplexconsensusreads/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
fgbio/callduplexconsensusreads:
- "modules/nf-core/fgbio/callduplexconsensusreads/**"
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- bioconda::fgbio=2.0.2
- bioconda::fgbio=2.2.1
41 changes: 34 additions & 7 deletions modules/nf-core/fgbio/callmolecularconsensusreads/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ process FGBIO_CALLMOLECULARCONSENSUSREADS {

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/fgbio:2.0.2--hdfd78af_0' :
'biocontainers/fgbio:2.0.2--hdfd78af_0' }"
'https://depot.galaxyproject.org/singularity/fgbio:2.2.1--hdfd78af_0' :
'biocontainers/fgbio:2.2.1--hdfd78af_0' }"

input:
tuple val(meta), path(bam)
tuple val(meta), path(grouped_bam)
val min_reads
val min_baseq

output:
tuple val(meta), path("*.bam"), emit: bam
Expand All @@ -19,19 +21,44 @@ process FGBIO_CALLMOLECULARCONSENSUSREADS {

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped"
def mem_gb = 8
if (!task.memory) {
log.info '[fgbio CallMolecularConsensusReads] Available memory not known - defaulting to 8GB. Specify process memory requirements to change this.'
} else {
mem_gb = task.memory.giga
}
if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
"""
fgbio \\
-Xmx${mem_gb}g \\
--tmp-dir=. \\
--async-io=true \\
--compression=1 \\
CallMolecularConsensusReads \\
--input $bam \\
--input $grouped_bam \\
--output ${prefix}.bam \\
--min-reads ${min_reads} \\
--min-input-base-quality ${min_baseq} \\
--threads ${task.cpus} \\
$args \\
--output ${prefix}.bam
$args;
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
END_VERSIONS
"""

stub:
prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped"
if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
"""
touch ${prefix}.bam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
END_VERSIONS
"""

}
11 changes: 8 additions & 3 deletions modules/nf-core/fgbio/callmolecularconsensusreads/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ keywords:
- UMIs
- consensus sequence
- bam
- sam
tools:
- fgbio:
description: Tools for working with genomic and high throughput sequencing data.
Expand All @@ -17,11 +16,17 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false, collapse:false ]
- bam:
- grouped_bam:
type: file
description: |
The input SAM or BAM file.
The input SAM or BAM file, grouped by UMIs
pattern: "*.{bam,sam}"
- min_reads:
type: integer
description: Minimum number of original reads to build each consensus read.
- min_baseq:
type: integer
description: Ignore bases in raw reads that have Q below this value.
output:
- meta:
type: map
Expand Down
Loading

0 comments on commit 2fc7438

Please sign in to comment.