diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0a6208330f..e4575c4d69 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,27 +33,28 @@ jobs: test: - "aligner" - "annotation" - - "default" - "cnvkit" - "controlfreec" - "deepvariant" + - "default" - "freebayes" - "gatk4_spark" - "haplotypecaller" - "manta" - "markduplicates" - - "mutect2" - "msisensorpro" - # - 'save_bam_mapped' + - "mutect2" - "prepare_recalibration" - "recalibrate" - - "variantcalling_channel" + # - 'save_bam_mapped' - "skip_markduplicates" + - "split_fastq" - "strelka" - "strelkabp" - - "split_fastq" - "targeted" + - "tiddit" - "tumor_normal_pair" + - "variantcalling_channel" steps: - name: Check out pipeline code diff --git a/CHANGELOG.md b/CHANGELOG.md index 47021b9e00..aa715d71f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -60,6 +60,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#572](https://github.com/nf-core/sarek/pull/572) - Adjusted subway map svg for firefox compatibility - [#578](https://github.com/nf-core/sarek/pull/578) - Updated module deeptools/bamcoverage - [#585](https://github.com/nf-core/sarek/pull/585) - Remove explicit BAM to CRAM conversion after MarkduplicatesSpark; tool does it internally +- [#581](https://github.com/nf-core/sarek/pull/581) - `TIDDIT` is updated to `3.1.0` ### Fixed @@ -87,6 +88,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#567](https://github.com/nf-core/sarek/pull/567) - Fix interval name resolving during scatter/gather by moving logic to modules.config causing name to be correctly resolved on process execution; also fixed duplicate naming when variant callers produce multiple vcf files by adding field `type` to `meta` map - [#585](https://github.com/nf-core/sarek/pull/585) - Fix Spark usage for GATK4 modules - [#587](https://github.com/nf-core/sarek/pull/587) - Fix issue with VEP extra files +- [#581](https://github.com/nf-core/sarek/pull/581) - `TIDDIT` is back - [#590](https://github.com/nf-core/sarek/pull/590) - Fix empty folders during scatter/gather ### Deprecated diff --git a/conf/modules.config b/conf/modules.config index f276b567a9..711229aa05 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -652,12 +652,24 @@ process{ ] } - // withName: 'TIDDIT_SV' { - // publishDir = [ - // mode: params.publish_dir_mode, - // path: { "${params.outdir}/variant_calling/${meta.id}/tiddit" } - // ] - // } + //TIDDIT + withName: 'TIDDIT_SV' { + ext.when = { params.tools && params.tools.contains('tiddit') } + ext.args = { bwa_index ? "" : "--skip_assembly" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/tiddit" }, + pattern: "*tab", + ] + } + withName : 'TABIX_BGZIP_TIDDIT_SV' { + ext.prefix = { "${meta.id}.vcf" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/tiddit" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } // TUMOR_VARIANT_CALLING diff --git a/modules.json b/modules.json index e323da9ddb..d1cd9d1052 100644 --- a/modules.json +++ b/modules.json @@ -232,7 +232,7 @@ "git_sha": "b3e9b88e80880f450ad79a95b2b7aa05e1de5484" }, "tiddit/sv": { - "git_sha": "57cb730e78634673fb254a77606e014ce942734c" + "git_sha": "b689b8ed88a9f89eb2f7c75d3eb0bace77ade109" }, "trimgalore": { "git_sha": "85ec13ff1fc2196c5a507ea497de468101baabed" diff --git a/modules/nf-core/modules/tiddit/sv/main.nf b/modules/nf-core/modules/tiddit/sv/main.nf index b3e3813c5f..2e876ef1ae 100644 --- a/modules/nf-core/modules/tiddit/sv/main.nf +++ b/modules/nf-core/modules/tiddit/sv/main.nf @@ -2,21 +2,20 @@ process TIDDIT_SV { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::tiddit=2.12.1" : null) + conda (params.enable_conda ? "bioconda::tiddit=3.1.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tiddit:2.12.1--py38h1773678_0' : - 'quay.io/biocontainers/tiddit:2.12.1--py38h1773678_0' }" + 'https://depot.galaxyproject.org/singularity/tiddit:3.1.0--py39h59fae87_1' : + 'quay.io/biocontainers/tiddit:3.1.0--py39h59fae87_1' }" input: - tuple val(meta), path(bam) + tuple val(meta), path(input), path(input_index) path fasta - path fai + path bwa_index output: - tuple val(meta), path("*.vcf") , emit: vcf - tuple val(meta), path("*.ploidy.tab") , emit: ploidy - tuple val(meta), path("*.signals.tab"), emit: signals - path "versions.yml" , emit: versions + tuple val(meta), path("*.vcf") , emit: vcf + tuple val(meta), path("*.ploidies.tab"), emit: ploidy + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -24,18 +23,21 @@ process TIDDIT_SV { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta ? "--ref $fasta" : "" + def bwa_command = bwa_index ? "[[ -d $bwa_index ]] && for i in $bwa_index/*; do [[ -f $fasta && ! \"\$i\" =~ .*\"$fasta\".* ]] && ln -s \$i ${fasta}.\${i##*.} || ln -s \$i .; done" : "" + """ + $bwa_command + tiddit \\ --sv \\ $args \\ - --bam $bam \\ - $reference \\ + --bam $input \\ + --ref $fasta \\ -o $prefix cat <<-END_VERSIONS > versions.yml "${task.process}": - tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*TIDDIT-//; s/ .*\$//') + tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*tiddit-//; s/ .*\$//') END_VERSIONS """ @@ -43,12 +45,11 @@ process TIDDIT_SV { def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.vcf - touch ${prefix}.ploidy.tab - touch ${prefix}.signals.tab + touch ${prefix}.ploidies.tab cat <<-END_VERSIONS > versions.yml "${task.process}": - tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*TIDDIT-//; s/ .*\$//') + tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*tiddit-//; s/ .*\$//') END_VERSIONS """ } diff --git a/modules/nf-core/modules/tiddit/sv/meta.yml b/modules/nf-core/modules/tiddit/sv/meta.yml index fc307081c8..8b41c69cf5 100644 --- a/modules/nf-core/modules/tiddit/sv/meta.yml +++ b/modules/nf-core/modules/tiddit/sv/meta.yml @@ -17,14 +17,22 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram}" + - index: + type: file + description: BAM/CRAM index file + pattern: "*.{bai,crai}" - fasta: type: file description: Input FASTA file pattern: "*.{fasta,fa}" - - fai: + - bwa_index: type: file - description: FASTA index file - pattern: "*.{fai}" + description: BWA genome index files + pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" output: - meta: type: map @@ -38,11 +46,7 @@ output: - ploidy: type: file description: tab - pattern: "*.{ploidy.tab}" - - signals: - type: file - description: tab - pattern: "*.{signals.tab}" + pattern: "*.{ploidies.tab}" - versions: type: file description: File containing software versions diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 7ad38a2a18..e29e48d874 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -2,20 +2,21 @@ // GERMLINE VARIANT CALLING // +include { RUN_CNVKIT_GERMLINE } from '../nf-core/variantcalling/cnvkit/germline/main.nf' include { RUN_DEEPVARIANT } from '../nf-core/variantcalling/deepvariant/main.nf' include { RUN_FREEBAYES } from '../nf-core/variantcalling/freebayes/main.nf' include { RUN_HAPLOTYPECALLER } from '../nf-core/variantcalling/haplotypecaller/main.nf' include { RUN_MANTA_GERMLINE } from '../nf-core/variantcalling/manta/germline/main.nf' include { RUN_STRELKA_SINGLE } from '../nf-core/variantcalling/strelka/single/main.nf' -include { RUN_CNVKIT_GERMLINE } from '../nf-core/variantcalling/cnvkit/germline/main.nf' -//include { TIDDIT } from './variantcalling/tiddit.nf' +include { RUN_TIDDIT } from '../nf-core/variantcalling/tiddit/main.nf' workflow GERMLINE_VARIANT_CALLING { take: tools // Mandatory, list of tools to apply cram_recalibrated // channel: [mandatory] cram - dbsnp - dbsnp_tbi // channel: [] + bwa // channel: [mandatory] bwa + dbsnp // channel: [mandatory] dbsnp + dbsnp_tbi // channel: [mandatory] dbsnp_tbi known_sites known_sites_tbi dict // channel: [mandatory] dict @@ -28,15 +29,16 @@ workflow GERMLINE_VARIANT_CALLING { main: - ch_versions = Channel.empty() + ch_versions = Channel.empty() //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config - deepvariant_vcf = Channel.empty() - freebayes_vcf = Channel.empty() - haplotypecaller_vcf = Channel.empty() - genotype_gvcf = Channel.empty() - manta_vcf = Channel.empty() - strelka_vcf = Channel.empty() + deepvariant_vcf = Channel.empty() + freebayes_vcf = Channel.empty() + genotype_gvcf = Channel.empty() + haplotypecaller_vcf = Channel.empty() + manta_vcf = Channel.empty() + strelka_vcf = Channel.empty() + tiddit_vcf = Channel.empty() // Remap channel with intervals cram_recalibrated_intervals = cram_recalibrated.combine(intervals) @@ -137,15 +139,23 @@ workflow GERMLINE_VARIANT_CALLING { } //TIDDIT - //TODO + if (tools.contains('tiddit')){ + RUN_TIDDIT(cram_recalibrated, + fasta, + bwa) + + tiddit_vcf = RUN_TIDDIT.out.tiddit_vcf + ch_versions = ch_versions.mix(RUN_TIDDIT.out.versions) + } emit: deepvariant_vcf freebayes_vcf - haplotypecaller_vcf genotype_gvcf + haplotypecaller_vcf manta_vcf strelka_vcf + tiddit_vcf versions = ch_versions } diff --git a/subworkflows/nf-core/variantcalling/tiddit/main.nf b/subworkflows/nf-core/variantcalling/tiddit/main.nf index c21e2e845a..7988614b40 100644 --- a/subworkflows/nf-core/variantcalling/tiddit/main.nf +++ b/subworkflows/nf-core/variantcalling/tiddit/main.nf @@ -1,35 +1,32 @@ include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../../../modules/nf-core/modules/tabix/bgziptabix/main' include { TIDDIT_SV } from '../../../../modules/nf-core/modules/tiddit/sv/main' -//TODO: UNDER CONSTRUCTIONS workflow RUN_TIDDIT { take: - + cram_recalibrated + fasta + bwa main: ch_versions = Channel.empty() - // if (tools.contains('tiddit')) { - // TODO: Update tiddit on bioconda, the current version does not support cram usage, needs newest version: - // https://github.com/SciLifeLab/TIDDIT/issues/82#issuecomment-1022103264 - // Issue opened, either this week or end of february - - // TIDDIT_SV( - // cram_recalibrated, - // fasta, - // fasta_fai - // ) - - // TABIX_BGZIP_TIDDIT_SV(TIDDIT_SV.out.vcf) - // tiddit_vcf_gz_tbi = TABIX_BGZIP_TIDDIT_SV.out.gz_tbi - // tiddit_ploidy = TIDDIT_SV.out.ploidy - // tiddit_signals = TIDDIT_SV.out.signals - // tiddit_wig = TIDDIT_SV.out.wig - // tiddit_gc_wig = TIDDIT_SV.out.gc_wig - - // ch_versions = ch_versions.mix(TABIX_BGZIP_TIDDIT_SV.out.versions) - // ch_versions = ch_versions.mix(TIDDIT_SV.out.versions) - // } + + TIDDIT_SV( + cram_recalibrated, + fasta, + bwa + ) + + TABIX_BGZIP_TIDDIT_SV(TIDDIT_SV.out.vcf) + tiddit_ploidy = TIDDIT_SV.out.ploidy + tiddit_vcf_gz = TABIX_BGZIP_TIDDIT_SV.out.gz_tbi.map{ meta, gz, tbi -> [meta, gz]} + + ch_versions = ch_versions.mix(TABIX_BGZIP_TIDDIT_SV.out.versions) + ch_versions = ch_versions.mix(TIDDIT_SV.out.versions) + emit: versions = ch_versions + + tiddit_vcf = tiddit_vcf_gz + tiddit_ploidy } diff --git a/tests/test_tools.yml b/tests/test_tools.yml index c88ddb98f2..a2b3f4e58e 100644 --- a/tests/test_tools.yml +++ b/tests/test_tools.yml @@ -70,6 +70,17 @@ - path: results/variant_calling/sample1/cnvkit/test.paired_end.recalibrated.sorted.cns - path: results/variant_calling/sample1/cnvkit/test.paired_end.recalibrated.sorted.call.cns +- name: Run variant calling on germline sample with tiddit + command: nextflow run main.nf -profile test,tools_germline,docker --tools tiddit -c ./tests/nextflow.config + tags: + - tiddit + - germline + - variant_calling + files: + - path: results/variant_calling/sample1/tiddit/sample1.ploidies.tab + - path: results/variant_calling/sample1/tiddit/sample1.vcf.gz + - path: results/variant_calling/sample1/tiddit/sample1.vcf.gz.tbi + - name: Run variant calling on somatic samples with controlfreec command: nextflow run main.nf -profile test,tools_somatic,docker --tools controlfreec -c ./tests/nextflow.config tags: diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 07acfa7d03..a253c4d364 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -759,6 +759,7 @@ workflow SAREK { GERMLINE_VARIANT_CALLING( params.tools, cram_variant_calling_status_normal, + [], dbsnp, dbsnp_tbi, known_sites, @@ -817,6 +818,7 @@ workflow SAREK { vcf_to_annotate = vcf_to_annotate.mix(GERMLINE_VARIANT_CALLING.out.freebayes_vcf) vcf_to_annotate = vcf_to_annotate.mix(GERMLINE_VARIANT_CALLING.out.haplotypecaller_vcf) vcf_to_annotate = vcf_to_annotate.mix(GERMLINE_VARIANT_CALLING.out.manta_vcf) + vcf_to_annotate = vcf_to_annotate.mix(GERMLINE_VARIANT_CALLING.out.tiddit_vcf) vcf_to_annotate = vcf_to_annotate.mix(GERMLINE_VARIANT_CALLING.out.strelka_vcf) vcf_to_annotate = vcf_to_annotate.mix(TUMOR_ONLY_VARIANT_CALLING.out.freebayes_vcf) vcf_to_annotate = vcf_to_annotate.mix(TUMOR_ONLY_VARIANT_CALLING.out.mutect2_vcf)