Skip to content
This repository has been archived by the owner on Jan 27, 2020. It is now read-only.

Compress and index annotated VCF files #599

Merged
merged 7 commits into from
Jun 18, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 78 additions & 38 deletions annotate.nf
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,17 @@ vcfNotToAnnotate = Channel.create()
if (annotateVCF == []) {
Channel.empty().mix(
Channel.fromPath("${directoryMap.haplotypecaller}/*.vcf.gz")
.flatten().map{vcf -> ['haplotypecaller',vcf]},
.flatten().map{vcf -> ['none', 'haplotypecaller', vcf, null]},
Channel.fromPath("${directoryMap.manta}/*SV.vcf.gz")
.flatten().map{vcf -> ['manta',vcf]},
.flatten().map{vcf -> ['none', 'manta', vcf, null]},
Channel.fromPath("${directoryMap.mutect1}/*.vcf.gz")
.flatten().map{vcf -> ['mutect1',vcf]},
.flatten().map{vcf -> ['none', 'mutect1', vcf, null]},
Channel.fromPath("${directoryMap.mutect2}/*.vcf.gz")
.flatten().map{vcf -> ['mutect2',vcf]},
.flatten().map{vcf -> ['none', 'mutect2', vcf, null]},
Channel.fromPath("${directoryMap.strelka}/*{somatic,variants}*.vcf.gz")
.flatten().map{vcf -> ['strelka',vcf]},
.flatten().map{vcf -> ['none', 'strelka', vcf, null]},
Channel.fromPath("${directoryMap.strelkabp}/*{somatic,variants}*.vcf.gz")
.flatten().map{vcf -> ['strelkabp',vcf]}
.flatten().map{vcf -> ['none', 'strelkabp', vcf, null]}
).choice(vcfToAnnotate, vcfNotToAnnotate) {
annotateTools == [] || (annotateTools != [] && it[0] in annotateTools) ? 0 : 1
}
Expand All @@ -95,9 +95,9 @@ if (annotateVCF == []) {
annotateVCF.each{ list += ",${it}" }
list = list.substring(1)
if (StringUtils.countMatches("${list}", ",") == 0) vcfToAnnotate = Channel.fromPath("${list}")
.map{vcf -> ['userspecified',vcf]}
.map{vcf -> ['none', 'userspecified', vcf, null]}
else vcfToAnnotate = Channel.fromPath("{$list}")
.map{vcf -> ['userspecified',vcf]}
.map{vcf -> ['none', 'userspecified', vcf, null]}
} else exit 1, "specify only tools or files to annotate, not both"

vcfNotToAnnotate.close()
Expand All @@ -110,7 +110,7 @@ process RunBcftoolsStats {
publishDir directoryMap.bcftoolsStats, mode: 'link'

input:
set variantCaller, file(vcf) from vcfForBCFtools
set annotator, variantCaller, file(vcf), file(idx) from vcfForBCFtools

output:
file ("${vcf.baseName}.bcf.tools.stats.out") into bcfReport
Expand All @@ -121,8 +121,8 @@ process RunBcftoolsStats {
}

if (params.verbose) bcfReport = bcfReport.view {
"BCFTools stats report:\n\
File : [${it.fileName}]"
"BCFTools stats report:\n" +
"File : [${it.fileName}]"
}

process RunVcftools {
Expand All @@ -131,7 +131,7 @@ process RunVcftools {
publishDir directoryMap.vcftools, mode: 'link'

input:
set variantCaller, file(vcf) from vcfForVCFtools
set annotator, variantCaller, file(vcf), file(idx) from vcfForVCFtools

output:
file ("${vcf.baseName}.*") into vcfReport
Expand All @@ -142,63 +142,77 @@ process RunVcftools {
}

if (params.verbose) vcfReport = vcfReport.view {
"VCFTools stats report:\n\
File : [${it.fileName}]"
"VCFTools stats report:\n" +
"Files : [${it.fileName}]"
}

process RunSnpeff {
tag {vcf}

publishDir params.outDir , saveAs: { it == "${vcf.baseName}.snpEff.csv" ? "${directoryMap.snpeffReports}/${it}" : "${directoryMap.snpeff}/${it}" }, mode: 'link'
publishDir params.outDir, mode: 'link', saveAs: {
if (it == "${vcf.baseName}.snpEff.csv") "${directoryMap.snpeffReports}/${it}"
else if (it == "${vcf.baseName}.snpEff.ann.vcf") null
else "${directoryMap.snpeff}/${it}"
}

input:
set variantCaller, file(vcf) from vcfForSnpeff
set annotator, variantCaller, file(vcf), file(idx) from vcfForSnpeff
val snpeffDb from Channel.value(params.genomes[params.genome].snpeffDb)

output:
set file("${vcf.baseName}.snpEff.ann.vcf"), file("${vcf.baseName}.snpEff.genes.txt"), file("${vcf.baseName}.snpEff.csv"), file("${vcf.baseName}.snpEff.summary.html") into snpeffReport
set variantCaller,file("${vcf.baseName}.snpEff.ann.vcf") into snpEffOutputVCFs
set file("${vcf.baseName}.snpEff.genes.txt"), file("${vcf.baseName}.snpEff.csv"), file("${vcf.baseName}.snpEff.summary.html") into snpeffOutput
set val("snpeff"), variantCaller, file("${vcf.baseName}.snpEff.ann.vcf") into snpeffVCF

when: 'snpeff' in tools || 'merge' in tools

script:
"""
java -Xmx${task.memory.toGiga()}g \
-jar \$SNPEFF_HOME/snpEff.jar \
${snpeffDb} \
-csvStats ${vcf.baseName}.snpEff.csv \
-nodownload \
-canon \
-v \
${vcf} \
> ${vcf.baseName}.snpEff.ann.vcf
-jar \$SNPEFF_HOME/snpEff.jar \
${snpeffDb} \
-csvStats ${vcf.baseName}.snpEff.csv \
-nodownload \
-canon \
-v \
${vcf} \
> ${vcf.baseName}.snpEff.ann.vcf

mv snpEff_summary.html ${vcf.baseName}.snpEff.summary.html
"""
}

if (params.verbose) snpeffReport = snpeffReport.view {
"snpEff report:\n\
File : ${it.fileName}"
if (params.verbose) snpeffOutput = snpeffOutput.view {
"snpEff report:\n" +
"File : ${it.fileName}"
}

// When we are running in the 'merge' mode (first snpEff, then VEP)
// we have to exchange the channels

if('merge' in tools) {
vcfForVep = snpEffOutputVCFs
// When running in the 'merge' mode
// snpEff output is used as VEP input
// Used a feedback loop from vcfCompressed
// https://github.com/nextflow-io/patterns/tree/master/feedback-loop

vcfCompressed = Channel.create()

vcfForVep = Channel.empty().mix(
vcfCompressed.until({it[0]!="snpeff"})
)
}

process RunVEP {
tag {vcf}

publishDir directoryMap.vep, mode: 'link'
publishDir params.outDir, mode: 'link', saveAs: {
if (it == "${vcf.baseName}.vep.summary.html") "${directoryMap.vep}/${it}"
else null
}

input:
set variantCaller, file(vcf) from vcfForVep
set annotator, variantCaller, file(vcf), file(idx) from vcfForVep

output:
set file("${vcf.baseName}.vep.ann.vcf"), file("${vcf.baseName}.vep.summary.html") into vepReport
set val("vep"), variantCaller, file("${vcf.baseName}.vep.ann.vcf") into vepVCF
file("${vcf.baseName}.vep.summary.html") into vepReport

when: 'vep' in tools || 'merge' in tools

Expand All @@ -222,8 +236,34 @@ process RunVEP {
}

if (params.verbose) vepReport = vepReport.view {
"VEP report:\n\
Files : ${it.fileName}"
"VEP report:\n" +
"Files : ${it.fileName}"
}

vcfToCompress = snpeffVCF.mix(vepVCF)

process CompressVCF {
tag {"${annotator} - ${vcf}"}

publishDir "${directoryMap."$annotator"}", mode: 'link'

input:
set annotator, variantCaller, file(vcf) from vcfToCompress

output:
set annotator, variantCaller, file("*.vcf.gz"), file("*.vcf.gz.tbi") into (vcfCompressed, vcfCompressedoutput)

script:
"""
bgzip < ${vcf} > ${vcf}.gz
tabix ${vcf}.gz
"""
}

if (params.verbose) vcfCompressedoutput = vcfCompressedoutput.view {
"${it[0]} VCF:\n" +
"File : ${it[2].fileName}\n" +
"Index : ${it[3].fileName}"
}

process GetVersionBCFtools {
Expand Down
1 change: 1 addition & 0 deletions configuration/containers.config
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ process {
$BuildPicardIndex.container = "${params.repository}/picard:${params.tag}"
$BuildSAMToolsIndex.container = "${params.repository}/sarek:${params.tag}"
$BuildVCFIndex.container = "${params.repository}/igvtools:${params.tag}"
$CompressVCF.container = "${params.repository}/sarek:${params.tag}"
$ConcatVCF.container = "${params.repository}/sarek:${params.tag}"
$CreateRecalibrationTable.container = "${params.repository}/gatk:${params.tag}"
$GetVersionAll.container = "${params.repository}/qctools:${params.tag}"
Expand Down
1 change: 1 addition & 0 deletions configuration/singularity-path.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ process {
$BuildPicardIndex.container = "${params.containerPath}/picard-${params.tag}.img"
$BuildSAMToolsIndex.container = "${params.containerPath}/sarek-${params.tag}.img"
$BuildVCFIndex.container = "${params.containerPath}/igvtools-${params.tag}.img"
$CompressVCF.container = "${params.containerPath}/sarek-${params.tag}.img"
$ConcatVCF.container = "${params.containerPath}/sarek-${params.tag}.img"
$CreateRecalibrationTable.container = "${params.containerPath}/gatk-${params.tag}.img"
$GetVersionAll.container = "${params.containerPath}/qctools-${params.tag}.img"
Expand Down
2 changes: 1 addition & 1 deletion scripts/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ then
ANNOTATOR=VEP
elif [[ ALL,ANNOTATEALL =~ $TEST ]]
then
ANNOTATOR=snpEFF,VEP
ANNOTATOR=merge,snpEFF,VEP
fi
if [[ $PROFILE == docker ]] && [[ $TRAVIS == true ]]
then
Expand Down