From 707a4c444057b809094a19cd3da0738198452285 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Thu, 14 Jun 2018 12:04:30 +0200 Subject: [PATCH 1/6] add merge option to ANNOTATEALL test --- scripts/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/test.sh b/scripts/test.sh index ccc595dc6f..a9f340d7c4 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -125,7 +125,7 @@ then ANNOTATOR=VEP elif [[ ALL,ANNOTATEALL =~ $TEST ]] then - ANNOTATOR=snpEFF,VEP + ANNOTATOR=merge,snpEFF,VEP fi if [[ $PROFILE == docker ]] && [[ $TRAVIS == true ]] then From f437d3e7bcef146a209561bf32156b0f84f3159c Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Thu, 14 Jun 2018 12:05:14 +0200 Subject: [PATCH 2/6] add index + compress annotated vcf --- annotate.nf | 70 ++++++++++++++++++++++++--- configuration/containers.config | 2 + configuration/singularity-path.config | 2 + 3 files changed, 67 insertions(+), 7 deletions(-) diff --git a/annotate.nf b/annotate.nf index 84a5cc9e92..7e324d4424 100644 --- a/annotate.nf +++ b/annotate.nf @@ -149,15 +149,19 @@ if (params.verbose) vcfReport = vcfReport.view { process RunSnpeff { tag {vcf} - publishDir params.outDir , saveAs: { it == "${vcf.baseName}.snpEff.csv" ? "${directoryMap.snpeffReports}/${it}" : "${directoryMap.snpeff}/${it}" }, mode: 'link' + publishDir params.outDir, mode: 'link', saveAs: { + if (it == "${vcf.baseName}.snpEff.csv") "${directoryMap.snpeffReports}/${it}" + else if (it == "${vcf.baseName}.snpEff.ann.vcf") null + else "${directoryMap.snpeff}/${it}" + } input: set variantCaller, file(vcf) from vcfForSnpeff val snpeffDb from Channel.value(params.genomes[params.genome].snpeffDb) output: - set file("${vcf.baseName}.snpEff.ann.vcf"), file("${vcf.baseName}.snpEff.genes.txt"), file("${vcf.baseName}.snpEff.csv"), file("${vcf.baseName}.snpEff.summary.html") into snpeffReport - set variantCaller,file("${vcf.baseName}.snpEff.ann.vcf") into snpEffOutputVCFs + set file("${vcf.baseName}.snpEff.genes.txt"), file("${vcf.baseName}.snpEff.csv"), file("${vcf.baseName}.snpEff.summary.html") into snpeffOutput + set variantCaller,file("${vcf.baseName}.snpEff.ann.vcf") into snpeffVCF when: 'snpeff' in tools || 'merge' in tools @@ -177,7 +181,7 @@ process RunSnpeff { """ } -if (params.verbose) snpeffReport = snpeffReport.view { +if (params.verbose) snpeffOutput = snpeffOutput.view { "snpEff report:\n\ File : ${it.fileName}" } @@ -185,20 +189,48 @@ if (params.verbose) snpeffReport = snpeffReport.view { // When we are running in the 'merge' mode (first snpEff, then VEP) // we have to exchange the channels +process CompressSnpeffVCF { + tag {vcf} + + publishDir directoryMap.snpeff, mode: 'link' + + input: + set variantCaller, file(vcf) from snpeffVCF + + output: + set variantCaller, file("*.vcf.gz") into snpeffVCFcompressed + file("*.vcf.gz.tbi") + + script: + """ + cat ${vcf} | bgzip > ${vcf}.gz + tabix ${vcf}.gz + """ +} + +if (params.verbose) snpeffVCFcompressed = snpeffVCFcompressed.view { + "snpEff VCF:\n\ + File : ${it[1].fileName}" +} + if('merge' in tools) { - vcfForVep = snpEffOutputVCFs + vcfForVep = snpeffVCFcompressed } process RunVEP { tag {vcf} - publishDir directoryMap.vep, mode: 'link' + publishDir params.outDir, mode: 'link', saveAs: { + if (it == "${vcf.baseName}.vep.summary.html") "${directoryMap.vep}/${it}" + else null + } input: set variantCaller, file(vcf) from vcfForVep output: - set file("${vcf.baseName}.vep.ann.vcf"), file("${vcf.baseName}.vep.summary.html") into vepReport + file("${vcf.baseName}.vep.ann.vcf") into vepVCF + file("${vcf.baseName}.vep.summary.html") into vepReport when: 'vep' in tools || 'merge' in tools @@ -226,6 +258,30 @@ if (params.verbose) vepReport = vepReport.view { Files : ${it.fileName}" } +process CompressVEPvcf { + tag {vcf} + + publishDir directoryMap.vep, mode: 'link' + + input: + file(vcf) from vepVCF + + output: + file("*.vcf.gz") into vepVCFcompressed + file("*.vcf.gz.tbi") + + script: + """ + cat ${vcf} | bgzip > ${vcf}.gz + tabix ${vcf}.gz + """ +} + +if (params.verbose) vepVCFcompressed = vepVCFcompressed.view { + "VEP VCF:\n\ + File : ${it.fileName}" +} + process GetVersionBCFtools { publishDir directoryMap.version, mode: 'link' output: file("v_*.txt") diff --git a/configuration/containers.config b/configuration/containers.config index a0fcd82ffe..32382dc020 100644 --- a/configuration/containers.config +++ b/configuration/containers.config @@ -12,6 +12,8 @@ process { $BuildPicardIndex.container = "${params.repository}/picard:${params.tag}" $BuildSAMToolsIndex.container = "${params.repository}/sarek:${params.tag}" $BuildVCFIndex.container = "${params.repository}/igvtools:${params.tag}" + $CompressSnpeffVCF.container = "${params.repository}/sarek:${params.tag}" + $CompressVEPvcf.container = "${params.repository}/sarek:${params.tag}" $ConcatVCF.container = "${params.repository}/sarek:${params.tag}" $CreateRecalibrationTable.container = "${params.repository}/gatk:${params.tag}" $GetVersionAll.container = "${params.repository}/qctools:${params.tag}" diff --git a/configuration/singularity-path.config b/configuration/singularity-path.config index 6874c61984..3cec3132e6 100644 --- a/configuration/singularity-path.config +++ b/configuration/singularity-path.config @@ -17,6 +17,8 @@ process { $BuildPicardIndex.container = "${params.containerPath}/picard-${params.tag}.img" $BuildSAMToolsIndex.container = "${params.containerPath}/sarek-${params.tag}.img" $BuildVCFIndex.container = "${params.containerPath}/igvtools-${params.tag}.img" + $CompressSnpeffVCF.container = "${params.containerPath}/sarek-${params.tag}.img" + $CompressVEPvcf.container = "${params.containerPath}/sarek-${params.tag}.img" $ConcatVCF.container = "${params.containerPath}/sarek-${params.tag}.img" $CreateRecalibrationTable.container = "${params.containerPath}/gatk-${params.tag}.img" $GetVersionAll.container = "${params.containerPath}/qctools-${params.tag}.img" From abc10ecf31c42833f6f9789dfc182558425f2996 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Thu, 14 Jun 2018 13:09:12 +0200 Subject: [PATCH 3/6] remove useless use of cat thanks @marcelm --- annotate.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/annotate.nf b/annotate.nf index 7e324d4424..9a56616d49 100644 --- a/annotate.nf +++ b/annotate.nf @@ -203,7 +203,7 @@ process CompressSnpeffVCF { script: """ - cat ${vcf} | bgzip > ${vcf}.gz + bgzip < ${vcf} > ${vcf}.gz tabix ${vcf}.gz """ } @@ -272,7 +272,7 @@ process CompressVEPvcf { script: """ - cat ${vcf} | bgzip > ${vcf}.gz + bgzip < ${vcf} > ${vcf}.gz tabix ${vcf}.gz """ } From abbf6d3856e43bfacd425e62301873eba98fb368 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 15 Jun 2018 15:43:13 +0200 Subject: [PATCH 4/6] use feedback loop to use only one process to compress VCF --- annotate.nf | 115 +++++++++++--------------- configuration/containers.config | 3 +- configuration/singularity-path.config | 3 +- 3 files changed, 51 insertions(+), 70 deletions(-) diff --git a/annotate.nf b/annotate.nf index 6605a175b1..4685bb7e9e 100644 --- a/annotate.nf +++ b/annotate.nf @@ -76,17 +76,17 @@ vcfNotToAnnotate = Channel.create() if (annotateVCF == []) { Channel.empty().mix( Channel.fromPath("${directoryMap.haplotypecaller}/*.vcf.gz") - .flatten().map{vcf -> ['haplotypecaller',vcf]}, + .flatten().map{vcf -> ['none', 'haplotypecaller', vcf, null]}, Channel.fromPath("${directoryMap.manta}/*SV.vcf.gz") - .flatten().map{vcf -> ['manta',vcf]}, + .flatten().map{vcf -> ['none', 'manta', vcf, null]}, Channel.fromPath("${directoryMap.mutect1}/*.vcf.gz") - .flatten().map{vcf -> ['mutect1',vcf]}, + .flatten().map{vcf -> ['none', 'mutect1', vcf, null]}, Channel.fromPath("${directoryMap.mutect2}/*.vcf.gz") - .flatten().map{vcf -> ['mutect2',vcf]}, + .flatten().map{vcf -> ['none', 'mutect2', vcf, null]}, Channel.fromPath("${directoryMap.strelka}/*{somatic,variants}*.vcf.gz") - .flatten().map{vcf -> ['strelka',vcf]}, + .flatten().map{vcf -> ['none', 'strelka', vcf, null]}, Channel.fromPath("${directoryMap.strelkabp}/*{somatic,variants}*.vcf.gz") - .flatten().map{vcf -> ['strelkabp',vcf]} + .flatten().map{vcf -> ['none', 'strelkabp', vcf, null]} ).choice(vcfToAnnotate, vcfNotToAnnotate) { annotateTools == [] || (annotateTools != [] && it[0] in annotateTools) ? 0 : 1 } @@ -95,9 +95,9 @@ if (annotateVCF == []) { annotateVCF.each{ list += ",${it}" } list = list.substring(1) if (StringUtils.countMatches("${list}", ",") == 0) vcfToAnnotate = Channel.fromPath("${list}") - .map{vcf -> ['userspecified',vcf]} + .map{vcf -> ['none', 'userspecified', vcf, null]} else vcfToAnnotate = Channel.fromPath("{$list}") - .map{vcf -> ['userspecified',vcf]} + .map{vcf -> ['none', 'userspecified', vcf, null]} } else exit 1, "specify only tools or files to annotate, not both" vcfNotToAnnotate.close() @@ -110,7 +110,7 @@ process RunBcftoolsStats { publishDir directoryMap.bcftoolsStats, mode: 'link' input: - set variantCaller, file(vcf) from vcfForBCFtools + set annotator, variantCaller, file(vcf), file(idx) from vcfForBCFtools output: file ("${vcf.baseName}.bcf.tools.stats.out") into bcfReport @@ -121,8 +121,8 @@ process RunBcftoolsStats { } if (params.verbose) bcfReport = bcfReport.view { - "BCFTools stats report:\n\ - File : [${it.fileName}]" + "BCFTools stats report:\n" + + "File : [${it.fileName}]" } process RunVcftools { @@ -131,7 +131,7 @@ process RunVcftools { publishDir directoryMap.vcftools, mode: 'link' input: - set variantCaller, file(vcf) from vcfForVCFtools + set annotator, variantCaller, file(vcf), file(idx) from vcfForVCFtools output: file ("${vcf.baseName}.*") into vcfReport @@ -142,8 +142,8 @@ process RunVcftools { } if (params.verbose) vcfReport = vcfReport.view { - "VCFTools stats report:\n\ - File : [${it.fileName}]" + "VCFTools stats report:\n" + + "Files : [${it.fileName}]" } process RunSnpeff { @@ -156,65 +156,46 @@ process RunSnpeff { } input: - set variantCaller, file(vcf) from vcfForSnpeff + set annotator, variantCaller, file(vcf), file(idx) from vcfForSnpeff val snpeffDb from Channel.value(params.genomes[params.genome].snpeffDb) output: set file("${vcf.baseName}.snpEff.genes.txt"), file("${vcf.baseName}.snpEff.csv"), file("${vcf.baseName}.snpEff.summary.html") into snpeffOutput - set variantCaller,file("${vcf.baseName}.snpEff.ann.vcf") into snpeffVCF + set val("snpeff"), variantCaller, file("${vcf.baseName}.snpEff.ann.vcf") into snpeffVCF when: 'snpeff' in tools || 'merge' in tools script: """ java -Xmx${task.memory.toGiga()}g \ - -jar \$SNPEFF_HOME/snpEff.jar \ - ${snpeffDb} \ - -csvStats ${vcf.baseName}.snpEff.csv \ - -nodownload \ - -canon \ - -v \ - ${vcf} \ - > ${vcf.baseName}.snpEff.ann.vcf + -jar \$SNPEFF_HOME/snpEff.jar \ + ${snpeffDb} \ + -csvStats ${vcf.baseName}.snpEff.csv \ + -nodownload \ + -canon \ + -v \ + ${vcf} \ + > ${vcf.baseName}.snpEff.ann.vcf mv snpEff_summary.html ${vcf.baseName}.snpEff.summary.html """ } if (params.verbose) snpeffOutput = snpeffOutput.view { - "snpEff report:\n\ - File : ${it.fileName}" + "snpEff report:\n" + + "File : ${it.fileName}" } -// When we are running in the 'merge' mode (first snpEff, then VEP) -// we have to exchange the channels - -process CompressSnpeffVCF { - tag {vcf} - - publishDir directoryMap.snpeff, mode: 'link' - - input: - set variantCaller, file(vcf) from snpeffVCF - - output: - set variantCaller, file("*.vcf.gz") into snpeffVCFcompressed - file("*.vcf.gz.tbi") - - script: - """ - bgzip < ${vcf} > ${vcf}.gz - tabix ${vcf}.gz - """ -} +if('merge' in tools) { + // When running in the 'merge' mode + // snpEff output is used as VEP input + // vcfCompressed is in the mix as well, only if it came out of snpEff -if (params.verbose) snpeffVCFcompressed = snpeffVCFcompressed.view { - "snpEff VCF:\n\ - File : ${it[1].fileName}" -} + vcfCompressed = Channel.create() -if('merge' in tools) { - vcfForVep = snpeffVCFcompressed + vcfForVep = Channel.empty().mix( + vcfCompressed.until({it[0]!="snpeff"}) + ) } process RunVEP { @@ -226,10 +207,10 @@ process RunVEP { } input: - set variantCaller, file(vcf) from vcfForVep + set annotator, variantCaller, file(vcf), file(idx) from vcfForVep output: - file("${vcf.baseName}.vep.ann.vcf") into vepVCF + set val("vep"), variantCaller, file("${vcf.baseName}.vep.ann.vcf") into vepVCF file("${vcf.baseName}.vep.summary.html") into vepReport when: 'vep' in tools || 'merge' in tools @@ -254,21 +235,22 @@ process RunVEP { } if (params.verbose) vepReport = vepReport.view { - "VEP report:\n\ - Files : ${it.fileName}" + "VEP report:\n" + + "Files : ${it.fileName}" } -process CompressVEPvcf { - tag {vcf} +vcfToCompress = snpeffVCF.mix(vepVCF) + +process CompressVCF { + tag {"${annotator} - ${vcf}"} - publishDir directoryMap.vep, mode: 'link' + publishDir "${directoryMap."$annotator"}", mode: 'link' input: - file(vcf) from vepVCF + set annotator, variantCaller, file(vcf) from vcfToCompress output: - file("*.vcf.gz") into vepVCFcompressed - file("*.vcf.gz.tbi") + set annotator, variantCaller, file("*.vcf.gz"), file("*.vcf.gz.tbi") into (vcfCompressed, vcfCompressedoutput) script: """ @@ -277,9 +259,10 @@ process CompressVEPvcf { """ } -if (params.verbose) vepVCFcompressed = vepVCFcompressed.view { - "VEP VCF:\n\ - File : ${it.fileName}" +if (params.verbose) vcfCompressedoutput = vcfCompressedoutput.view { + "${it[0]} VCF:\n" + + "File : ${it[2].fileName}\n" + + "Index : ${it[3].fileName}" } process GetVersionBCFtools { diff --git a/configuration/containers.config b/configuration/containers.config index 32382dc020..07b862c441 100644 --- a/configuration/containers.config +++ b/configuration/containers.config @@ -12,8 +12,7 @@ process { $BuildPicardIndex.container = "${params.repository}/picard:${params.tag}" $BuildSAMToolsIndex.container = "${params.repository}/sarek:${params.tag}" $BuildVCFIndex.container = "${params.repository}/igvtools:${params.tag}" - $CompressSnpeffVCF.container = "${params.repository}/sarek:${params.tag}" - $CompressVEPvcf.container = "${params.repository}/sarek:${params.tag}" + $CompressVCF.container = "${params.repository}/sarek:${params.tag}" $ConcatVCF.container = "${params.repository}/sarek:${params.tag}" $CreateRecalibrationTable.container = "${params.repository}/gatk:${params.tag}" $GetVersionAll.container = "${params.repository}/qctools:${params.tag}" diff --git a/configuration/singularity-path.config b/configuration/singularity-path.config index 3cec3132e6..7efea0292f 100644 --- a/configuration/singularity-path.config +++ b/configuration/singularity-path.config @@ -17,8 +17,7 @@ process { $BuildPicardIndex.container = "${params.containerPath}/picard-${params.tag}.img" $BuildSAMToolsIndex.container = "${params.containerPath}/sarek-${params.tag}.img" $BuildVCFIndex.container = "${params.containerPath}/igvtools-${params.tag}.img" - $CompressSnpeffVCF.container = "${params.containerPath}/sarek-${params.tag}.img" - $CompressVEPvcf.container = "${params.containerPath}/sarek-${params.tag}.img" + $CompressVCF.container = "${params.containerPath}/sarek-${params.tag}.img" $ConcatVCF.container = "${params.containerPath}/sarek-${params.tag}.img" $CreateRecalibrationTable.container = "${params.containerPath}/gatk-${params.tag}.img" $GetVersionAll.container = "${params.containerPath}/qctools-${params.tag}.img" From 1bfd19b4e03c5c2608e7a1e373b200469af2bfce Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 15 Jun 2018 15:43:44 +0200 Subject: [PATCH 5/6] better comment --- annotate.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/annotate.nf b/annotate.nf index 4685bb7e9e..5ef48c2a1d 100644 --- a/annotate.nf +++ b/annotate.nf @@ -189,7 +189,7 @@ if (params.verbose) snpeffOutput = snpeffOutput.view { if('merge' in tools) { // When running in the 'merge' mode // snpEff output is used as VEP input - // vcfCompressed is in the mix as well, only if it came out of snpEff + // vcfCompressed is in the mix, only if it came out of snpEff vcfCompressed = Channel.create() From 1b8d674d66cd26b5f8ea09b4484e650c976254cc Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 15 Jun 2018 15:44:37 +0200 Subject: [PATCH 6/6] better comment --- annotate.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/annotate.nf b/annotate.nf index 5ef48c2a1d..e5405b4c04 100644 --- a/annotate.nf +++ b/annotate.nf @@ -189,7 +189,8 @@ if (params.verbose) snpeffOutput = snpeffOutput.view { if('merge' in tools) { // When running in the 'merge' mode // snpEff output is used as VEP input - // vcfCompressed is in the mix, only if it came out of snpEff + // Used a feedback loop from vcfCompressed + // https://github.com/nextflow-io/patterns/tree/master/feedback-loop vcfCompressed = Channel.create()