From 7084a73166edef43717350512fb42715f6c50cfc Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Thu, 7 Jun 2018 17:21:05 +0200 Subject: [PATCH] Add new feature to get software versions for MultiQC --- annotate.nf | 77 ++++++++++----- bin/scrape_tool_versions.py | 78 +++++++++++++++ buildReferences.nf | 23 ++--- configuration/containers.config | 15 +++ configuration/singularity-path.config | 16 +++- germlineVC.nf | 98 ++++++++----------- lib/QC.groovy | 100 ++++++++++++++++++++ lib/SarekUtils.groovy | 37 +++++++- main.nf | 88 +++++++++++------ runMultiQC.nf | 70 +++++++------- scripts/test.sh | 2 +- somaticVC.nf | 131 +++++++++++++++----------- 12 files changed, 517 insertions(+), 218 deletions(-) create mode 100755 bin/scrape_tool_versions.py create mode 100644 lib/QC.groovy diff --git a/annotate.nf b/annotate.nf index 9414a29470..84a5cc9e92 100644 --- a/annotate.nf +++ b/annotate.nf @@ -57,7 +57,7 @@ tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} annotateTools = params.annotateTools ? params.annotateTools.split(',').collect{it.trim().toLowerCase()} : [] annotateVCF = params.annotateVCF ? params.annotateVCF.split(',').collect{it.trim()} : [] -directoryMap = defineDirectoryMap() +directoryMap = SarekUtils.defineDirectoryMap(params.outDir) toolList = defineToolList() if (!SarekUtils.checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information' @@ -102,7 +102,7 @@ if (annotateVCF == []) { vcfNotToAnnotate.close() -(vcfForBCFtools, vcfForSnpeff, vcfForVep) = vcfToAnnotate.into(3) +(vcfForBCFtools, vcfForVCFtools, vcfForSnpeff, vcfForVep) = vcfToAnnotate.into(4) process RunBcftoolsStats { tag {vcf} @@ -117,10 +117,7 @@ process RunBcftoolsStats { when: !params.noReports - script: - """ - bcftools stats ${vcf} > ${vcf.baseName}.bcf.tools.stats.out - """ + script: QC.bcftools(vcf) } if (params.verbose) bcfReport = bcfReport.view { @@ -128,6 +125,27 @@ if (params.verbose) bcfReport = bcfReport.view { File : [${it.fileName}]" } +process RunVcftools { + tag {vcf} + + publishDir directoryMap.vcftools, mode: 'link' + + input: + set variantCaller, file(vcf) from vcfForVCFtools + + output: + file ("${vcf.baseName}.*") into vcfReport + + when: !params.noReports + + script: QC.vcftools(vcf) +} + +if (params.verbose) vcfReport = vcfReport.view { + "VCFTools stats report:\n\ + File : [${it.fileName}]" +} + process RunSnpeff { tag {vcf} @@ -208,6 +226,34 @@ if (params.verbose) vepReport = vepReport.view { Files : ${it.fileName}" } +process GetVersionBCFtools { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: !params.noReports + script: QC.getVersionBCFtools() +} + +process GetVersionSnpEFF { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: 'snpeff' in tools || 'merge' in tools + script: QC.getVersionSnpEFF() +} + +process GetVersionVCFtools { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: !params.noReports + script: QC.getVersionVCFtools() +} + +process GetVersionVEP { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: 'vep' in tools || 'merge' in tools + script: QC.getVersionVEP() +} + /* ================================================================================ = F U N C T I O N S = @@ -219,26 +265,11 @@ def checkUppmaxProject() { return !(workflow.profile == 'slurm' && !params.project) } -def defineDirectoryMap() { - return [ - 'haplotypecaller' : "${params.outDir}/VariantCalling/HaplotypeCaller", - 'manta' : "${params.outDir}/VariantCalling/Manta", - 'mutect1' : "${params.outDir}/VariantCalling/MuTect1", - 'mutect2' : "${params.outDir}/VariantCalling/MuTect2", - 'strelka' : "${params.outDir}/VariantCalling/Strelka", - 'strelkabp' : "${params.outDir}/VariantCalling/StrelkaBP", - 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", - 'snpeffReports' : "${params.outDir}/Reports/SnpEff", - 'snpeff' : "${params.outDir}/Annotation/SnpEff", - 'vep' : "${params.outDir}/Annotation/VEP" - ] -} - def defineToolList() { return [ + 'merge', 'snpeff', - 'vep', - 'merge' + 'vep' ] } diff --git a/bin/scrape_tool_versions.py b/bin/scrape_tool_versions.py new file mode 100755 index 0000000000..fcc61c4c7f --- /dev/null +++ b/bin/scrape_tool_versions.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +from __future__ import print_function +from collections import OrderedDict +import re + +regexes = { + 'AlleleCount': ['v_allelecount.txt', r"(\S+)"], + 'ASCAT': ['v_ascat.txt', r"(\d\.\d+)"], + 'bcftools': ['v_bcftools.txt', r"bcftools (\S+)"], + 'BWA': ['v_bwa.txt', r"Version: (\S+)"], + 'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"], + 'GATK': ['v_gatk.txt', r"GATK version(\S+)"], + 'htslib': ['v_samtools.txt', r"htslib (\S+)"], + 'Manta': ['v_manta.txt', r"([0-9.]+)"], + 'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"], + 'Nextflow': ['v_nextflow.txt', r"(\S+)"], + 'FreeBayes': ['v_freebayes.txt', r"version: v(\d\.\d\.\d+)"], + 'Picard': ['v_picard.txt', r"Picard version:(\d\.\d\.\d+)"], + 'Qualimap': ['v_qualimap.txt', r"QualiMap v.(\S+)"], + 'R': ['v_r.txt', r"R version (\S+)"], + 'samtools': ['v_samtools.txt', r"samtools (\S+)"], + 'Sarek': ['v_sarek.txt', r"(\S+)"], + 'SnpEff': ['v_snpeff.txt', r"version SnpEff (\S+)"], + 'Strelka': ['v_strelka.txt', r"([0-9.]+)"], + 'vcftools': ['v_vcftools.txt', r"([0-9.]+)"], + 'VEP': ['v_vep.txt', r"ensembl-vep : (\S+)"], +} +results = OrderedDict() +results['Sarek'] = 'N/A' +results['Nextflow'] = 'N/A' +results['BWA'] = 'N/A' +results['samtools'] = 'N/A' +results['htslib'] = 'N/A' +results['GATK'] = 'N/A' +results['Picard'] = 'N/A' +results['Manta'] = 'N/A' +results['Strelka'] = 'N/A' +results['FreeBayes'] = 'N/A' +results['AlleleCount'] = 'N/A' +results['R'] = 'N/A' +results['ASCAT'] = 'N/A' +results['SnpEff'] = 'N/A' +results['VEP'] = 'N/A' +results['FastQC'] = 'N/A' +results['Qualimap'] = 'N/A' +results['bcftools'] = 'N/A' +results['vcftools'] = 'N/A' +results['MultiQC'] = 'N/A' + +# Search each file using its regex +for k, v in regexes.items(): + try: + with open(v[0]) as x: + versions = x.read() + match = re.search(v[1], versions) + if match: + results[k] = "v {}".format(match.group(1)) + except Exception as FileNotFoundError: + print("No such file:", v[0]) + +# Remove empty keys (defining them above ensures correct order) +for k in ['Sarek', 'Nextflow', 'BWA', 'samtools', 'htslib', 'GATK', 'Picard', 'Manta', 'Strelka', 'FreeBayes', 'AlleleCount', 'R', 'ASCAT', 'SnpEff', 'VEP', 'FastQC', 'Qualimap', 'bcftools', 'vcftools', 'MultiQC']: + if results[k] == 'N/A': + del(results[k]) + +# Dump to YAML +print (''' +id: 'Sarek' +order: -1000 +section_href: 'https://github.com/SciLifeLab/Sarek' +plot_type: 'html' +description: 'tool versions are collected at run time from output.' +data: | +
+''') +for k,v in results.items(): + print("
{}
{}
".format(k,v)) +print ("
") diff --git a/buildReferences.nf b/buildReferences.nf index 75e6b916d3..33c8863082 100644 --- a/buildReferences.nf +++ b/buildReferences.nf @@ -98,27 +98,24 @@ if (params.verbose) ch_decompressedFiles = ch_decompressedFiles.view { } ch_fastaFile = Channel.create() -ch_otherFiles = Channel.create() -ch_vcfFiles = Channel.create() +ch_fastaForBWA = Channel.create() +ch_fastaForPicard = Channel.create() +ch_fastaForSAMTools = Channel.create() +ch_otherFile = Channel.create() +ch_vcfFile = Channel.create() ch_decompressedFiles - .choice(ch_fastaFile, ch_vcfFiles, ch_otherFiles) { + .choice(ch_fastaFile, ch_vcfFile, ch_otherFile) { it =~ ".fasta" ? 0 : it =~ ".vcf" ? 1 : 2} -(ch_fastaFile, ch_fastaFileToKeep) = ch_fastaFile.into(2) -(ch_vcfFiles, ch_vcfFilesToKeep) = ch_vcfFiles.into(2) +(ch_fastaForBWA, ch_fastaForPicard, ch_fastaForSAMTools, ch_fastaFileToKeep) = ch_fastaFile.into(4) +(ch_vcfFile, ch_vcfFileToKeep) = ch_vcfFile.into(2) ch_notCompressedfiles - .mix(ch_otherFiles, ch_fastaFileToKeep, ch_vcfFilesToKeep) + .mix(ch_fastaFileToKeep, ch_vcfFileToKeep, ch_otherFile) .collectFile(storeDir: params.outDir) -ch_fastaForBWA = Channel.create() -ch_fastaForPicard = Channel.create() -ch_fastaForSAMTools = Channel.create() - -ch_fastaFile.into(ch_fastaForBWA,ch_fastaForPicard,ch_fastaForSAMTools) - process BuildBWAindexes { tag {f_reference} @@ -193,7 +190,7 @@ process BuildVCFIndex { publishDir params.outDir, mode: 'link' input: - file(f_reference) from ch_vcfFiles + file(f_reference) from ch_vcfFile output: file("${f_reference}.idx") into ch_vcfIndex diff --git a/configuration/containers.config b/configuration/containers.config index ea772f22d5..a0fcd82ffe 100644 --- a/configuration/containers.config +++ b/configuration/containers.config @@ -14,6 +14,21 @@ process { $BuildVCFIndex.container = "${params.repository}/igvtools:${params.tag}" $ConcatVCF.container = "${params.repository}/sarek:${params.tag}" $CreateRecalibrationTable.container = "${params.repository}/gatk:${params.tag}" + $GetVersionAll.container = "${params.repository}/qctools:${params.tag}" + $GetVersionAlleleCount.container = "${params.repository}/runallelecount:${params.tag}" + $GetVersionASCAT.container = "${params.repository}/r-base:${params.tag}" + $GetVersionBamQC.container = "${params.repository}/qctools:${params.tag}" + $GetVersionBCFtools.container = "${params.repository}/sarek:${params.tag}" + $GetVersionBWAsamtools.container = "${params.repository}/sarek:${params.tag}" + $GetVersionFastQC.container = "${params.repository}/qctools:${params.tag}" + $GetVersionFreeBayes.container = "${params.repository}/freebayes:${params.tag}" + $GetVersionGATK.container = "${params.repository}/gatk:${params.tag}" + $GetVersionManta.container = "${params.repository}/sarek:${params.tag}" + $GetVersionPicard.container = "${params.repository}/picard:${params.tag}" + $GetVersionSnpeff.container = {params.genome == 'GRCh38' ? "${params.repository}/snpeffgrch38:${params.tag}" : "${params.repository}/snpeffgrch37:${params.tag}"} + $GetVersionStrelka.container = "${params.repository}/sarek:${params.tag}" + $GetVersionVCFtools.container = "${params.repository}/qctools:${params.tag}" + $GetVersionVEP.container = {params.genome == 'GRCh38' ? "${params.repository}/vepgrch38:${params.tag}" : "${params.repository}/vepgrch37:${params.tag}"} $IndelRealigner.container = "${params.repository}/gatk:${params.tag}" $MapReads.container = "${params.repository}/sarek:${params.tag}" $MarkDuplicates.container = "${params.repository}/picard:${params.tag}" diff --git a/configuration/singularity-path.config b/configuration/singularity-path.config index 2257afd2a3..6874c61984 100644 --- a/configuration/singularity-path.config +++ b/configuration/singularity-path.config @@ -19,7 +19,21 @@ process { $BuildVCFIndex.container = "${params.containerPath}/igvtools-${params.tag}.img" $ConcatVCF.container = "${params.containerPath}/sarek-${params.tag}.img" $CreateRecalibrationTable.container = "${params.containerPath}/gatk-${params.tag}.img" - $GenerateMultiQCconfig.container = "${params.containerPath}/qctools-${params.tag}.img" + $GetVersionAll.container = "${params.containerPath}/qctools-${params.tag}.img" + $GetVersionAlleleCount.container = "${params.containerPath}/runallelecount-${params.tag}.img" + $GetVersionASCAT.container = "${params.containerPath}/r-base-${params.tag}.img" + $GetVersionBamQC.container = "${params.containerPath}/qctools-${params.tag}.img" + $GetVersionBCFtools.container = "${params.containerPath}/sarek-${params.tag}.img" + $GetVersionBWAsamtools.container = "${params.containerPath}/sarek-${params.tag}.img" + $GetVersionFastQC.container = "${params.containerPath}/qctools-${params.tag}.img" + $GetVersionFreeBayes.container = "${params.containerPath}/freebayes-${params.tag}.img" + $GetVersionGATK.container = "${params.containerPath}/gatk-${params.tag}.img" + $GetVersionManta.container = "${params.containerPath}/sarek-${params.tag}.img" + $GetVersionPicard.container = "${params.containerPath}/picard-${params.tag}.img" + $GetVersionSnpeff.container = {params.genome == 'GRCh38' ? "${params.containerPath}/snpeffgrch38-${params.tag}.img" : "${params.containerPath}/snpeffgrch37-${params.tag}.img"} + $GetVersionStrelka.container = "${params.containerPath}/sarek-${params.tag}.img" + $GetVersionVCFtools.container = "${params.containerPath}/qctools-${params.tag}.img" + $GetVersionVEP.container = {params.genome == 'GRCh38' ? "${params.containerPath}/vepgrch38-${params.tag}.img" : "${params.containerPath}/vepgrch37-${params.tag}.img"} $IndelRealigner.container = "${params.containerPath}/gatk-${params.tag}.img" $MapReads.container = "${params.containerPath}/sarek-${params.tag}.img" $MarkDuplicates.container = "${params.containerPath}/picard-${params.tag}.img" diff --git a/germlineVC.nf b/germlineVC.nf index afb1611d14..f6c87d9a25 100644 --- a/germlineVC.nf +++ b/germlineVC.nf @@ -61,7 +61,7 @@ if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project ${bam}.samtools.stats.out - """ + script: QC.samtoolsStats(bam) } if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { @@ -157,14 +154,7 @@ process RunBamQC { when: !params.noReports && !params.noBAMQC - script: - """ - qualimap --java-mem-size=${task.memory.toGiga()}G \ - bamqc \ - -bam ${bam} \ - -outdir ${idSample} \ - -outformat HTML - """ + script: QC.bamQC(bam,idSample,task.memory) } if (params.verbose) bamQCreport = bamQCreport.view { @@ -579,10 +569,7 @@ process RunBcftoolsStats { when: !params.noReports - script: - """ - bcftools stats ${vcf} > ${vcf.baseName}.bcf.tools.stats.out - """ + script: QC.bcftools(vcf) } if (params.verbose) bcfReport = bcfReport.view { @@ -605,28 +592,7 @@ process RunVcftools { when: !params.noReports - script: - """ - vcftools \ - --gzvcf ${vcf} \ - --relatedness2 \ - --out ${vcf.baseName} - - vcftools \ - --gzvcf ${vcf} \ - --TsTv-by-count \ - --out ${vcf.baseName} - - vcftools \ - --gzvcf ${vcf} \ - --TsTv-by-qual \ - --out ${vcf.baseName} - - vcftools \ - --gzvcf ${vcf} \ - --FILTER-summary \ - --out ${vcf.baseName} - """ + script: QC.vcftools(vcf) } if (params.verbose) vcfReport = vcfReport.view { @@ -635,6 +601,42 @@ if (params.verbose) vcfReport = vcfReport.view { } vcfReport.close() + +process GetVersionGATK { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: 'haplotypecaller' in tools && !params.onlyQC + script: QC.getVersionGATK() +} + +process GetVersionStrelka { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: 'strelka' in tools && !params.onlyQC + script: QC.getVersionStrelka() +} + +process GetVersionManta { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: 'manta' in tools && !params.onlyQC + script: QC.getVersionManta() +} + +process GetVersionBCFtools { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: !params.noReports + script: QC.getVersionBCFtools() +} + +process GetVersionVCFtools { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: !params.noReports + script: QC.getVersionVCFtools() +} + /* ================================================================================ = F U N C T I O N S = @@ -690,24 +692,6 @@ def checkUppmaxProject() { return !(workflow.profile == 'slurm' && !params.project) } -def defineDirectoryMap() { - return [ - 'recalibrated' : "${params.outDir}/Preprocessing/Recalibrated", - 'bamQC' : "${params.outDir}/Reports/bamQC", - 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", - 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats", - 'vcftools' : "${params.outDir}/Reports/VCFTools", - 'ascat' : "${params.outDir}/VariantCalling/Ascat", - 'freebayes' : "${params.outDir}/VariantCalling/FreeBayes", - 'gvcf-hc' : "${params.outDir}/VariantCalling/HaplotypeCallerGVCF", - 'haplotypecaller' : "${params.outDir}/VariantCalling/HaplotypeCaller", - 'manta' : "${params.outDir}/VariantCalling/Manta", - 'mutect1' : "${params.outDir}/VariantCalling/MuTect1", - 'mutect2' : "${params.outDir}/VariantCalling/MuTect2", - 'strelka' : "${params.outDir}/VariantCalling/Strelka" - ] -} - def defineReferenceMap() { if (!(params.genome in params.genomes)) exit 1, "Genome ${params.genome} not found in configuration" return [ diff --git a/lib/QC.groovy b/lib/QC.groovy new file mode 100644 index 0000000000..96ef79fa6f --- /dev/null +++ b/lib/QC.groovy @@ -0,0 +1,100 @@ +class QC { +// Run bamQC on vcf file + static def bamQC(bam, idSample, mem) { + """ + qualimap --java-mem-size=${mem.toGiga()}G \ + bamqc \ + -bam ${bam} \ + -outdir ${idSample} \ + -outformat HTML + """ + } + +// Run bcftools on vcf file + static def bcftools(vcf) { + """ + bcftools stats ${vcf} > ${vcf.baseName}.bcf.tools.stats.out + """ + } + +// Run samtools stats on bam file + static def samtoolsStats(bam) { + """ + samtools stats ${bam} > ${bam}.samtools.stats.out + """ + } + +// Run vcftools on vcf file + static def vcftools(vcf) { + """ + vcftools \ + --gzvcf ${vcf} \ + --relatedness2 \ + --out ${vcf.baseName} + + vcftools \ + --gzvcf ${vcf} \ + --TsTv-by-count \ + --out ${vcf.baseName} + + vcftools \ + --gzvcf ${vcf} \ + --TsTv-by-qual \ + --out ${vcf.baseName} + + vcftools \ + --gzvcf ${vcf} \ + --FILTER-summary \ + --out ${vcf.baseName} + """ + } + +// Get BCFtools version + static def getVersionBCFtools() { + """ + bcftools version > v_bcftools.txt + """ + } + +// Get GATK version + static def getVersionGATK() { + """ + echo "GATK version"\$(java -jar \$GATK_HOME/GenomeAnalysisTK.jar --version 2>&1) > v_gatk.txt + """ + } + +// Get Manta version + static def getVersionManta() { + """ + cat \$MANTA_INSTALL_PATH/lib/python/configBuildTimeInfo.py | grep workflowVersion > v_manta.txt + """ + } + +// Get SnpEFF version + static def getVersionSnpEFF() { + """ + echo "SNPEFF version"\$(java -jar \$SNPEFF_HOME/snpEff.jar -h 2>&1) > v_snpeff.txt + """ + } + +// Get Strelka version + static def getVersionStrelka() { + """ + cat \$STRELKA_INSTALL_PATH/lib/python/configBuildTimeInfo.py | grep workflowVersion > v_strelka.txt + """ + } + +// Get VCFtools version + static def getVersionVCFtools() { + """ + vcftools --version > v_vcftools.txt + """ + } + +// Get VEP version + static def getVersionVEP() { + """ + vep --help > v_vep.txt + """ + } +} diff --git a/lib/SarekUtils.groovy b/lib/SarekUtils.groovy index 5af73fcadc..ba59b3bf06 100644 --- a/lib/SarekUtils.groovy +++ b/lib/SarekUtils.groovy @@ -1,6 +1,6 @@ class MyUtils { + // Check if params is in this given list static def checkParams(it) { - // Check if params is in this given list return it in [ 'annotate-tools', 'annotate-VCF', @@ -67,13 +67,13 @@ class MyUtils { 'version'] } + // Loop through all parameters to check their existence and spelling static def checkParameterList(list, realList) { - // Loop through all parameters to check their existence and spelling return list.every{ checkParameterExistence(it, realList) } } + // Check parameter existence static def checkParameterExistence(it, list) { - // Check parameter existence if (!list.contains(it)) { println("Unknown parameter: ${it}") return false @@ -81,8 +81,37 @@ class MyUtils { return true } + // Define map of directories + static def defineDirectoryMap(outDir) { + return [ + 'nonRealigned' : "${outDir}/Preprocessing/NonRealigned", + 'nonRecalibrated' : "${outDir}/Preprocessing/NonRecalibrated", + 'recalibrated' : "${outDir}/Preprocessing/Recalibrated", + 'ascat' : "${outDir}/VariantCalling/Ascat", + 'freebayes' : "${outDir}/VariantCalling/FreeBayes", + 'gvcf-hc' : "${outDir}/VariantCalling/HaplotypeCallerGVCF", + 'haplotypecaller' : "${outDir}/VariantCalling/HaplotypeCaller", + 'manta' : "${outDir}/VariantCalling/Manta", + 'mutect1' : "${outDir}/VariantCalling/MuTect1", + 'mutect2' : "${outDir}/VariantCalling/MuTect2", + 'strelka' : "${outDir}/VariantCalling/Strelka", + 'strelkabp' : "${outDir}/VariantCalling/StrelkaBP", + 'snpeff' : "${outDir}/Annotation/SnpEff", + 'vep' : "${outDir}/Annotation/VEP", + 'bamQC' : "${outDir}/Reports/bamQC", + 'bcftoolsStats' : "${outDir}/Reports/BCFToolsStats", + 'fastQC' : "${outDir}/Reports/FastQC", + 'markDuplicatesQC' : "${outDir}/Reports/MarkDuplicates", + 'multiQC' : "${outDir}/Reports/MultiQC", + 'samtoolsStats' : "${outDir}/Reports/SamToolsStats", + 'snpeffReports' : "${outDir}/Reports/SnpEff", + 'vcftools' : "${outDir}/Reports/VCFTools", + 'version' : "${outDir}/Reports/ToolsVersion" + ] + } + + // Compare params to list of verified params static def isAllowedParams(params) { - // Compare params to list of verified params final test = true params.each{ if (!checkParams(it.toString().split('=')[0])) { diff --git a/main.nf b/main.nf index 52711628ea..8fc1baaaa5 100644 --- a/main.nf +++ b/main.nf @@ -63,7 +63,7 @@ if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project ${bam}.samtools.stats.out - """ + script: QC.samtoolsStats(bam) } if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { @@ -575,14 +576,7 @@ process RunBamQC { when: !params.noReports && !params.noBAMQC - script: - """ - qualimap --java-mem-size=${task.memory.toGiga()}G \ - bamqc \ - -bam ${bam} \ - -outdir ${idSample} \ - -outformat HTML - """ + script: QC.bamQC(bam,idSample,task.memory) } if (params.verbose) bamQCreport = bamQCreport.view { @@ -590,6 +584,59 @@ if (params.verbose) bamQCreport = bamQCreport.view { Dir : [${it.fileName}]" } +process GetVersionBamQC { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: !params.noReports && !params.noBAMQC + + script: + """ + qualimap --version &> v_qualimap.txt + """ +} + +process GetVersionBWAsamtools { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: step == 'mapping' && !params.onlyQC + + script: + """ + bwa &> v_bwa.txt 2>&1 || true + samtools --version &> v_samtools.txt + """ +} + +process GetVersionFastQC { + publishDir directoryMap.version, mode: 'link' + output: + file("v_fastqc.txt") + when: step == 'mapping' && !params.noReports + + script: + """ + fastqc -v > v_fastqc.txt + """ +} + +process GetVersionGATK { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: !params.onlyQC + script: QC.getVersionGATK() +} + +process GetVersionPicard { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: step == 'mapping' && !params.onlyQC + + script: + """ + echo "Picard version:"\$(java -jar \$PICARD_HOME/picard.jar MarkDuplicates --version 2>&1) > v_picard.txt + """ +} + /* ================================================================================ = F U N C T I O N S = @@ -646,19 +693,6 @@ def checkExactlyOne(list) { return n == 1 } -def defineDirectoryMap() { - return [ - 'nonRealigned' : "${params.outDir}/Preprocessing/NonRealigned", - 'nonRecalibrated' : "${params.outDir}/Preprocessing/NonRecalibrated", - 'recalibrated' : "${params.outDir}/Preprocessing/Recalibrated", - 'bamQC' : "${params.outDir}/Reports/bamQC", - 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", - 'fastQC' : "${params.outDir}/Reports/FastQC", - 'markDuplicatesQC' : "${params.outDir}/Reports/MarkDuplicates", - 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats" - ] -} - def defineReferenceMap() { if (!(params.genome in params.genomes)) exit 1, "Genome ${params.genome} not found in configuration" return [ diff --git a/runMultiQC.nf b/runMultiQC.nf index fe90ce222f..5e87625424 100644 --- a/runMultiQC.nf +++ b/runMultiQC.nf @@ -52,7 +52,7 @@ if (params.help) exit 0, helpMessage() if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " -directoryMap = defineDirectoryMap() +directoryMap = SarekUtils.defineDirectoryMap(params.outDir) /* ================================================================================ = P R O C E S S E S = @@ -61,41 +61,28 @@ directoryMap = defineDirectoryMap() startMessage() -process GenerateMultiQCconfig { +process GetVersionAll { publishDir directoryMap.multiQC, mode: 'link' input: + file(versions) from Channel.fromPath("${directoryMap.version}/*").collect() output: - file("multiqc_config.yaml") into multiQCconfig + file ("tool_versions_mqc.yaml") into versionsForMultiQC when: !params.noReports script: """ - touch multiqc_config.yaml - echo "custom_logo: ${baseDir}/doc/images/Sarek_no_Border.png" >> multiqc_config.yaml - echo "custom_logo_url: http://opensource.scilifelab.se/projects/sarek" >> multiqc_config.yaml - echo "custom_logo_title: 'Sarek'" >> multiqc_config.yaml - echo "report_header_info:" >> multiqc_config.yaml - echo "- Sarek version: ${params.version}" >> multiqc_config.yaml - echo "- Contact Name: ${params.callName}" >> multiqc_config.yaml - echo "- Contact E-mail: ${params.contactMail}" >> multiqc_config.yaml - echo "- Directory: ${workflow.launchDir}" >> multiqc_config.yaml - echo "- Genome: "${params.genome} >> multiqc_config.yaml - echo "top_modules:" >> multiqc_config.yaml - echo "- 'fastqc'" >> multiqc_config.yaml - echo "- 'picard'" >> multiqc_config.yaml - echo "- 'samtools'" >> multiqc_config.yaml - echo "- 'qualimap'" >> multiqc_config.yaml - echo "- 'bcftools'" >> multiqc_config.yaml - echo "- 'vcftools'" >> multiqc_config.yaml - echo "- 'snpeff'" >> multiqc_config.yaml + echo "${params.version}" &> v_sarek.txt + echo "${workflow.nextflow.version}" &> v_nextflow.txt + multiqc --version &> v_multiqc.txt + scrape_tool_versions.py &> tool_versions_mqc.yaml """ } -if (params.verbose && !params.noReports) multiQCconfig = multiQCconfig.view { - "MultiQC config:\n\ +if (params.verbose && !params.noReports) versionsForMultiQC = versionsForMultiQC.view { + "MultiQC tools version:\n\ File : [${it.fileName}]" } @@ -108,14 +95,15 @@ reportsForMultiQC = Channel.empty() Channel.fromPath("${directoryMap.samtoolsStats}/*"), Channel.fromPath("${directoryMap.snpeffReports}/*"), Channel.fromPath("${directoryMap.vcftools}/*"), - multiQCconfig ).collect() process RunMultiQC { publishDir directoryMap.multiQC, mode: 'link' input: - file ('*') from reportsForMultiQC + file (multiqcConfig) from createMultiQCconfig() + file (reports) from reportsForMultiQC + file (versions) from versionsForMultiQC output: set file("*multiqc_report.html"), file("*multiqc_data") into multiQCReport @@ -145,17 +133,27 @@ def checkUppmaxProject() { return !(workflow.profile == 'slurm' && !params.project) } -def defineDirectoryMap() { - return [ - 'bamQC' : "${params.outDir}/Reports/bamQC", - 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", - 'fastQC' : "${params.outDir}/Reports/FastQC", - 'markDuplicatesQC' : "${params.outDir}/Reports/MarkDuplicates", - 'multiQC' : "${params.outDir}/Reports/MultiQC", - 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats", - 'snpeffReports' : "${params.outDir}/Reports/SnpEff", - 'vcftools' : "${params.outDir}/Reports/VCFTools" - ] +def createMultiQCconfig() { + def file = workDir.resolve('multiqc_config.yaml') + file.text = """ + custom_logo: ${baseDir}/doc/images/Sarek_no_Border.png + custom_logo_url: http://opensource.scilifelab.se/projects/sarek + custom_logo_title: 'Sarek' + report_header_info: + - Contact Name: ${params.callName} + - Contact E-mail: ${params.contactMail} + - Genome: ${params.genome} + top_modules: + - 'fastqc' + - 'picard' + - 'samtools' + - 'qualimap' + - 'bcftools' + - 'vcftools' + - 'snpeff' + """.stripIndent() + + return file } def grabRevision() { diff --git a/scripts/test.sh b/scripts/test.sh index 71f164a900..ccc595dc6f 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -55,7 +55,7 @@ function clean_repo() { if [[ $TRAVIS == false ]] && [[ $KEEP == false ]] then echo "$(tput setaf 1)Cleaning directory$(tput sgr0)" - rm -rf work .nextflow* Preprocessing Reports Annotation VariantCalling Results + rm -rf work .nextflow* Annotation Preprocessing Reports Results VariantCalling fi } diff --git a/somaticVC.nf b/somaticVC.nf index 20d15b7e92..a8f698876b 100644 --- a/somaticVC.nf +++ b/somaticVC.nf @@ -66,7 +66,7 @@ if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project ${bam}.samtools.stats.out - """ + script: QC.samtoolsStats(bam) } if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { @@ -162,14 +159,7 @@ process RunBamQC { when: !params.noReports && !params.noBAMQC - script: - """ - qualimap --java-mem-size=${task.memory.toGiga()}G \ - bamqc \ - -bam ${bam} \ - -outdir ${idSample} \ - -outformat HTML - """ + script: QC.bamQC(bam,idSample,task.memory) } if (params.verbose) bamQCreport = bamQCreport.view { @@ -826,10 +816,7 @@ process RunBcftoolsStats { when: !params.noReports - script: - """ - bcftools stats ${vcf} > ${vcf.baseName}.bcf.tools.stats.out - """ + script: QC.bcftools(vcf) } if (params.verbose) bcfReport = bcfReport.view { @@ -852,28 +839,7 @@ process RunVcftools { when: !params.noReports - script: - """ - vcftools \ - --gzvcf ${vcf} \ - --relatedness2 \ - --out ${vcf.baseName} - - vcftools \ - --gzvcf ${vcf} \ - --TsTv-by-count \ - --out ${vcf.baseName} - - vcftools \ - --gzvcf ${vcf} \ - --TsTv-by-qual \ - --out ${vcf.baseName} - - vcftools \ - --gzvcf ${vcf} \ - --FILTER-summary \ - --out ${vcf.baseName} - """ + script: QC.vcftools(vcf) } if (params.verbose) vcfReport = vcfReport.view { @@ -882,6 +848,76 @@ if (params.verbose) vcfReport = vcfReport.view { } vcfReport.close() + +process GetVersionGATK { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: !params.onlyQC + script: QC.getVersionGATK() +} + +process GetVersionFreeBayes { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: 'freebayes' in tools && !params.onlyQC + + script: + """ + freebayes --version > v_freebayes.txt + """ +} + +process GetVersionAlleleCount { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: 'ascat' in tools && !params.onlyQC + + script: + """ + alleleCounter --version > v_allelecount.txt + """ +} + +process GetVersionASCAT { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: 'ascat' in tools && !params.onlyQC + + script: + """ + R --version > v_r.txt + cat ${baseDir}/scripts/ascat.R | grep "ASCAT version" > v_ascat.txt + """ +} + +process GetVersionStrelka { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: 'strelka' in tools && !params.onlyQC + script: QC.getVersionStrelka() +} + +process GetVersionManta { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: 'manta' in tools && !params.onlyQC + script: QC.getVersionManta() +} + +process GetVersionBCFtools { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: !params.noReports + script: QC.getVersionBCFtools() +} + +process GetVersionVCFtools { + publishDir directoryMap.version, mode: 'link' + output: file("v_*.txt") + when: !params.noReports + script: QC.getVersionVCFtools() +} + /* ================================================================================ = F U N C T I O N S = @@ -937,23 +973,6 @@ def checkUppmaxProject() { return !(workflow.profile == 'slurm' && !params.project) } -def defineDirectoryMap() { - return [ - 'recalibrated' : "${params.outDir}/Preprocessing/Recalibrated", - 'bamQC' : "${params.outDir}/Reports/bamQC", - 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", - 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats", - 'vcftools' : "${params.outDir}/Reports/VCFTools", - 'ascat' : "${params.outDir}/VariantCalling/Ascat", - 'freebayes' : "${params.outDir}/VariantCalling/FreeBayes", - 'manta' : "${params.outDir}/VariantCalling/Manta", - 'mutect1' : "${params.outDir}/VariantCalling/MuTect1", - 'mutect2' : "${params.outDir}/VariantCalling/MuTect2", - 'strelka' : "${params.outDir}/VariantCalling/Strelka", - 'strelkabp' : "${params.outDir}/VariantCalling/StrelkaBP" - ] -} - def defineReferenceMap() { if (!(params.genome in params.genomes)) exit 1, "Genome ${params.genome} not found in configuration" return [