Skip to content
This repository has been archived by the owner on Jan 27, 2020. It is now read-only.

Commit

Permalink
Merge pull request #595 from MaxUlysse/MultiQC
Browse files Browse the repository at this point in the history
Get the versions of the software used in the MultiQC report
  • Loading branch information
Szilveszter Juhos authored Jun 12, 2018
2 parents f2d1f5d + 7084a73 commit f41a3bd
Show file tree
Hide file tree
Showing 12 changed files with 517 additions and 218 deletions.
77 changes: 54 additions & 23 deletions annotate.nf
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()}
annotateTools = params.annotateTools ? params.annotateTools.split(',').collect{it.trim().toLowerCase()} : []
annotateVCF = params.annotateVCF ? params.annotateVCF.split(',').collect{it.trim()} : []

directoryMap = defineDirectoryMap()
directoryMap = SarekUtils.defineDirectoryMap(params.outDir)
toolList = defineToolList()

if (!SarekUtils.checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information'
Expand Down Expand Up @@ -102,7 +102,7 @@ if (annotateVCF == []) {

vcfNotToAnnotate.close()

(vcfForBCFtools, vcfForSnpeff, vcfForVep) = vcfToAnnotate.into(3)
(vcfForBCFtools, vcfForVCFtools, vcfForSnpeff, vcfForVep) = vcfToAnnotate.into(4)

process RunBcftoolsStats {
tag {vcf}
Expand All @@ -117,17 +117,35 @@ process RunBcftoolsStats {

when: !params.noReports

script:
"""
bcftools stats ${vcf} > ${vcf.baseName}.bcf.tools.stats.out
"""
script: QC.bcftools(vcf)
}

if (params.verbose) bcfReport = bcfReport.view {
"BCFTools stats report:\n\
File : [${it.fileName}]"
}

process RunVcftools {
tag {vcf}

publishDir directoryMap.vcftools, mode: 'link'

input:
set variantCaller, file(vcf) from vcfForVCFtools

output:
file ("${vcf.baseName}.*") into vcfReport

when: !params.noReports

script: QC.vcftools(vcf)
}

if (params.verbose) vcfReport = vcfReport.view {
"VCFTools stats report:\n\
File : [${it.fileName}]"
}

process RunSnpeff {
tag {vcf}

Expand Down Expand Up @@ -208,6 +226,34 @@ if (params.verbose) vepReport = vepReport.view {
Files : ${it.fileName}"
}

process GetVersionBCFtools {
publishDir directoryMap.version, mode: 'link'
output: file("v_*.txt")
when: !params.noReports
script: QC.getVersionBCFtools()
}

process GetVersionSnpEFF {
publishDir directoryMap.version, mode: 'link'
output: file("v_*.txt")
when: 'snpeff' in tools || 'merge' in tools
script: QC.getVersionSnpEFF()
}

process GetVersionVCFtools {
publishDir directoryMap.version, mode: 'link'
output: file("v_*.txt")
when: !params.noReports
script: QC.getVersionVCFtools()
}

process GetVersionVEP {
publishDir directoryMap.version, mode: 'link'
output: file("v_*.txt")
when: 'vep' in tools || 'merge' in tools
script: QC.getVersionVEP()
}

/*
================================================================================
= F U N C T I O N S =
Expand All @@ -219,26 +265,11 @@ def checkUppmaxProject() {
return !(workflow.profile == 'slurm' && !params.project)
}

def defineDirectoryMap() {
return [
'haplotypecaller' : "${params.outDir}/VariantCalling/HaplotypeCaller",
'manta' : "${params.outDir}/VariantCalling/Manta",
'mutect1' : "${params.outDir}/VariantCalling/MuTect1",
'mutect2' : "${params.outDir}/VariantCalling/MuTect2",
'strelka' : "${params.outDir}/VariantCalling/Strelka",
'strelkabp' : "${params.outDir}/VariantCalling/StrelkaBP",
'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats",
'snpeffReports' : "${params.outDir}/Reports/SnpEff",
'snpeff' : "${params.outDir}/Annotation/SnpEff",
'vep' : "${params.outDir}/Annotation/VEP"
]
}

def defineToolList() {
return [
'merge',
'snpeff',
'vep',
'merge'
'vep'
]
}

Expand Down
78 changes: 78 additions & 0 deletions bin/scrape_tool_versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/usr/bin/env python
from __future__ import print_function
from collections import OrderedDict
import re

regexes = {
'AlleleCount': ['v_allelecount.txt', r"(\S+)"],
'ASCAT': ['v_ascat.txt', r"(\d\.\d+)"],
'bcftools': ['v_bcftools.txt', r"bcftools (\S+)"],
'BWA': ['v_bwa.txt', r"Version: (\S+)"],
'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"],
'GATK': ['v_gatk.txt', r"GATK version(\S+)"],
'htslib': ['v_samtools.txt', r"htslib (\S+)"],
'Manta': ['v_manta.txt', r"([0-9.]+)"],
'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"],
'Nextflow': ['v_nextflow.txt', r"(\S+)"],
'FreeBayes': ['v_freebayes.txt', r"version: v(\d\.\d\.\d+)"],
'Picard': ['v_picard.txt', r"Picard version:(\d\.\d\.\d+)"],
'Qualimap': ['v_qualimap.txt', r"QualiMap v.(\S+)"],
'R': ['v_r.txt', r"R version (\S+)"],
'samtools': ['v_samtools.txt', r"samtools (\S+)"],
'Sarek': ['v_sarek.txt', r"(\S+)"],
'SnpEff': ['v_snpeff.txt', r"version SnpEff (\S+)"],
'Strelka': ['v_strelka.txt', r"([0-9.]+)"],
'vcftools': ['v_vcftools.txt', r"([0-9.]+)"],
'VEP': ['v_vep.txt', r"ensembl-vep : (\S+)"],
}
results = OrderedDict()
results['Sarek'] = '<span style="color:#999999;\">N/A</span>'
results['Nextflow'] = '<span style="color:#999999;\">N/A</span>'
results['BWA'] = '<span style="color:#999999;\">N/A</span>'
results['samtools'] = '<span style="color:#999999;\">N/A</span>'
results['htslib'] = '<span style="color:#999999;\">N/A</span>'
results['GATK'] = '<span style="color:#999999;\">N/A</span>'
results['Picard'] = '<span style="color:#999999;\">N/A</span>'
results['Manta'] = '<span style="color:#999999;\">N/A</span>'
results['Strelka'] = '<span style="color:#999999;\">N/A</span>'
results['FreeBayes'] = '<span style="color:#999999;\">N/A</span>'
results['AlleleCount'] = '<span style="color:#999999;\">N/A</span>'
results['R'] = '<span style="color:#999999;\">N/A</span>'
results['ASCAT'] = '<span style="color:#999999;\">N/A</span>'
results['SnpEff'] = '<span style="color:#999999;\">N/A</span>'
results['VEP'] = '<span style="color:#999999;\">N/A</span>'
results['FastQC'] = '<span style="color:#999999;\">N/A</span>'
results['Qualimap'] = '<span style="color:#999999;\">N/A</span>'
results['bcftools'] = '<span style="color:#999999;\">N/A</span>'
results['vcftools'] = '<span style="color:#999999;\">N/A</span>'
results['MultiQC'] = '<span style="color:#999999;\">N/A</span>'

# Search each file using its regex
for k, v in regexes.items():
try:
with open(v[0]) as x:
versions = x.read()
match = re.search(v[1], versions)
if match:
results[k] = "v {}".format(match.group(1))
except Exception as FileNotFoundError:
print("No such file:", v[0])

# Remove empty keys (defining them above ensures correct order)
for k in ['Sarek', 'Nextflow', 'BWA', 'samtools', 'htslib', 'GATK', 'Picard', 'Manta', 'Strelka', 'FreeBayes', 'AlleleCount', 'R', 'ASCAT', 'SnpEff', 'VEP', 'FastQC', 'Qualimap', 'bcftools', 'vcftools', 'MultiQC']:
if results[k] == '<span style="color:#999999;\">N/A</span>':
del(results[k])

# Dump to YAML
print ('''
id: 'Sarek'
order: -1000
section_href: 'https://github.com/SciLifeLab/Sarek'
plot_type: 'html'
description: 'tool versions are collected at run time from output.'
data: |
<dl class="dl-horizontal" style="margin-bottom:0;">
''')
for k,v in results.items():
print(" <dt>{}</dt><dd>{}</dd>".format(k,v))
print (" </dl>")
23 changes: 10 additions & 13 deletions buildReferences.nf
Original file line number Diff line number Diff line change
Expand Up @@ -98,27 +98,24 @@ if (params.verbose) ch_decompressedFiles = ch_decompressedFiles.view {
}

ch_fastaFile = Channel.create()
ch_otherFiles = Channel.create()
ch_vcfFiles = Channel.create()
ch_fastaForBWA = Channel.create()
ch_fastaForPicard = Channel.create()
ch_fastaForSAMTools = Channel.create()
ch_otherFile = Channel.create()
ch_vcfFile = Channel.create()

ch_decompressedFiles
.choice(ch_fastaFile, ch_vcfFiles, ch_otherFiles) {
.choice(ch_fastaFile, ch_vcfFile, ch_otherFile) {
it =~ ".fasta" ? 0 :
it =~ ".vcf" ? 1 : 2}

(ch_fastaFile, ch_fastaFileToKeep) = ch_fastaFile.into(2)
(ch_vcfFiles, ch_vcfFilesToKeep) = ch_vcfFiles.into(2)
(ch_fastaForBWA, ch_fastaForPicard, ch_fastaForSAMTools, ch_fastaFileToKeep) = ch_fastaFile.into(4)
(ch_vcfFile, ch_vcfFileToKeep) = ch_vcfFile.into(2)

ch_notCompressedfiles
.mix(ch_otherFiles, ch_fastaFileToKeep, ch_vcfFilesToKeep)
.mix(ch_fastaFileToKeep, ch_vcfFileToKeep, ch_otherFile)
.collectFile(storeDir: params.outDir)

ch_fastaForBWA = Channel.create()
ch_fastaForPicard = Channel.create()
ch_fastaForSAMTools = Channel.create()

ch_fastaFile.into(ch_fastaForBWA,ch_fastaForPicard,ch_fastaForSAMTools)

process BuildBWAindexes {
tag {f_reference}

Expand Down Expand Up @@ -193,7 +190,7 @@ process BuildVCFIndex {
publishDir params.outDir, mode: 'link'

input:
file(f_reference) from ch_vcfFiles
file(f_reference) from ch_vcfFile

output:
file("${f_reference}.idx") into ch_vcfIndex
Expand Down
15 changes: 15 additions & 0 deletions configuration/containers.config
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,21 @@ process {
$BuildVCFIndex.container = "${params.repository}/igvtools:${params.tag}"
$ConcatVCF.container = "${params.repository}/sarek:${params.tag}"
$CreateRecalibrationTable.container = "${params.repository}/gatk:${params.tag}"
$GetVersionAll.container = "${params.repository}/qctools:${params.tag}"
$GetVersionAlleleCount.container = "${params.repository}/runallelecount:${params.tag}"
$GetVersionASCAT.container = "${params.repository}/r-base:${params.tag}"
$GetVersionBamQC.container = "${params.repository}/qctools:${params.tag}"
$GetVersionBCFtools.container = "${params.repository}/sarek:${params.tag}"
$GetVersionBWAsamtools.container = "${params.repository}/sarek:${params.tag}"
$GetVersionFastQC.container = "${params.repository}/qctools:${params.tag}"
$GetVersionFreeBayes.container = "${params.repository}/freebayes:${params.tag}"
$GetVersionGATK.container = "${params.repository}/gatk:${params.tag}"
$GetVersionManta.container = "${params.repository}/sarek:${params.tag}"
$GetVersionPicard.container = "${params.repository}/picard:${params.tag}"
$GetVersionSnpeff.container = {params.genome == 'GRCh38' ? "${params.repository}/snpeffgrch38:${params.tag}" : "${params.repository}/snpeffgrch37:${params.tag}"}
$GetVersionStrelka.container = "${params.repository}/sarek:${params.tag}"
$GetVersionVCFtools.container = "${params.repository}/qctools:${params.tag}"
$GetVersionVEP.container = {params.genome == 'GRCh38' ? "${params.repository}/vepgrch38:${params.tag}" : "${params.repository}/vepgrch37:${params.tag}"}
$IndelRealigner.container = "${params.repository}/gatk:${params.tag}"
$MapReads.container = "${params.repository}/sarek:${params.tag}"
$MarkDuplicates.container = "${params.repository}/picard:${params.tag}"
Expand Down
16 changes: 15 additions & 1 deletion configuration/singularity-path.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,21 @@ process {
$BuildVCFIndex.container = "${params.containerPath}/igvtools-${params.tag}.img"
$ConcatVCF.container = "${params.containerPath}/sarek-${params.tag}.img"
$CreateRecalibrationTable.container = "${params.containerPath}/gatk-${params.tag}.img"
$GenerateMultiQCconfig.container = "${params.containerPath}/qctools-${params.tag}.img"
$GetVersionAll.container = "${params.containerPath}/qctools-${params.tag}.img"
$GetVersionAlleleCount.container = "${params.containerPath}/runallelecount-${params.tag}.img"
$GetVersionASCAT.container = "${params.containerPath}/r-base-${params.tag}.img"
$GetVersionBamQC.container = "${params.containerPath}/qctools-${params.tag}.img"
$GetVersionBCFtools.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionBWAsamtools.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionFastQC.container = "${params.containerPath}/qctools-${params.tag}.img"
$GetVersionFreeBayes.container = "${params.containerPath}/freebayes-${params.tag}.img"
$GetVersionGATK.container = "${params.containerPath}/gatk-${params.tag}.img"
$GetVersionManta.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionPicard.container = "${params.containerPath}/picard-${params.tag}.img"
$GetVersionSnpeff.container = {params.genome == 'GRCh38' ? "${params.containerPath}/snpeffgrch38-${params.tag}.img" : "${params.containerPath}/snpeffgrch37-${params.tag}.img"}
$GetVersionStrelka.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionVCFtools.container = "${params.containerPath}/qctools-${params.tag}.img"
$GetVersionVEP.container = {params.genome == 'GRCh38' ? "${params.containerPath}/vepgrch38-${params.tag}.img" : "${params.containerPath}/vepgrch37-${params.tag}.img"}
$IndelRealigner.container = "${params.containerPath}/gatk-${params.tag}.img"
$MapReads.container = "${params.containerPath}/sarek-${params.tag}.img"
$MarkDuplicates.container = "${params.containerPath}/picard-${params.tag}.img"
Expand Down
Loading

0 comments on commit f41a3bd

Please sign in to comment.