Skip to content
This repository has been archived by the owner on Jan 27, 2020. It is now read-only.

Get the versions of the software used in the MultiQC report #595

Merged
merged 1 commit into from
Jun 12, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 54 additions & 23 deletions annotate.nf
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()}
annotateTools = params.annotateTools ? params.annotateTools.split(',').collect{it.trim().toLowerCase()} : []
annotateVCF = params.annotateVCF ? params.annotateVCF.split(',').collect{it.trim()} : []

directoryMap = defineDirectoryMap()
directoryMap = SarekUtils.defineDirectoryMap(params.outDir)
toolList = defineToolList()

if (!SarekUtils.checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information'
Expand Down Expand Up @@ -102,7 +102,7 @@ if (annotateVCF == []) {

vcfNotToAnnotate.close()

(vcfForBCFtools, vcfForSnpeff, vcfForVep) = vcfToAnnotate.into(3)
(vcfForBCFtools, vcfForVCFtools, vcfForSnpeff, vcfForVep) = vcfToAnnotate.into(4)

process RunBcftoolsStats {
tag {vcf}
Expand All @@ -117,17 +117,35 @@ process RunBcftoolsStats {

when: !params.noReports

script:
"""
bcftools stats ${vcf} > ${vcf.baseName}.bcf.tools.stats.out
"""
script: QC.bcftools(vcf)
}

if (params.verbose) bcfReport = bcfReport.view {
"BCFTools stats report:\n\
File : [${it.fileName}]"
}

process RunVcftools {
tag {vcf}

publishDir directoryMap.vcftools, mode: 'link'

input:
set variantCaller, file(vcf) from vcfForVCFtools

output:
file ("${vcf.baseName}.*") into vcfReport

when: !params.noReports

script: QC.vcftools(vcf)
}

if (params.verbose) vcfReport = vcfReport.view {
"VCFTools stats report:\n\
File : [${it.fileName}]"
}

process RunSnpeff {
tag {vcf}

Expand Down Expand Up @@ -208,6 +226,34 @@ if (params.verbose) vepReport = vepReport.view {
Files : ${it.fileName}"
}

process GetVersionBCFtools {
publishDir directoryMap.version, mode: 'link'
output: file("v_*.txt")
when: !params.noReports
script: QC.getVersionBCFtools()
}

process GetVersionSnpEFF {
publishDir directoryMap.version, mode: 'link'
output: file("v_*.txt")
when: 'snpeff' in tools || 'merge' in tools
script: QC.getVersionSnpEFF()
}

process GetVersionVCFtools {
publishDir directoryMap.version, mode: 'link'
output: file("v_*.txt")
when: !params.noReports
script: QC.getVersionVCFtools()
}

process GetVersionVEP {
publishDir directoryMap.version, mode: 'link'
output: file("v_*.txt")
when: 'vep' in tools || 'merge' in tools
script: QC.getVersionVEP()
}

/*
================================================================================
= F U N C T I O N S =
Expand All @@ -219,26 +265,11 @@ def checkUppmaxProject() {
return !(workflow.profile == 'slurm' && !params.project)
}

def defineDirectoryMap() {
return [
'haplotypecaller' : "${params.outDir}/VariantCalling/HaplotypeCaller",
'manta' : "${params.outDir}/VariantCalling/Manta",
'mutect1' : "${params.outDir}/VariantCalling/MuTect1",
'mutect2' : "${params.outDir}/VariantCalling/MuTect2",
'strelka' : "${params.outDir}/VariantCalling/Strelka",
'strelkabp' : "${params.outDir}/VariantCalling/StrelkaBP",
'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats",
'snpeffReports' : "${params.outDir}/Reports/SnpEff",
'snpeff' : "${params.outDir}/Annotation/SnpEff",
'vep' : "${params.outDir}/Annotation/VEP"
]
}

def defineToolList() {
return [
'merge',
'snpeff',
'vep',
'merge'
'vep'
]
}

Expand Down
78 changes: 78 additions & 0 deletions bin/scrape_tool_versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/usr/bin/env python
from __future__ import print_function
from collections import OrderedDict
import re

regexes = {
'AlleleCount': ['v_allelecount.txt', r"(\S+)"],
'ASCAT': ['v_ascat.txt', r"(\d\.\d+)"],
'bcftools': ['v_bcftools.txt', r"bcftools (\S+)"],
'BWA': ['v_bwa.txt', r"Version: (\S+)"],
'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"],
'GATK': ['v_gatk.txt', r"GATK version(\S+)"],
'htslib': ['v_samtools.txt', r"htslib (\S+)"],
'Manta': ['v_manta.txt', r"([0-9.]+)"],
'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"],
'Nextflow': ['v_nextflow.txt', r"(\S+)"],
'FreeBayes': ['v_freebayes.txt', r"version: v(\d\.\d\.\d+)"],
'Picard': ['v_picard.txt', r"Picard version:(\d\.\d\.\d+)"],
'Qualimap': ['v_qualimap.txt', r"QualiMap v.(\S+)"],
'R': ['v_r.txt', r"R version (\S+)"],
'samtools': ['v_samtools.txt', r"samtools (\S+)"],
'Sarek': ['v_sarek.txt', r"(\S+)"],
'SnpEff': ['v_snpeff.txt', r"version SnpEff (\S+)"],
'Strelka': ['v_strelka.txt', r"([0-9.]+)"],
'vcftools': ['v_vcftools.txt', r"([0-9.]+)"],
'VEP': ['v_vep.txt', r"ensembl-vep : (\S+)"],
}
results = OrderedDict()
results['Sarek'] = '<span style="color:#999999;\">N/A</span>'
results['Nextflow'] = '<span style="color:#999999;\">N/A</span>'
results['BWA'] = '<span style="color:#999999;\">N/A</span>'
results['samtools'] = '<span style="color:#999999;\">N/A</span>'
results['htslib'] = '<span style="color:#999999;\">N/A</span>'
results['GATK'] = '<span style="color:#999999;\">N/A</span>'
results['Picard'] = '<span style="color:#999999;\">N/A</span>'
results['Manta'] = '<span style="color:#999999;\">N/A</span>'
results['Strelka'] = '<span style="color:#999999;\">N/A</span>'
results['FreeBayes'] = '<span style="color:#999999;\">N/A</span>'
results['AlleleCount'] = '<span style="color:#999999;\">N/A</span>'
results['R'] = '<span style="color:#999999;\">N/A</span>'
results['ASCAT'] = '<span style="color:#999999;\">N/A</span>'
results['SnpEff'] = '<span style="color:#999999;\">N/A</span>'
results['VEP'] = '<span style="color:#999999;\">N/A</span>'
results['FastQC'] = '<span style="color:#999999;\">N/A</span>'
results['Qualimap'] = '<span style="color:#999999;\">N/A</span>'
results['bcftools'] = '<span style="color:#999999;\">N/A</span>'
results['vcftools'] = '<span style="color:#999999;\">N/A</span>'
results['MultiQC'] = '<span style="color:#999999;\">N/A</span>'

# Search each file using its regex
for k, v in regexes.items():
try:
with open(v[0]) as x:
versions = x.read()
match = re.search(v[1], versions)
if match:
results[k] = "v {}".format(match.group(1))
except Exception as FileNotFoundError:
print("No such file:", v[0])

# Remove empty keys (defining them above ensures correct order)
for k in ['Sarek', 'Nextflow', 'BWA', 'samtools', 'htslib', 'GATK', 'Picard', 'Manta', 'Strelka', 'FreeBayes', 'AlleleCount', 'R', 'ASCAT', 'SnpEff', 'VEP', 'FastQC', 'Qualimap', 'bcftools', 'vcftools', 'MultiQC']:
if results[k] == '<span style="color:#999999;\">N/A</span>':
del(results[k])

# Dump to YAML
print ('''
id: 'Sarek'
order: -1000
section_href: 'https://github.com/SciLifeLab/Sarek'
plot_type: 'html'
description: 'tool versions are collected at run time from output.'
data: |
<dl class="dl-horizontal" style="margin-bottom:0;">
''')
for k,v in results.items():
print(" <dt>{}</dt><dd>{}</dd>".format(k,v))
print (" </dl>")
23 changes: 10 additions & 13 deletions buildReferences.nf
Original file line number Diff line number Diff line change
Expand Up @@ -98,27 +98,24 @@ if (params.verbose) ch_decompressedFiles = ch_decompressedFiles.view {
}

ch_fastaFile = Channel.create()
ch_otherFiles = Channel.create()
ch_vcfFiles = Channel.create()
ch_fastaForBWA = Channel.create()
ch_fastaForPicard = Channel.create()
ch_fastaForSAMTools = Channel.create()
ch_otherFile = Channel.create()
ch_vcfFile = Channel.create()

ch_decompressedFiles
.choice(ch_fastaFile, ch_vcfFiles, ch_otherFiles) {
.choice(ch_fastaFile, ch_vcfFile, ch_otherFile) {
it =~ ".fasta" ? 0 :
it =~ ".vcf" ? 1 : 2}

(ch_fastaFile, ch_fastaFileToKeep) = ch_fastaFile.into(2)
(ch_vcfFiles, ch_vcfFilesToKeep) = ch_vcfFiles.into(2)
(ch_fastaForBWA, ch_fastaForPicard, ch_fastaForSAMTools, ch_fastaFileToKeep) = ch_fastaFile.into(4)
(ch_vcfFile, ch_vcfFileToKeep) = ch_vcfFile.into(2)

ch_notCompressedfiles
.mix(ch_otherFiles, ch_fastaFileToKeep, ch_vcfFilesToKeep)
.mix(ch_fastaFileToKeep, ch_vcfFileToKeep, ch_otherFile)
.collectFile(storeDir: params.outDir)

ch_fastaForBWA = Channel.create()
ch_fastaForPicard = Channel.create()
ch_fastaForSAMTools = Channel.create()

ch_fastaFile.into(ch_fastaForBWA,ch_fastaForPicard,ch_fastaForSAMTools)

process BuildBWAindexes {
tag {f_reference}

Expand Down Expand Up @@ -193,7 +190,7 @@ process BuildVCFIndex {
publishDir params.outDir, mode: 'link'

input:
file(f_reference) from ch_vcfFiles
file(f_reference) from ch_vcfFile

output:
file("${f_reference}.idx") into ch_vcfIndex
Expand Down
15 changes: 15 additions & 0 deletions configuration/containers.config
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,21 @@ process {
$BuildVCFIndex.container = "${params.repository}/igvtools:${params.tag}"
$ConcatVCF.container = "${params.repository}/sarek:${params.tag}"
$CreateRecalibrationTable.container = "${params.repository}/gatk:${params.tag}"
$GetVersionAll.container = "${params.repository}/qctools:${params.tag}"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may be possible to tidy up the configs here quite a bit with the new nextflow concept of label (in new versions of NF only)

$GetVersionAlleleCount.container = "${params.repository}/runallelecount:${params.tag}"
$GetVersionASCAT.container = "${params.repository}/r-base:${params.tag}"
$GetVersionBamQC.container = "${params.repository}/qctools:${params.tag}"
$GetVersionBCFtools.container = "${params.repository}/sarek:${params.tag}"
$GetVersionBWAsamtools.container = "${params.repository}/sarek:${params.tag}"
$GetVersionFastQC.container = "${params.repository}/qctools:${params.tag}"
$GetVersionFreeBayes.container = "${params.repository}/freebayes:${params.tag}"
$GetVersionGATK.container = "${params.repository}/gatk:${params.tag}"
$GetVersionManta.container = "${params.repository}/sarek:${params.tag}"
$GetVersionPicard.container = "${params.repository}/picard:${params.tag}"
$GetVersionSnpeff.container = {params.genome == 'GRCh38' ? "${params.repository}/snpeffgrch38:${params.tag}" : "${params.repository}/snpeffgrch37:${params.tag}"}
$GetVersionStrelka.container = "${params.repository}/sarek:${params.tag}"
$GetVersionVCFtools.container = "${params.repository}/qctools:${params.tag}"
$GetVersionVEP.container = {params.genome == 'GRCh38' ? "${params.repository}/vepgrch38:${params.tag}" : "${params.repository}/vepgrch37:${params.tag}"}
$IndelRealigner.container = "${params.repository}/gatk:${params.tag}"
$MapReads.container = "${params.repository}/sarek:${params.tag}"
$MarkDuplicates.container = "${params.repository}/picard:${params.tag}"
Expand Down
16 changes: 15 additions & 1 deletion configuration/singularity-path.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,21 @@ process {
$BuildVCFIndex.container = "${params.containerPath}/igvtools-${params.tag}.img"
$ConcatVCF.container = "${params.containerPath}/sarek-${params.tag}.img"
$CreateRecalibrationTable.container = "${params.containerPath}/gatk-${params.tag}.img"
$GenerateMultiQCconfig.container = "${params.containerPath}/qctools-${params.tag}.img"
$GetVersionAll.container = "${params.containerPath}/qctools-${params.tag}.img"
$GetVersionAlleleCount.container = "${params.containerPath}/runallelecount-${params.tag}.img"
$GetVersionASCAT.container = "${params.containerPath}/r-base-${params.tag}.img"
$GetVersionBamQC.container = "${params.containerPath}/qctools-${params.tag}.img"
$GetVersionBCFtools.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionBWAsamtools.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionFastQC.container = "${params.containerPath}/qctools-${params.tag}.img"
$GetVersionFreeBayes.container = "${params.containerPath}/freebayes-${params.tag}.img"
$GetVersionGATK.container = "${params.containerPath}/gatk-${params.tag}.img"
$GetVersionManta.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionPicard.container = "${params.containerPath}/picard-${params.tag}.img"
$GetVersionSnpeff.container = {params.genome == 'GRCh38' ? "${params.containerPath}/snpeffgrch38-${params.tag}.img" : "${params.containerPath}/snpeffgrch37-${params.tag}.img"}
$GetVersionStrelka.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionVCFtools.container = "${params.containerPath}/qctools-${params.tag}.img"
$GetVersionVEP.container = {params.genome == 'GRCh38' ? "${params.containerPath}/vepgrch38-${params.tag}.img" : "${params.containerPath}/vepgrch37-${params.tag}.img"}
$IndelRealigner.container = "${params.containerPath}/gatk-${params.tag}.img"
$MapReads.container = "${params.containerPath}/sarek-${params.tag}.img"
$MarkDuplicates.container = "${params.containerPath}/picard-${params.tag}.img"
Expand Down
Loading