Skip to content

Commit

Permalink
Fixes/publish failed qc stats (#29)
Browse files Browse the repository at this point in the history
* Publish files for failed QC jobs

* Fix docker image name in README.md

* Fix qc-failed jobs error message

* Submit counters only for imputation jobs

---------

Co-authored-by: Lukas Forer <lukas.forer@i-med.ac.at>
  • Loading branch information
seppinho and lukfor committed Sep 27, 2024
1 parent 2029f96 commit 122c53a
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 19 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ params.imputation.window = 100000
### Build docker image locally

```
docker build -t genepi/imputation-docker:latest .
docker build -t genepi/imputationserver2:latest .
```
### Run testcases
Expand Down
33 changes: 18 additions & 15 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ workflow {
INPUT_VALIDATION.out.validation_report,
site_files_ch.collect()
)

// check if QC chunks exist in case QC failed
QUALITY_CONTROL.out.qc_metafiles.ifEmpty {
error "QC step failed"
}

if (params.mode == 'imputation') {

Expand Down Expand Up @@ -141,32 +146,30 @@ workflow {
workflow.onComplete {
//TODO: use templates
//TODO: move in EmailHelper class
//see https://www.nextflow.io/docs/latest/mail.html for configuration etc...
// Nfcore Template: https://github.com/nf-core/rnaseq/blob/b89fac32650aacc86fcda9ee77e00612a1d77066/lib/NfcoreTemplate.groovy#L155

if (!workflow.success) {
def statusMessage = workflow.exitStatus != null ? "failed" : "canceled"
if (!workflow.success) {
def statusMessage = workflow.exitStatus != null || workflow.errorReport == "QC step failed" ? "failed" : "canceled"
if (params.send_mail && params.user.email != null){
sendMail{
to "${params.user.email}"
subject "[${params.service.name}] Job ${params.project} ${statusMessage}"
body "Dear ${params.user.name}, \n Your job has been ${statusMessage}.\n\n More details can be found at the following link: ${params.service.url}/index.html#!jobs/${params.project}"
}
}
println "::error:: Imputation failed."
println "::error:: Imputation job ${statusMessage}."
return
}

//submit counters on success
println "::submit-counter name=samples::"
println "::submit-counter name=genotypes::"
println "::submit-counter name=chromosomes::"
println "::submit-counter name=runs::"

println "::set-value-and-submit name=reference_panel::${params.refpanel.id}"
println "::set-value-and-submit name=phasing_engine::${phasing_engine}"
println "::set-value-and-submit name=genome_build::${params.build}"
//submit counters for successful imputation jobs
if (params.mode == 'imputation') {
println "::submit-counter name=samples::"
println "::submit-counter name=genotypes::"
println "::submit-counter name=chromosomes::"
println "::submit-counter name=runs::"

println "::set-value-and-submit name=reference_panel::${params.refpanel.id}"
println "::set-value-and-submit name=phasing_engine::${phasing_engine}"
println "::set-value-and-submit name=genome_build::${params.build}"
}

// imputation job
if (params.merge_results === true && params.encryption.enabled === true) {
Expand Down
14 changes: 11 additions & 3 deletions modules/local/quality_control/quality_control_vcf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ process QUALITY_CONTROL_VCF {
val(panel_version)

output:
path("${metaFilesDir}/*"), emit: chunks_csv
path("${metaFilesDir}/*"), emit: chunks_csv, optional: true
path("${chunksDir}/*"), emit: chunks_vcf
path("${statisticsDir}/*"), optional: true
path("maf.txt"), emit: maf_file, optional: true
Expand Down Expand Up @@ -56,10 +56,18 @@ process QUALITY_CONTROL_VCF {
--report qc_report.txt \
$chain \
$vcf_files
exit_code_a=\$?
cat qc_report.txt
exit \$exit_code_a
# Check if QC step failed
if [[ \$exit_code_a -ne 0 ]]; then
rm -rf ${metaFilesDir}
fi
cat qc_report.txt
# Always exit 0 that QC files get published
exit 0
"""

}
25 changes: 25 additions & 0 deletions tests/data/refpanels/hapmap2/cloudgene_with_qcfilters.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: HapMap 2
description: HapMap2 Reference Panel for Michigan Imputation Server
version: 2.0.0
website: http://imputationserver.sph.umich.edu
category: RefPanel
id: hapmap2-chr20

properties:
id: hapmap2-chr20
build: hg19
genotypes: ${CLOUDGENE_APP_LOCATION}/msavs/hapmap_r22.chr$chr.CEU.hg19.msav
sites: ${CLOUDGENE_APP_LOCATION}/sites/hapmap_r22.chr$chr.CEU.hg19_impute.sites.gz
mapEagle: ${CLOUDGENE_APP_LOCATION}/map/genetic_map_hg19_withX.txt.gz
refEagle: ${CLOUDGENE_APP_LOCATION}/bcfs/hapmap_r22.chr$chr.CEU.hg19.recode.bcf
refBeagle: ${CLOUDGENE_APP_LOCATION}/bcfs/hapmap_r22.chr$chr.CEU.hg19.recode.bref3
mapBeagle: ${CLOUDGENE_APP_LOCATION}/map/plink.chr$chr.GRCh37.map
populations:
- id: eur
name: EUR
samples: 60
- id: "off"
name: Off
samples: -1
qcFilter:
minSnps: 10000
29 changes: 29 additions & 0 deletions tests/main.qc.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,35 @@ nextflow_pipeline {

}

test("Should fail QC-only but publish files") {

when {
params {
project = "test-job"
build = "hg19"
files = "$projectDir/tests/data/input/chr20-phased/*.vcf.gz"
allele_frequency_population = "eur"
mode = "qc-only"
refpanel_yaml = "$projectDir/tests/data/refpanels/hapmap2/cloudgene_with_qcfilters.yaml"
output = "${outputDir}"
}
}

then {
assert workflow.failed

assert file("${outputDir}/statistics/snps-typed-only.txt").exists()

def log = file("${outputDir}/qc_report.txt")

assert snapshot(
log.text
).match()

}

}

test("Should run QC-only without population") {

when {
Expand Down
10 changes: 10 additions & 0 deletions tests/main.qc.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
{
"Should fail QC-only but publish files": {
"content": [
"::log:: Reference Panel Ranges: genome-wide\n::message:: Calculating QC Statistics\n::group type=message::\n<b>Statistics:</b>\nAlternative allele frequency > 0.5 sites: 2,296\nReference Overlap: 99.00 %\nMatch: 7,735\nAllele switch: 0\nStrand flip: 0\nStrand flip and allele switch: 0\nA/T, C/G genotypes: 0\n<b>Filtered sites:</b>\nFilter flag set: 0\nInvalid alleles: 0\nMultiallelic sites: 0\nDuplicated sites: 0\nNonSNP sites: 0\nMonomorphic sites: 11\nAllele mismatch: 0\nSNPs call rate < 90%: 0\n::endgroup::\n::group type=error::\nExcluded sites in total: 11\nRemaining sites in total: 7,735\nSee snps-excluded.txt for details\nTyped only sites: 78\nSee typed-only.txt for details\n\n<b>Warning:</b> 4 Chunk(s) excluded: < 10000 SNPs (see chunks-excluded.txt for details).\n\nRemaining chunk(s): 0\n\n<b>Error:</b> No chunks passed the QC step. Imputation cannot be started!\n::endgroup::\n"
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.2"
},
"timestamp": "2024-09-26T16:41:25.904156"
},
"Should write typed only sites": {
"content": [
"::log:: Reference Panel Ranges: genome-wide\n::message:: Calculating QC Statistics\n::group type=message::\n<b>Statistics:</b>\nAlternative allele frequency > 0.5 sites: 2,296\nReference Overlap: 99.00 %\nMatch: 7,735\nAllele switch: 0\nStrand flip: 0\nStrand flip and allele switch: 0\nA/T, C/G genotypes: 0\n<b>Filtered sites:</b>\nFilter flag set: 0\nInvalid alleles: 0\nMultiallelic sites: 0\nDuplicated sites: 0\nNonSNP sites: 0\nMonomorphic sites: 11\nAllele mismatch: 0\nSNPs call rate < 90%: 0\n::endgroup::\n::group type=warning::\nExcluded sites in total: 11\nRemaining sites in total: 7,735\nSee snps-excluded.txt for details\nTyped only sites: 78\nSee typed-only.txt for details\n\n::endgroup::\n",
Expand Down

0 comments on commit 122c53a

Please sign in to comment.