From cf29773f27fca21c27d23399f1a23b8f830afb26 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 3 May 2019 13:53:41 +0200 Subject: [PATCH 1/6] fix Jenkins tests --- Jenkinsfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index c8a8e4252d..97c43e3384 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -13,9 +13,10 @@ pipeline { } stage('Build') { steps { - sh "git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data" - sh "nextflow run build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link -ansi-log false" - sh "rm -rf work/ references/pipeline_info .nextflow*" + sh "rm -rf data" + sh "git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data" + sh "nextflow run build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link -ansi-log false" + sh "rm -rf work/ references/pipeline_info .nextflow*" } } stage('SampleDir') { From 578e239bf795dace76cc957c3da1bf84682e6be1 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 3 May 2019 13:53:48 +0200 Subject: [PATCH 2/6] sort ansi codes --- build.nf | 25 +++++++++++++------------ main.nf | 11 ++++++----- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/build.nf b/build.nf index c5238c483e..6c7e19e5e3 100644 --- a/build.nf +++ b/build.nf @@ -349,15 +349,16 @@ process DownloadCADD { def nfcoreHeader(){ // Log colors ANSI codes - c_black = params.monochrome_logs ? '' : "\033[0;30m"; - c_blue = params.monochrome_logs ? '' : "\033[0;34m"; - c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; + c_reset = params.monochrome_logs ? '' : "\033[0m"; c_dim = params.monochrome_logs ? '' : "\033[2m"; + c_black = params.monochrome_logs ? '' : "\033[0;30m"; + c_red = params.monochrome_logs ? '' : "\033[0;31m"; c_green = params.monochrome_logs ? '' : "\033[0;32m"; + c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; + c_blue = params.monochrome_logs ? '' : "\033[0;34m"; c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_reset = params.monochrome_logs ? '' : "\033[0m"; + c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; c_white = params.monochrome_logs ? '' : "\033[0;37m"; - c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; return """ ${c_dim}----------------------------------------------------${c_reset} ${c_green},--.${c_black}/${c_green},-.${c_reset} @@ -365,12 +366,12 @@ def nfcoreHeader(){ ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} ${c_green}`._,._,\'${c_reset} - ____ _____ _ - .' _ `. / ____| | | - / |\\`-_ \\ | (___ ___ _ __ __ | | __ - | | \\ `-| \\___ \\/__ \\| ´__/ _\\| |/ / - \\ | \\ / ____) | __ | | | __| < - `|____\\' |_____/\\____|_| \\__/|_|\\_\\ + ${c_black} ____ ${c_blue} _____ _ ${c_reset} + ${c_black} .' ${c_green}_${c_black} `. ${c_blue} / ____| | | ${c_reset} + ${c_black} / ${c_green}|\\${c_white}`-_${c_black} \\ ${c_blue} | (___ ___ _ __ __ | | __ ${c_reset} + ${c_black} | ${c_green}| \\ ${c_white}`-${c_black}| ${c_blue} \\___ \\/__ \\| ´__/ _\\| |/ / ${c_reset} + ${c_black} \\ ${c_green}| \\ ${c_black}/ ${c_blue} ____) | __ | | | __| < ${c_reset} + ${c_black} `${c_green}|${c_black}____${c_green}\\${c_black}' ${c_blue} |_____/\\____|_| \\__/|_|\\_\\ ${c_reset} ${c_purple} nf-core/sarek v${workflow.manifest.version}${c_reset} ${c_dim}----------------------------------------------------${c_reset} @@ -409,4 +410,4 @@ def checkFile(it) { final f = file(it) if (!f.exists()) exit 1, "Missing file: ${it}, see --help for more information" return true -} \ No newline at end of file +} diff --git a/main.nf b/main.nf index cd3d3ac7a9..596385569f 100644 --- a/main.nf +++ b/main.nf @@ -1115,15 +1115,16 @@ workflow.onComplete { def nfcoreHeader(){ // Log colors ANSI codes - c_black = params.monochrome_logs ? '' : "\033[0;30m"; - c_blue = params.monochrome_logs ? '' : "\033[0;34m"; - c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; + c_reset = params.monochrome_logs ? '' : "\033[0m"; c_dim = params.monochrome_logs ? '' : "\033[2m"; + c_black = params.monochrome_logs ? '' : "\033[0;30m"; + c_red = params.monochrome_logs ? '' : "\033[0;31m"; c_green = params.monochrome_logs ? '' : "\033[0;32m"; + c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; + c_blue = params.monochrome_logs ? '' : "\033[0;34m"; c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_reset = params.monochrome_logs ? '' : "\033[0m"; + c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; c_white = params.monochrome_logs ? '' : "\033[0;37m"; - c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; return """ ${c_dim}----------------------------------------------------${c_reset} ${c_green},--.${c_black}/${c_green},-.${c_reset} From ed447ba0de7a8bd585af3594aac180292b1eca4a Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 3 May 2019 14:44:09 +0200 Subject: [PATCH 3/6] add tests --- .travis.yml | 2 +- Jenkinsfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index b3847aab0e..77bfdcdc0a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -35,4 +35,4 @@ install: script: - git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data - nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link --max_memory 7.GB --max_cpus 2 -ansi-log false - - nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --sampleDir data/testdata/tiny/normal --tools HaplotypeCaller,Manta,Strelka --igenomes_base references --publishDirMode link --max_memory 7.GB --max_cpus 2 -ansi-log false + - nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --sampleDir data/testdata/tsv/tiny-manta.tsv --tools HaploTypeCaller,Manta,Strelka,MuTecT2,FreeBayes --igenomes_base references --publishDirMode link --max_memory 7.GB --max_cpus 2 -ansi-log false diff --git a/Jenkinsfile b/Jenkinsfile index 97c43e3384..630e2e2e88 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -27,7 +27,7 @@ pipeline { } stage('Multiple') { steps { - sh "nextflow run main.nf -profile docker --sample data/testdata/tsv/tiny-multiple.tsv --tools HaplotypeCaller,Manta,Strelka --genome smallGRCh37 --igenomes_base references --publishDirMode link -ansi-log false" + sh "nextflow run main.nf -profile docker --sample data/testdata/tsv/tiny-multiple.tsv --tools HaploTypeCaller,Manta,Strelka,MuTecT2,FreeBayes --genome smallGRCh37 --igenomes_base references --publishDirMode link -ansi-log false" sh "rm -rf work/ .nextflow* results/" } } From d93b562e9128cbecc2414320ffb16531c406a77c Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 3 May 2019 14:44:37 +0200 Subject: [PATCH 4/6] add Manta, Strelka, StrelkaBP, MuTecT2, Freebayes, Ascat, Controlfreec --- main.nf | 606 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 566 insertions(+), 40 deletions(-) diff --git a/main.nf b/main.nf index 596385569f..39fb4e8804 100644 --- a/main.nf +++ b/main.nf @@ -46,6 +46,7 @@ def helpMessage() { --step Specify starting step Available: Mapping, Recalibrate, VariantCalling Default: Mapping + --strelkaBP Use Manta candidateSmallIndels for Strelka as Best Practice --targetBED target BED file for targeted sequencing --tools Specify tools to use for variant calling Available: HaplotypeCaller, @@ -97,6 +98,7 @@ params.sample = null params.sampleDir = null params.sequencing_center = null params.step = 'mapping' +params.strelkaBP = true params.targetBED = null params.tools = null @@ -189,6 +191,7 @@ if (params.step) summary['Step'] = params.step if (params.tools) summary['Tools'] = tools.join(', ') if (params.noReports) summary['Reports'] = params.noReports if (params.noGVCF) summary['GVCF'] = params.noGVCF +if (params.strelkaBP) summary['Strelka BP'] = params.strelkaBP if (params.sequencing_center) summary['Sequencing Center'] = params.sequencing_center summary['Nucleotides/s'] = params.nucleotidesPerSecond summary['Output dir'] = params.outdir @@ -516,7 +519,7 @@ bedIntervals = bedIntervals bedIntervals = bedIntervals.dump(tag:'bedintervals') -(bedIntervalsBR, bedIntervalsHC) = bedIntervals.into(2) +(bedIntervalsBR, bedIntervalsHC, bedIntervalsForMpileup, bedIntervals) = bedIntervals.into(4) bamForBaseRecalibrator = mdBam.combine(bedIntervalsBR) @@ -724,12 +727,12 @@ recalibratedBam = recalibratedBam.dump(tag:'BAM') // Manta will be run in Germline mode, or in Tumor mode depending on status // HaplotypeCaller and Strelka will be run for Normal and Tumor samples -(bamsForSingleManta, bamsForSingleStrelka, recalibratedBam) = recalibratedBam.into(3) +(bamsForSingleManta, bamsForSingleStrelka, recalibratedBamTemp, recalibratedBam) = recalibratedBam.into(4) // To speed Variant Callers up we are chopping the reference into smaller pieces // Do variant calling by this intervals, and re-merge the VCFs -bamsForHC = recalibratedBam.combine(bedIntervalsHC) +bamsForHC = recalibratedBamTemp.combine(bedIntervalsHC) process RunHaplotypecaller { tag {idSample + "-" + intervalBed.baseName} @@ -745,8 +748,8 @@ process RunHaplotypecaller { ]) output: - set val("HaplotypeCallerGVCF"), idPatient, status, idSample, file("${intervalBed.baseName}_${idSample}.g.vcf") into hcGenomicVCF - set idPatient, status, idSample, file(intervalBed), file("${intervalBed.baseName}_${idSample}.g.vcf") into vcfsToGenotype + set val("HaplotypeCallerGVCF"), idPatient, idSample, file("${intervalBed.baseName}_${idSample}.g.vcf") into hcGenomicVCF + set idPatient, idSample, file(intervalBed), file("${intervalBed.baseName}_${idSample}.g.vcf") into vcfsToGenotype when: 'haplotypecaller' in tools @@ -763,7 +766,7 @@ process RunHaplotypecaller { """ } -hcGenomicVCF = hcGenomicVCF.groupTuple(by:[0,1,2,3]) +hcGenomicVCF = hcGenomicVCF.groupTuple(by:[0,1,2]) if (params.noGVCF) hcGenomicVCF.close() @@ -771,7 +774,7 @@ process RunGenotypeGVCFs { tag {idSample + "-" + intervalBed.baseName} input: - set idPatient, status, idSample, file(intervalBed), file(gvcf) from vcfsToGenotype + set idPatient, idSample, file(intervalBed), file(gvcf) from vcfsToGenotype set file(genomeFile), file(genomeIndex), file(genomeDict), file(dbsnp), file(dbsnpIndex) from Channel.value([ referenceMap.genomeFile, referenceMap.genomeIndex, @@ -781,7 +784,7 @@ process RunGenotypeGVCFs { ]) output: - set val("HaplotypeCaller"), idPatient, status, idSample, file("${intervalBed.baseName}_${idSample}.vcf") into hcGenotypedVCF + set val("HaplotypeCaller"), idPatient, idSample, file("${intervalBed.baseName}_${idSample}.vcf") into hcGenotypedVCF when: 'haplotypecaller' in tools @@ -801,40 +804,11 @@ process RunGenotypeGVCFs { """ } -hcGenotypedVCF = hcGenotypedVCF.groupTuple(by:[0,1,2,3]) +hcGenotypedVCF = hcGenotypedVCF.groupTuple(by:[0,1,2]) // we are merging the VCFs that are called separatelly for different intervals // so we can have a single sorted VCF containing all the calls for a given caller -vcfsToMerge = hcGenomicVCF.mix(hcGenotypedVCF) - -vcfsToMerge = vcfsToMerge.dump(tag:'VCFsToMerge') - -process ConcatVCF { - tag {variantCaller + "-" + idSample} - - publishDir "${params.outdir}/VariantCalling/${idSample}/${"$variantCaller"}", mode: params.publishDirMode - - input: - set variantCaller, idPatient, status, idSample, file(vcFiles) from vcfsToMerge - file(genomeIndex) from Channel.value(referenceMap.genomeIndex) - file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null") - - output: - // we have this funny *_* pattern to avoid copying the raw calls to publishdir - set variantCaller, idPatient, status, idSample, file("*_*.vcf.gz"), file("*_*.vcf.gz.tbi") into vcfConcatenated - - when: 'haplotypecaller' in tools - - script: - if (variantCaller == 'HaplotypeCaller') outputFile = "${variantCaller}_${idSample}.vcf" - else if (variantCaller == 'HaplotypeCallerGVCF') outputFile = "haplotypecaller_${idSample}.g.vcf" - options = params.targetBED ? "-t ${targetBED}" : "" - """ - concatenateVCFs.sh -i ${genomeIndex} -c ${task.cpus} -o ${outputFile} ${options} - """ -} - process RunSingleStrelka { tag {idSample} @@ -924,9 +898,537 @@ process RunSingleManta { singleMantaOutput = singleMantaOutput.dump(tag:'Single Manta') +/* +======================================================================================== + SOMATIC VARIANT CALLING +======================================================================================== +*/ + +// separate recalibrateBams by status +bamsNormal = Channel.create() +bamsTumor = Channel.create() + +recalibratedBam + .choice(bamsTumor, bamsNormal) {it[1] == 0 ? 1 : 0} + +// Ascat, Control-FREEC, Manta Tumor-only SV +bamsForAscat = Channel.create() +bamsForMpileup = Channel.create() +bamsForSingleManta = Channel.create() + +(bamsTumorTemp, bamsTumor) = bamsTumor.into(2) +(bamsNormalTemp, bamsNormal) = bamsNormal.into(2) +(bamsForAscat, bamsForMpileup, bamsForSingleManta) = bamsNormalTemp.mix(bamsTumorTemp).into(3) + +// Removing status because not relevant anymore +bamsNormal = bamsNormal.map { idPatient, status, idSample, bam, bai -> [idPatient, idSample, bam, bai] } +bamsTumor = bamsTumor.map { idPatient, status, idSample, bam, bai -> [idPatient, idSample, bam, bai] } + +bamsAll = bamsNormal.join(bamsTumor) + +// Manta and Strelka +(bamsForManta, bamsForStrelka, bamsForStrelkaBP, bamsAll) = bamsAll.into(4) + +bamsTumorNormalIntervals = bamsAll.spread(bedIntervals) +bamsForMpileup = bamsForMpileup.spread(bedIntervalsForMpileup) + +// MuTect2, FreeBayes +( bamsFMT2, bamsFFB) = bamsTumorNormalIntervals.into(3) + +// This will give as a list of unfiltered calls for MuTect2. +process RunMutect2 { + tag {idSampleTumor + "_vs_" + idSampleNormal + "-" + intervalBed.baseName} + + input: + set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(intervalBed) from bamsFMT2 + set file(genomeFile), file(genomeIndex), file(genomeDict), file(dbsnp), file(dbsnpIndex) from Channel.value([ + referenceMap.genomeFile, + referenceMap.genomeIndex, + referenceMap.genomeDict, + referenceMap.dbsnp, + referenceMap.dbsnpIndex + ]) + + output: + set val("MuTect2"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") into mutect2Output + + when: 'mutect2' in tools + + script: + """ + gatk --java-options "-Xmx${task.memory.toGiga()}g" \ + Mutect2 \ + -R ${genomeFile}\ + -I ${bamTumor} -tumor ${idSampleTumor} \ + -I ${bamNormal} -normal ${idSampleNormal} \ + -L ${intervalBed} \ + -O ${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf + """ +} + +mutect2Output = mutect2Output.groupTuple(by:[0,1,2,3]) + +process RunFreeBayes { + tag {idSampleTumor + "_vs_" + idSampleNormal + "-" + intervalBed.baseName} + + input: + set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(intervalBed) from bamsFFB + file(genomeFile) from Channel.value(referenceMap.genomeFile) + file(genomeIndex) from Channel.value(referenceMap.genomeIndex) + + output: + set val("FreeBayes"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") into freebayesOutput + + when: 'freebayes' in tools + + script: + """ + freebayes \ + -f ${genomeFile} \ + --pooled-continuous \ + --pooled-discrete \ + --genotype-qualities \ + --report-genotype-likelihood-max \ + --allele-balance-priors-off \ + --min-alternate-fraction 0.03 \ + --min-repeat-entropy 1 \ + --min-alternate-count 2 \ + -t ${intervalBed} \ + ${bamTumor} \ + ${bamNormal} > ${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf + """ +} + +freebayesOutput = freebayesOutput.groupTuple(by:[0,1,2,3]) + +vcfsToMerge = mutect2Output.mix(freebayesOutput, hcGenotypedVCF) + +vcfsToMerge = vcfsToMerge.dump(tag:'VCF to merge') + +process ConcatVCF { + tag {variantCaller + "-" + idSample} + + publishDir "${params.outdir}/VariantCalling/${idSample}/${"$variantCaller"}", mode: params.publishDirMode + + input: + set variantCaller, idPatient, idSample, file(vcFiles) from vcfsToMerge + file(genomeIndex) from Channel.value(referenceMap.genomeIndex) + file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null") + + output: + // we have this funny *_* pattern to avoid copying the raw calls to publishdir + set variantCaller, idPatient, idSample, file("*_*.vcf.gz"), file("*_*.vcf.gz.tbi") into vcfConcatenated + + when: ('haplotypecaller' in tools || 'mutect2' in tools || 'freebayes' in tools) + + script: + if (variantCaller == 'HaplotypeCallerGVCF') outputFile = "haplotypecaller_${idSample}.g.vcf" + else outputFile = "${variantCaller}_${idSample}.vcf" + + options = params.targetBED ? "-t ${targetBED}" : "" + """ + concatenateVCFs.sh -i ${genomeIndex} -c ${task.cpus} -o ${outputFile} ${options} + """ +} + +vcfConcatenated = vcfConcatenated.dump(tag:'VCF') + +process RunStrelka { + tag {idSampleTumor + "_vs_" + idSampleNormal} + + publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/Strelka", mode: params.publishDirMode + + input: + set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from bamsForStrelka + file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null") + set file(genomeFile), file(genomeIndex), file(genomeDict) from Channel.value([ + referenceMap.genomeFile, + referenceMap.genomeIndex, + referenceMap.genomeDict + ]) + + output: + set val("Strelka"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("*.vcf.gz"), file("*.vcf.gz.tbi") into strelkaOutput + + when: 'strelka' in tools + + script: + beforeScript = params.targetBED ? "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz" : "" + options = params.targetBED ? "--exome --callRegions call_targets.bed.gz" : "" + """ + ${beforeScript} + configureStrelkaSomaticWorkflow.py \ + --tumor ${bamTumor} \ + --normal ${bamNormal} \ + --referenceFasta ${genomeFile} \ + ${options} \ + --runDir Strelka + + python Strelka/runWorkflow.py -m local -j ${task.cpus} + mv Strelka/results/variants/somatic.indels.vcf.gz Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz + mv Strelka/results/variants/somatic.indels.vcf.gz.tbi Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz.tbi + mv Strelka/results/variants/somatic.snvs.vcf.gz Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz + mv Strelka/results/variants/somatic.snvs.vcf.gz.tbi Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz.tbi + """ +} + +strelkaOutput = strelkaOutput.dump(tag:'Strelka') + +process RunManta { + tag {idSampleTumor + "_vs_" + idSampleNormal} + + publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/Manta", mode: params.publishDirMode + + input: + set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from bamsForManta + file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null") + set file(genomeFile), file(genomeIndex) from Channel.value([ + referenceMap.genomeFile, + referenceMap.genomeIndex + ]) + + output: + set val("Manta"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("*.vcf.gz"), file("*.vcf.gz.tbi") into mantaOutput + set idPatient, idSampleNormal, idSampleTumor, file("*.candidateSmallIndels.vcf.gz"), file("*.candidateSmallIndels.vcf.gz.tbi") into mantaToStrelka + + when: 'manta' in tools + + script: + beforeScript = params.targetBED ? "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz" : "" + options = params.targetBED ? "--exome --callRegions call_targets.bed.gz" : "" + """ + ${beforeScript} + configManta.py \ + --normalBam ${bamNormal} \ + --tumorBam ${bamTumor} \ + --reference ${genomeFile} \ + ${options} \ + --runDir Manta + + python Manta/runWorkflow.py -m local -j ${task.cpus} + + mv Manta/results/variants/candidateSmallIndels.vcf.gz \ + Manta_${idSampleTumor}_vs_${idSampleNormal}.candidateSmallIndels.vcf.gz + mv Manta/results/variants/candidateSmallIndels.vcf.gz.tbi \ + Manta_${idSampleTumor}_vs_${idSampleNormal}.candidateSmallIndels.vcf.gz.tbi + mv Manta/results/variants/candidateSV.vcf.gz \ + Manta_${idSampleTumor}_vs_${idSampleNormal}.candidateSV.vcf.gz + mv Manta/results/variants/candidateSV.vcf.gz.tbi \ + Manta_${idSampleTumor}_vs_${idSampleNormal}.candidateSV.vcf.gz.tbi + mv Manta/results/variants/diploidSV.vcf.gz \ + Manta_${idSampleTumor}_vs_${idSampleNormal}.diploidSV.vcf.gz + mv Manta/results/variants/diploidSV.vcf.gz.tbi \ + Manta_${idSampleTumor}_vs_${idSampleNormal}.diploidSV.vcf.gz.tbi + mv Manta/results/variants/somaticSV.vcf.gz \ + Manta_${idSampleTumor}_vs_${idSampleNormal}.somaticSV.vcf.gz + mv Manta/results/variants/somaticSV.vcf.gz.tbi \ + Manta_${idSampleTumor}_vs_${idSampleNormal}.somaticSV.vcf.gz.tbi + """ +} + +mantaOutput = mantaOutput.dump(tag:'Manta') + +bamsForStrelkaBP = bamsForStrelkaBP.map { + idPatientNormal, idSampleNormal, bamNormal, baiNormal, idSampleTumor, bamTumor, baiTumor -> + [idPatientNormal, idSampleNormal, idSampleTumor, bamNormal, baiNormal, bamTumor, baiTumor] +}.join(mantaToStrelka, by:[0,1,2]).map { + idPatientNormal, idSampleNormal, idSampleTumor, bamNormal, baiNormal, bamTumor, baiTumor, mantaCSI, mantaCSIi -> + [idPatientNormal, idSampleNormal, bamNormal, baiNormal, idSampleTumor, bamTumor, baiTumor, mantaCSI, mantaCSIi] +} + +process RunStrelkaBP { + tag {idSampleTumor + "_vs_" + idSampleNormal} + + publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/Strelka", mode: params.publishDirMode + + input: + set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(mantaCSI), file(mantaCSIi) from bamsForStrelkaBP + file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null") + set file(genomeFile), file(genomeIndex), file(genomeDict) from Channel.value([ + referenceMap.genomeFile, + referenceMap.genomeIndex, + referenceMap.genomeDict + ]) + + output: + set val("Strelka"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("*.vcf.gz"), file("*.vcf.gz.tbi") into strelkaBPOutput + + when: 'strelka' in tools && 'manta' in tools && params.strelkaBP + + script: + beforeScript = params.targetBED ? "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz" : "" + options = params.targetBED ? "--exome --callRegions call_targets.bed.gz" : "" + """ + ${beforeScript} + configureStrelkaSomaticWorkflow.py \ + --tumor ${bamTumor} \ + --normal ${bamNormal} \ + --referenceFasta ${genomeFile} \ + --indelCandidates ${mantaCSI} \ + ${options} \ + --runDir Strelka + + python Strelka/runWorkflow.py -m local -j ${task.cpus} + + mv Strelka/results/variants/somatic.indels.vcf.gz \ + StrelkaBP_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz + mv Strelka/results/variants/somatic.indels.vcf.gz.tbi \ + StrelkaBP_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz.tbi + mv Strelka/results/variants/somatic.snvs.vcf.gz \ + StrelkaBP_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz + mv Strelka/results/variants/somatic.snvs.vcf.gz.tbi \ + StrelkaBP_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz.tbi + """ +} + +strelkaBPOutput = strelkaBPOutput.dump(tag:'Strelka BP') + +// Run commands and code from Malin Larsson +// Based on Jesper Eisfeldt's code +process RunAlleleCount { + tag {idSample} + + input: + set idPatient, status, idSample, file(bam), file(bai) from bamsForAscat + set file(acLoci), file(genomeFile), file(genomeIndex), file(genomeDict) from Channel.value([ + referenceMap.acLoci, + referenceMap.genomeFile, + referenceMap.genomeIndex, + referenceMap.genomeDict + ]) + + output: + set idPatient, status, idSample, file("${idSample}.alleleCount") into alleleCountOutput + + when: 'ascat' in tools + + script: + """ + alleleCounter \ + -l ${acLoci} \ + -r ${genomeFile} \ + -b ${bam} \ + -o ${idSample}.alleleCount; + """ +} + +alleleCountNormal = Channel.create() +alleleCountTumor = Channel.create() + +alleleCountOutput + .choice(alleleCountTumor, alleleCountNormal) {it[1] == 0 ? 1 : 0} + +alleleCountOutput = alleleCountNormal.combine(alleleCountTumor) + +alleleCountOutput = alleleCountOutput.map { + idPatientNormal, statusNormal, idSampleNormal, alleleCountNormal, + idPatientTumor, statusTumor, idSampleTumor, alleleCountTumor -> + [idPatientNormal, idSampleNormal, idSampleTumor, alleleCountNormal, alleleCountTumor] +} + +// R script from Malin Larssons bitbucket repo: +// https://bitbucket.org/malinlarsson/somatic_wgs_pipeline +process RunConvertAlleleCounts { + tag {idSampleTumor + "_vs_" + idSampleNormal} + + publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/ASCAT", mode: params.publishDirMode + + input: + set idPatient, idSampleNormal, idSampleTumor, file(alleleCountNormal), file(alleleCountTumor) from alleleCountOutput + + output: + set idPatient, idSampleNormal, idSampleTumor, file("${idSampleNormal}.BAF"), file("${idSampleNormal}.LogR"), file("${idSampleTumor}.BAF"), file("${idSampleTumor}.LogR") into convertAlleleCountsOutput + + when: 'ascat' in tools + + script: + gender = patientGenders[idPatient] + """ + convertAlleleCounts.r ${idSampleTumor} ${alleleCountTumor} ${idSampleNormal} ${alleleCountNormal} ${gender} + """ +} + +// R scripts from Malin Larssons bitbucket repo: +// https://bitbucket.org/malinlarsson/somatic_wgs_pipeline +process RunAscat { + tag {idSampleTumor + "_vs_" + idSampleNormal} + + publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/ASCAT", mode: params.publishDirMode + + input: + set idPatient, idSampleNormal, idSampleTumor, file(bafNormal), file(logrNormal), file(bafTumor), file(logrTumor) from convertAlleleCountsOutput + file(acLociGC) from Channel.value([referenceMap.acLociGC]) + + output: + set val("ASCAT"), idPatient, idSampleNormal, idSampleTumor, file("${idSampleTumor}.*.{png,txt}") into ascatOutput + + when: 'ascat' in tools + + script: + """ + # get rid of "chr" string if there is any + for f in *BAF *LogR; do sed 's/chr//g' \$f > tmpFile; mv tmpFile \$f;done + run_ascat.r ${bafTumor} ${logrTumor} ${bafNormal} ${logrNormal} ${idSampleTumor} ${baseDir} ${acLociGC} + """ +} + +ascatOutput.dump(tag:'ASCAT') + +process RunMpileup { + tag {idSample + "-" + intervalBed.baseName} + + input: + set idPatient, status, idSample, file(bam), file(bai), file(intervalBed) from bamsForMpileup + set file(genomeFile), file(genomeIndex) from Channel.value([ + referenceMap.genomeFile, + referenceMap.genomeIndex + ]) + + output: + set idPatient, status, idSample, file("${idSample}_${intervalBed.baseName}.pileup.gz") into mpileupToMerge + + when: ('controlfreec' in tools || 'mpileup' in tools) + + script: + """ + samtools mpileup \ + -f ${genomeFile} ${bam} \ + -l ${intervalBed} \ + | bgzip --threads ${task.cpus} -c > ${idSample}_${intervalBed.baseName}.pileup.gz + """ +} + +mpileupToMerge = mpileupToMerge.groupTuple(by:[0,1,2]) + +process MergeMpileup { + tag {idSample} + + publishDir params.outdir, mode: params.publishDirMode, saveAs: { it == "${idSample}.pileup.gz" ? "VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/mpileup/${it}" : '' } + + input: + set idPatient, status, idSample, file(mpileup) from mpileupToMerge + + output: + set idPatient, status, idSample, file("${idSample}.pileup.gz") into mpileupOutput + + when: ('controlfreec' in tools || 'mpileup' in tools) + + script: + """ + for i in `ls -1v *.pileup.gz`; + do zcat \$i >> ${idSample}.pileup + done + bgzip --threads ${task.cpus} -c ${idSample}.pileup > ${idSample}.pileup.gz + rm ${idSample}.pileup + """ +} + +mpileupOutput = mpileupOutput.dump(tag:'mpileup') + +mpileupNormal = Channel.create() +mpileupTumor = Channel.create() + +mpileupOutput + .choice(mpileupTumor, mpileupNormal) {it[1] == 0 ? 1 : 0} + +mpileupOutput = mpileupNormal.combine(mpileupTumor) + +mpileupOutput = mpileupOutput.map { + idPatientNormal, statusNormal, idSampleNormal, mpileupNormal, + idPatientTumor, statusTumor, idSampleTumor, mpileupTumor -> + [idPatientNormal, idSampleNormal, idSampleTumor, mpileupNormal, mpileupTumor] +} + +process RunControlFreec { + tag {idSampleTumor + "_vs_" + idSampleNormal} + + publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/controlFREEC", mode: params.publishDirMode + + input: + set idPatient, idSampleNormal, idSampleTumor, file(mpileupNormal), file(mpileupTumor) from mpileupOutput + set file(genomeFile), file(genomeIndex), file(dbsnp), file(dbsnpIndex), file(chrDir), file(chrLength) from Channel.value([ + referenceMap.genomeFile, + referenceMap.genomeIndex, + referenceMap.dbsnp, + referenceMap.dbsnpIndex, + referenceMap.chrDir, + referenceMap.chrLength + ]) + + output: + set idPatient, idSampleNormal, idSampleTumor, file("${idSampleTumor}.pileup.gz_CNVs"), file("${idSampleTumor}.pileup.gz_ratio.txt"), file("${idSampleTumor}.pileup.gz_normal_CNVs"), file("${idSampleTumor}.pileup.gz_normal_ratio.txt"), file("${idSampleTumor}.pileup.gz_BAF.txt"), file("${idSampleNormal}.pileup.gz_BAF.txt") into controlFreecOutputVisualization + set file("*.pileup.gz*"), file("${idSampleTumor}_vs_${idSampleNormal}.config.txt") into controlFreecOutput + + when: 'controlfreec' in tools + + script: + config = "${idSampleTumor}_vs_${idSampleNormal}.config.txt" + gender = patientGenders[idPatient] + """ + touch ${config} + echo "[general]" >> ${config} + echo "BedGraphOutput = TRUE" >> ${config} + echo "chrFiles = \${PWD}/${referenceMap.chrDir.fileName}" >> ${config} + echo "chrLenFile = \${PWD}/${referenceMap.chrLength.fileName}" >> ${config} + echo "coefficientOfVariation = 0.05" >> ${config} + echo "contaminationAdjustment = TRUE" >> ${config} + echo "forceGCcontentNormalization = 0" >> ${config} + echo "maxThreads = ${task.cpus}" >> ${config} + echo "minimalSubclonePresence = 20" >> ${config} + echo "ploidy = 2,3,4" >> ${config} + echo "sex = ${gender}" >> ${config} + echo "window = 50000" >> ${config} + echo "" >> ${config} + + echo "[control]" >> ${config} + echo "inputFormat = pileup" >> ${config} + echo "mateFile = \${PWD}/${mpileupNormal}" >> ${config} + echo "mateOrientation = FR" >> ${config} + echo "" >> ${config} + + echo "[sample]" >> ${config} + echo "inputFormat = pileup" >> ${config} + echo "mateFile = \${PWD}/${mpileupTumor}" >> ${config} + echo "mateOrientation = FR" >> ${config} + echo "" >> ${config} + + echo "[BAF]" >> ${config} + echo "SNPfile = ${referenceMap.dbsnp.fileName}" >> ${config} + + freec -conf ${config} + """ +} + +process RunControlFreecVisualization { + + tag {idSampleTumor + "_vs_" + idSampleNormal} + + publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/controlFREEC", mode: params.publishDirMode + + input: + set idPatient, idSampleNormal, idSampleTumor, file(cnvTumor), file(ratioTumor), file(cnvNormal), file(ratioNormal), file(bafTumor), file(bafNormal) from controlFreecOutputVisualization + + output: + set file("*.txt"), file("*.png"), file("*.bed") into controlFreecOutputFinal + + when: 'controlfreec' in tools + + """ + cat /opt/conda/envs/sarek-2.3/bin/assess_significance.R | R --slave --args ${cnvTumor} ${ratioTumor} + cat /opt/conda/envs/sarek-2.3/bin/assess_significance.R | R --slave --args ${cnvNormal} ${ratioNormal} + cat /opt/conda/envs/sarek-2.3/bin/makeGraph.R | R --slave --args 2 ${ratioTumor} ${bafTumor} + cat /opt/conda/envs/sarek-2.3/bin/makeGraph.R | R --slave --args 2 ${ratioNormal} ${bafNormal} + perl /opt/conda/envs/sarek-2.3/bin/freec2bed.pl -f ${ratioTumor} > ${idSampleTumor}.bed + perl /opt/conda/envs/sarek-2.3/bin/freec2bed.pl -f ${ratioNormal} > ${idSampleNormal}.bed + """ +} + +(strelkaIndels, strelkaSNVS) = strelkaOutput.into(2) +(mantaSomaticSV, mantaDiploidSV) = mantaOutput.into(2) + vcfForQC = Channel.empty().mix( vcfConcatenated.map { - variantcaller, idPatient, status, idSample, vcf, tbi -> + variantcaller, idPatient, idSample, vcf, tbi -> [variantcaller, idPatient, idSample, vcf] }, singleStrelkaOutput.map { @@ -936,6 +1438,22 @@ vcfForQC = Channel.empty().mix( singleMantaOutput.map { variantcaller, idPatient, idSample, vcf, tbi -> [variantcaller, idPatient, idSample, vcf[2]] + }, + mantaDiploidSV.map { + variantcaller, idPatient, idSample, vcf, tbi -> + [variantcaller, idPatient, idSample, vcf[2]] + }, + mantaSomaticSV.map { + variantcaller, idPatient, idSample, vcf, tbi -> + [variantcaller, idPatient, idSample, vcf[3]] + }, + strelkaIndels.map { + variantcaller, idPatient, idSample, vcf, tbi -> + [variantcaller, idPatient, idSample, vcf[0]] + }, + strelkaSNVS.map { + variantcaller, idPatient, idSample, vcf, tbi -> + [variantcaller, idPatient, idSample, vcf[1]] }) (vcfForBCFtools, vcfForVCFtools) = vcfForQC.into(2) @@ -1234,13 +1752,19 @@ def defineReferenceMap(step, tools) { 'knownIndelsIndex' : checkParamReturnFile("knownIndelsIndex") ) } + if ('controlfreec' in tools) { + referenceMap.putAll( + 'chrDir' : checkParamReturnFile("chrDir"), + 'chrLength' : checkParamReturnFile("chrLength") + ) + } if ('ascat' in tools) { referenceMap.putAll( 'acLoci' : checkParamReturnFile("acLoci"), 'acLociGC' : checkParamReturnFile("acLociGC") ) } - if ('mapping' in step || 'haplotypecaller' in tools || 'mutect2' in tools) { + if ('mapping' in step || 'haplotypecaller' in tools || 'mutect2' in tools || 'controlfreec' in tools) { referenceMap.putAll( 'dbsnp' : checkParamReturnFile("dbsnp"), 'dbsnpIndex' : checkParamReturnFile("dbsnpIndex") @@ -1263,9 +1787,11 @@ def defineStepList() { def defineToolList() { return [ 'ascat', + 'controlfreec', 'freebayes', 'haplotypecaller', 'manta', + 'mpileup', 'mutect2', 'strelka' ] From 45928fc0316a8758e7f437cc207ada9ce72567d0 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 3 May 2019 14:47:02 +0200 Subject: [PATCH 5/6] update docs --- main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 39fb4e8804..216f4070d6 100644 --- a/main.nf +++ b/main.nf @@ -49,7 +49,8 @@ def helpMessage() { --strelkaBP Use Manta candidateSmallIndels for Strelka as Best Practice --targetBED target BED file for targeted sequencing --tools Specify tools to use for variant calling - Available: HaplotypeCaller, + Available: ASCAT, ControlFREEC, FreeBayes, HaplotypeCaller + Manta, mpileup, MuTect2, Strelka References If not specified in the configuration file or you wish to overwrite any of the references. --acLoci acLoci file From a1a90141ee723b39c0741d3572f526c92d4a47f2 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 3 May 2019 14:51:48 +0200 Subject: [PATCH 6/6] fix travis tests --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 77bfdcdc0a..ee7db79aa5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -35,4 +35,4 @@ install: script: - git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data - nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link --max_memory 7.GB --max_cpus 2 -ansi-log false - - nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --sampleDir data/testdata/tsv/tiny-manta.tsv --tools HaploTypeCaller,Manta,Strelka,MuTecT2,FreeBayes --igenomes_base references --publishDirMode link --max_memory 7.GB --max_cpus 2 -ansi-log false + - nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --sample data/testdata/tsv/tiny-manta.tsv --tools HaploTypeCaller,Manta,Strelka,MuTecT2,FreeBayes --igenomes_base references --publishDirMode link --max_memory 7.GB --max_cpus 2 -ansi-log false