From cf29773f27fca21c27d23399f1a23b8f830afb26 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Fri, 3 May 2019 13:53:41 +0200
Subject: [PATCH 1/6] fix Jenkins tests

---
 Jenkinsfile | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index c8a8e4252d..97c43e3384 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -13,9 +13,10 @@ pipeline {
         }
         stage('Build') {
             steps {
-              sh "git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data"
-              sh "nextflow run build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link -ansi-log false"
-              sh "rm -rf work/ references/pipeline_info .nextflow*"
+                sh "rm -rf data"
+                sh "git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data"
+                sh "nextflow run build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link -ansi-log false"
+                sh "rm -rf work/ references/pipeline_info .nextflow*"
             }
         }
         stage('SampleDir') {

From 578e239bf795dace76cc957c3da1bf84682e6be1 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Fri, 3 May 2019 13:53:48 +0200
Subject: [PATCH 2/6] sort ansi codes

---
 build.nf | 25 +++++++++++++------------
 main.nf  | 11 ++++++-----
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/build.nf b/build.nf
index c5238c483e..6c7e19e5e3 100644
--- a/build.nf
+++ b/build.nf
@@ -349,15 +349,16 @@ process DownloadCADD {
 
 def nfcoreHeader(){
     // Log colors ANSI codes
-    c_black  = params.monochrome_logs ? '' : "\033[0;30m";
-    c_blue   = params.monochrome_logs ? '' : "\033[0;34m";
-    c_cyan   = params.monochrome_logs ? '' : "\033[0;36m";
+    c_reset  = params.monochrome_logs ? '' : "\033[0m";
     c_dim    = params.monochrome_logs ? '' : "\033[2m";
+    c_black  = params.monochrome_logs ? '' : "\033[0;30m";
+    c_red    = params.monochrome_logs ? '' : "\033[0;31m";
     c_green  = params.monochrome_logs ? '' : "\033[0;32m";
+    c_yellow = params.monochrome_logs ? '' : "\033[0;33m";
+    c_blue   = params.monochrome_logs ? '' : "\033[0;34m";
     c_purple = params.monochrome_logs ? '' : "\033[0;35m";
-    c_reset  = params.monochrome_logs ? '' : "\033[0m";
+    c_cyan   = params.monochrome_logs ? '' : "\033[0;36m";
     c_white  = params.monochrome_logs ? '' : "\033[0;37m";
-    c_yellow = params.monochrome_logs ? '' : "\033[0;33m";
 
     return """    ${c_dim}----------------------------------------------------${c_reset}
                                             ${c_green},--.${c_black}/${c_green},-.${c_reset}
@@ -365,12 +366,12 @@ def nfcoreHeader(){
     ${c_blue}  |\\ | |__  __ /  ` /  \\ |__) |__         ${c_yellow}}  {${c_reset}
     ${c_blue}  | \\| |       \\__, \\__/ |  \\ |___     ${c_green}\\`-._,-`-,${c_reset}
                                             ${c_green}`._,._,\'${c_reset}
-           ____        _____               _    
-         .' _  `.     / ____|             | |   
-        /  |\\`-_ \\   | (___  ___  _ __ __ | | __
-       |   | \\  `-|   \\___ \\/__ \\| ´__/ _\\| |/ /
-        \\ |   \\  /    ____) | __ | | |  __|   < 
-         `|____\\'    |_____/\\____|_|  \\__/|_|\\_\\
+    ${c_black}       ____      ${c_blue}  _____               _ ${c_reset}
+    ${c_black}     .' ${c_green}_${c_black}  `.    ${c_blue} / ____|             | | ${c_reset}
+    ${c_black}    /  ${c_green}|\\${c_white}`-_${c_black} \\ ${c_blue}  | (___  ___  _ __ __ | | __ ${c_reset}
+    ${c_black}   |   ${c_green}| \\  ${c_white}`-${c_black}| ${c_blue}  \\___ \\/__ \\| ´__/ _\\| |/ / ${c_reset}
+    ${c_black}    \\ ${c_green}|   \\  ${c_black}/ ${c_blue}   ____) | __ | | |  __|   < ${c_reset}
+    ${c_black}     `${c_green}|${c_black}____${c_green}\\${c_black}'   ${c_blue} |_____/\\____|_|  \\__/|_|\\_\\ ${c_reset}
 
     ${c_purple}  nf-core/sarek v${workflow.manifest.version}${c_reset}
     ${c_dim}----------------------------------------------------${c_reset}
@@ -409,4 +410,4 @@ def checkFile(it) {
   final f = file(it)
   if (!f.exists()) exit 1, "Missing file: ${it}, see --help for more information"
   return true
-}
\ No newline at end of file
+}
diff --git a/main.nf b/main.nf
index cd3d3ac7a9..596385569f 100644
--- a/main.nf
+++ b/main.nf
@@ -1115,15 +1115,16 @@ workflow.onComplete {
 
 def nfcoreHeader(){
     // Log colors ANSI codes
-    c_black  = params.monochrome_logs ? '' : "\033[0;30m";
-    c_blue   = params.monochrome_logs ? '' : "\033[0;34m";
-    c_cyan   = params.monochrome_logs ? '' : "\033[0;36m";
+    c_reset  = params.monochrome_logs ? '' : "\033[0m";
     c_dim    = params.monochrome_logs ? '' : "\033[2m";
+    c_black  = params.monochrome_logs ? '' : "\033[0;30m";
+    c_red    = params.monochrome_logs ? '' : "\033[0;31m";
     c_green  = params.monochrome_logs ? '' : "\033[0;32m";
+    c_yellow = params.monochrome_logs ? '' : "\033[0;33m";
+    c_blue   = params.monochrome_logs ? '' : "\033[0;34m";
     c_purple = params.monochrome_logs ? '' : "\033[0;35m";
-    c_reset  = params.monochrome_logs ? '' : "\033[0m";
+    c_cyan   = params.monochrome_logs ? '' : "\033[0;36m";
     c_white  = params.monochrome_logs ? '' : "\033[0;37m";
-    c_yellow = params.monochrome_logs ? '' : "\033[0;33m";
 
     return """    ${c_dim}----------------------------------------------------${c_reset}
                                             ${c_green},--.${c_black}/${c_green},-.${c_reset}

From ed447ba0de7a8bd585af3594aac180292b1eca4a Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Fri, 3 May 2019 14:44:09 +0200
Subject: [PATCH 3/6] add tests

---
 .travis.yml | 2 +-
 Jenkinsfile | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index b3847aab0e..77bfdcdc0a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,4 +35,4 @@ install:
 script:
   - git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data
   - nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link --max_memory 7.GB --max_cpus 2 -ansi-log false
-  - nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --sampleDir data/testdata/tiny/normal --tools HaplotypeCaller,Manta,Strelka --igenomes_base references --publishDirMode link --max_memory 7.GB --max_cpus 2 -ansi-log false
+  - nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --sampleDir data/testdata/tsv/tiny-manta.tsv --tools HaploTypeCaller,Manta,Strelka,MuTecT2,FreeBayes --igenomes_base references --publishDirMode link --max_memory 7.GB --max_cpus 2 -ansi-log false
diff --git a/Jenkinsfile b/Jenkinsfile
index 97c43e3384..630e2e2e88 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -27,7 +27,7 @@ pipeline {
         }
         stage('Multiple') {
             steps {
-                sh "nextflow run main.nf -profile docker --sample data/testdata/tsv/tiny-multiple.tsv --tools HaplotypeCaller,Manta,Strelka --genome smallGRCh37 --igenomes_base references --publishDirMode link -ansi-log false"
+                sh "nextflow run main.nf -profile docker --sample data/testdata/tsv/tiny-multiple.tsv --tools HaploTypeCaller,Manta,Strelka,MuTecT2,FreeBayes --genome smallGRCh37 --igenomes_base references --publishDirMode link -ansi-log false"
                 sh "rm -rf work/ .nextflow* results/"
             }
         }

From d93b562e9128cbecc2414320ffb16531c406a77c Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Fri, 3 May 2019 14:44:37 +0200
Subject: [PATCH 4/6] add Manta, Strelka, StrelkaBP, MuTecT2, Freebayes, Ascat,
 Controlfreec

---
 main.nf | 606 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 566 insertions(+), 40 deletions(-)

diff --git a/main.nf b/main.nf
index 596385569f..39fb4e8804 100644
--- a/main.nf
+++ b/main.nf
@@ -46,6 +46,7 @@ def helpMessage() {
       --step                        Specify starting step
                                     Available: Mapping, Recalibrate, VariantCalling
                                     Default: Mapping
+      --strelkaBP                   Use Manta candidateSmallIndels for Strelka as Best Practice
       --targetBED                   target BED file for targeted sequencing
       --tools                       Specify tools to use for variant calling
                                     Available: HaplotypeCaller,
@@ -97,6 +98,7 @@ params.sample = null
 params.sampleDir = null
 params.sequencing_center = null
 params.step = 'mapping'
+params.strelkaBP = true
 params.targetBED = null
 params.tools = null
 
@@ -189,6 +191,7 @@ if (params.step)              summary['Step']              = params.step
 if (params.tools)             summary['Tools']             = tools.join(', ')
 if (params.noReports)         summary['Reports']           = params.noReports
 if (params.noGVCF)            summary['GVCF']              = params.noGVCF
+if (params.strelkaBP)         summary['Strelka BP']        = params.strelkaBP
 if (params.sequencing_center) summary['Sequencing Center'] = params.sequencing_center
 summary['Nucleotides/s'] = params.nucleotidesPerSecond
 summary['Output dir']    = params.outdir
@@ -516,7 +519,7 @@ bedIntervals = bedIntervals
 
 bedIntervals = bedIntervals.dump(tag:'bedintervals')
 
-(bedIntervalsBR, bedIntervalsHC) = bedIntervals.into(2)
+(bedIntervalsBR, bedIntervalsHC, bedIntervalsForMpileup, bedIntervals) = bedIntervals.into(4)
 
 bamForBaseRecalibrator = mdBam.combine(bedIntervalsBR)
 
@@ -724,12 +727,12 @@ recalibratedBam = recalibratedBam.dump(tag:'BAM')
 // Manta will be run in Germline mode, or in Tumor mode depending on status
 // HaplotypeCaller and Strelka will be run for Normal and Tumor samples
 
-(bamsForSingleManta, bamsForSingleStrelka, recalibratedBam) = recalibratedBam.into(3)
+(bamsForSingleManta, bamsForSingleStrelka, recalibratedBamTemp, recalibratedBam) = recalibratedBam.into(4)
 
 // To speed Variant Callers up we are chopping the reference into smaller pieces
 // Do variant calling by this intervals, and re-merge the VCFs
 
-bamsForHC = recalibratedBam.combine(bedIntervalsHC)
+bamsForHC = recalibratedBamTemp.combine(bedIntervalsHC)
 
 process RunHaplotypecaller {
   tag {idSample + "-" + intervalBed.baseName}
@@ -745,8 +748,8 @@ process RunHaplotypecaller {
     ])
 
   output:
-    set val("HaplotypeCallerGVCF"), idPatient, status, idSample, file("${intervalBed.baseName}_${idSample}.g.vcf") into hcGenomicVCF
-    set idPatient, status, idSample, file(intervalBed), file("${intervalBed.baseName}_${idSample}.g.vcf") into vcfsToGenotype
+    set val("HaplotypeCallerGVCF"), idPatient, idSample, file("${intervalBed.baseName}_${idSample}.g.vcf") into hcGenomicVCF
+    set idPatient, idSample, file(intervalBed), file("${intervalBed.baseName}_${idSample}.g.vcf") into vcfsToGenotype
 
   when: 'haplotypecaller' in tools
 
@@ -763,7 +766,7 @@ process RunHaplotypecaller {
   """
 }
 
-hcGenomicVCF = hcGenomicVCF.groupTuple(by:[0,1,2,3])
+hcGenomicVCF = hcGenomicVCF.groupTuple(by:[0,1,2])
 
 if (params.noGVCF) hcGenomicVCF.close()
 
@@ -771,7 +774,7 @@ process RunGenotypeGVCFs {
   tag {idSample + "-" + intervalBed.baseName}
 
   input:
-    set idPatient, status, idSample, file(intervalBed), file(gvcf) from vcfsToGenotype
+    set idPatient, idSample, file(intervalBed), file(gvcf) from vcfsToGenotype
     set file(genomeFile), file(genomeIndex), file(genomeDict), file(dbsnp), file(dbsnpIndex) from Channel.value([
       referenceMap.genomeFile,
       referenceMap.genomeIndex,
@@ -781,7 +784,7 @@ process RunGenotypeGVCFs {
     ])
 
   output:
-    set val("HaplotypeCaller"), idPatient, status, idSample, file("${intervalBed.baseName}_${idSample}.vcf") into hcGenotypedVCF
+    set val("HaplotypeCaller"), idPatient, idSample, file("${intervalBed.baseName}_${idSample}.vcf") into hcGenotypedVCF
 
   when: 'haplotypecaller' in tools
 
@@ -801,40 +804,11 @@ process RunGenotypeGVCFs {
   """
 }
 
-hcGenotypedVCF = hcGenotypedVCF.groupTuple(by:[0,1,2,3])
+hcGenotypedVCF = hcGenotypedVCF.groupTuple(by:[0,1,2])
 
 // we are merging the VCFs that are called separatelly for different intervals
 // so we can have a single sorted VCF containing all the calls for a given caller
 
-vcfsToMerge = hcGenomicVCF.mix(hcGenotypedVCF)
-
-vcfsToMerge = vcfsToMerge.dump(tag:'VCFsToMerge')
-
-process ConcatVCF {
-  tag {variantCaller + "-" + idSample}
-
-  publishDir "${params.outdir}/VariantCalling/${idSample}/${"$variantCaller"}", mode: params.publishDirMode
-
-  input:
-    set variantCaller, idPatient, status, idSample, file(vcFiles) from vcfsToMerge
-    file(genomeIndex) from Channel.value(referenceMap.genomeIndex)
-    file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null")
-
-  output:
-    // we have this funny *_* pattern to avoid copying the raw calls to publishdir
-    set variantCaller, idPatient, status, idSample, file("*_*.vcf.gz"), file("*_*.vcf.gz.tbi") into vcfConcatenated
-
-  when: 'haplotypecaller' in tools
-
-  script:
-  if (variantCaller == 'HaplotypeCaller') outputFile = "${variantCaller}_${idSample}.vcf"
-  else if (variantCaller == 'HaplotypeCallerGVCF') outputFile = "haplotypecaller_${idSample}.g.vcf"
-  options = params.targetBED ? "-t ${targetBED}" : ""
-  """
-  concatenateVCFs.sh -i ${genomeIndex} -c ${task.cpus} -o ${outputFile} ${options}
-  """
-}
-
 process RunSingleStrelka {
   tag {idSample}
 
@@ -924,9 +898,537 @@ process RunSingleManta {
 
 singleMantaOutput = singleMantaOutput.dump(tag:'Single Manta')
 
+/*
+========================================================================================
+                         SOMATIC VARIANT CALLING
+========================================================================================
+*/
+
+// separate recalibrateBams by status
+bamsNormal = Channel.create()
+bamsTumor = Channel.create()
+
+recalibratedBam
+  .choice(bamsTumor, bamsNormal) {it[1] == 0 ? 1 : 0}
+
+// Ascat, Control-FREEC, Manta Tumor-only SV
+bamsForAscat = Channel.create()
+bamsForMpileup = Channel.create()
+bamsForSingleManta = Channel.create()
+
+(bamsTumorTemp, bamsTumor) = bamsTumor.into(2)
+(bamsNormalTemp, bamsNormal) = bamsNormal.into(2)
+(bamsForAscat, bamsForMpileup, bamsForSingleManta) = bamsNormalTemp.mix(bamsTumorTemp).into(3)
+
+// Removing status because not relevant anymore
+bamsNormal = bamsNormal.map { idPatient, status, idSample, bam, bai -> [idPatient, idSample, bam, bai] }
+bamsTumor = bamsTumor.map { idPatient, status, idSample, bam, bai -> [idPatient, idSample, bam, bai] }
+
+bamsAll = bamsNormal.join(bamsTumor)
+
+// Manta and Strelka
+(bamsForManta, bamsForStrelka, bamsForStrelkaBP, bamsAll) = bamsAll.into(4)
+
+bamsTumorNormalIntervals = bamsAll.spread(bedIntervals)
+bamsForMpileup = bamsForMpileup.spread(bedIntervalsForMpileup)
+
+// MuTect2, FreeBayes
+( bamsFMT2, bamsFFB) = bamsTumorNormalIntervals.into(3)
+
+// This will give as a list of unfiltered calls for MuTect2.
+process RunMutect2 {
+  tag {idSampleTumor + "_vs_" + idSampleNormal + "-" + intervalBed.baseName}
+
+  input:
+    set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(intervalBed) from bamsFMT2
+    set file(genomeFile), file(genomeIndex), file(genomeDict), file(dbsnp), file(dbsnpIndex) from Channel.value([
+      referenceMap.genomeFile,
+      referenceMap.genomeIndex,
+      referenceMap.genomeDict,
+      referenceMap.dbsnp,
+      referenceMap.dbsnpIndex
+    ])
+
+  output:
+    set val("MuTect2"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") into mutect2Output
+
+  when: 'mutect2' in tools
+
+  script:
+  """
+  gatk --java-options "-Xmx${task.memory.toGiga()}g" \
+    Mutect2 \
+    -R ${genomeFile}\
+    -I ${bamTumor}  -tumor ${idSampleTumor} \
+    -I ${bamNormal} -normal ${idSampleNormal} \
+    -L ${intervalBed} \
+    -O ${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf
+  """
+}
+
+mutect2Output = mutect2Output.groupTuple(by:[0,1,2,3])
+
+process RunFreeBayes {
+  tag {idSampleTumor + "_vs_" + idSampleNormal + "-" + intervalBed.baseName}
+
+  input:
+    set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(intervalBed) from bamsFFB
+    file(genomeFile) from Channel.value(referenceMap.genomeFile)
+    file(genomeIndex) from Channel.value(referenceMap.genomeIndex)
+
+  output:
+    set val("FreeBayes"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") into freebayesOutput
+
+  when: 'freebayes' in tools
+
+  script:
+  """
+  freebayes \
+    -f ${genomeFile} \
+    --pooled-continuous \
+    --pooled-discrete \
+    --genotype-qualities \
+    --report-genotype-likelihood-max \
+    --allele-balance-priors-off \
+    --min-alternate-fraction 0.03 \
+    --min-repeat-entropy 1 \
+    --min-alternate-count 2 \
+    -t ${intervalBed} \
+    ${bamTumor} \
+    ${bamNormal} > ${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf
+  """
+}
+
+freebayesOutput = freebayesOutput.groupTuple(by:[0,1,2,3])
+
+vcfsToMerge = mutect2Output.mix(freebayesOutput, hcGenotypedVCF)
+
+vcfsToMerge = vcfsToMerge.dump(tag:'VCF to merge')
+
+process ConcatVCF {
+  tag {variantCaller + "-" + idSample}
+
+  publishDir "${params.outdir}/VariantCalling/${idSample}/${"$variantCaller"}", mode: params.publishDirMode
+
+  input:
+    set variantCaller, idPatient, idSample, file(vcFiles) from vcfsToMerge
+    file(genomeIndex) from Channel.value(referenceMap.genomeIndex)
+    file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null")
+
+  output:
+    // we have this funny *_* pattern to avoid copying the raw calls to publishdir
+    set variantCaller, idPatient, idSample, file("*_*.vcf.gz"), file("*_*.vcf.gz.tbi") into vcfConcatenated
+
+  when: ('haplotypecaller' in tools || 'mutect2' in tools || 'freebayes' in tools)
+
+  script:
+  if (variantCaller == 'HaplotypeCallerGVCF') outputFile = "haplotypecaller_${idSample}.g.vcf"
+  else outputFile = "${variantCaller}_${idSample}.vcf"
+
+  options = params.targetBED ? "-t ${targetBED}" : ""
+  """
+  concatenateVCFs.sh -i ${genomeIndex} -c ${task.cpus} -o ${outputFile} ${options}
+  """
+}
+
+vcfConcatenated = vcfConcatenated.dump(tag:'VCF')
+
+process RunStrelka {
+  tag {idSampleTumor + "_vs_" + idSampleNormal}
+
+  publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/Strelka", mode: params.publishDirMode
+
+  input:
+    set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from bamsForStrelka
+    file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null")
+    set file(genomeFile), file(genomeIndex), file(genomeDict) from Channel.value([
+      referenceMap.genomeFile,
+      referenceMap.genomeIndex,
+      referenceMap.genomeDict
+    ])
+
+  output:
+    set val("Strelka"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("*.vcf.gz"), file("*.vcf.gz.tbi") into strelkaOutput
+
+  when: 'strelka' in tools
+
+  script:
+  beforeScript = params.targetBED ? "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz" : ""
+  options = params.targetBED ? "--exome --callRegions call_targets.bed.gz" : ""
+  """
+  ${beforeScript}
+  configureStrelkaSomaticWorkflow.py \
+  --tumor ${bamTumor} \
+  --normal ${bamNormal} \
+  --referenceFasta ${genomeFile} \
+  ${options} \
+  --runDir Strelka
+
+  python Strelka/runWorkflow.py -m local -j ${task.cpus}
+  mv Strelka/results/variants/somatic.indels.vcf.gz Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz
+  mv Strelka/results/variants/somatic.indels.vcf.gz.tbi Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz.tbi
+  mv Strelka/results/variants/somatic.snvs.vcf.gz Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz
+  mv Strelka/results/variants/somatic.snvs.vcf.gz.tbi Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz.tbi
+  """
+}
+
+strelkaOutput = strelkaOutput.dump(tag:'Strelka')
+
+process RunManta {
+  tag {idSampleTumor + "_vs_" + idSampleNormal}
+
+  publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/Manta", mode: params.publishDirMode
+
+  input:
+    set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from bamsForManta
+    file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null")
+    set file(genomeFile), file(genomeIndex) from Channel.value([
+      referenceMap.genomeFile,
+      referenceMap.genomeIndex
+    ])
+
+  output:
+    set val("Manta"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("*.vcf.gz"), file("*.vcf.gz.tbi") into mantaOutput
+    set idPatient, idSampleNormal, idSampleTumor, file("*.candidateSmallIndels.vcf.gz"), file("*.candidateSmallIndels.vcf.gz.tbi") into mantaToStrelka
+
+  when: 'manta' in tools
+
+  script:
+  beforeScript = params.targetBED ? "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz" : ""
+  options = params.targetBED ? "--exome --callRegions call_targets.bed.gz" : ""
+  """
+  ${beforeScript}
+  configManta.py \
+  --normalBam ${bamNormal} \
+  --tumorBam ${bamTumor} \
+  --reference ${genomeFile} \
+  ${options} \
+  --runDir Manta
+
+  python Manta/runWorkflow.py -m local -j ${task.cpus}
+
+  mv Manta/results/variants/candidateSmallIndels.vcf.gz \
+    Manta_${idSampleTumor}_vs_${idSampleNormal}.candidateSmallIndels.vcf.gz
+  mv Manta/results/variants/candidateSmallIndels.vcf.gz.tbi \
+    Manta_${idSampleTumor}_vs_${idSampleNormal}.candidateSmallIndels.vcf.gz.tbi
+  mv Manta/results/variants/candidateSV.vcf.gz \
+    Manta_${idSampleTumor}_vs_${idSampleNormal}.candidateSV.vcf.gz
+  mv Manta/results/variants/candidateSV.vcf.gz.tbi \
+    Manta_${idSampleTumor}_vs_${idSampleNormal}.candidateSV.vcf.gz.tbi
+  mv Manta/results/variants/diploidSV.vcf.gz \
+    Manta_${idSampleTumor}_vs_${idSampleNormal}.diploidSV.vcf.gz
+  mv Manta/results/variants/diploidSV.vcf.gz.tbi \
+    Manta_${idSampleTumor}_vs_${idSampleNormal}.diploidSV.vcf.gz.tbi
+  mv Manta/results/variants/somaticSV.vcf.gz \
+    Manta_${idSampleTumor}_vs_${idSampleNormal}.somaticSV.vcf.gz
+  mv Manta/results/variants/somaticSV.vcf.gz.tbi \
+    Manta_${idSampleTumor}_vs_${idSampleNormal}.somaticSV.vcf.gz.tbi
+  """
+}
+
+mantaOutput = mantaOutput.dump(tag:'Manta')
+
+bamsForStrelkaBP = bamsForStrelkaBP.map {
+  idPatientNormal, idSampleNormal, bamNormal, baiNormal, idSampleTumor, bamTumor, baiTumor ->
+  [idPatientNormal, idSampleNormal, idSampleTumor, bamNormal, baiNormal, bamTumor, baiTumor]
+}.join(mantaToStrelka, by:[0,1,2]).map {
+  idPatientNormal, idSampleNormal, idSampleTumor, bamNormal, baiNormal, bamTumor, baiTumor, mantaCSI, mantaCSIi ->
+  [idPatientNormal, idSampleNormal, bamNormal, baiNormal, idSampleTumor, bamTumor, baiTumor, mantaCSI, mantaCSIi]
+}
+
+process RunStrelkaBP {
+  tag {idSampleTumor + "_vs_" + idSampleNormal}
+
+  publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/Strelka", mode: params.publishDirMode
+
+  input:
+    set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(mantaCSI), file(mantaCSIi) from bamsForStrelkaBP
+    file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null")
+    set file(genomeFile), file(genomeIndex), file(genomeDict) from Channel.value([
+      referenceMap.genomeFile,
+      referenceMap.genomeIndex,
+      referenceMap.genomeDict
+    ])
+
+  output:
+    set val("Strelka"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("*.vcf.gz"), file("*.vcf.gz.tbi") into strelkaBPOutput
+
+  when: 'strelka' in tools && 'manta' in tools && params.strelkaBP
+
+  script:
+  beforeScript = params.targetBED ? "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz" : ""
+  options = params.targetBED ? "--exome --callRegions call_targets.bed.gz" : ""
+  """
+  ${beforeScript}
+  configureStrelkaSomaticWorkflow.py \
+  --tumor ${bamTumor} \
+  --normal ${bamNormal} \
+  --referenceFasta ${genomeFile} \
+  --indelCandidates ${mantaCSI} \
+  ${options} \
+  --runDir Strelka
+
+  python Strelka/runWorkflow.py -m local -j ${task.cpus}
+
+  mv Strelka/results/variants/somatic.indels.vcf.gz \
+    StrelkaBP_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz
+  mv Strelka/results/variants/somatic.indels.vcf.gz.tbi \
+    StrelkaBP_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz.tbi
+  mv Strelka/results/variants/somatic.snvs.vcf.gz \
+    StrelkaBP_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz
+  mv Strelka/results/variants/somatic.snvs.vcf.gz.tbi \
+    StrelkaBP_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz.tbi
+  """
+}
+
+strelkaBPOutput = strelkaBPOutput.dump(tag:'Strelka BP')
+
+// Run commands and code from Malin Larsson
+// Based on Jesper Eisfeldt's code
+process RunAlleleCount {
+  tag {idSample}
+
+  input:
+    set idPatient, status, idSample, file(bam), file(bai) from bamsForAscat
+    set file(acLoci), file(genomeFile), file(genomeIndex), file(genomeDict) from Channel.value([
+      referenceMap.acLoci,
+      referenceMap.genomeFile,
+      referenceMap.genomeIndex,
+      referenceMap.genomeDict
+    ])
+
+  output:
+    set idPatient, status, idSample, file("${idSample}.alleleCount") into alleleCountOutput
+
+  when: 'ascat' in tools
+
+  script:
+  """
+  alleleCounter \
+  -l ${acLoci} \
+  -r ${genomeFile} \
+  -b ${bam} \
+  -o ${idSample}.alleleCount;
+  """
+}
+
+alleleCountNormal = Channel.create()
+alleleCountTumor = Channel.create()
+
+alleleCountOutput
+  .choice(alleleCountTumor, alleleCountNormal) {it[1] == 0 ? 1 : 0}
+
+alleleCountOutput = alleleCountNormal.combine(alleleCountTumor)
+
+alleleCountOutput = alleleCountOutput.map {
+  idPatientNormal, statusNormal, idSampleNormal, alleleCountNormal,
+  idPatientTumor,  statusTumor,  idSampleTumor,  alleleCountTumor ->
+  [idPatientNormal, idSampleNormal, idSampleTumor, alleleCountNormal, alleleCountTumor]
+}
+
+// R script from Malin Larssons bitbucket repo:
+// https://bitbucket.org/malinlarsson/somatic_wgs_pipeline
+process RunConvertAlleleCounts {
+  tag {idSampleTumor + "_vs_" + idSampleNormal}
+
+  publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/ASCAT", mode: params.publishDirMode
+
+  input:
+    set idPatient, idSampleNormal, idSampleTumor, file(alleleCountNormal), file(alleleCountTumor) from alleleCountOutput
+
+  output:
+    set idPatient, idSampleNormal, idSampleTumor, file("${idSampleNormal}.BAF"), file("${idSampleNormal}.LogR"), file("${idSampleTumor}.BAF"), file("${idSampleTumor}.LogR") into convertAlleleCountsOutput
+
+  when: 'ascat' in tools
+
+  script:
+  gender = patientGenders[idPatient]
+  """
+  convertAlleleCounts.r ${idSampleTumor} ${alleleCountTumor} ${idSampleNormal} ${alleleCountNormal} ${gender}
+  """
+}
+
+// R scripts from Malin Larssons bitbucket repo:
+// https://bitbucket.org/malinlarsson/somatic_wgs_pipeline
+process RunAscat {
+  tag {idSampleTumor + "_vs_" + idSampleNormal}
+
+  publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/ASCAT", mode: params.publishDirMode
+
+  input:
+    set idPatient, idSampleNormal, idSampleTumor, file(bafNormal), file(logrNormal), file(bafTumor), file(logrTumor) from convertAlleleCountsOutput
+    file(acLociGC) from Channel.value([referenceMap.acLociGC])
+
+  output:
+    set val("ASCAT"), idPatient, idSampleNormal, idSampleTumor, file("${idSampleTumor}.*.{png,txt}") into ascatOutput
+
+  when: 'ascat' in tools
+
+  script:
+  """
+  # get rid of "chr" string if there is any
+  for f in *BAF *LogR; do sed 's/chr//g' \$f > tmpFile; mv tmpFile \$f;done
+  run_ascat.r ${bafTumor} ${logrTumor} ${bafNormal} ${logrNormal} ${idSampleTumor} ${baseDir} ${acLociGC}
+  """
+}
+
+ascatOutput.dump(tag:'ASCAT')
+
+process RunMpileup {
+  tag {idSample + "-" + intervalBed.baseName}
+
+  input:
+    set idPatient, status, idSample, file(bam), file(bai), file(intervalBed) from bamsForMpileup
+    set file(genomeFile), file(genomeIndex) from Channel.value([
+      referenceMap.genomeFile,
+      referenceMap.genomeIndex
+    ])
+
+  output:
+    set idPatient, status, idSample, file("${idSample}_${intervalBed.baseName}.pileup.gz") into mpileupToMerge
+
+  when: ('controlfreec' in tools || 'mpileup' in tools)
+
+  script:
+  """
+  samtools mpileup \
+  -f ${genomeFile} ${bam} \
+  -l ${intervalBed} \
+  | bgzip --threads ${task.cpus} -c > ${idSample}_${intervalBed.baseName}.pileup.gz
+  """
+}
+
+mpileupToMerge = mpileupToMerge.groupTuple(by:[0,1,2])
+
+process MergeMpileup {
+  tag {idSample}
+
+  publishDir params.outdir, mode: params.publishDirMode, saveAs: { it == "${idSample}.pileup.gz" ? "VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/mpileup/${it}" : '' }
+
+  input:
+    set idPatient, status, idSample, file(mpileup) from mpileupToMerge
+
+  output:
+    set idPatient, status, idSample, file("${idSample}.pileup.gz") into mpileupOutput
+
+    when: ('controlfreec' in tools || 'mpileup' in tools)
+
+  script:
+  """
+  for i in `ls -1v *.pileup.gz`;
+    do zcat \$i >> ${idSample}.pileup
+  done
+  bgzip --threads ${task.cpus} -c ${idSample}.pileup > ${idSample}.pileup.gz
+  rm ${idSample}.pileup
+  """
+}
+
+mpileupOutput = mpileupOutput.dump(tag:'mpileup')
+
+mpileupNormal = Channel.create()
+mpileupTumor = Channel.create()
+
+mpileupOutput
+  .choice(mpileupTumor, mpileupNormal) {it[1] == 0 ? 1 : 0}
+
+mpileupOutput = mpileupNormal.combine(mpileupTumor)
+
+mpileupOutput = mpileupOutput.map {
+  idPatientNormal, statusNormal, idSampleNormal, mpileupNormal,
+  idPatientTumor,  statusTumor,  idSampleTumor,  mpileupTumor ->
+  [idPatientNormal, idSampleNormal, idSampleTumor, mpileupNormal, mpileupTumor]
+}
+
+process RunControlFreec {
+  tag {idSampleTumor + "_vs_" + idSampleNormal}
+
+  publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/controlFREEC", mode: params.publishDirMode
+
+  input:
+    set idPatient, idSampleNormal, idSampleTumor, file(mpileupNormal), file(mpileupTumor) from mpileupOutput
+    set file(genomeFile), file(genomeIndex), file(dbsnp), file(dbsnpIndex), file(chrDir), file(chrLength) from Channel.value([
+      referenceMap.genomeFile,
+      referenceMap.genomeIndex,
+      referenceMap.dbsnp,
+      referenceMap.dbsnpIndex,
+      referenceMap.chrDir,
+      referenceMap.chrLength
+    ])
+
+  output:
+    set idPatient, idSampleNormal, idSampleTumor, file("${idSampleTumor}.pileup.gz_CNVs"), file("${idSampleTumor}.pileup.gz_ratio.txt"), file("${idSampleTumor}.pileup.gz_normal_CNVs"), file("${idSampleTumor}.pileup.gz_normal_ratio.txt"), file("${idSampleTumor}.pileup.gz_BAF.txt"), file("${idSampleNormal}.pileup.gz_BAF.txt") into controlFreecOutputVisualization
+    set file("*.pileup.gz*"), file("${idSampleTumor}_vs_${idSampleNormal}.config.txt") into controlFreecOutput
+
+  when: 'controlfreec' in tools
+
+  script:
+  config = "${idSampleTumor}_vs_${idSampleNormal}.config.txt"
+  gender = patientGenders[idPatient]
+  """
+  touch ${config}
+  echo "[general]" >> ${config}
+  echo "BedGraphOutput = TRUE" >> ${config}
+  echo "chrFiles = \${PWD}/${referenceMap.chrDir.fileName}" >> ${config}
+  echo "chrLenFile = \${PWD}/${referenceMap.chrLength.fileName}" >> ${config}
+  echo "coefficientOfVariation = 0.05" >> ${config}
+  echo "contaminationAdjustment = TRUE" >> ${config}
+  echo "forceGCcontentNormalization = 0" >> ${config}
+  echo "maxThreads = ${task.cpus}" >> ${config}
+  echo "minimalSubclonePresence = 20" >> ${config}
+  echo "ploidy = 2,3,4" >> ${config}
+  echo "sex = ${gender}" >> ${config}
+  echo "window = 50000" >> ${config}
+  echo "" >> ${config}
+
+  echo "[control]" >> ${config}
+  echo "inputFormat = pileup" >> ${config}
+  echo "mateFile = \${PWD}/${mpileupNormal}" >> ${config}
+  echo "mateOrientation = FR" >> ${config}
+  echo "" >> ${config}
+
+  echo "[sample]" >> ${config}
+  echo "inputFormat = pileup" >> ${config}
+  echo "mateFile = \${PWD}/${mpileupTumor}" >> ${config}
+  echo "mateOrientation = FR" >> ${config}
+  echo "" >> ${config}
+
+  echo "[BAF]" >> ${config}
+  echo "SNPfile = ${referenceMap.dbsnp.fileName}" >> ${config}
+
+  freec -conf ${config}
+  """
+}
+
+process RunControlFreecVisualization {
+
+  tag {idSampleTumor + "_vs_" + idSampleNormal}
+
+  publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/controlFREEC", mode: params.publishDirMode
+
+  input:
+    set idPatient, idSampleNormal, idSampleTumor, file(cnvTumor), file(ratioTumor), file(cnvNormal), file(ratioNormal), file(bafTumor), file(bafNormal) from controlFreecOutputVisualization
+
+  output:
+    set file("*.txt"), file("*.png"), file("*.bed") into controlFreecOutputFinal
+
+  when: 'controlfreec' in tools
+
+  """
+  cat /opt/conda/envs/sarek-2.3/bin/assess_significance.R | R --slave --args ${cnvTumor} ${ratioTumor}
+  cat /opt/conda/envs/sarek-2.3/bin/assess_significance.R | R --slave --args ${cnvNormal} ${ratioNormal}
+  cat /opt/conda/envs/sarek-2.3/bin/makeGraph.R | R --slave --args 2 ${ratioTumor} ${bafTumor}
+  cat /opt/conda/envs/sarek-2.3/bin/makeGraph.R | R --slave --args 2 ${ratioNormal} ${bafNormal}
+  perl /opt/conda/envs/sarek-2.3/bin/freec2bed.pl -f ${ratioTumor} > ${idSampleTumor}.bed
+  perl /opt/conda/envs/sarek-2.3/bin/freec2bed.pl -f ${ratioNormal} > ${idSampleNormal}.bed
+  """
+}
+
+(strelkaIndels, strelkaSNVS) = strelkaOutput.into(2)
+(mantaSomaticSV, mantaDiploidSV) = mantaOutput.into(2)
+
 vcfForQC = Channel.empty().mix(
   vcfConcatenated.map {
-    variantcaller, idPatient, status, idSample, vcf, tbi ->
+    variantcaller, idPatient, idSample, vcf, tbi ->
     [variantcaller, idPatient, idSample, vcf]
   },
   singleStrelkaOutput.map {
@@ -936,6 +1438,22 @@ vcfForQC = Channel.empty().mix(
   singleMantaOutput.map {
     variantcaller, idPatient, idSample, vcf, tbi ->
     [variantcaller, idPatient, idSample, vcf[2]]
+  },
+  mantaDiploidSV.map {
+    variantcaller, idPatient, idSample, vcf, tbi ->
+    [variantcaller, idPatient, idSample, vcf[2]]
+  },
+  mantaSomaticSV.map {
+    variantcaller, idPatient, idSample, vcf, tbi ->
+    [variantcaller, idPatient, idSample, vcf[3]]
+  },
+  strelkaIndels.map {
+    variantcaller, idPatient, idSample, vcf, tbi ->
+    [variantcaller, idPatient, idSample, vcf[0]]
+  },
+  strelkaSNVS.map {
+    variantcaller, idPatient, idSample, vcf, tbi ->
+    [variantcaller, idPatient, idSample, vcf[1]]
   })
 
 (vcfForBCFtools, vcfForVCFtools) = vcfForQC.into(2)
@@ -1234,13 +1752,19 @@ def defineReferenceMap(step, tools) {
       'knownIndelsIndex' : checkParamReturnFile("knownIndelsIndex")
     )
   }
+  if ('controlfreec' in tools) {
+    referenceMap.putAll(
+      'chrDir'           : checkParamReturnFile("chrDir"),
+      'chrLength'        : checkParamReturnFile("chrLength")
+    )
+  }
   if ('ascat' in tools) {
     referenceMap.putAll(
       'acLoci'           : checkParamReturnFile("acLoci"),
       'acLociGC'         : checkParamReturnFile("acLociGC")
     )
   }
-  if ('mapping' in step || 'haplotypecaller' in tools || 'mutect2' in tools) {
+  if ('mapping' in step || 'haplotypecaller' in tools || 'mutect2' in tools || 'controlfreec' in tools) {
     referenceMap.putAll(
       'dbsnp'            : checkParamReturnFile("dbsnp"),
       'dbsnpIndex'       : checkParamReturnFile("dbsnpIndex")
@@ -1263,9 +1787,11 @@ def defineStepList() {
 def defineToolList() {
   return [
     'ascat',
+    'controlfreec',
     'freebayes',
     'haplotypecaller',
     'manta',
+    'mpileup',
     'mutect2',
     'strelka'
   ]

From 45928fc0316a8758e7f437cc207ada9ce72567d0 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Fri, 3 May 2019 14:47:02 +0200
Subject: [PATCH 5/6] update docs

---
 main.nf | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index 39fb4e8804..216f4070d6 100644
--- a/main.nf
+++ b/main.nf
@@ -49,7 +49,8 @@ def helpMessage() {
       --strelkaBP                   Use Manta candidateSmallIndels for Strelka as Best Practice
       --targetBED                   target BED file for targeted sequencing
       --tools                       Specify tools to use for variant calling
-                                    Available: HaplotypeCaller,
+                                    Available: ASCAT, ControlFREEC, FreeBayes, HaplotypeCaller
+                                    Manta, mpileup, MuTect2, Strelka
 
     References                      If not specified in the configuration file or you wish to overwrite any of the references.
       --acLoci                      acLoci file

From a1a90141ee723b39c0741d3572f526c92d4a47f2 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Fri, 3 May 2019 14:51:48 +0200
Subject: [PATCH 6/6] fix travis tests

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 77bfdcdc0a..ee7db79aa5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,4 +35,4 @@ install:
 script:
   - git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data
   - nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile docker --genome smallGRCh37 --refdir data/reference --outdir references --publishDirMode link --max_memory 7.GB --max_cpus 2 -ansi-log false
-  - nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --sampleDir data/testdata/tsv/tiny-manta.tsv --tools HaploTypeCaller,Manta,Strelka,MuTecT2,FreeBayes --igenomes_base references --publishDirMode link --max_memory 7.GB --max_cpus 2 -ansi-log false
+  - nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile docker --genome smallGRCh37 --sample data/testdata/tsv/tiny-manta.tsv --tools HaploTypeCaller,Manta,Strelka,MuTecT2,FreeBayes --igenomes_base references --publishDirMode link --max_memory 7.GB --max_cpus 2 -ansi-log false