-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathsetupDS.sh
78 lines (65 loc) · 5.05 KB
/
setupDS.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/bin/bash
# This script initializes the snakemake envs for Duplex Sequencing. Before running this script, make sure you have downloaded a copy of GATK 3.8.1. The GATK3.8.1 jar file is the first argument of this script.
set -e
set -o pipefail
set -u
snakeDir=$(pwd)
maxCores="${1}"
#Sanity check on the value of maxCores
if (( $maxCores < 1 )); then
echo "maxCores reflects the maximum number of cores that may be used by the pipeline."
echo "maxCores must be an integer >= 1; ${maxCores} < 1"
exit 1
fi
# Setup test case
echo "Creating test config file"
echo "sample,rglb,rgpl,rgpu,rgsm,reference,target_bed,maskBed,blast_db,targetTaxonId,baseDir,in1,in2,mqFilt,minMem,maxMem,cutOff,nCutOff,umiLen,spacerLen,locLen,readLen,adapterSeq,clipBegin,clipEnd,minClonal,maxClonal,minDepth,maxNs,recovery,cluster_dist,cm_outputs,cm_sumTypes,cm_filters,runSSCS,rerun_type" > test/testConfig.csv
echo "test1,test,test,test,test,${snakeDir}/test/testRef/testRef.fa,${snakeDir}/test/testTarget/test.bed,NONE,${snakeDir}/test/testBlastDb/testBlastDb,9606,testData,testSeq1.fastq.gz,testSeq2.fastq.gz,0,3,200,0.7,0.02,8,1,8,150,ANNNNNNNNAGATCGGAAGAG,7,0,0,0.1,100,1,noRecovery.sh,10,GB,GT,none,FALSE,0" >> test/testConfig.csv
echo "test2,test,test,test,test,${snakeDir}/test/testRef/testRef.fa,${snakeDir}/test/testTarget/test.bed,NONE,${snakeDir}/test/testBlastDb/testBlastDb,9606,testData,testSeq1.fastq.gz,testSeq2.fastq.gz,0,3,200,0.7,0.02,8,1,8,150,ANNNNNNNNAGATCGGAAGAG,7,0,0,0.1,100,1,recoverAmbig.sh,10,GB,GT,none,FALSE,0" >> test/testConfig.csv
echo "test3,test,test,test,test,${snakeDir}/test/testRef/testRef.fa,${snakeDir}/test/testTarget/test.bed,NONE,${snakeDir}/test/testBlastDb/testBlastDb,9606,testData,testSeq1.fastq.gz,testSeq2.fastq.gz,0,3,200,0.7,0.02,8,1,8,150,ANNNNNNNNAGATCGGAAGAG,7,0,0,0.1,100,1,recoverWrongSpecies.sh,10,GB,GT,none,FALSE,0" >> test/testConfig.csv
echo "test4,test,test,test,test,${snakeDir}/test/testRef/testRef.fa,${snakeDir}/test/testTarget/test.bed,NONE,${snakeDir}/test/testBlastDb/testBlastDb,9606,testData,testSeq1.fastq.gz,testSeq2.fastq.gz,0,3,200,0.7,0.02,8,1,8,150,ANNNNNNNNAGATCGGAAGAG,7,0,0,0.1,100,1,recoverAll.sh,10,GB,GT,none,FALSE,0" >> test/testConfig.csv
echo "test5,test,test,test,test,${snakeDir}/test/testRef/testRef.fa,${snakeDir}/test/testTarget/test.bed,NONE,none,9606,testData,testSeq1.fastq.gz,testSeq2.fastq.gz,0,3,200,0.7,0.02,8,1,8,150,ANNNNNNNNAGATCGGAAGAG,7,0,0,0.1,100,1,noRecovery.sh,10,GB,GT,none,FALSE,0" >> test/testConfig.csv
echo "test6,test,test,test,test,${snakeDir}/test/testRef/testRef.fa,${snakeDir}/test/testTarget/test.bed,NONE,${snakeDir}/test/testBlastDb/testBlastDb,9606,testData,testSeq1.fastq.gz,testSeq2.fastq.gz,0,3,200,0.7,0.02,8,1,8,150,ANNNNNNNNAGATCGGAAGAG,7,0,0,0.1,100,1,noRecovery.sh,10,GB,GT,none,TRUE,0" >> test/testConfig.csv
# Set up progConfig file
echo "Creating progConfig file"
echo "samples: test/testConfig.csv" > DS_progConfig.yaml
echo "maxCores: ${maxCores}" >> DS_progConfig.yaml
echo "vardict_f: \".0000001\"" >> DS_progConfig.yaml
echo "vardict_nmfreq: \".0000001\"" >> DS_progConfig.yaml
echo "vardict_r: \"1\"" >> DS_progConfig.yaml
echo "vardict_V: \"0.00000000001\"" >> DS_progConfig.yaml
echo "vardict_adaptor: GCTCTTCCGATCT,CTCTTCCGATCT,TCTTCCGATCT,CTTCCGATCT,TTCCGATCT,TCCGATCT,CCGATCT,CGATCT" >> DS_progConfig.yaml
echo "Configuring snakemake"
snakemake --cores 1 --use-conda --conda-frontend mamba --conda-prefix ${snakeDir}/.snakemake -- initializeEnvs
echo "Creating run script"
echo "#!/bin/bash" > DS
echo "" >> DS
echo "# This is a run script for the DS snakemake pipeline" >> DS
echo "inConfig=\"\$1\"" >> DS
echo "snakemake -s ${snakeDir}/Snakefile --use-conda --keep-going -j ${maxCores} --conda-prefix ${snakeDir}/.snakemake --config samples=\"\${inConfig}\"" >> DS
chmod a+x DS
echo "Creating dag script"
echo "#!/bin/bash" > DS-dag
echo "" >> DS-dag
echo "# This is a run script for the DS snakemake pipeline" >> DS-dag
echo "inConfig=\"\$1\"" >> DS-dag
echo "snakemake -s ${snakeDir}/Snakefile --use-conda -j ${maxCores} --dag --conda-prefix ${snakeDir}/.snakemake --config samples=\"\${inConfig}\" -- | dot -Tpdf > \${inConfig}_dag.pdf" >> DS-dag
chmod a+x DS-dag
echo "Creating unlock script"
echo "#!/bin/bash" > DS-unlock
echo "" >> DS-unlock
echo "# This is an unlock script for the DS snakemake pipeline" >> DS-unlock
echo "# Run this if the pipeline gets stuck locked for some reason" >> DS-unlock
echo "inConfig=\"\$1\"" >> DS-unlock
echo "snakemake --unlock -s ${snakeDir}/Snakefile --use-conda -j 1 --conda-prefix ${snakeDir}/.snakemake --config samples=\"\${inConfig}\"" >> DS-unlock
chmod a+x DS-unlock
echo "Creating rerun prep script"
echo "#!/bin/bash" > DS-clean
echo "" >> DS-clean
echo "# This is a script to clean up a run directory prior to a rerun" >> DS-clean
echo "# Run this if you need to rerun part or all of the pipeline after" >> DS-clean
echo "# setting the 'rerun_type' column in your config file" >> DS-clean
echo "inConfig=\"\$1\"" >> DS-clean
echo "snakemake -s ${snakeDir}/ResetSnakefile --use-conda -j 1 --config samples=\"\${inConfig}\"" >> DS-clean
chmod a+x DS-clean
echo "Done"