-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathconsensus_indel.sh
executable file
·112 lines (95 loc) · 2.8 KB
/
consensus_indel.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/bin/bash
###
### generate consensus indel callset
###
function usage {
>&2 echo "usage: $0 -b [/path/to/broad.indel.vcf.gz] -d [dkfz] -m [smufin] -s [sanger] -o [outfile: defaults to merged.vcf]"
>&2 echo " Generates consensus somatic indel VCFs for sample"
>&2 echo " Input VCFs must be bgzip-ed and tabix-ed."
exit 1
}
readonly EXECUTABLE_PATH=${USE_EXECUTABLE_PATH:-"/usr/local/bin"}
readonly MODEL_PATH=${USE_MODEL_PATH:-"/usr/local/models"}
readonly TMPDIR=${USE_TMPDIR:-"/tmp"}
outfile=consensus.indel.vcf
while getopts "b:d:m:s:o:h" OPTION
do
case $OPTION in
b) readonly broadfile="${OPTARG}"
;;
d) readonly dkfzfile="${OPTARG}"
;;
m) readonly smufinfile="${OPTARG}"
;;
s) readonly sangerfile="${OPTARG}"
;;
o) outfile="${OPTARG}"
;;
h) usage
;;
esac
done
##
## make sure required arguments are given
##
if [[ -z "$dkfzfile" ]] || [[ -z "$sangerfile" ]]
then
>&2 echo "required argument missing: need dkfz (-d) and sanger (-s) files"
usage
fi
if [[ -z "${outfile}" ]]
then
>&2 echo "Invalid empty output filename"
usage
fi
if [[ ! -d "/dbs/annotation_databases" ]]
then
>&2 echo "Missing directory containing needed annotations: /dbs/annotation_databases"
usage
fi
##
## make sure the files (look to be) bgzipped and have .tbi files
##
for file in "$smufinfile" "$broadfile" "$dkfzfile" "$sangerfile"
do
if [[ ! -z "$file" ]]
then
if [[ "$file" != *.gz ]] || [[ ! -f "${file}.tbi" ]]
then
>&2 echo "Input VCF files must be bgziped and tabixed."
usage
fi
fi
done
##
## Merge indel calls
##
readonly MERGED="${TMPDIR}/merged.vaf.$$.indel.vcf"
readonly ANNOTATED="${TMPDIR}/annotated.indel.$$.vcf"
"${EXECUTABLE_PATH}"/merge-one-tumour-indel.sh \
-b "${broadfile}" -d "${dkfzfile}" -m "${smufinfile}" -s "${sangerfile}" -o "$MERGED"
##
## Annotate with dbsnp, 1kgenomes, repeat_masker, and cosmic if provided
##
dbsnp_args=("${MERGED}.gz" "indel" "${ANNOTATED}")
"${EXECUTABLE_PATH}"/dbsnp_annotate_one.sh "${dbsnp_args[@]}"
rm -f "${MERGED}"
rm -f "${MERGED}.gz"
rm -f "${MERGED}.gz.tbi"
##
## Filter with consensus model
##
if [[ -f $broadfile ]] && [[ -f $smufinfile ]]
then
readonly MODELFILE="${MODEL_PATH}/stacked-logistic-all-four.RData"
elif [[ -f $broadfile ]]
then
readonly MODELFILE="${MODEL_PATH}/stacked-logistic-no-broad.RData"
else
readonly MODELFILE="${MODEL_PATH}/stacked-logistic-no-smufin.RData"
fi
readonly INTERMEDIATE="${TMPDIR}/intermediate.indel.$$.vcf"
readonly MODEL_THRESHOLD=0.71
"${EXECUTABLE_PATH}"/apply_model.sh "${MODELFILE}" "${ANNOTATED}.gz" "${INTERMEDIATE}" "${outfile}" "$MODEL_THRESHOLD"
rm -f "${ANNOTATED}"*
rm -f "${INTERMEDIATE}"*