-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathqc.go
237 lines (218 loc) · 8 KB
/
qc.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
package main
import (
"fmt"
"log"
"path/filepath"
"regexp"
"strconv"
"strings"
"github.com/liserjrqlxue/goUtil/fmtUtil"
"github.com/liserjrqlxue/goUtil/osUtil"
"github.com/liserjrqlxue/goUtil/simpleUtil"
"github.com/liserjrqlxue/goUtil/textUtil"
)
func loadFilterStat(filterStat string, quality map[string]string) {
var db = make(map[string]float64)
filters := strings.Split(filterStat, ",")
for _, filter := range filters {
var fDb = simpleUtil.HandleError(textUtil.File2Map(filter, "\t", false)).(map[string]string)
var numberOfReads = simpleUtil.HandleError(strconv.ParseFloat(fDb["Number of Reads:"], 32)).(float64)
var GCfq1 = simpleUtil.HandleError(strconv.ParseFloat(fDb["GC(%) of fq1:"], 32)).(float64)
var GCfq2 = simpleUtil.HandleError(strconv.ParseFloat(fDb["GC(%) of fq2:"], 32)).(float64)
var Q20fq1 = simpleUtil.HandleError(strconv.ParseFloat(fDb["Q20(%) of fq1:"], 32)).(float64)
var Q20fq2 = simpleUtil.HandleError(strconv.ParseFloat(fDb["Q20(%) of fq2:"], 32)).(float64)
var Q30fq1 = simpleUtil.HandleError(strconv.ParseFloat(fDb["Q30(%) of fq1:"], 32)).(float64)
var Q30fq2 = simpleUtil.HandleError(strconv.ParseFloat(fDb["Q30(%) of fq2:"], 32)).(float64)
fDb["Discard Reads related to low qual:"] = strings.TrimSpace(fDb["Discard Reads related to low qual:"])
var lowQualReads = 0.0
if fDb["Discard Reads related to low qual:"] != "" {
lowQualReads = simpleUtil.HandleError(strconv.ParseFloat(strings.TrimSpace(fDb["Discard Reads related to low qual:"]), 32)).(float64)
}
db["numberOfReads"] += numberOfReads
db["lowQualReads"] += lowQualReads
db["GC"] += (GCfq1 + GCfq2) / 2 * numberOfReads
db["Q20"] += (Q20fq1 + Q20fq2) / 2 * numberOfReads
db["Q30"] += (Q30fq1 + Q30fq2) / 2 * numberOfReads
}
quality["Q20 碱基的比例"] = strconv.FormatFloat(db["Q20"]/db["numberOfReads"], 'f', 2, 32) + "%"
quality["Q30 碱基的比例"] = strconv.FormatFloat(db["Q30"]/db["numberOfReads"], 'f', 2, 32) + "%"
quality["测序数据的 GC 含量"] = strconv.FormatFloat(db["GC"]/db["numberOfReads"], 'f', 2, 32) + "%"
quality["低质量 reads 比例"] = strconv.FormatFloat(db["lowQualReads"]/db["numberOfReads"], 'f', 2, 32) + "%"
}
func updateQC(stats map[string]int, quality map[string]string) {
quality["罕见变异占比(Tier1/总)"] = fmt.Sprintf("%0.2f%%", float64(stats["Tier1"])/float64(stats["Total"])*100)
quality["罕见烈性变异占比 in tier1"] = fmt.Sprintf("%0.2f%%", float64(stats["Tier1LoF"])/float64(stats["Tier1"])*100)
quality["罕见纯合变异占比 in tier1"] = fmt.Sprintf("%0.2f%%", float64(stats["Tier1Hom"])/float64(stats["Tier1"])*100)
quality["纯合变异占比 in all"] = fmt.Sprintf("%0.2f%%", float64(stats["Hom"])/float64(stats["Total"])*100)
for _, chr := range []string{
"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "X",
} {
quality["chr"+chr+"纯合变异占比"] = fmt.Sprintf("%0.2f%%", float64(stats["Hom:chr"+chr])/float64(stats["chr"+chr])*100)
}
quality["SNVs_all"] = strconv.Itoa(stats["snv"])
quality["SNVs_tier1"] = strconv.Itoa(stats["Tier1snv"])
quality["Small insertion(包含 dup)_all"] = strconv.Itoa(stats["ins"])
quality["Small insertion(包含 dup)_tier1"] = strconv.Itoa(stats["Tier1ins"])
quality["Small deletion_all"] = strconv.Itoa(stats["del"])
quality["Small deletion_tier1"] = strconv.Itoa(stats["Tier1del"])
quality["exon CNV_all"] = strconv.Itoa(stats["exonCNV"])
quality["exon CNV_tier1"] = strconv.Itoa(stats["Tier1exonCNV"])
quality["large CNV_all"] = strconv.Itoa(stats["largeCNV"])
quality["large CNV_tier1"] = strconv.Itoa(stats["Tier1largeCNV"])
}
var isSharp = regexp.MustCompile(`^#`)
var isBamPath = regexp.MustCompile(`^## Files : (\S+)`)
func loadQC(files, kinship string, quality []map[string]string, isWGS bool) {
var kinshipHash = make(map[string]map[string]string)
if kinship != "" {
kinshipHash, _ = textUtil.File2MapMap(kinship, "样品ID", "\t", nil)
}
sep := "\t"
if isWGS {
sep = ": "
}
for i, path := range strings.Split(files, ",") {
for _, line := range textUtil.File2Array(path) {
if isSharp.MatchString(line) {
if m := isBamPath.FindStringSubmatch(line); m != nil {
if osUtil.FileExists(m[1]) {
quality[i]["bamPath"] = m[1]
}
}
} else {
m := strings.Split(line, sep)
if len(m) > 1 {
quality[i][strings.TrimSpace(m[0])] = strings.TrimSpace(m[1])
}
}
}
if isWGS {
absPath, e := filepath.Abs(path)
if e == nil {
quality[i]["bamPath"] = filepath.Join(filepath.Dir(absPath), "..", "bam_chr")
} else {
log.Println(e, path)
quality[i]["bamPath"] = filepath.Join(filepath.Dir(path), "..", "bam_chr")
}
}
kinshipInfo, ok := kinshipHash[quality[i]["样本编号"]]
if ok {
for k, v := range kinshipInfo {
quality[i][k] = v
}
}
}
}
func parseQC() {
var karyotypeMap = make(map[string]string)
if *karyotype != "" {
karyotypeMap, err = textUtil.Files2Map(*karyotype, "\t", true)
simpleUtil.CheckErr(err)
}
// load coverage.report
if *qc != "" {
loadQC(*qc, *kinship, qualitys, *wgs)
for _, quality := range qualitys {
for k, v := range qualityKeyMap {
quality[k] = quality[v]
}
var ok bool
quality["核型预测"], ok = karyotypeMap[quality["样本编号"]]
if !ok {
quality["核型预测"] = "NA"
}
// WGS -> WES
var rawDataGb, ok1 = quality["原始数据产出(Gb)"]
var rawDataMb = quality["原始数据产出(Mb)"]
if ok1 && rawDataMb == "" {
var rawData, e = strconv.ParseFloat(rawDataGb, 64)
if e == nil {
quality["原始数据产出(Mb)"] = fmt.Sprintf("%.2f", rawData*1000)
}
}
if *wesim {
var qcArray []string
for _, key := range qcColumn {
qcArray = append(qcArray, quality[key])
}
fmtUtil.FprintStringArray(qcFile, qcArray, "\t")
}
}
if *wesim {
simpleUtil.CheckErr(qcFile.Close())
}
logTime("load coverage.report")
if *filterStat != "" {
loadFilterStat(*filterStat, qualitys[0])
}
if *imQc != "" {
parseIMQC(*imQc, qualitys)
}
if *mtQc != "" {
parseMTQC(*mtQc, qualitys)
}
}
}
func parseMTQC(files string, qualitys []map[string]string) {
for i, path := range strings.Split(files, ",") {
var qcMap, e = textUtil.File2Map(path, "\t", false)
simpleUtil.CheckErr(e)
for k, v := range qcMap {
qualitys[i][k] = v
}
}
}
func parseIMQC(files string, qualitys []map[string]string) {
var imqc = make(map[string]map[string]string)
for _, path := range strings.Split(files, ",") {
var qcMap, _ = textUtil.File2MapMap(path, "sampleID", "\t", nil)
for s, m := range qcMap {
imqc[s] = m
}
}
for _, quality := range qualitys {
var sampleID = quality["样本编号"]
var qcMap, ok = imqc[sampleID]
if ok {
quality["Q20 碱基的比例"] = qcMap["Q20_clean"] + "%"
quality["Q30 碱基的比例"] = qcMap["Q30_clean"] + "%"
quality["测序数据的 GC 含量"] = qcMap["GC_clean"] + "%"
quality["低质量 reads 比例"] = qcMap["lowQual"] + "%"
}
}
}
func parseList() {
for _, sample := range sampleList {
sampleMap[sample] = true
var quality = make(map[string]string)
quality["样本编号"] = sample
qualitys = append(qualitys, quality)
}
}
// format quality, and write json output
func qc2json(quality map[string]string, output string) (qualityJson map[string]string) {
var qualityJsonInfo, _ = textUtil.File2MapMap(filepath.Join(etcPath, "quality.json.txt"), "name", "\t", nil)
qualityJson = make(map[string]string)
for k, m := range qualityJsonInfo {
qualityJson[k] = quality[m["describe"]]
}
var targetRegionSize, e = strconv.ParseFloat(qualityJson["targetRegionSize"], 64)
if e == nil {
qualityJson["targetRegionSize"] = fmt.Sprintf("%.0f", targetRegionSize)
}
for _, s := range []string{
"targetRegionCoverage",
"averageDepthGt4X",
"averageDepthGt10X",
"averageDepthGt20X",
"averageDepthGt30X",
"mtTargetRegionGt2000X",
} {
if !strings.HasSuffix(qualityJson[s], "%") {
qualityJson[s] += "%"
}
}
writeBytes(jsonMarshalIndent(qualityJson, "", " "), output)
return
}