-
Notifications
You must be signed in to change notification settings - Fork 1
/
dbSNPReader.cs
174 lines (159 loc) · 6.78 KB
/
dbSNPReader.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
using System;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
using System.Diagnostics;
using System.Threading;
using System.Threading.Tasks;
namespace Personal_Genome_Explorer
{
class dbSNPReader
{
StreamReader parentStream;
/** A regex that matches text like this: rs8896 */
Regex snpIdRegex = new Regex("^(rs\\d+)", RegexOptions.Compiled);
/** A regex that matches either alleles or orientation declarations:
* alleles='C/T'
* or:
* CTG...orient=-
*/
Regex snpInfoRegex = new Regex("(alleles=\'([^\'.]*)\')|(CTG.*orient=([-+]))", RegexOptions.Compiled);
/** Initialization constructor. */
public dbSNPReader(StreamReader inParentStream)
{
parentStream = inParentStream;
}
/** Processes SNP orientation info from the parent stream and updates the local SNP database. */
public void ProcessSNPOrientationInfo(CancellationToken cancellationToken)
{
while(!cancellationToken.IsCancellationRequested && !parentStream.EndOfStream)
{
// Read blocks separated by empty lines.
while(!parentStream.EndOfStream)
{
var snpLine = parentStream.ReadLine();
// Parse the rs# for the SNP.
string snpId = Utilities.GetSingleRegexMatch(snpLine, snpIdRegex,null);
if (snpId != null)
{
// Skip SNPs that aren't in the local database.
if (!SNPDatabaseManager.localDatabase.snpToInfoMap.ContainsKey(snpId.ToLowerInvariant()))
{
while (!parentStream.EndOfStream)
{
var infoLine = parentStream.ReadLine();
if (infoLine == "") { break; }
}
}
else
{
var snpInfo = SNPDatabaseManager.localDatabase.snpToInfoMap[snpId];
var snpOrientationInfo = new SNPOrientationInfo();
// After reading a SNP heading, read info lines that are associated with the SNP until a blank line is encountered.
while (!parentStream.EndOfStream)
{
var infoLine = parentStream.ReadLine();
if (infoLine == "") { break; }
// Parse the orientation of the refSNP cluster on the reference human genome build.
var regexMatch = snpInfoRegex.Match(infoLine);
if (regexMatch.Success)
{
if (regexMatch.Groups[2].Success)
{
// Parse the alleles for this SNP.
string alleles = regexMatch.Groups[2].Value;
// Parse the alleles of the SNP oriented to the refSNP.
snpOrientationInfo.bHasAlleleA = alleles.Contains("A");
snpOrientationInfo.bHasAlleleT = alleles.Contains("T");
snpOrientationInfo.bHasAlleleC = alleles.Contains("C");
snpOrientationInfo.bHasAlleleG = alleles.Contains("G");
}
else
{
Debug.Assert(regexMatch.Groups[4].Success);
snpOrientationInfo.orientation = DNA.StringToOrientation(regexMatch.Groups[4].Value);
Debug.Assert(snpOrientationInfo.orientation != Orientation.Unknown);
}
}
}
// Set the orientation of this SNP in the database.
var snpOrientation = snpOrientationInfo.GetOrientation(snpInfo);
Debug.Assert(snpOrientation == snpInfo.orientation || snpInfo.orientation == Orientation.Unknown);
snpInfo.orientation = snpOrientation;
SNPDatabaseManager.localDatabase.snpToInfoMap[snpId] = snpInfo;
}
}
}
};
}
/** The orientation of a genotype relative to the reference human genome. */
struct SNPOrientationInfo
{
public bool bHasAlleleA;
public bool bHasAlleleT;
public bool bHasAlleleC;
public bool bHasAlleleG;
public Orientation orientation;
/** Tests whether a genotype matches the alleles in this orientation. */
bool DoesGenotypeMatch(Genotype genotype)
{
switch(genotype)
{
case Genotype.A: return bHasAlleleA;
case Genotype.T: return bHasAlleleT;
case Genotype.C: return bHasAlleleC;
case Genotype.G: return bHasAlleleG;
default: return true;
};
}
/** Returns the opposite of this orientation. */
Orientation GetOppositeOrientation()
{
switch(orientation)
{
case Orientation.Plus: return Orientation.Minus;
case Orientation.Minus: return Orientation.Plus;
default: return Orientation.Unknown;
}
}
/** Determines the orientation of the genotypes of a SNP. */
public Orientation GetOrientation(SNPInfo snpInfo)
{
// Check whether any of the SNP's genotypes and their complements don't match the valid alleles for this orientation.
var matches = new bool[2] { true, true };
foreach(var genotypeInfo in snpInfo.genotypes)
{
// Determine whether this genotype or its complement matches the valid alleles for this orientation.
var orientedGenotypes = new DiploidGenotype[]
{
genotypeInfo.genotype,
genotypeInfo.genotype.GetComplement()
};
for(int tryIndex = 0;tryIndex < orientedGenotypes.Length;tryIndex++)
{
if( !DoesGenotypeMatch(orientedGenotypes[tryIndex].a) ||
!DoesGenotypeMatch(orientedGenotypes[tryIndex].b))
{
matches[tryIndex] = false;
}
}
}
if (matches[0] && !matches[1])
{
// If the SNP's genotypes all match this orientation's valid alleles, they have the same orientation.
return orientation;
}
else if (matches[1] && !matches[0])
{
// If the SNP's genotypes' complements all match this orientation's valid alleles, the SNP's genotypes have the opposite orientation.
return GetOppositeOrientation();
}
else
{
// If none of the SNP's genotypes or their complements mismatch this orientation's alleles, we can't determine the orientation of the SNP's genotypes.
return Orientation.Unknown;
}
}
}
}
}