-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbitscode.cpp
151 lines (138 loc) · 3.56 KB
/
bitscode.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#include "bitscode.h"
/*
* uiReadLength must < WORD_SIZE
* WORD_SIZE is 32 bp in 32 big machine and 64 bp in 64 bit machine
* Each base is encoded into 2 bits: A -> 00, C->01, G->10 and T->11.
* These two digits are located in two word, for bits operation.
* The first nucleotide is encoded as the last digit.
*/
void EncodeRead(const char * strRead, InBits * readsInBits, int len) {
/* This function transfers the ACGT string to binary format */
readsInBits->ub = 0;
readsInBits->lb = 0;
// A 00
// C 01
// G 10
// T 11
for (int i = len - 1; i >= 0; i--) {
if (strRead[i] == 'A' || strRead[i] == 'a') {
} else if (strRead[i] == 'C' || strRead[i] == 'c') {
readsInBits->lb++;
} else if (strRead[i] == 'G' || strRead[i] == 'g') {
readsInBits->ub++;
} else if (strRead[i] == 'T' || strRead[i] == 't') {
readsInBits->ub++;
readsInBits->lb++;
} else {
//printf("Not A, C, G, T\n");
//anything else as A
}
if (i != 0) {
readsInBits->ub <<= 1; //left shift 1
readsInBits->lb <<= 1;
}
}
}
void printWORD(WORD_SIZE word, SIZE_T len) {
printf("\n");
for (SIZE_T i = 0; i < len; i++) {
if ((word & 0x01) == 1) {
printf("1");
} else {
printf("0");
}
word >>= 1;
}
printf("\n");
}
void DecodeRead(char * strReads, int readLen, const InBits * readsInBits) {
/* This function transfers binary format to ACGT string */
WORD_SIZE UpperBits = readsInBits->ub;
WORD_SIZE LowerBits = readsInBits->lb;
int strReadsl = 0;
for (int i = 0; i < readLen; i++) {
WORD_SIZE c = (UpperBits & 0x01) << 1 | (LowerBits & 0x01);
switch (c) {
case 0x00:
strReads[strReadsl++] = 'A';
break;
case 0x01:
strReads[strReadsl++] = 'C';
break;
case 0x02:
strReads[strReadsl++] = 'G';
break;
case 0x03:
strReads[strReadsl++] = 'T';
break;
default:
strReads[strReadsl++] = 'N';
break;
}
LowerBits >>= 1;
UpperBits >>= 1;
}
strReads[strReadsl] = 0;
}
void DecodeReadReverse(char * strRead, int readLen,
const InBits * readsInBits) {
DecodeRead(strRead, readLen, readsInBits);
Swap(strRead, readLen);
}
SIZE_T bitsSetCount(WORD_SIZE bits) {
/* magic function to calculate how many ones are there */
SIZE_T c; // c accumulates the total bits set in v
for (c = 0; bits; c++) {
bits &= bits - 1; // clear the least significant bit set
}
return c;
}
char getNT(const int & nt) {
switch (nt) {
case 0:
return 'A';
case 1:
return 'C';
case 2:
return 'G';
case 3:
return 'T';
}
return 'A';
}
SIZE_T bitsStrNCompare(InBits r1, InBits r2, SIZE_T N) {
/* compare only the last N bases (bits) */
WORD_SIZE bits = (r1.ub ^ r2.ub) | (r1.lb ^ r2.lb);
bits <<= (wordSize - N);
return ((SIZE_T) bitsSetCount(bits));
}
void Swap(char * strVal, int len) {
char chr;
for (int i = 0; i < len / 2; i++) {
chr = strVal[i];
strVal[i] = strVal[len - i - 1];
strVal[len - i - 1] = chr;
}
}
char complimentBase(char nt) {
switch (nt) {
case 'a':
return ('t');
case 'c':
return ('g');
case 'g':
return ('c');
case 't':
return ('a');
case 'A':
return ('T');
case 'C':
return ('G');
case 'G':
return ('C');
case 'T':
return ('A');
default:
return ('N');
}
}