forked from jgurtowski/ectools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cov.py
85 lines (67 loc) · 2.2 KB
/
cov.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import operator
from itertools import repeat,count,compress
#increment coverage (l)
def fillc(l,s,e):
'''l is an array
s and e are start and end of
a region the coverage should be incremented in'''
if(len(l) < e):
l += list(repeat(0, e-len(l)))
for cr in range(s,e):
l[cr] += 1
def pairwise(iterable,func=operator.add):
it = iter(iterable)
prev = next(it)
for el in it:
yield func(el,prev)
prev = el
def accumulate_mod(iterable, func=operator.add):
it = iter(iterable)
total = next(it)
yield total
for element in it:
if element == 0:
total = 0
else:
total = func(total,element)
yield total
def getMarkedRanges(v):
'''Takes an array v with wanted elements marked as 1
all other elements are 0. This function returns
the index ranges of these 1's'''
#subtract adjacent elements to find -1
#flip these to 1's and change everything else to 0
#so that we can use it to find the index
breaks = map(lambda x: 1 if x == -1 else 0 , pairwise(v + [0],operator.sub))
#create a cumsum of the inverse to know how many elements
#are a part of this region
lengths = accumulate_mod(v)
#zip up the indexes with the lengths
z = zip(count(),list(lengths))
#use the breaks to select only the indexes we want
#and use the cumsum to know how many elements came
#previously
endAndLength = compress(z,breaks)
return map(lambda (e,l): (e-l+1,e), endAndLength)
def getCoverageFromNucAlignments(alignments):
'''Gets coverage from nucmer alignments'''
it = iter(alignments)
n = it.next()
def nucAGetter():
yield (n.sstart-1, n.send-1)
for g in alignments:
yield (n.sstart-1, n.send-1)
return getCoverage( nucAGetter(), n.slen)
def getCoverageFromAlignments(align_tuples, subject_length):
'''Gets coverage from alignment tuples
Tuples just need to contain the subject start and end positions (0 offset)
'''
cov = list(repeat(0, subject_length))
for a in align_tuples:
(s,e) = a
if s > e:
t = s
s = e
e = t
fillc(cov, s, e)
return cov