forked from jgurtowski/ectools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
readlength_verror.py
37 lines (27 loc) · 923 Bytes
/
readlength_verror.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/env python
##
#Script just scans blasr (m4) output
#and graphs the difference between
#the original read length and the
#aligned read length for buckets of
#read length
##
import sys
from itertools import starmap
from operator import div
from m4io import getAlignments, longestNonOverlapping
if not len(sys.argv) == 2:
sys.exit("readlength_verror.py in.m4")
binsize = 1000
inm4 = sys.argv[1]
bins = {}
for read,alignments in getAlignments(inm4, longestNonOverlapping):
a0 = alignments[0]
bin = a0.qseqlength / binsize
readlen = a0.qseqlength
bases_lost = readlen - sum(map( lambda a : a.qend - a.qstart, alignments))
prev = bins.get(bin, (0.0,0.0,0) )
bins[bin] = (prev[0] + bases_lost, prev[1] + readlen, prev[2] + 1)
data = map(lambda (k,v) : (k, (v[0]/v[1], v[2])) ,bins.iteritems())
for bin, values in data:
print "\t".join(map(str,[bin,values[0],values[1]]))