-
Notifications
You must be signed in to change notification settings - Fork 0
/
make-gap-bed.awk
58 lines (56 loc) · 1.19 KB
/
make-gap-bed.awk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#### Description: Awk script to annotate gaps in fasta.
#### USAGE: awk -f make-gap-bed.awk <path-to-fasta>
#### Options: can take in gap_size_threshold. Default is 0 (all Ns or ns are annotated).
#### Input: fasta file
#### Output: bed-formatted stdout
#### Written by: Olga Dudchenko - olga.dudchenko@bcm.edu.
BEGIN{
# gap_size_threshold=500
}
{
if ($0~/>/) # new scaffold
{
if ((start)&&(counter >= gap_size_threshold)) # in big gap
print scaf_id, start-1, start+counter-1 # bed start is 0-based and bed end is 1-based
scaf_id=substr($1, 2)
pos=0 # position in contig
start=0 # gap start
counter=0 # gap length
next
}
if ($0!~/N/&&$0!~/n/)
{
if ((start)&&(counter>=gap_size_threshold))
print scaf_id, start-1, start+counter-1
start=0
counter=0
pos+=length($0)
}
else
{
n=split($0, a, "")
for (i=1; i<=n; i++)
{
if (a[i]=="N" || a[i]=="n")
{
if (!start)
{
start=pos+1
}
counter+=1
}
else
{
if ((start)&&(counter>=gap_size_threshold))
print scaf_id, start-1, start+counter-1
start=0
counter=0
}
pos+=1
}
}
}
END{
if ((start)&&(counter>=gap_size_threshold))
print scaf_id, start-1, start+counter-1
}