-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathhap_split.py
57 lines (46 loc) · 1.3 KB
/
hap_split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
## for splitting the haplotypes
## After AGPcorrect but prior to unloc assignment
import csv
import pandas as pd
corr="corrected.agp"
header=[]
agp_lines=[]
with open(corr) as file:
agp = csv.reader(file,delimiter='\t')
for line in agp:
if "#" in line[0]:
header.append(line)
else:
agp_lines.append(line)
file.close()
current_hap=''
current_scaff=''
H1_lines=[]
H2_lines=[]
for line in agp_lines:
if 'proximity_ligation' in line or 'Painted' in line:
if 'Hap_1' in line:
H1_lines.append(line)
current_hap='Hap_1'
elif 'Hap_2' in line:
H2_lines.append(line)
current_hap='Hap_2'
elif current_hap=='Hap_1':
H1_lines.append(line)
elif current_hap=='Hap_2':
H2_lines.append(line)
else:
if 'H1' in line[5]:
H1_lines.append(line)
elif 'H2' in line[5]:
H2_lines.append(line)
with open ('Hap_1/hap.agp','w',newline='\n') as file1:
writer=csv.writer(file1,delimiter='\t')
writer.writerows(header)
writer.writerows(H1_lines)
file1.close()
with open ('Hap_2/hap.agp', 'w', newline='\n') as file2:
writer=csv.writer(file2,delimiter='\t')
writer.writerows(header)
writer.writerows(H2_lines)
file2.close()