-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path00-original2flat.py
59 lines (54 loc) · 2.04 KB
/
00-original2flat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
# Current work directory
CWD = os.getcwd()
# Original data directory
ODD = CWD + "/data/00-original"
# Flat data directory
FDD = CWD + "/data/01-flat"
# Files
FILES = [
("Supp-A-36630-HPRD-positive-interaction.txt", 0),
("Supp-B-36480-HPRD-negative-interaction.txt", 0),
("Supp-C-3899-HPRD-positive-interaction-below-0.25.txt", 0),
("Supp-D-4262-HPRD-negative-interaction-below-0.25.txt", 1),
("Supp-E-1882-interacting-0.5-non-interacting-0.5.txt", -1),
]
def generate_flat_file(INPUT_FILE, OUTPUT_FILE, shift):
input_file = open(INPUT_FILE)
output_file = open(OUTPUT_FILE, 'w')
index = 1
line = input_file.readline()
while line:
if (index+shift)>=5 and ( (index+shift)%5==2 or (index+shift)%5==4 ):
line=line.strip()
# print(index, "=", line)
if (index+shift)%5 == 2:
output_line = line + ','
elif (index+shift)%5 == 4:
output_line = output_line + line + '\n'
valid = True
valid = valid and output_line.find("B")<0
valid = valid and output_line.find("J")<0
valid = valid and output_line.find("O")<0
valid = valid and output_line.find("U")<0
valid = valid and output_line.find("X")<0
valid = valid and output_line.find("Z")<0
pairs = output_line.strip().split(',')
valid = valid and len(pairs[0])>=50 and len(pairs[1])>=50
if valid:
output_file.write(output_line)
else:
print("skip index="+str((index+shift)//5))
output_file.write('\n')
index = index + 1
line = input_file.readline()
input_file.close()
output_file.close()
if __name__=="__main__":
if not os.path.exists(FDD):
os.makedirs(FDD)
for FILE, SHIFT in FILES:
print("Processing "+FILE+" ...")
generate_flat_file(ODD+"/"+FILE, FDD+"/"+FILE, SHIFT)
print("Processed!")
print("Finished!!!")