-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconvert_dataset.py
64 lines (52 loc) · 1.64 KB
/
convert_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
'''
read in a file
line by line, append the relevant information to new line
append new line to new file
'''
fileIn = 'datasetMarsden4-4WORDS-TEST.txt'
fileOut = 'datasetMarsden4-4WORDS-MULTI-TEST.txt'
num_types = 4
types = ['e', 't', 'a', 'o', 'i', 'n', 's', 'r' ,'h', 'l', 'd', 'c', 'u', 'm', 'f', 'p', 'g', 'w', 'y', 'b', 'v', 'k', 'x', 'j', 'q']
with open(fileIn, 'r') as f:
lines = f.read()
lines = lines.split('\n')
lines_as_list = []
for line in lines:
#print(line)
line = line.split()
lines_as_list.append(line)
#print(str(lines_as_list))
new_list = []
#print(str(lines_as_list))
for i in range(len(lines_as_list)-1):
new_line = []
name = lines_as_list[i][0]
name = name[:-1]
new_line.append(name)
new_line.append(lines_as_list[i][5:])
new_list.append(new_line)
#print(str(new_list))
final_list = []
i = 0
while i < len(new_list):
for type in range(num_types):
final_line = []
name = new_list[i][0]
final_line.append(name)
final_line.append(types[type])
#print(len(new_list[i][1]))
for token in range(len(new_list[i][1])//2):
#print(token)
#### THESE WERE ORIGINALLY GENERATED IN THE WRONG ORDER SO THIS FIXES IT
final_line.append(new_list[i + token][1][(type * 2) + 1]) # append x
final_line.append(new_list[i+token][1][type*2]) # append y
#print(final_line)
final_list.append(final_line)
i = i + num_types
#print(str(final_list))
with open(fileOut, 'a') as f:
for line in range(len(final_list)):
f.write(' '.join(final_list[line]))
f.write('\n')
#
#f.write(str(final_list))