-
Notifications
You must be signed in to change notification settings - Fork 1
/
auto_data_annotate_helper.py
81 lines (71 loc) · 2.27 KB
/
auto_data_annotate_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
CWD = os.getcwd()
ANNOTATION_DIR = CWD + "\\sentences_annotation_auto\\"
ANNOTATION_TARGET = ANNOTATION_DIR + "annotation.txt"
ANNOTATION_RESULT = ANNOTATION_DIR + "annotation_fin.txt"
def split_sentence(line):
spl = str(line).rsplit(" ", 3)
return spl[0]
with open(ANNOTATION_TARGET, "r", encoding="utf8") as f:
lines = f.readlines()
lines.reverse()
with open(ANNOTATION_RESULT, "r", encoding="utf8") as g:
lines_resume = g.readlines()
if len(lines_resume) == 0:
last_line = ""
else:
last_line = lines_resume[-1]
with open(ANNOTATION_RESULT, "a", 1, encoding="utf8") as g:
lines_len = len(lines)
i = 0
count = 0
resume_flag = True
if last_line == "":
resume_flag = False
while i < lines_len:
# if count>10:
# break
if resume_flag:
if last_line == lines[i]:
resume_flag = False
i += 1
if resume_flag:
i += 1
continue
if lines[i].startswith("@@"):
print(lines[i])
i += 1
continue
print("1: " + lines[i])
# single sentence mode
# inp = input("Input 1 result: ")
# if inp == '1':
# g.write((lines[i]))
# i += 1
# continue
# end
# multiple sentence mode
sentence = split_sentence(lines[i])
inc = 1
while i + inc < lines_len:
next_sentence = split_sentence(lines[i + inc])
if next_sentence == sentence:
print(str(inc + 1) + ": " + lines[i + inc])
inc += 1
else:
while True:
inp = input("Input {} result: ".format(inc))
inp = inp.rstrip("\n")
if len(inp) != inc:
if inp == "none" and inc > 4:
inp = '2' * inc
else:
print("Input error.")
continue
for j in range(len(inp)):
if inp[j] == '1':
g.write(lines[i + j])
count += 1
break
i += inc
break