-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathevaluate.py
135 lines (109 loc) · 4.08 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
__license__ = """
Copyright (C) 2017 Guillaume Genthial
Modifications copyright (C) 2020 CEA LIST
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tfner.data_utils import CoNLLDataset
from tfner.ner_model import NERModel
from tfner.config import Config
import time, sys, argparse, re
from argparse import RawTextHelpFormatter
def align_data(data):
"""Given dict with lists, creates aligned strings
Adapted from Assignment 3 of CS224N
Args:
data: (dict) data["x"] = ["I", "love", "you"]
(dict) data["y"] = ["O", "O", "O"]
Returns:
data_aligned: (dict) data_align["x"] = "I love you"
data_align["y"] = "O O O "
"""
spacings = [max([len(seq[i]) for seq in data.values()])
for i in range(len(data[list(data.keys())[0]]))]
data_aligned = dict()
# for each entry, create aligned string
for key, seq in data.items():
str_aligned = ""
for token, spacing in zip(seq, spacings):
str_aligned += token + " " * (spacing - len(token) + 1)
data_aligned[key] = str_aligned
return data_aligned
def interactive_shell(model):
"""Creates interactive shell to play with model
Args:
model: instance of NERModel
"""
model.logger.info("""
This is an interactive mode.
To exit, enter 'exit'.
You can enter a sentence like
input> I love Paris""")
while True:
try:
# for python 2
sentence = raw_input("input> ")
except NameError:
# for python 3
sentence = input("input> ")
words_raw = sentence.strip().split(" ")
if words_raw == ["exit"]:
break
words_raw_formatted=[]
str=""
#seperate punctuations from text
for word in words_raw:
for char in word:
if char.isalpha() or char.isdigit():
str+=char
else:
if(str!=""):
words_raw_formatted+=[str]
words_raw_formatted+=[char]
str=""
if(str!=""):
words_raw_formatted+=[str]
str=""
preds = model.predict(words_raw_formatted)
to_print = align_data({"input": words_raw_formatted, "output": preds})
for key, seq in to_print.items():
model.logger.info(seq)
def main(language="eng"):
#create instance of config
try:
config = Config(lang=language)
except Exception as e:
#print >> sys.stderr, "Exception: %s" % str
print("Exception: %s" % e.args, file=sys.stderr)
sys.exit(1)
start_time = time.perf_counter()
print("--- Execution time : %s seconds ---" % (time.perf_counter() - start_time))
#build model
start_time = time.perf_counter()
model = NERModel(config)
model.build()
model.restore_session(config.dir_model)
print("--- Execution time : %s seconds ---" % (time.perf_counter() - start_time))
# create dataset
test = CoNLLDataset(config.filename_test, config.processing_word,
config.processing_tag, config.max_iter)
# evaluate and interact
model.evaluate(test)
interactive_shell(model)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='''NE recognizer''', formatter_class=RawTextHelpFormatter)
parser.add_argument('--lang', required=False, default="eng", help="Specify the language between french as fr and english as eng")
try:
arguments = parser.parse_args(args=sys.argv[1:])
arguments=vars(arguments)
except:
parser.print_help()
sys.exit(1)
main(language=arguments["lang"])