-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathquestion_classification.py
144 lines (121 loc) · 5.66 KB
/
question_classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import os
from trie_tree import LexiconNER
class QuestionClassifier:
def __init__(self):
cur_dir = 'data'
entity_dicts = {}
# 特征词路径
self.city_names_path = os.path.join(cur_dir, 'mid_data/city_names.txt')
self.hero_names_path = os.path.join(cur_dir, 'mid_data/hero_names.txt')
self.hero_races_path = os.path.join(cur_dir, 'mid_data/hero_races.txt')
self.hero_release_dates_path = os.path.join(cur_dir, 'mid_data/hero_release_dates.txt')
self.hero_roles_path = os.path.join(cur_dir, 'mid_data/hero_roles.txt')
self.hero_titles_path = os.path.join(cur_dir, 'mid_data/hero_titles.txt')
self.rels_path = os.path.join(cur_dir, 'mid_data/rels.txt')
# 加载特征词
entity_dicts = {
"city_names": [i.strip() for i in open(self.city_names_path, encoding="utf-8") if i.strip()],
"hero_names": [i.strip() for i in open(self.hero_names_path, encoding="utf-8") if i.strip()],
"hero_races": [i.strip() for i in open(self.hero_races_path, encoding="utf-8") if i.strip()],
"hero_release_dates": [i.strip() for i in open(self.hero_release_dates_path, encoding="utf-8") if
i.strip()],
"hero_roles": [i.strip() for i in open(self.hero_roles_path, encoding="utf-8") if i.strip()],
"hero_titles": [i.strip() for i in open(self.hero_titles_path, encoding="utf-8") if i.strip()],
"rels": [i.strip() for i in open(self.rels_path, encoding="utf-8") if i.strip()]
}
self.lexicon_ner = LexiconNER(entity_dicts)
# 问句疑问词
self.hero_race_qwds = ["种族"]
self.hero_role_qwds = ["角色"]
self.hero_title_qwds = ["别称", "别名", "称号"]
self.hero_info_qwds = ["基本信息", '简介', '介绍', '信息']
self.city_qwds = ['区域', '城市']
self.env_qwds = ['风景', '景色', '建筑']
self.rel_qwds = entity_dicts['rels']
print('model init finished ......')
return
'''分类主函数'''
def classify(self, question):
data = {}
entities = self.lexicon_ner(question)
entity_dict = {}
# 收集问句当中所涉及到的实体类型
types = set()
for entity in entities:
types.add(entity['type'])
if entity['text'] not in entity_dict:
entity_dict[entity['text']] = [entity['type']]
else:
if entity['type'] not in entity_dict[entity['text']]:
entity_dict[entity['text']].append(entity['type'])
types = list(types)
data['args'] = entity_dict
uestion_type = 'others'
question_types = []
# 查询英雄的种族
if self.check_words(self.hero_race_qwds, question) and "hero_names" in types:
question_type = 'hero_race'
question_types.append(question_type)
# 查询英雄的角色
if self.check_words(self.hero_role_qwds, question) and "hero_names" in types:
question_type = 'hero_role'
question_types.append(question_type)
# 查询英雄的介绍
if self.check_words(self.hero_info_qwds, question):
if "hero_names" in types:
question_type = 'hero_info'
question_types.append(question_type)
elif "city_names" in types:
question_type = 'city_info'
question_types.append(question_type)
# 查询英雄的别称
if self.check_words(self.hero_title_qwds, question) and "hero_names" in types:
question_type = 'hero_title'
question_types.append(question_type)
# 查询区域的景色
if self.check_words(self.env_qwds, question) and "city_names" in types:
question_type = 'city_has_env'
question_types.append(question_type)
# 查询英雄所属的区域
if self.check_words(self.city_qwds, question):
if "hero_names" in types:
question_type = 'hero_belong_city'
question_types.append(question_type)
else:
# 查询某区域包含什么英雄
if "英雄" in question:
question_type = 'city_has_hero'
question_types.append(question_type)
# 查询某种关系的英雄
if self.check_words(self.rel_qwds, question):
if "rels" in types and "hero_names" in types:
# 既出现关系,又出现英雄实体
question_type = 'rel_hero'
question_types.append(question_type)
elif "rels" in types and "hero_names" not in types:
# 只出现关系,不出现英雄实体
question_type = 'rel_no_hero'
question_types.append(question_type)
# 将多个分类结果进行合并处理,组装成一个字典
data['question_types'] = question_types
return data
'''基于特征词进行分类'''
def check_words(self, wds, sent):
for wd in wds:
if wd in sent:
return True
return False
if __name__ == '__main__':
handler = QuestionClassifier()
"""
input an question:
question = "德玛西亚有什么景色吗"
{'args': {'德玛西亚': ['city_names']}, 'question_types': ['city_has_env']}
"""
# while 1:
# question = input('input an question:')
# data = handler.classify(question)
# print(data)
question = "盖伦的别称是什么"
question = "德玛西亚区域有哪些英雄"
print(handler.classify(question))