-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmercury_api.py
132 lines (112 loc) · 5.09 KB
/
mercury_api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from requests import status_codes
import web
import json
import re
import requests
import spacy
import configparser
from web import webapi
from collections import defaultdict
import unicodedata
from web.webapi import BadRequest
urls = (
'/(.*)', 'DestinationRequestHandler' #all maps to destination request handler class
)
# Start web server
app = web.application(urls, globals())
web.header( 'Content-Type',
'application/json' )
class DestinationRequestHandler:
def POST(self, collection):
beam_width = 16
beam_density = 0.0001
model = self.__get_config()
nlp = spacy.load(model)
itinerary_object = json.loads(web.data())
response_data = []
try:
for itinerary_item in itinerary_object:
input_string = self.__clean_text(itinerary_item['title'])
day = itinerary_item['day']
docs = nlp(input_string)
beams = nlp.get_pipe("ner").beam_parse([docs], beam_width = beam_width, beam_density = beam_density)
entity_scores = defaultdict(float)
for beam in beams:
for score, ents in nlp.get_pipe("ner").moves.get_beam_parses(beam):
for start, end, label in ents:
entity_scores[(start, end, label)] += score
place = []
for key in entity_scores:
start, end, label = key
score = entity_scores[key]
output_string = docs[start:end]
if(score>0):
place.append({"name" : output_string, "score" : score})
item = {
"day" : day,
"input" : itinerary_item['title'],
"temp_string" : input_string,
"recommended_destination" : place
}
response_data.append(item)
return response_data
except:
raise webapi.badrequest
def __get_config(self):
config = configparser.ConfigParser()
config.read('./configs/api_config.cfg')
model_folder = config.get('model_path','folder')
model = config.get('model_path','model_to_execute')
return model_folder + model
def __clean_text(self, input_string):
temp_string = input_string
temp_string = unicodedata.normalize('NFKD', temp_string).encode('ascii', 'ignore').decode('utf-8')
temp_string = re.sub(' - ', ' to ', temp_string)
temp_string = re.sub(r"\([^()]*\)", "", temp_string)
temp_string = re.sub(' via ', ' through ', temp_string)
temp_string = re.sub('/', ' to ', temp_string)
temp_string = re.sub(r"\([^()]*\)", "", temp_string)
temp_string = re.sub(r'[^a-zA-Z0-9 \n\.]', '', temp_string)
temp_string = re.sub(r'/', ' ', temp_string)
return temp_string
"""only for test purpose"""
# def POST(self, collection):
# suggestionsPlaces = []
# beam_width = 16
# beam_density = 0.0001
# nlp = spacy.load(R".\training\model-best")
# response_data=[]
# for id in range(4000, 5800):
# print("PRODUCT ID : ", id)
# r = requests.get(url='https://www.bookmundi.com/ws/GetItineraryData?id=' + str(id))
# itinerary_object = r.json()
# if len(itinerary_object)>0:
# for itinerary_item in itinerary_object:
# input_string = self.__clean_text(itinerary_item['title'])
# day = itinerary_item['day']
# docs = nlp(input_string)
# beams = nlp.get_pipe("ner").beam_parse([docs], beam_width = beam_width, beam_density = beam_density)
# entity_scores = defaultdict(float)
# for beam in beams:
# for score, ents in nlp.get_pipe("ner").moves.get_beam_parses(beam):
# for start, end, label in ents:
# entity_scores[(start, end, label)] += score
# place = []
# for key in entity_scores:
# start, end, label = key
# score = entity_scores[key]*100
# output_string = docs[start:end]
# print(output_string)
# if(score>0):
# place.append({"name" : output_string, "score" : score})
# item = {
# "product_id" : id,
# "day" : day,
# "input" : itinerary_item['title'],
# "temp_string" : input_string,
# "recommended_destination" : place
# }
# response_data.append(item)
# return response_data
if __name__ == "__main__":
app.run()