-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
302 lines (214 loc) · 9.78 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
##############################################################################################################
# Filename: app.py
#
# Description: A Streamlit application to test the performance of our neural network models.
#
# Copyright © 2023 by Georgios Ioannou
##############################################################################################################
# Import libraries.
import joblib # To read our pretrained tokenizer and label encoder.
import nltk # Natural Language Processing.
import numpy as np # Data wrangling.
import re # Regular expression operations.
import streamlit as st # Streamlit.
from nltk.stem import (
WordNetLemmatizer,
) # Lemmatize using WordNet's built-in morphy function.
from nltk.stem import (
PorterStemmer,
) # Remove morphological affixes from words, leaving only the word stem.
from nltk.corpus import stopwords # Remove stopwaords.
from nltk import word_tokenize # Tokenize.
from tensorflow.keras.models import load_model # To load the model.
from tensorflow.keras.preprocessing.sequence import (
pad_sequences,
) # Transforms a list of sequences into a 2D Numpy array.
nltk.download("stopwords")
nltk.download("wordnet")
nltk.download("omw-1.4")
##############################################################################################################
# Read our pretrained tokenizer.
tokenizer = joblib.load(open("./tokenizer/tokenizer.pickle", "rb"))
# Read our pretrained label encoder.
label = joblib.load(open("./label_encoder/label_encoder.h5", "rb"))
##############################################################################################################
# Emotions emoji mapping dictionary for visualization purposes.
emotions_emoji_dict = {
"anger": "😠",
"fear": "😨",
"joy": "😂",
"love": "😍",
"sadness": "😞",
"surprise": "😯",
}
##############################################################################################################
stop_words = set(stopwords.words("english"))
lemmatizer = WordNetLemmatizer()
# Cleaner class is responsible for cleaning the documents using its pipeline function.
class Cleaner:
def __init__(self):
pass
# 1. Make a function that makes all text lowercase.
def make_lowercase(self, input_string):
input_string = input_string.split()
input_string = [y.lower() for y in input_string]
return " ".join(input_string)
# 2. Make a function that removes all stopwords.
def remove_stopwords(self, input_string):
input_string = [i for i in str(input_string).split() if i not in stop_words]
return " ".join(input_string)
# 3. Make a function that removes all numbers.
def remove_numbers(self, input_string):
input_string = "".join([i for i in input_string if not i.isdigit()])
return input_string
# 4. Make a function that removes all punctuation.
def remove_punctuation(self, input_string):
input_string = re.sub(
"[%s]" % re.escape("""!"#$%&'()*+,،-./:;<=>؟?@[\]^_`{|}~"""),
" ",
input_string,
)
input_string = input_string.replace(
"؛",
"",
)
input_string = re.sub("\s+", " ", input_string)
input_string = " ".join(input_string.split())
return input_string.strip()
# 5. Make a function that removes all urls.
def remove_urls(self, input_string):
url_pattern = re.compile(r"https?://\S+|www\.\S+")
return url_pattern.sub(r"", input_string)
# 6. Make a function for lemmatization.
def lemmatization(self, input_string):
lemmatizer = WordNetLemmatizer()
input_string = input_string.split()
input_string = [lemmatizer.lemmatize(y) for y in input_string]
return " ".join(input_string)
# 7. Make a function that breaks words into their stem words.
def stem_words(self, input_string):
porter = PorterStemmer()
words = word_tokenize(input_string)
valid_words = []
for word in words:
stemmed_word = porter.stem(word)
valid_words.append(stemmed_word)
input_string = " ".join(valid_words)
return input_string
# 8. Make a pipeline function that applies all the text processing functions you just built.
def pipeline(self, input_string):
input_string = self.make_lowercase(input_string) # 1.
input_string = self.remove_stopwords(input_string) # 2.
input_string = self.remove_numbers(input_string) # 3.
input_string = self.remove_punctuation(input_string) # 4.
input_string = self.remove_urls(input_string) # 5.
input_string = self.lemmatization(input_string) # 6.
# input_string = self.stem_words(input_string) # 7.
return input_string
##############################################################################################################
# Inference function for new user input.
def inference(user_input, model, selected_model):
# Create an instance of the Cleaner class.
cleaner = Cleaner()
# Call the pipeline function on the new user input.
cleaned_user_input = cleaner.pipeline(user_input)
# Convert cleaned_user_input into a sequence of integers.
cleaned_user_input = tokenizer.texts_to_sequences([cleaned_user_input])
# Pad the sequences to a length of 256.
cleaned_user_input = pad_sequences(cleaned_user_input, maxlen=256, truncating="pre")
# Inference.
if selected_model == "CNN":
model_output = model.predict([cleaned_user_input, cleaned_user_input])
else:
model_output = model.predict(cleaned_user_input)
# Model predicts the predicted emotion for the cleaned_user_input.
output = label.inverse_transform(np.argmax(model_output, axis=-1))[0]
# Calculate the probability of the predicted result.
probability = np.max(model_output)
return output, probability
##############################################################################################################
# Function to apply local CSS.
def local_css(file_name):
with open(file_name) as f:
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
##############################################################################################################
# Main function to create the Streamlit web application.
def main():
try:
st.set_page_config(page_title="TESSA | 😠😨😂😍😞😯")
# Load CSS.
local_css("styles/style.css")
# Title.
title = f"""<h1 align="center" style="font-family: monospace; font-size: 2.1rem; margin-top: -6rem">
Text Emotion System Sentiment Analysis</h1>"""
st.markdown(title, unsafe_allow_html=True)
# Subtitle.
title = f"""<h2 align="center" style="font-family: monospace; font-size: 2.3rem; margin-top: -2rem">
TESSA</h2>"""
st.markdown(title, unsafe_allow_html=True)
# Image.
image = "./logo/logo.png"
st.image(image)
# Margin between the image and the form.
st.markdown(
f'<p style="margin-top: 5rem; text-align: center;"></p>',
unsafe_allow_html=True,
)
# Add a dropdown menu for model selection.
selected_model = st.selectbox(
"Select Neural Network Model", ["CNN+LSTM", "CNN", "BiLSTM"]
)
# Form to get user input.
with st.form(key="my_form", border=True):
st.markdown(
"<style>label, .stTextInput { color: red; }</style>",
unsafe_allow_html=True,
)
user_input = st.text_area("Type Here")
submit_text = st.form_submit_button(label="Classify")
with st.spinner(text="Model Inference..."):
if submit_text:
# Load the selected model.
if selected_model == "BiLSTM":
model_path = "./neural_network_models/bidirectional_lstm_model.h5"
elif selected_model == "CNN":
model_path = "./neural_network_models/cnn_model.h5"
elif selected_model == "CNN+LSTM":
model_path = "./neural_network_models/cnn_lstm_model.h5"
# Load the model.
model = load_model(model_path)
# Inference.
prediction, probability = inference(
user_input=user_input, model=model, selected_model=selected_model
)
st.success("Prediction Found Below!")
emoji_icon = emotions_emoji_dict[prediction]
# Display selected neural network model.
st.markdown(
f'<p style="text-align: center;">Neural Network Model = {selected_model}</p>',
unsafe_allow_html=True,
)
# Display predicted emotion.
st.markdown(
f'<p style="text-align: center;">Predicted Emotion = {prediction.capitalize()}{emoji_icon}</p>',
unsafe_allow_html=True,
)
# Display confidence score.
st.markdown(
f'<p style="text-align: center;">Confidence = {np.max(probability)}</p>',
unsafe_allow_html=True,
)
st.balloons()
# GitHub repository of project.
st.markdown(
f"""
<p align="center" style="font-family: monospace; color: #FAF9F6; font-size: 1rem;"><b> Check out our
<a href="https://github.com/GeorgiosIoannouCoder/tessa" style="color: #FAF9F6;"> GitHub repository</a></b>
</p>
""",
unsafe_allow_html=True,
)
except Exception as e:
st.error(e)
if __name__ == "__main__":
main()