-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
93 lines (63 loc) · 2.43 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import streamlit as st
import requests as r
from os.path import dirname, join, realpath
import joblib
from langdetect import detect
# text preprocessing modules
from string import punctuation
import re # regular expression
# add banner image
st.header("Gender-Based Violence Tweet Classification App")
st.image("images/Stop-Gender-based-Violence.png")
st.subheader(
"""
A Data science app to classify tweets about GBV without using keywords.
"""
)
# form to collect news content
my_form = st.form(key="tweets_form")
tweet = my_form.text_input("Input your tweet here")
submit = my_form.form_submit_button(label="make prediction")
# load the model and count_vectorizer
with open(join(dirname(realpath(__file__)), "models/tweets_model.pkl"), "rb") as f:
model = joblib.load(f)
with open(join(dirname(realpath(__file__)), "preprocessors/vectorizer.pkl"), "rb") as f:
vectorizer = joblib.load(f)
with open(
join(dirname(realpath(__file__)), "preprocessors/labelEncoder.pkl"), "rb"
) as f:
labelEncoder = joblib.load(f)
# function to clean the text
# clean the dataset
def text_cleaning(text):
# Clean the text, with the option to remove stop_words and to lemmatize word
# Clean the text
text = re.sub(r"[^A-Za-z0-9]", " ", text)
text = re.sub(r"\'s", " ", text)
text = re.sub(r"http\S+", " link ", text)
text = re.sub(r"\b\d+(?:\.\d+)?\s+", "", text) # remove numbers
text = text.lower()
# Remove punctuation from text
text = " ".join([c for c in text if c not in punctuation])
# Return a list of words
return text
if submit:
if detect(tweet) == "en":
cleaned_tweet = text_cleaning(tweet)
# transform the input
transformed_tweet = vectorizer.transform([tweet])
# perform prediction
prediction = model.predict(transformed_tweet)
output = int(prediction[0])
probas = model.predict_proba(transformed_tweet)
probability = "{:.2f}".format(float(probas[:, output]))
class_predicted = labelEncoder.inverse_transform([prediction[0]])
# Display results of the NLP task
st.header("Results")
st.write("The tweet has {} content".format(class_predicted[0]))
else:
st.write(
" ⚠️ The tweet is not in English language.Please make sure the input is in English language"
)
url = "https://twitter.com/Davis_McDavid"
st.write("Developed with ❤️ by [Davis David](%s)" % url)