-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsentiment-analysis.py
49 lines (40 loc) · 1.26 KB
/
sentiment-analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import matplotlib.pyplot as plt
from transformers import pipeline, AutoTokenizer
from load_texts import *
model = "lucas-leme/FinBERT-PT-BR"
pipe = pipeline("text-classification", model=model, return_all_scores = True, device=0)
tokenizer = AutoTokenizer.from_pretrained(model)
print("Loading data...")
dfCorpus = return_data_frame()
corpus = dfCorpus.text.to_list()
print(len(corpus), "atas")
size = 1750 #characters
corpus_trunc = [txt[:size] for txt in corpus]
out = pipe(corpus_trunc)
positive_scores = []
neutral_scores = []
negative_scores = []
# Get sentiment scores
for l in out:
for d in l:
if d['label'] == 'POSITIVE':
positive_scores.append(d['score'])
elif d['label'] == 'NEUTRAL':
neutral_scores.append(d['score'])
elif d['label'] == 'NEGATIVE':
negative_scores.append(d['score'])
else:
raise('Error in label', d)
dfCorpus['positive_scores'] = positive_scores
dfCorpus['neutral_scores'] = neutral_scores
dfCorpus['negative_scores'] = negative_scores
def df_plot(column):
plt.figure()
dfCorpus[column].plot()
plt.title(column)
plt.ylabel("score")
plt.xlabel("meeting")
plt.show()
df_plot('positive_scores')
df_plot('neutral_scores')
df_plot('negative_scores')