-
Notifications
You must be signed in to change notification settings - Fork 0
/
sentiment_analysis.R
133 lines (113 loc) · 4.08 KB
/
sentiment_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
library(tibble)
library(dplyr)
library(tidytext)
library(ggplot2)
library(wordcloud)
library(reshape2)
# tidy_plot_keyword
tidy_plot_keyword <- dataset$genres
plot_keywords_df <- dataset$plot_keywords
plot_keywords_df <- mutate(plot_keywords_df, text=dataset$plot_keywords)
plot_keywords_df <- data.frame("plot keywords" = as.character(dataset$plot_keywords))
plot_keywords_df <- dataset$plot_keywords
tidy_plot_keyword <- tibble(line = 1:5043,text = plot_keywords_df)
tidy_plot_keyword <- unnest_tokens(tidy_plot_keyword,word,text)
# Comparing the three sentiment dictionaries
afinn <- tidy_plot_keyword %>%
group_by(index = line) %>%
inner_join(get_sentiments("afinn")) %>%
summarise(sentiment = sum(value)) %>%
mutate(method = "AFINN")
bing_and_nrc <- bind_rows(tidygenres %>%
unnest_tokens(word,text) %>%
inner_join(get_sentiments("bing")) %>%
mutate(method = "Bing"),
tidygenres %>%
unnest_tokens(word,text) %>%
inner_join(get_sentiments("loughran")) %>%
mutate(method = "Loughran"),
tidygenres %>%
unnest_tokens(word,text) %>%
inner_join(get_sentiments("nrc") %>%
filter(sentiment %in% c("positive",
"negative"))) %>%
mutate(method = "NRC")) %>%
count(method, index = line, sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative)
bind_rows(afinn,
bing_and_nrc) %>%
ggplot(aes(index, sentiment, fill = method)) +
geom_col(show.legend = FALSE) +
facet_wrap(~method, ncol = 1, scales = "free_y")
tidygenres %>%
unnest_tokens(word,text) %>%
inner_join(get_sentiments("nrc")) %>%
filter(sentiment %in% c("positive",
"negative")) %>%
count(sentiment)
tidygenres %>%
unnest_tokens(word,text) %>%
inner_join(get_sentiments("afinn")) %>%
count(value)
tidygenres %>%
unnest_tokens(word,text) %>%
inner_join(get_sentiments("bing")) %>%
filter(sentiment %in% c("positive",
"negative")) %>%
count(sentiment)
tidygenres %>%
unnest_tokens(word,text) %>%
inner_join(get_sentiments("loughran")) %>%
filter(sentiment %in% c("positive",
"negative")) %>%
count(sentiment)
bing_word_counts <- tidygenres %>%
unnest_tokens(word,text) %>%
inner_join(get_sentiments("nrc")) %>%
count(word, sentiment, sort = TRUE) %>%
ungroup()
bing_nrc <- bing_word_counts %>%
filter(sentiment == "positive" | sentiment == "negative") %>%
inner_join(get_sentiments("bing"))
##
bing_nrc <- bing_word_counts %>%
filter(sentiment == "positive" | sentiment == "negative") %>%
inner_join(get_sentiments("bing")) %>%
filter(word == "love" | word == "death" | word == "murder") %>%
count(word, sentiment, sort = TRUE)
bing_nrc %>%
group_by(sentiment) %>%
top_n(10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment",
x = NULL) +
coord_flip()
bing_word_counts %>%
group_by(sentiment) %>%
top_n(10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment",
x = NULL) +
coord_flip()
# Wordclouds
tidygenres %>%
unnest_tokens(word,text) %>%
anti_join(stop_words) %>%
count(word) %>%
with(wordcloud(word, n, max.words = 100, scale=c(0.5,0.75)))
tidygenres %>%
unnest_tokens(word,text) %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("gray20", "gray80"),
max.words = 100)