-
Notifications
You must be signed in to change notification settings - Fork 2
/
NLP.R
57 lines (32 loc) · 1.44 KB
/
NLP.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#NLP
#install.packages('tm',repos='http://cran.us.r-project.org')
#install.packages("tm",dependencies=TRUE)
#install.packages('twitteR',repos='http://cran.us.r-project.org')
#install.packages('wordcloud',repos='http://cran.us.r-project.org')
#install.packages('RColorBrewer',repos='http://cran.us.r-project.org')
library(twitteR)
library(tm)
library(wordcloud)
library(RColorBrewer)
#Put your
#ckey <- Consumer Key (API Key)
#skey <- Consumer Secret (API Secret)
#token <- Access Token
#secToken <- Access Token Secret
#Connect to Twitter
setup_twitter_oauth(ckey,skey,token,secToken)
#Search
search_soccer <- searchTwitter("soccer",n=100, lang = "en")
soccer_text <- sapply(search_soccer, function(x) x$getText())
#Cleaning The Data
soccer_text <- iconv(soccer_text,"UTF-8","ASCII")
soccer_corpus <- Corpus(vectorSource(soccer_text))
#Document Term Matrix
term_doc_matrix <- TermDocumentMatrix(soccer_corpus,
control = list(removePunctuation = TRUE,
stopwords = c("soccer","http", stopwords("english")),
removeNumbers = TRUE,tolower = TRUE))
term_doc_matrix <- as.matrix(term_doc_matrix)
word_freq <- sort(rowSums(term_doc_matrix), decreasing = T)
dm <- data.frame(word = names(word_freq),freq = word_freq)
word_cloud <- wordcloud(dm$word, dm$freq, random.order=FALSE, colors=brewer.pal(8, "Dark2"))