This repository has been archived by the owner on Sep 4, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathKelimeSayisi.R
62 lines (31 loc) · 1.74 KB
/
KelimeSayisi.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
library(tm)
library(tidyverse)
library(Rcpp)
edatbag <- read_csv("edatbag.csv") ### edatlar, baglaclar
doc.corpus<-Corpus(VectorSource(yazar$text)) #yazar adli dataframe'i xmlToDataFrame.R adli scriptte olusturmustuk
doc.corpus<-tm_map(doc.corpus,content_transformer(tolower))
doc.corpus<-tm_map(doc.corpus,content_transformer(removePunctuation))
doc.corpus<-tm_map(doc.corpus,content_transformer(removeNumbers))
doc.corpus<-tm_map(doc.corpus, removeWords, edatbag$kelime)
# doc.corpus<-tm_map(doc.corpus, removeWords, c("he","var","sen","gel","amk")) #buraya siz kendi verinize göre yeni custom kelimeler ekleyip filtrelemeler yapabilirsiniz
doc.corpus<-tm_map(doc.corpus, removeWords, stopwords("english"))
#özel isaretleri falan kaldirmak için basit bir fonksiyon yazimi:
removeURL<-function(x) gsub('http[[:alnum:]]*','',x)
myCorpus<-tm_map(doc.corpus,removeURL)
myCorpus<-tm_map(myCorpus,stripWhitespace)
tdm<-TermDocumentMatrix(myCorpus)
mtdm<-as.matrix(TermDocumentMatrix(myCorpus))
term_frequency<-rowSums(mtdm)
term_frequency<-sort(term_frequency,decreasing=TRUE)
#en çok kullanillan 70 kelimeyi sayilariyla birlikte yazdir:
term_frequency[1:70]
text_data<- data.frame(word = names(term_frequency),freq=term_frequency)
text_data<-remove_rownames(text_data)
#word cloud seklinde görsellestirme
set.seed(1234) ### zar
wordcloud::wordcloud(words = text_data$word, freq = text_data$freq, min.freq = 85,
max.words=30, random.order=FALSE, rot.per=0.35,
colors=RColorBrewer::brewer.pal(8, "Dark2"))
#bir subset islemi yapip kelime frekansina veya baska bir seye göre seçim yapip görsellestirme:
w<-subset(term_frequency,term_frequency>=70)
barplot(w, las = 2)