-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserver.R
98 lines (89 loc) · 2.91 KB
/
server.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
library(shiny)
options(shiny.trace = F) # cahnge to T for trace
library(shinysky)
shinyServer(function(input, output) {
library(tm)
library(RWeka)
library(data.table)
library(SnowballC)
library(stringr)
# process.=cmpfun(process)
# classify.=cmpfun(classify)
# makeCorpus.=cmpfun(makeCorpus)
## FUNCTION DEFINITIONS ##
# Make Corpus and do transformations
makeCorpus<- function(x) {
corpus<-Corpus(VectorSource(x))
# corpus <- tm_map(corpus, stripWhitespace)
corpus <- tm_map(corpus, content_transformer(tolower))
# corpus <- tm_map(corpus, removeWords, stopwords("english"))
corpus <- tm_map(corpus, stemDocument)
corpus<- tm_map(corpus,removePunctuation)
# corpus<- tm_map(corpus,removeNumbers)
return(corpus)
}
process<- function(x) {
x=gsub(",?", "", x)
x=gsub("\\.{3,}", "", x)
x=gsub("\\:", "", x)
x=gsub("\\'", "", x)
x=gsub("\\|", "", x)
x=gsub("\\{", "", x)
x=gsub("\\}", "", x)
x<-strsplit(unlist(x),"[\\.]{1}")
x<-strsplit(unlist(x),"\\?+")
x<-strsplit(unlist(x),"\\!+") # Error: non-character argument?
x<-strsplit(unlist(x),"\\-+")
x<-strsplit(unlist(x),"\\(+")
x<-strsplit(unlist(x),"\\)+")
x<-strsplit(unlist(x),"\\\"")
x<-gsub("^\\s+", "", unlist(x))
x<-gsub("\\s+$", "", unlist(x))
x<-gsub("\\s*~\\s*", " ", unlist(x))
x<-gsub("\\/", " ", unlist(x))
x<-gsub("\\+", " ", unlist(x))
x<-gsub("it s ", "its ", unlist(x))
x<-gsub("i m not", "im not", unlist(x))
x<-gsub("i didn t", "i didnt", unlist(x))
x<-gsub("i don t", "i dont", unlist(x))
x<-gsub(" i m ", " im ", unlist(x))
x=x[which(nchar(x)!=1)]
x=x[which(nchar(x)!=0)]
}
getPred=function(x){
# Take an input:
test=x
# transform as training set was (lowercase, stem, strip punctuation etc.)
test=iconv(test, to='ASCII', sub=' ')
test=process(test)
test=paste0(test, collapse=" ")
corpus<-makeCorpus(test)
corpus=as.character(corpus[[1]][1])
# Split by words:
words<-unlist(strsplit(corpus,"\\s+"))
Tfreq=afreq
# Isolate last two words of the sentence
history=words[(length(words)-1):length(words)]
nMin1=words[length(words)]
history=paste(as.character(history),collapse=' ')
histstring=str_replace_all(history, "[[:punct:]]", "?")
# Make prediction list of matches:
Tpred=data.table(Tfreq[grep(paste0("^",histstring," "),Tfreq$grams),][order(-counts)])
# Isolate top prediction:
pred=Tpred[1]$grams
pred=unlist(strsplit(pred,"\\s+"))
pred=pred[length(pred)]
if(is.na(pred)){
pred="the"
}
return(pred)
}
# Trigrams
afreq=readRDS("ALL.no4counts.RDS")
library(compiler)
getPred.=cmpfun(getPred)
# OUTPUT PREDICTION #
output$prediction <- renderText({
as.character(getPred.(input$text))
})
})