-
Notifications
You must be signed in to change notification settings - Fork 2
/
assignment-4.R
66 lines (49 loc) · 2.42 KB
/
assignment-4.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
con <- file("assignment-4.log")
sink(con, append=TRUE)
sink(con, append=TRUE, type="message")
library(caret)
library(mlbench)
library(e1071)
set.seed(1234567890)
mydata <- read.table("data.csv",sep=",", header=TRUE, row.names="ID")
# to check if data contains NA
any(is.na(mydata))
head(mydata)
dim(mydata)
summary(mydata)
trainIndex <- sample(1:nrow(mydata), 0.8 * nrow(mydata))
train <- mydata[trainIndex, ]
test <- mydata[-trainIndex, ]
Y <- factor(ifelse(train$IS_DEFAULT=='1', "yes", "no"))
############################################# MODELS ####################################
############ BOOSTING ###############################################
control <- trainControl(method="repeatedcv", number=10, classProbs = TRUE,verboseIter = TRUE)
fit <- train(train[,1:23],Y, method="gbm", trControl=control, metric = "Accuracy" ,preProcess = c("center","scale"),tuneLength = 2)
print(fit)
##################################### RANDOM FOREST ###############################################
control <- trainControl(method="repeatedcv", number=10, classProbs = TRUE,verboseIter = TRUE)
fit <- train(train[,1:23],Y, method="rf", trControl=control, metric = "Accuracy" ,preProcess = c("center","scale"),tuneLength = 2)
print(fit)
##################################### BAGGING ###############################################
control <- trainControl(method="repeatedcv", number=10, classProbs = TRUE,verboseIter = TRUE)
fit_bag <- train(train[,1:23],Y, method="treebag", trControl=control, metric = "Accuracy" ,preProcess = c("center","scale"),tuneLength = 10)
print(fit_bag)
##################################### KNN ###############################################
control <- trainControl(method="repeatedcv", number=3, classProbs = TRUE,verboseIter = TRUE)
fit <- train(train[,1:23],Y, method="knn", trControl=control, metric = "Accuracy" ,preProcess = c("center","scale"),tuneLength = 20)
print(fit)
##################################### Logistic Regression ###############################################
objControl <- trainControl(method='cv', number=10, returnResamp='none',verboseIter = TRUE, classProbs = TRUE)
fit <- train(train[,6:23],Y, method="glm", trControl=objControl, metric = "Accuracy" )
# display results
print(fit)
################### ROC ##################
plot(fit)
p <- predict(fit, test)
pred <- factor(ifelse(train$IS_DEFAULT=='1', '1', '0'))
head(unclass(pred))
p <- round(p)
p <- p[,2]
y <- test[,24]
table(p,y)
#########################