-
Notifications
You must be signed in to change notification settings - Fork 0
/
Longleymodel.R
138 lines (77 loc) · 2.54 KB
/
Longleymodel.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#callingLongley's Economic Regression from the R data#
install.packages("mlbench",repos = "http://cran.us.r-project.org")
#Data Gathering
#loading "longley" data
#Data preparation and we'll be analysing the data
data( "longley" , packages="mlbench")
#calling "mlbench" into the library
library("mlbench")
data("longley")
ss=longley
#assigning object ss to the data longley
str(ss)
summary(ss)
#no missing value
#outlier checking and trearmemt
#checking outlieer in GNP.deflator
boxplot.stats(ss$GNP.deflator)$out
boxplot(ss$GNP.deflator)
#NO OUTLIER
#checking outlieer in GNP
boxplot.stats(ss$GNP)$out
#NO OUTLIER
boxplot.stats(ss$Unemployed)$out
#NO OUTLIER
View(ss)
#checking outlieer Armed.Forces
boxplot.stats(ss$Armed.Forces)$out
#NO OUTLIER
#checking outlieer in Population
boxplot.stats(ss$Population)$out
#NO OUTLIER
#checking outlieer in Year
boxplot.stats(ss$Year)$out
#NO OUTLIER
#checking outlieer in Employed
boxplot.stats(ss$Employed)$out
#NO OUTLIER
#install.packages("ggplot")
#install.packages("GGally")
#Visualizing correlation
ggpairs(data=ss,columns = 1:7)
install.packages("car")
#visulisation of multicollinerity
#running linear regression on the whole data and derive vif
modell=lm(Employed~. , data=ss)
library(car)
vif(modell)
#creation of train and test data
## 75% of the sample size
smp_size <- floor(0.75 * nrow(ss))
set.seed(100)
train_ind = sample(seq_len(nrow(ss)), size = smp_size)
train =ss[train_ind, ]
test = ss[-train_ind, ]
#TREATING MULTICOLLINEARITY AND DERIVING VIF
#running linear regression on train data
linear_m=lm(Employed~.,data=train)
vif(linear_m)
linear_mod=lm(Employed~GNP.deflator+Unemployed+Armed.Forces+Population+Year,data=train)
vif(linear_mod)
linear_mod1=lm(Employed~GNP.deflator+Unemployed+Armed.Forces+Population,data=train)
vif(linear_mod1)
linear_mod2=lm(Employed~Unemployed+Armed.Forces+Population,data=train)
vif(linear_mod2)
linear_mod3=lm(Employed~Unemployed+Population,data=train)
#checking diagnosis
vif(linear_mod3)
summary(linear_mod3)
# predict Employment on test data
Employ_pred <- predict(linear_mod3, test)
head(Employ_pred)
actuals_preds <- data.frame(cbind(actuals=train$Employed, predicteds=Employ_pred))
correlation_accuracy <- cor(actuals_preds)
head(actuals_preds)
#Evaluation of predictions
mape <- mean((abs((actuals_preds$predicteds - actuals_preds$actuals))/actuals_preds$actuals),na.rm = TRUE)
mape