forked from DoubleD1994/R_Tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
/
HandlingBigData.R
65 lines (51 loc) · 1.07 KB
/
HandlingBigData.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Handling Big Data
# set working directory
setwd("/Users/daviddryburgh/Documents/R_Programming")
# Load the ff package
library(ff)
# Read a CSV file as ff data frame
irisff <- read.table.ffdf(
file = "Iris.csv",
FUN = "read.csv"
)
# Inspect the class
class(irisff)
# Inspect the column names
names(irisff)
# Inspect the first few rows
irisff[1:5,]
# Load the biglm package
library(biglm)
model <- biglm(
formula = Petal.Width ~ Petal.Length,
data = irisff
)
# Summarize the model
summary(model)
# Create a scatterplot
plot(
x = irisff$Petal.Length[],
y = irisff$Petal.Width[],
main = "Iris Petal Length vs. Width",
xlab = "Petal Length(cm)",
ylab = "Petal Width (cm)"
)
# Get y-intercept from model
b <- summary(model)$mat[1,1]
# Get slope from model
m <- summary(model)$mat[2,1]
# Draw a regression line on plot
lines(
x = irisff$Petal.Length[],
y = m * irisff$Petal.Length[] + b,
col = "red",
lwd = 3
)
# Predict new values with the model
predict(
object = model,
newdata = data.frame(
Petal.Length = c(2, 5, 7),
Petal.Width = c(0, 0, 0)
)
)