-
Notifications
You must be signed in to change notification settings - Fork 197
/
Copy pathTurbidityCompareBBvsCoombs.r
298 lines (233 loc) · 8.76 KB
/
TurbidityCompareBBvsCoombs.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
# Compare Turbidity at several sites on French Creek.
# 2014-04-20 CJS ggplot, lsmeans etc
# French Creek was monitored for several months at two sites (Barclay Bride and Coombs).
# At the synpotic times, several water quality variables were measured, including
# Turbidity in NTU.
#
# We will compare the Turbidity between Barclay Bridge and Coombs
#
# Lines starting with ##---part001b; or ##---part001e; bracket the source
# line for inclusion by LaTex and usually are not coded.
#
options(useFancyQuotes=FALSE) # renders summary output corrects
# Load the necessary libraries
library(doBy)
library(ggplot2)
library(gridExtra)
library(lsmeans)
library(plyr)
library(reshape2)
source('../../schwarz.functions.r')
options(width=200)
# Read in the data
sink('TurbidityCompareBBvsCoombs-R-001.txt', split=TRUE)
##---part001b;
wq <- read.csv('TurbidityCompare.csv', header=TRUE)
# Discard all but the Barclay Bridge and Coombs readings
wq <- wq[,1:3]
print(wq)
##---part001e;
sink()
#*************************************************************************
# Analyze the log(ratio) of the two readings
# Compute the log(ratio) of the Barclay Bridge and Coombs readings
sink('TurbidityCompareBBvsCoombs-R-003.txt', split=TRUE)
##---part003b;
wq$logratio <- log(wq$BB/wq$Coombs)
wq[1:6,]
##---part003e;
sink()
# Get dot plot to see if any outliers etc
##---part004b;
plot004 <- ggplot(data=wq, aes(x="Difference", y=logratio))+
geom_jitter(position=position_jitter(w=0.05))+
geom_hline(yintercept=0)+
#geom_boxplot(notch=TRUE, alpha=0.2)+
ggtitle("Plot of log(ratio) of turbidity")+
ylab("log(ratio - BB/Coombs - turbidity")
plot004
##---part004e;
ggsave(plot=plot004, file='TurbidityCompareBBvsCoombs-R-004.png')
# Dot plut using Base R graphics
stripchart(wq$logratio,
vertical=TRUE, method="jitter", jitter=.1,
main="Log ratio (BB/Coombs) NTU Units",
sub='Whiskers extend to range of data',
xlab='', ylab='Log ratio (BB/Coombsh)')
# Do the formal test and find the se by "hand"
sink("TurbidityCompareBBvsCoombs-R-005.txt", split=TRUE)
##---part005b;
result <- t.test(wq$logratio, )
names(result)
result$se.meanlogratio <- result$estimate / result$statistic
# Convert back to ratio on the anti-log scale
result$ratio <- exp(result$estimate)
result$se.ratio<- result$se.meanlogratio * result$ratio
print(result)
cat("Estimated mean log-ratio is ", result$estimate," (SE ", result$se.meanlogratio, ")\n")
cat("Estimate median ratio (BB/Coombs) is", result$ratio," (SE ", result$se.ratio, ")\n")
##---part005e;
sink()
##---part006b;
# Add the confidence interval to the dot plot
plot006 <- plot004 +
annotate("point", x="Difference", y=result$estimate, size=6, shape=2)+
annotate("errorbar", x="Difference",
ymin=result$conf.int[1], ymax=result$conf.int[2], width=0.2)+
ylab("Log(ratio) with confidence interval")
plot006
##---part0063;
ggsave(plot=plot006, file="TurbidityCompareBBvsCoombs-R-006.png")
# Repeat for Base R graphics
stripchart(wq$logratio, add=FALSE,
vertical=TRUE, method="jitter", jitter=.1)
abline(h=0, lty=2)
abline(h=result$estimate, lty=3, lwd=3)
segments(.9, result$conf.int[1], .9,result$conf.int[2], lty=2, lwd=3)
#*************************************************************************
# Analyze the two columns directly
# We need to log both readings to compare the log(ratio)
sink("TurbidityCompareBBvsCoombs-R-010.txt", split=TRUE)
##---part010b;
wq$logBB <- log(wq$BB)
wq$logCoombs <- log(wq$Coombs)
result <- t.test(wq$logBB,
wq$logCoombs,
paired=TRUE)
names(result)
result$se.meanlogratio <- result$estimate / result$statistic# Convert back to ratio on the anti-log scale
result$ratio <- exp(result$estimate)
result$se.ratio<- result$se.meanlogratio * result$ratio
print(result)
cat("Estimated mean log-ratio is ", result$estimate," (SE ", result$se.meanlogratio, ")\n")
cat("Estimate median ratio (BB/Coombs) is", result$ratio," (SE ", result$se.ratio, ")\n")
##---part010e;
sink()
#*************************************************************************
# Analyze as a linear model using aov() and lm()
# We first need to stack the BB and Coombs readings
sink('TurbidityCompareBBvsCoombs-R-019.txt', split=TRUE)
##---part019b;
wq2 <- melt(wq,
id.vars="SampleTime",
measure.vars=c("BB","Coombs"),
variable.name="Site", ,
value.name="Turbidity")
wq2$logTurbidity <- log(wq2$Turbidity)
wq2[ c(1:3, 16:18),]
##---part019e;
sink()
# declare factor and blocking variables are factors
sink('TurbidityCompareBBvsCoombs-R-020.txt', split=TRUE)
##---part020b;
wq2$Site <- factor(wq2$Site)
wq2$SampleTime <- factor(wq2$SampleTime)
str(wq2)
##---part020e;
sink()
# Check for block completeness
sink('TurbidityCompareBBvsCoombs-R-checkcomplete.txt', split=TRUE)
##---partcheckcompleteb;
xtabs(~SampleTime+Site, data=wq2)
##---partcheckcompletee;
sink()
sink('TurbidityCompareBBvsCoombs-R-021.txt', split=TRUE)
##---part021b;
# Compute some summary statistics for each group
# We don't compute a se here because the design is not a CRD
report <- ddply(wq2, "Site", sf.simple.summary, variable="logTurbidity")
report
##---part021e;
sink()
# Using the doBy package
# We don't compute a se here because the design is not a CRD
report <- summaryBy( logTurbidity ~ Site, data=wq2, FUN=c(length,mean,sd))
report
# fit the linear model and get the ANOVA table and test for effects
# Be sure that both variables are defined as FACTORS by R.
# Be sure to put the blocking variable first in the model
sink('TurbidityCompareBBvsCoombs-R-022.txt', split=TRUE)
##---part022b;
result <- lm(logTurbidity ~ SampleTime + Site, data=wq2)
anova(result)
##---part022e;
sink()
##---partdiagb;
# Check the assumptions of the ANOVA model using autoplot and fortity
# as defined in my schwarz.functions
plotdiag <-sf.autoplot.lm(result, which=c(1:3,5), mfrow=c(2,2))
plotdiag
##---partdiage;
ggsave(plot=plotdiag, file='TurbidityCompareBBvsCoombs-R-diag.png')
# Check the assumptions of the ANOVA model using Base R graphics
layout(matrix(1:4, nrow=2))
plot(result)
layout(1)
sink('TurbidityCompareBBvsCoombs-R-lsmeansreport.txt', split=TRUE)
##---partlsmeansobjb;
# Create the lsmeans object that is used in subsequent computations and
# obtain basic estimates of the marginal means (not adjusted for simultaneous
# coverage)
result.lsmo <- lsmeans(result, ~Site, adjust='tukey')
cat("Marginal means (not adjusted for simultaneous coverage)\n\n")
summary(result.lsmo, infer=TRUE)
##---partlsmeansobje;
sink()
cat("Marginal means (adjusted for simultaneous coverage)\n\n")
summary(result.lsmo, infer=TRUE, adjust="tukey")
sink('TurbidityCompareBBvsCoombs-R-cldreport.txt', split=TRUE)
##---partcldreportb;
# Get the compact letter display and a plot
result.cld <- cld(result.lsmo)
result.cld
##---partcldreporte;
sink()
##---partcldplotsb;
# Make a bar plot of the cld display
plotcld <- sf.cld.plot.bar(result.cld, variable="Site")
plotcld <- plotcld +
xlab("Site")+
ylab("Mean logTurbidity (with 95% ci)")+
ggtitle("Comparison of mean logTurbidity with cld")
plotcld
# Make a line graph of the cld display
plotcldb <- sf.cld.plot.line(result.cld, variable="Site")#, ciwidth=0.1)
plotcldb <- plotcldb +
xlab("Site")+
ylab("Mean logTurbidity (with 95% ci)")+
ggtitle("Comparison of mean logTurbidity with cld")
plotcldb
##---partcldplotse;
ggsave(plot=plotcld, file='TurbidityCompareBBvsCoombs-R-cldbar.png')
ggsave(plot=plotcldb, file='TurbidityCompareBBvsCoombs-R-cldline.png')
# Find all the pairwise differences WITHOUT adjusting for multiple comparisons
result.pairs <- pairs(result.lsmo, adjust='none')
summary(result.pairs, infer=TRUE)
sink('TurbidityCompareBBvsCoombs-R-pairsreport.txt', split=TRUE)
##---partpairsb;
# Find all the pairwise differences adjusting for multipicity
result.pairs <- pairs(result.lsmo, adjust='tukey')
summary(result.pairs, infer=TRUE)
##---partpairse;
sink()
# Make a plot of the differences
##---partpairsplotb;
result.pairs.ci <- confint(result.pairs) # extract the ci values
result.pairs.ci
plotdiff <- ggplot(result.pairs.ci, aes(contrast, estimate, ymin = lower.CL, ymax = upper.CL)) +
geom_point(size=4)+
geom_linerange(size=1.5)+
geom_abline(interecept=0, slope=0, linetype=2)+
ylab("Estimated diff and 95% ci")+
xlab("Contrast")+
ggtitle("Estimated pairwise differences and ci")
plotdiff
##---partpairsplote;
ggsave(plot=plotdiff, file='TurbidityCompareBBvsCoombs-R-pairdiff.png')
# Same plot using the glht package and the default plotting methods
result.pairs.glht <- as.glht(result.pairs)
result.pairs.glht.ci <-confint(result.pairs.glht) # extract the confint
result.pairs.glht.ci$confint
old.par <- par(mar=c(5,9,4,2)) # adjust the left margin of th eplot
plot(result.pairs.glht)#
par(old.par)