-
Notifications
You must be signed in to change notification settings - Fork 1
/
AK_SWE_NLR.R
210 lines (148 loc) · 7.79 KB
/
AK_SWE_NLR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#This model uses a nonlinear regression to predict SWE
#The code used to generate and fit the model was written by Eric Sproles, the rest was written by Sam Neitlich
#Set working directory
setwd("~/Documents/GPHY484_491/491Proj") #Change to your wd
#Install and library packages
install.packages('tidyverse')
install.packages('hydroGOF')
install.packages('zeallot')
library(tidyverse)
library(caret)
library('hydroGOF')
library(ggplot2)
library(stringr)
library(raster)
library(zeallot)
## read in files from snotel sites to develop parameters with a subset of the sites
BASE1 <-read.csv(file.choose()) #Read in nlr_inputs_base.csv here
#Define functions based on the equations in Leibowitz et al.
S_EXP <- function(PRISM_T,S1,S2) (exp(-(PRISM_T + S1)/S2))
C_SNOW <- function(PRISM_T,S1,S2) (1 - (1/(1 + S_EXP(PRISM_T,S1,S2) )))
M_EXP <- function(PRISM_T,M1,M2) (exp(-(PRISM_T + M1)/M2))
C_MELT <- function(PRISM_T,M1,M2) (1/(1 + M_EXP(PRISM_T,M1,M2)))
#Fit a reverse sigmoid nonlinear regression to the data
baseswe.mod <- nls(SNOTEL_SWE ~ SURPLUS * C_SNOW(PRISM_T,S1,S2)
+ PREV_SWE * (1 - C_MELT(PRISM_T,M1,M2)),
data=BASE1,start=list(S1 = -4, S2 = 2, M1 = -3, M2 = 1),trace = TRUE)
#S1 = -4, S2 = 5, M1 = -10, M2 = 0.1 are the original parameters used in the Bristol Bay model
#I updated them to S1 = -4, S2 = 2, M1 = -3, M2 = 1 for this model, but they are locationally dependant
#Summarize
summary(baseswe.mod)
#This will show us all of the parmater predictions that were optimized in this model
#Predict SWE to plot--note that we're predicting on the same dataset we trained the model on here
BASE1['Predicted_SWE'] = predict(baseswe.mod)
#Generate linear model of predicted vs. observed SWE and summarize model
attach(BASE1)
lmod = lm(Predicted_SWE ~ SNOTEL_SWE)
summary(lmod)
#Plot the linear model in greater detail and fit regression line
ggplot(BASE1, aes(SNOTEL_SWE, Predicted_SWE))+
geom_point()+
labs(title="Predicted vs. Observed SWE", y= "Predicted SWE (mm)", x = "Observed SWE (mm)")+
geom_smooth(method="lm")+
theme_classic()+
annotate("text", x=c(73,27,50), y=c(215,200,180),
label=c("Pred. SWE = 0.883 (Obs. SWE) + 1.731",
expression(R^"2": 0.9698),
"Adjusted p-value: 2.2e-16"))
## This is the same as above but uses all of the SNOTEL sites in the model
COMB1 <-read.csv(file.choose()) #Read in file containing all sites (nlr_inputs_comb.csv)
#Define functions
S_EXP <- function(PRISM_T,S1,S2) (exp(-(PRISM_T + S1)/S2))
C_SNOW <- function(PRISM_T,S1,S2) (1 - (1/(1 + S_EXP(PRISM_T,S1,S2) )))
M_EXP <- function(PRISM_T,M1,M2) (exp(-(PRISM_T + M1)/M2))
C_MELT <- function(PRISM_T,M1,M2) (1/(1 + M_EXP(PRISM_T,M1,M2)))
#Fit regression
combswe.mod <- nls(SNOTEL_SWE ~ SURPLUS * C_SNOW(PRISM_T,S1,S2)
+ PREV_SWE * (1 - C_MELT(PRISM_T,M1,M2)),
data=COMB1,start=list(S1 = -4, S2 = 2, M1 = -3, M2 = 1),trace = TRUE)
#Summarize model and use to predict outputs
summary(combswe.mod)
COMB1['Predicted_SWE'] = predict(combswe.mod)
#Fit linear model and display with ggplot2-- same as above
attach(COMB1)
lmod = lm(Predicted ~ SNOTEL_SWE)
summary(lmod)
ggplot(COMB1, aes(SNOTEL_SWE, Predicted_SWE))+
geom_point()+
labs(title="Predicted vs. Observed SWE", y= "Predicted SWE (mm)", x = "Observed SWE (mm)")+
geom_smooth(method="lm")+
theme_classic()+
annotate("text", x=c(73,27,50), y=c(215,200,180),
label=c("Pred. SWE = 0.883 (Obs. SWE) + 1.731",
expression(R^"2": 0.9698),
"Adjusted p-value: 2.2e-16"))
#Create a random sample of the data and split into separate training and testing datasets
#This will provide a more robust analysis of model performance than what we did above.
random_sample <- createDataPartition(COMB1$SNOTEL_SWE,
p = 0.8, list = FALSE)
#Define training and testing datasets
training_dataset <- COMB1[random_sample, ]
testing_dataset <- COMB1[-random_sample, ]
#Fit the model, same as above
S_EXP <- function(PRISM_T,S1,S2) (exp(-(PRISM_T + S1)/S2))
C_SNOW <- function(PRISM_T,S1,S2) (1 - (1/(1 + S_EXP(PRISM_T,S1,S2) )))
M_EXP <- function(PRISM_T,M1,M2) (exp(-(PRISM_T + M1)/M2))
C_MELT <- function(PRISM_T,M1,M2) (1/(1 + M_EXP(PRISM_T,M1,M2)))
#Use training dataset as input this time
training.mod <- nls(SNOTEL_SWE ~ SURPLUS * C_SNOW(PRISM_T,S1,S2)
+ PREV_SWE * (1 - C_MELT(PRISM_T,M1,M2)),
data=training_dataset,start=list(S1 = -4, S2 = 2, M1 = -3, M2 = 1),trace = TRUE)
#Predict values and calculate four statistics
predictions <- predict(training.mod, testing_dataset)
data.frame( R2 = R2(predictions, testing_dataset $ SNOTEL_SWE),
RMSE = RMSE(predictions, testing_dataset $ SNOTEL_SWE),
MAE = MAE(predictions, testing_dataset $ SNOTEL_SWE),
NSE = NSE(predictions, testing_dataset $ SNOTEL_SWE))
#These statistics indicate that the model is performing well
#-------------------------------------------------
coefficients <- summary(combswe.mod)$coefficients # Extract coefficients in matrix
c(S1, S2, M1, M2) %<-% coefficients[c(1, 2, 3, 4)] #Set the coefficients equal to variables
#Import the zip file that we downloaded in python here
zipF<-file.choose() #File should be in downloads section
#Unzip the file to a directory (working directory is preferable)
outDir<-"/Users/samneitlich/Documents/GPHY484_491/491Proj/" # Define the folder where the zip file should be unzipped to
unzip(zipF,exdir=outDir)
list <- list(10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9) #List months by water year date
for (i in list) { #Iterate through every month
#Put in a leading 0 and determine the file name to import for that month
s <- str_pad(i, 2, pad = "0")
fname = paste("nlr_input", s, ".csv", sep = "")
#Read the csv and save to dataframe
input <- read.csv(file = fname, header = TRUE)
#nrow(input)
#Set the previous SWE for October to 0, otherwise, use the predicted SWE from the previous month
#This is why we sorted the months in order of the water year
if (i==10) {
input['PREV_SWE'] <- 0
} else {
input['PREV_SWE'] <- toRaster['SWE']
}
#Create a new dataframe with only the values used in prediction, this is necessary for the predict function to work
new <- data.frame(SURPLUS=c(input$SURPLUS),PRISM_T=c(input$PRISM_T), PREV_SWE=c(input$PREV_SWE))
#Predict the SWE for each location in the watershed
pred <- predict(baseswe.mod, newdata = new)
#Save predictions to column of dataframe
input['PredictedSWE'] <- pred
#Change all of the predicted SWE values that are below 0 to 0
input$PredictedSWE <- ifelse(input$PredictedSWE >0, input$PredictedSWE, 0)
#Use the same logic to change any values above 300 to 300, as this is roughly that max SWE that we would expect per the SNOTEL site data
input$PredictedSWE <- ifelse(input$PredictedSWE <300, input$PredictedSWE, 300)
#Create a new dataframe with only lat, lon, and predicted SWE
toRaster <- data.frame(X=c(input$X),Y=c(input$Y), SWE=c(input$PredictedSWE))
rst <- rasterFromXYZ(toRaster, crs = 4269) #Create raster from dataframe, crs = NAD 83
rst
title = paste("Distribution of SWE values, month = ", i, sep="") #Set title
#Plot a histogram of values in the raster
hist(rst, main=title,
xlab = 'Predicted SWE (mm)',
ylab = 'Count',
col= "blue",
maxpixels=22000000)
plot(rst, main=title) #Plot the raster
}
#Now we can see histograms and rasters of the SWE values in this watershed for every month
#These files can easily be exported or saved.
#Some of the lower values are getting overpredicted, and I keep rounding them down to 300.
#While this keeps the error from propegating too far, it's not a perfect solution.
#That said, the model performs pretty well throughout most of the watershed.