-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctions_source.R
309 lines (272 loc) · 13.1 KB
/
functions_source.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
# Script with all the functions needed in the PMIP4 benchmarking project.
# It must be called by init.R
# Created by Laia Comas-Bru in October 2020
# last modified: February 2021
# This script includes interpolate_spline_conserve_mean.R by Author: Kamolphat Atsawawaranunt
#### function to supressing automatic output from cat(). Use it as: y <- quiet(FUNCTION) ####
quiet <- function(x) {
sink(tempfile())
on.exit(sink())
invisible(force(x))
}
#### function for converting gregorian days to julian calendar ####
### FROM: https://rdrr.io/rforge/ncdf.tools/src/R/convertDateNcdf2R.R ###
convertDateNcdf2R = function(
##title<< Convert netCDF time vector to POSIXct R date object
time.source ##<< numeric vector or netCDF connection: either a number of time units since
## origin or a netCDF file connection, In the latter case, the time
## vector is extracted from the netCDF file, This file, and especially the
## time variable, has to follow the CF netCDF conventions.
, units = 'days' ##<< character string: units of the time source. If the source
## is a netCDF file, this value is ignored and is read from that file.
, origin = as.POSIXct('1800-01-01', tz = 'UTC') ##<< POSIXct object:
## Origin or day/hour zero of the time source. If the source
## is a netCDF file, this value is ignored and is read from that file.
, time.format = c('%Y-%m-%d', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M', '%Y-%m-%d %Z %H:%M', '%Y-%m-%d %Z %H:%M:%S')
)
##description<< This function converts a time vector from a netCDF file or a vector of Julian days (or seconds, minutes, hours)
## since a specified origin into a POSIXct R vector.
{
close.file = FALSE
if (class(time.source) == 'character') {
if (file.exists(time.source)) {
time.source = open.nc(time.source)
} else {
stop(paste('File ', time.source, ' is not existent!', sep = ''))
}
}
if (class(time.source) == 'NetCDF') {
attget.result <- try({
units.file <- infoNcdfAtts(time.source, 'time')[, 'value'][infoNcdfAtts(time.source, 'time')[, 'name'] == 'units']
origin.char <- sub('^.*since ', '', units.file)
units <- sub(' since.*', '', units.file)
}, silent = TRUE)
for (formatT in time.format) {
origin <- strptime(origin.char, format = formatT, tz = 'UTC')
if (!is.na(origin))
break
}
if (is.na(origin))
stop('Not possible to determine origin. Wrong format supplied?')
date.vec <- as.numeric(var.get.nc(time.source, 'time'))
} else {
if (!is.numeric(time.source))
stop('time.source needs to be numeric if not a netCDF file connection!')
date.vec <- time.source
}
if (!is.element(units, c('seconds', 'minutes', 'hours', 'days')))
stop(paste('Unit ', units, ' is not implemented.', sep = ''))
multiplicator <- switch(units, days = 60 * 60 * 24, hours = 60 * 60, minutes = 60, seconds = 1)
time.out <- origin + date.vec * multiplicator
if (origin < as.POSIXct('1582-10-30', tz = 'UTC'))
time.out <- time.out + 10 * 24 * 60 * 60
if (close.file)
close.nc(time.source)
##value<<
## POSIXct vector: time vector in native R format
return(time.out)
}
#### Read in shape file for plotting ####
options("rgdal_show_exportToProj4_warnings"="none") #-> to mute the text output'
library(rgdal)
quiet(shapefile <- readOGR(dsn=paste(getwd(), "/input_coast_shapefile/ne_110m_land/ne_110m_land.shp", sep=""), layer = 'ne_110m_land')) #laptop
shapefile_df_360 <- fortify(shapefile)
# #arrange longitudes to be from 0 to 360 instead of -180 to 180
index <- shapefile_df_360$long <0
shapefile_df_360$long[index] <- (shapefile_df_360$long[index] +360)
rm(index)
rm(shapefile)
quiet(shapefile <- readOGR(dsn=paste(getwd(), "/input_coast_shapefile/ne_110m_land_360/ne_110m_land.shp", sep=""), layer = 'ne_110m_land')) #laptop
shapefile_df_180 <- fortify(shapefile)
rm(shapefile)
mapWorld_grey <- geom_polygon(data= shapefile_df_180, aes(x = long, y = lat, group = group), fill = 'grey87', colour = 'grey22', size = 0.05, alpha=0.7)
#### function for plotting maps ####
plot_mtco_eg_disc <- function(mat_withlatlon, cols, brkpnt, title_name, varunits, shapefile_df){
mat <- mat_withlatlon
startpnt <- brkpnt[1:length(brkpnt)-1]
endpnt <- brkpnt[2:length(brkpnt)]
brk_lab <- paste(signif(startpnt, digits = 3), signif(endpnt, digits = 3), sep = ' to ')
mapWorld <- geom_polygon(data = shapefile_df, aes(x = long, y = lat, group = group), fill = NA, colour = 'black', size = 0.05)
dt <- melt(mat)
colnames(dt) <- c('lon', 'lat', 'val')
dt <- subset(dt, !is.na(val))
dt$breaks <- cut(dt$val, breaks = brkpnt,
labels = brk_lab)
mp <- ggplot() +
geom_tile(data = dt, aes(x=lon, y=lat, colour = breaks, fill = breaks)) +
coord_fixed(xlim = c(-180, 180), ylim = c(-90,90), expand = F) +
theme_bw() +
xlab('Longitude [deg]') +
ylab('Latitude [deg]') +
# scale_fill_manual(name = varunits,
# breaks = brk_lab[brk_lab %in% sort(unique(dt$breaks))],
# values = cols[brk_lab %in% sort(unique(dt$breaks))]) +
scale_fill_manual(name = varunits,
breaks = brk_lab[brk_lab %in% sort(unique(dt$breaks))],
values = cols[brk_lab %in% sort(unique(dt$breaks))]) +
scale_colour_manual(name = varunits,
breaks = brk_lab[brk_lab %in% sort(unique(dt$breaks))],
values = cols[brk_lab %in% sort(unique(dt$breaks))]) +
theme(legend.position = 'left',
legend.key.width = unit(0.5, 'cm')
) + ggtitle(paste(title_name, ' min/max = ', round(min(mat, na.rm = T), 2), "/", round(max(mat, na.rm = T), 2), sep = '')) + mapWorld
return(mp)
}
#### Spline interpolation with conservation of mean ####
# interpolate_spline_conserve_mean.R
# Author: Kamolphat Atsawawaranunt
# This file contains a snippet of code which I have used for mean-preserving
# spline interpolation. The user may alter this in whichever way they want to
# suit their own need.
#
# The idea of how this was done was taken from:
# https://stats.stackexchange.com/questions/59418/interpolation-of-influenza-data-that-conserves-weekly-mean
# which follows methods described in:
# Harzallah, A., 1995. The interpolation of data series using a constrained iterating technique. Monthly weather review, 123(7), pp.2251-2254.
# DOI/URL: https://doi.org/10.1175/1520-0493(1995)123%3C2251:TIODSU%3E2.0.CO;2
# The function follows an iterative process:
# 1. Interpolate the data (in this case, using spline interpolation)
# 2. Calculate the mean of each of the time periods defined
# 3. Calculate the differences between the means of the timeperiods (from 2.) with the previous time period (original or the previous iteration)
# 4. If the differences between the means (step 3.) is more than the threshold value:
# i. store the interpolated values.
# ii. store the residuals, and interpolate the residuals (go back to step 1., and repeat)
# 5. If the differences between the means (step 4.) is less than the threshold value; or the maximum number of iterations are met:
# i. store the interpolated values
# ii. sum up the interpolated values, and return these values.
#
# The fucntion describe at https://stats.stackexchange.com/questions/59418/interpolation-of-influenza-data-that-conserves-weekly-mean
# follows the same logic, but only works with equally spaced sampled data (i.e. weekly)
# and therefore was edited.
# Define functions
interpol_spline_cons_mean <- function(y_points, month_len, max_iter, tol){
# Interpolate monthly timeseries data to daily.
#
# Args:
# y_points: iterable(list or array), mean values at each timestep
# month_len: iterable(list or array), number of days at which each timestep represents
# max_iter: integer, maximum iterations if convegence is never met
# tol: numeric, the tolerance threshold for indicating convergence
#
# Returns:
# An array of interpolated values as the same length as the total sum of month_len
#
#
# Example:
# y_points = c(12, 13, 14, 29, 32, 35, 33, 24, 18, 10, 8, 7) # data
# month_len = c(31, 28 ,31, 30, 31, 30, 31, 31, 30, 31, 30, 31) # month length the data represents
# y_interpolated = interpol_spline_cons_mean(y_points, month_len, 100, 0.01) # interpolate
# unlist(lapply(unname(split(y_interpolated, rep(1:length(month_len), month_len))), mean)) # see whether the means are the same with the threshold defined
#
# Check if month_len and y_points have the same dimension
if (length(month_len) != length(y_points)){
print('Lengths of month_len and y_points are not the same.')
} else {
x_points <- (cumsum(month_len) + 1) - ((month_len + 1)/2)
y_points_mat <- matrix(NA, ncol = length(y_points), nrow = max_iter+1)
# input starting value as x_points
y_points_mat[1,] <- y_points
# create cumulative number of days from start from month_len
max_x <- max(cumsum(month_len))
# create an array of the interpolated points
y_points_interp_mat <- matrix(NA, ncol = max_x, nrow = max_iter)
x_points_out <- 1:max_x
for (i in 1:max_iter){
splineoutpt <- spline(x_points, y_points_mat[i,], method = 'fmm', xout = x_points_out)
y_points_interp_mat[i,] <- splineoutpt$y
new_mean <- unlist(lapply(unname(split(splineoutpt$y, rep(1:length(month_len), month_len))), mean))
resid <- y_points_mat[i,] - new_mean
if (max(resid) < tol){
cat("Converged after", i, "iterations with tolerance of", tol, sep=" ")
break
}
y_points_mat[i+1,] =resid
}
return(colSums(y_points_interp_mat, na.rm = T))
}
}
#### function opposite to "%in%" to be used in filters ####
`%notin%` <- Negate(`%in%`)
##### create function to grid datasets ####
# input:
# - data file with lat and lon
# - grid data file with cells lat/lon ranges as columns
# output:
# - cbind of input grid with count(n) column and averaged observations
obs_data_to_grid <- function(grid, data) {
for (n in 1:dim(grid)[1]) {
newx <- data %>% filter (lat >= grid$lat_min [n] & lat < grid$lat_max[n] &
lon >= grid$lon_min[n] & lon < grid$lon_max[n])
if (dim(newx)[1] == 0) {
grid$count_n[n] = NA
} else {
grid$count_n[n] <- dim(newx)[1] # how many data points per gridcell?
}
if (n == 1) {
x_temp <- newx [, 3:8] %>% summarise_if(is.numeric, mean, na.rm = T)
} else {
x_temp[n,] <- newx [, 3:8] %>% summarise_if(is.numeric, mean, na.rm = T)
}
}
return (cbind (grid, x_temp))
}
##### define function to find the closest lat/lon values #####
FindGridCoords <- function (myLat, myLon, data){
# data has at least two columns $lat and $lon
outCoords <- data[which(data$lat==myLat)[which(data$lat==myLat) %in% which(data$lon==myLon)],]
return (outCoords)
}
##### function to trim model names #####
trim_mode_name = function (x) {
name_trim <- substr(x, 1, nchar(x) - 5)
return (name_trim)
}
my_name_trim = function (x) { # used in DM3
name_trim <- substr(x, 1, nchar(x) - 17)
return (name_trim)
}
##### matrixplot function for the score plotting (DM4 script)#####
matrixplot <- function (st) {
ramp = c("darkolivegreen4", "palegreen1","lemonchiffon1", "sandybrown", "white")
bp <- ggplot(data = data %>% filter (data$step == st), aes(x=model, y=var, fill= factor(z_val))) +
geom_tile(width=1, height = 0.97, color="black", show.legend = T) +
scale_fill_manual(name = element_blank(),
labels = c("25% better than mean", "better than mean",
"worse than mean but better than random", "worse than random", "no significant improvement"),
limits = factor(c(1,2,3,4,5)),
values = ramp,
drop = F) +
guides(fill=guide_legend(nrow=3, byrow=F, x.intersp = 3, label.hjust =0, label.vjust = 0.5)) +
geom_text(aes(label=paste(val,"\n [",min,"-",max,"]",sep=""),
color = "black"), size=4) +
guides(color = FALSE) +
scale_color_manual(values =c("black")) +
theme_bw()+
theme(axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0.40, hjust=0.5,size=12,face="bold"),
axis.text.y = element_text(angle = 0, vjust = 0.25, hjust=0.5,size=12,face="bold"),
legend.position="top",
legend.text = element_text(size=13),
plot.caption = element_text(size=12, vjust = 2)) +
labs(caption = paste ("Step:", st,sep=""))
return(bp)
}
##### function to convert moisture from alpha to MI units #####
# MI=MAP/equilibrium evapotranspiration
# alpha = actual/equilibrium evapotranspiration
# actual evapotranspiration is given by:
# Ea = Eq [1 + m - (1 + mω)1/ω] where ω = 3
# alpha = 1+m - (1 + mω)1/ω
# if we re-express the budyko formulation:
# (1+mi**3)**(1/3) = z
# then α = 1 + mi - z
# dm/dα = (z/z-mi)2
# Δm = Δα dm/dα
delta_alpha_to_MI <- function (mi, alpha) {
z <- (1+mi^3)^(1/3)
MI <- alpha * (z^2/(z^2-mi^2))
#MI = alpha * (z/(mi-z))^2 # incorrect (Sandy's)
#Δm = Δα dm/dα
return (round(MI,3))
}