-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path_troubleShootingTmp.Rmd
190 lines (149 loc) · 9.05 KB
/
_troubleShootingTmp.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
---
title: "Optical Model no AOD"
author: "Simon Topp"
date: "6/7/2019"
output: html_document
editor_options:
chunk_output_type: console
---
```{r setup, include=FALSE}
library(tidyverse)
library(feather)
library(googledrive)
library(viridis)
library(knitr)
library(sf)
library(rgdal)
library(maps)
library(magrittr)
library(mlbench)
library(caret)
library(randomForest)
library(doParallel)
library(furrr)
library(lubridate)
library(groupdata2)
library(CAST)
library(mapview)
library(onehot)
library(Metrics)
library(kableExtra)
library(ggpmisc)
library(stlplus)
library(Hmisc)
library(xgboost)
library(scales)
knitr::opts_chunk$set(error = F)
knitr::opts_chunk$set(warning = F)
knitr::opts_chunk$set(message = F)
knitr::opts_chunk$set(echo = F)
```
```{r}
## Table of variables for manuscript
features <- "NR, BG, dWL, inStreamCat, PctCarbResidCat, Precip8110Cat, Tmean8110Cat, RunoffCat, ClayCat, SandCat, OmCat, PermCat, RckdepCat, WtDepCat, BFICat, ElevCat, AOD, KffactCat, CatAreaSqKm, PctAg2006Slp10Cat, NPDESDensCat, HydrlCondCat, PctImp2006Cat, pctUrban2006, pctForest2006, PctCrop2006Cat, pctWetland2006, WetIndexCat, areasqkm, meandused"
varTable <- tibble(Variable = c('Lake Area', 'Lake Depth', 'NIR/Red', 'Blue/Green', 'Forel-Ule Index', 'Network Status', 'Percent Carbonate', 'Mean Precipitation', 'Mean Temperature', 'Runoff', 'Percent Clay', 'Percent Sand', 'Percent Organic Matter', 'Soil Permeability', 'Bedrock Depth', 'Water Table Depth', 'Base Flow Index', 'Elevation', 'Atmospheric Optical Depth', 'Kffactor', 'Catchement Area', 'Percent Agriculture', 'NPDE Density', 'Hydraulic Conductivity', 'Percent Impervious Surface', 'Percent Urban', 'Percent Forest', 'Percent Crop', 'Percent Wetland', 'Topographic Wetness Index'), Description = c('Lake area from NHD v2 (sq. km)', 'Mean lake depth from NHD v2 (m)', 'Band Ratio', 'Band Ratio', 'Dominant Color Wavelength as defined by Wang et al. (2015)', 'Binary on/off network status for lake based on NHD flowlines', 'Percent of catchment area classified as lithology type: carbonate residual material', 'PRISM climate data - 30-year normal mean precipitation (mm): Annual period: 1981-2010 within the catchment', 'PRISM climate data - 30-year normal mean temperature (C°): Annual period: 1981-2010 within the catchment', 'Mean runoff (mm) within catchment', 'Mean % clay content of soils (STATSGO) within catchment', 'Mean % sand content of soils (STATSGO) within catchment', 'Mean organic matter content (% by weight) of soils (STATSGO) within catchment', 'Mean permeability (cm/hour) of soils (STATSGO) within catchment', 'Mean depth (cm) to bedrock of soils (STATSGO) within catchment','Mean seasonal water table depth (cm) of soils (STATSGO) within catchment','Base flow is the component of streamflow that can be attributed to ground-water discharge into streams. The BFI is the ratio of base flow to total flow, expressed as a percentage, within catchment', 'Mean catchment elevation (m)', 'Atmospheric Optical Depth over observation pulled from MERRA 2 reanalysis data', 'Mean of STATSGO Kffactor raster within catchment. The Universal Soil Loss Equation (USLE) and represents a relative index of susceptibility of bare, cultivated soil to particle detachment and transport by rainfall', 'Area of local catchment (square km)', '% of catchment area classified as ag land cover (NLCD 2006 classes 81-82) occurring on slopes > 10%', 'Density of permitted NPDES (National Pollutant Discharge Elimination System) sites within catchment (sites/square km)', 'Mean lithological hydraulic conductivity (micrometers per second) content in surface or near surface geology within catchment', 'Mean imperviousness of anthropogenic surfaces within catchment (NLCD 2006)','Percent of catchment classified as either medium or high density urban (NLCD 2006)', 'Percent of catchment classified as either deciduous, coniferous, or mixed forest (NLCD 2006)', 'Percent of catchment area classified as crop land use (NLCD 2006 class 82)', 'Percent of catchment classified as woody or herbaceous wetland landcover (NLCD 2006)', 'Mean Composite Topographic Index (CTI)[Wetness Index] within catchment'))
varTable %>%
kable(.) %>% #,'latex', booktabs = T) %>%
kable_styling(bootstrap_options = c('striped', "condensed")) %>%
column_spec(1, border_right = T) %>%
row_spec(c(2,3,4,5,9,10,27,29, 30, 16, 20), bold = T) %>%
save_kable('figures/FigTable.png', height = 9)
## Make a table summarizing sample sizes
sampTable <- tibble(Sample = c('2012 National Lake Assessment', 'Large Random Sample', 'Lakes Over 10 sq km', 'Total Unique Lakes', 'Total Lakes with >25 years of Observations'), Initial.Count = c(1038, 18000, 1170, 19964, 8897), `Post.QAQC.count` = c(1029, 13362, 1105, 14971, 8897)) %>%
kable(col.names = c('Sample', 'Initial Count', 'Post QA/QC Count'),
format.args = list(big.mark = ","), align = 'c') %>%
kable_styling()
```
```{r}
## Look at the differences between preds for lake vs cntr pulls
over10NLA <- lake.join %>% filter(areasqkm > 10)
pullComp <- read_feather(paste0('out/TS_Preds/NLA2012_cntr_',iteration,'.feather')) %>%
bind_rows(read_feather(paste0('out/TS_Preds/NLA2012_FullLake_',iteration,'.feather')) %>% mutate(lakeSamp = 'NLA2012_FullLake')) %>% filter(COMID %in% over10NLA$COMID)
#filter where we have preds for both
counts <- pullComp %>%
group_by(COMID, system.index) %>%
summarise(count = n()) %>%
filter(count > 1) %>%
mutate(uID = paste0(COMID, system.index))
check <- pullComp %>% mutate(uID = paste0(COMID, system.index)) %>%
filter(uID %in% counts$uID) %>%
select(uID, value, lakeSamp) %>%
spread(lakeSamp, value)
ggplot(check, aes(x = NLA2012_cntr, y = NLA2012_FullLake)) +
geom_point() + geom_abline(color = 'red') +
ggtitle('Full Lake vs Centerpoint Predictions \nfor lakes over 10 sq km in the NLA2012 sample\n red line is 1:1') +
stat_poly_eq(aes(label = paste(stat(adj.rr.label))),
formula = y~x, parse = TRUE,
label.y = Inf, vjust = 1.3)
summ.meds <- pullComp %>%
filter(month %in% c(5:9),
year < 2019) %>% ##Incomplete obs for 2019 so we won't use it.
group_by(COMID, year, region, lakeSamp) %>%
summarise(secchi.med = median(value))
##Check how many years we have observations for each lake and only take lakes with > 10 years of obs. We loose ~500 lakes.
counts <- summ.meds %>%
group_by(COMID) %>%
summarise(count = n()) %>%
#filter(count > 10) %>%
left_join(lake.join)
#### Create wide dataset
summ.meds.wide <- summ.meds %>%
filter(COMID %in% counts$COMID) %>%
spread(year, secchi.med)
##Check regional counts to make sure they're reasonable
summ.meds.wide %>% group_by(region) %>% summarise(count = n())
###### Apply 1000 rounds of bootstrapping to yearly summer medians
## Map over the regions and pull out summary stats
bootstrapped.ts <- summ.meds.wide %>%
group_by(region, lakeSamp) %>%
nest() %>%
mutate(boot.means= purrr::map(data, ~boot(.,boot.med, R = 1000)),
boots.summ = purrr::map(boot.means, boot.summary)) %>%
select(-boot.means, -data) %>%
unnest(boots.summ)
## Generate a figure showing the mean summer clarity and 'stability' (sd of bootstrap iterations) for each lake.
bootstrapped.ts %>%
filter(year < 2019) %>%
ggplot(., aes(x = year, y = mean, color = lakeSamp)) +
#geom_errorbar(aes(ymin = mean - se, ymax = mean + se), color = 'grey60', alpha = .4) +
geom_line() +
theme_bw() +
geom_point() +
facet_wrap(~region, scales = 'free', ncol = 2)
```
```{r}
check <- st_read('/Users/simontopp/Downloads/Sentinel-2_Descending_Orbits_shape_file/Sentinel-2_Descending_Orbit_Shape_File.shp')
st_layers('/Users/simontopp/Downloads/NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_Lower48.gdb')
check <- st_read('/Users/simontopp/Downloads/NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_Lower48.gdb', layer = 'NHDWaterbody')
check2 <- check %>% mutate_at(vars(MeanDepth, LakeVolume, LakeArea), ~format(round(., 5), nsmall =5 ))
check2$ELEVATION[check2$ELEVATION == -99999999] = -9999
st_write(check2, 'in/NHD/NHDPlusV2_NHDWaterbodies.shp')
nhd <- st_read('in/NHD/NHDPLusV2_NHDWaterbodies.shp')
swotViz <- nhd %>% filter(LakeAre > 62500)
test <- swotViz[1,]
check <- test %>% st_bbox(crs = 4269)
points <- st_sfc(st_point(c(check[1],check[2])),
st_point(c(check[1],check[4])),
st_point(c(check[3],check[2])), crs = 4269)
calcDim <- function(i){
bb <- st_bbox(i, crs = 4269)
points <- st_sfc(st_point(c(bb[1],bb[2])),
st_point(c(bb[1],bb[4])),
st_point(c(bb[3],bb[2])), crs = 4269)
width <- st_distance(points[1],points[3])[1] %>% round(1)
height <- st_distance(points[1],points[2])[1] %>% round(1)
return(list(width, height))
}
calcDim(swotViz$geometry[3])
plan(multiprocess)
test <- swotViz %>%
mutate(dims = furrr::future_map(geometry, calcDim, .progress = T),
width = purrr::map_dbl(dims, 1),
height = purrr::map_dbl(dims,2)) %>%
select(-dims)
plan(sequential)
swotViz<- test %>% filter(width > 300 & height > 300)
st_distance(point[1],point[2])[1]
st_distance(points[1],points[3])[1]
st_is_longlat(st_point(c(check[1],check[2]), dim = 'XY'))
```