-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path04_run-report.Rmd
161 lines (137 loc) · 4.71 KB
/
04_run-report.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
---
title: "Run Report"
output: html_document
date: '2022-12-19'
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)
library(tidyverse)
library(here)
base_dir <- "MANDIFORE_runs"
```
# Logfile summary
This table shows each ensemble for each site of the run, and checks the logfile for ED2 errors, and `PEcAn.ED2::model2netcdf.ED2()` errors. Ensembles that didn't complete the full run, but have no errors are most likely due to the HPC walltime being reached and those runs getting killed.
```{r}
logs <- list.files(base_dir, "logfile.txt", recursive = TRUE, full.names = TRUE)
# read in ends of log files
log_ends <- logs |>
map(~{
read_lines(.x) |>
tail(50)
})
#check end of logs for common patterns
finished <- log_ends |>
map_lgl(\(x) x |> str_detect("MODEL FINISHED") |> any())
ed_error <- log_ends |>
map_lgl(\(x) {
x |> str_detect("ERROR IN MODEL RUN") |> any() |
x |> str_detect("FATAL ERROR") |> any()
})
ed_error_reason <- log_ends |>
map_chr(\(x) {
out <- x[str_detect(x, "(?<=---> Reason:).+") | str_detect(x, "SIGABRT")] |>
str_remove("---> Reason:") |> str_trim()
if(length(out) == 0) {
out <- NA_character_
}
out
})
model2netcdf_error <- log_ends |>
map_lgl(\(x) x |> str_detect("ERROR IN model2netcdf.ED2") |> any())
```
```{r}
#make report table
log_report <-
tibble(path = logs, finished, ed_error, ed_error_reason, model2netcdf_error) |>
mutate(
ensemble = path |> str_extract("(?<=ENS-)\\d+") |> as.numeric(),
site = path |> str_extract("MANDIFORE-(PNW|SEUS)-\\d+")
) |>
select(site, ensemble, everything()) |>
arrange(site, desc(finished), ensemble)
```
```{r}
ensemble_report <-
tibble(path = list.files(base_dir, "analysis-E-.+\\.h5", recursive = TRUE)) |>
mutate(
ensemble = path |> str_extract("(?<=ENS-)\\d+") |> as.numeric(),
site = path |> str_extract("MANDIFORE-(PNW|SEUS)-\\d+"),
date = path |> basename() |> str_extract("(?<=E-)\\d{4}-\\d{2}") |> lubridate::ym()
) |>
group_by(site, ensemble) |>
summarize(
date_start = min(date),
date_end = max(date)
) |>
full_join(log_report) |>
select(-path)
reactable::reactable(ensemble_report, filterable = TRUE)
```
# Ensemble data summary
Get the samples.Rdata for every site run and combine into a dataframe.
```{r}
samples_rdata <- list.files(base_dir, "samples\\.Rdata", recursive = TRUE, full.names = TRUE)
sitenames <- samples_rdata |> str_extract("MANDIFORE-(PNW|SEUS)-\\d+")
ensemble_inputs <-
samples_rdata |>
set_names(sitenames) |>
map_dfr(\(Rdata) {
load(Rdata)
ensemble.samples[-which(names(ensemble.samples)=="env")] |>
map(\(.x) mutate(.x, ensemble = 1:n())) |>
bind_rows(.id = "pft_name")
}, .id = "site") |>
select(site, ensemble, pft_name, everything())
reactable::reactable(ensemble_inputs, filterable = TRUE)
```
1) load samples.Rdata and create a table for each ensemble member with 1 column per parameter per pft
join with ensemble report and actual data summary and model when variables go extreme (as a binary) as a function of inputs.
data in ensemble.samples.NOENSEMBLEID.Rdata inside objec `ens.samples`
2) plot distributions for traits in `trait.samples` (in samples.Rdata). Look for anything extreme or weird
# Posterior summary
A work-in-progress. Open and visualize posterior distributions with the `distributional` and `ggdist` packages.
```{r}
library(distributional)
library(ggdist)
library(dplyr)
library(glue)
posterior_rdata <- list.files(base_dir, "post\\.distns\\.Rdata", recursive = TRUE, full.names = TRUE)
dists_raw <-
posterior_rdata |>
set_names(posterior_rdata) |>
map_dfr(\(Rdata) {
load(Rdata)
post.distns |> as_tibble(rownames = "var")
}, .id = "path") #|>
# group_by(pft_name, var) |>
# summarize(across(starts_with("param"), mean))
# #make distributions for distributional and ggdist packages
# mutate(dist_string = glue("{distn}({parama}, {paramb})")) |>
# parse_dist(dist_string)
dists <-
dists_raw |>
mutate(site = str_extract(path, "MANDIFORE-(PNW|SEUS)-\\d+"),
pft_name = str_extract(path, "(?<=pft/)[[a-zA-z\\.]]+")) |>
select(-path) |>
group_by(var, pft_name) |>
summarize(
distn = unique(distn),
across(starts_with("param"), mean)
) |>
# #make distributions for distributional and ggdist packages
mutate(dist_string = glue("{distn}({parama}, {paramb})")) |>
parse_dist(dist_string)
vars <- dists$var |> unique()
varslist <- split(vars, ceiling(seq_along(vars)/5))
map(varslist, \(x) {
dists |>
filter(var %in% x) |>
ggplot() +
stat_slabinterval(
aes(dist = .dist, args = .args),
orientation = "y",
size = 1
) +
facet_grid(pft_name~var, scales = "free")
})
```