diff --git a/CHANGES.md b/CHANGES.md index 08ce7f5..651d5fe 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,59 @@ ## Change log +### Version 1.0.1 + +Updates (mostly) required to run the OSPAR 2024 CEMP assessment. + +#### Data import + +For OSPAR and HELCOM style assessments, data from Germany are now matched to stations by name for 2023 onwards. This applies to biota, sediment and water. Note that for HELCOM, biota data from Germany are already matched by name for all years. + +#### Uncertainty processing + +harsat 1.0.0 replaced implausibly large relative uncertainties ($>=$ 100%) and replaced them with imputed values. However, implausibly small relative uncertainties were not changed. The code now replaces relative uncertainties $<=$ 1% with imputed values. + +The defaults can be changed using `control$relative_uncertainty` in `read_data`. To replicate the defaults in harsat 1.0.0, set `control$relative_uncertainty = c(0, 100)`. To keep all uncertainties, regardless of how ridiculous they are, set `control$relative_uncertainty = c(0, Inf)`. + +Two minor bug fixes: + +* relative uncertainties were being filtered for all distributional types, but this is only a reliable procedure for determinands with `distribution == "lognormal"`; the checks are now only applied to lognormal data +* some biological effect data with distributions other than normal or lognormal were being incorrectly deleted; this has now been corrected + +The oddity files have been updated to show: + +* implausible_uncertainties_reported.csv - all reported uncertainties that are replaced by imputed values +* missing_uncertainties.csv - all uncertainties (normal or lognormal data) that are not reported and can't be imputed +* implausible_uncertaintes_calculated.csv - all uncertainties that are calculated during the data processing (e.g. during normalisation) that are implausible and are set to missing + +#### Uncertainty coefficients + +The function `ctsm_uncrt_workup` and related supporting functions are used in OSPAR assessments to update the fixed and proportional standard deviations which are subsequently used to impute missing uncertainties. These functions were ignored during the initial development of harsat and are now harsat compatible. + +#### Biological effect assessments + +Imposex assessments: these are now fully reproducible with seeds for random number generation provided in the calls to `ctsm.VDS.cl` and `assess_imposex` + +Assessment functions for negative binomial data have been added. Negative binomial data includes MNC - the number of micronucleated cells. + +#### Reporting + +`report_assessment` generates default file names. These are based on the series identifier with additional station information. It is now possible to override this behaviour for a single report by providing a different file name using the `output_file` argument. + +#### Reference tables + +* new values added to method_extraction table + +#### Minor bug fixes + +* correct behaviour of argument `return_early` in `create_timeseries` +* pass `info` component of the harsat object to `determinand.link.sum`, `determinand.link.replace`, and `determinand.link.imposex` +* ensure early return from `ctsm_convert_basis` when there is nothing to convert (avoids issues e.g. when all the data are biological effects) +* ensure SURVT (in pargroup B-BIO) is recognised as a biological effect in `ctsm_get_datatype` (SURVT is the only determinand in this pargroup that isn't an auxiliary variable) +* pass `good_status` to assessment functions for data with distributions other than normal and lognormal +* trap pathological case in estimation of `prtrend`; see #436 +* ensure `ctsm_OHAT_legends` uses the symbology as specified in `write_summary_table` + + ### Version 1.0.0 - Initial public release diff --git a/DESCRIPTION b/DESCRIPTION index 427ade9..2ff5613 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: harsat Title: Harmonized Regional Seas Assessment Tool -Version: 1.0.0 +Version: 1.0.1 Authors@R: c( person(given = "Arctic Monitoring and Assessment Programme (AMAP)", email = "amap@amap.no", role = c("cph", "fnd", "aut")), person(given = "Helsinki Commission (HELCOM)", email = "secretariat@helcom.fi", role = c("cph", "fnd", "aut")), @@ -22,26 +22,28 @@ RoxygenNote: 7.2.3 VignetteBuilder: knitr, rmarkdown Depends: R (>= 4.2.1.0) Imports: + digest, + dplyr, flexsurv, + lattice, lme4, + lubridate, + magrittr, + MASS, mgcv, + mvtnorm, numDeriv, optimx, parallel, pbapply, - MASS, - mvtnorm, - dplyr, - magrittr, - lattice, - lubridate, readr, + readxl, + sf, stringr, + survival, + TeachingDemos, tibble, - tidyr, - sf, - digest, - readxl + tidyr Suggests: knitr, rmarkdown, diff --git a/R/assessment_functions.R b/R/assessment_functions.R index ffa10ce..e9597c8 100644 --- a/R/assessment_functions.R +++ b/R/assessment_functions.R @@ -836,6 +836,7 @@ assess_lmm <- function( AC = AC, recent.years = recent.years, determinand = determinand, + good_status = good.status, max.year = max.year, recent.trend = recent.trend, nYearFull = nYearFull, @@ -978,9 +979,9 @@ assess_lmm <- function( contrast.whole <- ctsm.lmm.contrast(fit, start = min(data$year), end = max(data$year)) row.names(contrast.whole) <- "whole" - start.year <- max(max.year - recent.trend + 1, min(data$year)) - if (sum(unique(data$year) >= start.year - 0.5) >= 5) { - contrast.recent <- ctsm.lmm.contrast(fit, start = start.year, end = max(data$year)) + start_recent <- max(max.year - recent.trend + 1, min(data$year)) + if (sum(unique(data$year) >= start_recent - 0.5) >= 5) { + contrast.recent <- ctsm.lmm.contrast(fit, start = start_recent, end = max(data$year)) row.names(contrast.recent) <- "recent" contrast.whole <- rbind(contrast.whole, contrast.recent) } @@ -1065,15 +1066,44 @@ assess_lmm <- function( if (output$method %in% c("linear", "smooth")) { - # for linear trend and recent trend, use pltrend (from likelihood ratio test) if - # method = "linear", because a better test - # really need to go into profile likelihood territory here! + # pltrend + # method = "linear" use p_linear (from likelihood ratio test) + # method = "smooth" use p from the Wald test in contrasts + # for linear model, likelihood ratio test is a better test (fewer + # approximations) than the Wald test + # for smooth model, would be better to go into profile likelihood + # territory (future enhancement) + + # prtrend + # same approach; however p_linear could be misleading when the years at + # the end of the time series are all censored values and a flat model is + # fitted; the estimate of rtrend is shrunk to reflect this, but p_linear + # might be misleadingly significant; something to think about in the + # future + # however, there is a pathological case when all the fitted values in the + # recent period have the same value; rtrend is zero, and yet can still be + # significant based on p_linear even though there are no data to support + # this; in this case use p from the Wald test (which is unity) + + if (output$method == "linear") { + pltrend <- p_linear + } else { + pltrend <- output$contrasts["whole", "p"] + } - pltrend <- if (output$method == "linear") p_linear else with(output$contrasts["whole", ], p) ltrend <- with(output$contrasts["whole", ], estimate / (end - start)) if ("recent" %in% row.names(output$contrasts)) { - prtrend <- if (output$method == "linear") p_linear else with(output$contrasts["recent", ], p) + + if ( + output$method == "linear" & + max(data$year[data$censoring %in% ""]) > start_recent + ) { + prtrend <- p_linear + } else { + prtrend <- output$contrasts["recent", "p"] + } + rtrend <- with(output$contrasts["recent", ], estimate / (end - start)) } } @@ -1292,8 +1322,17 @@ ctsm.lmm.contrast <- function(ctsm.ob, start, end) { wk <- t(wk) %*% ctsm.ob$Xpred[pos, ] se.contrast <- sqrt(wk %*% ctsm.ob$vcov %*% t(wk)) - t.stat <- contrast / se.contrast - p.contrast <- 1 - pf(t.stat^2, 1, ctsm.ob$dfResid) + # catch pathological case where contrast = 0 and se.contrast = 0 + # this can happen if all the data between start and end are censored, so + # a 'flat' model is fitted + + if (dplyr::near(contrast, 0L) & dplyr::near(se.contrast, 0L)) { + p.contrast <- 1 + } else { + t.stat <- contrast / se.contrast + p.contrast <- 1 - pf(t.stat^2, 1, ctsm.ob$dfResid) + } + data.frame(start, end, estimate = contrast, se = se.contrast, p = p.contrast) } @@ -1642,8 +1681,8 @@ ctsm_dyear <- function( # Other distributions ---- assess_survival <- function( - data, annualIndex, AC, recent.years, determinand, max.year, recent.trend, - nYearFull, firstYearFull) { + data, annualIndex, AC, recent.years, determinand, good_status, max.year, + recent.trend, nYearFull, firstYearFull) { # silence non-standard evaluation warnings .data <- est <- lcl <- ucl <- p <- se <- NULL @@ -1741,11 +1780,6 @@ assess_survival <- function( data$year_adj <- data$year - min(recent.years) - # establish other info - - good_status <- ctsm_get_info(info$determinand, determinand, "good_status") - - # type of fit depends on number of years: # nYear <= 2 none # nYear <= 4 mean @@ -1755,7 +1789,7 @@ assess_survival <- function( # have only currently coded for mean and linear - look at ctsm.anyyear.lmm for # extensions to smoothers - if (determinand %in% c("NRR", "SURVT") & nYear >= 7) { + if (determinand %in% c("NRR", "SURVT") & nYear >= 8) { stop("time series too long: need to include code for smoothers") } @@ -1786,7 +1820,7 @@ assess_survival <- function( # mean model fits$mean <- flexsurv::flexsurvreg( - Surv(time, time2, type = "interval2") ~ 1, + survival::Surv(time, time2, type = "interval2") ~ 1, dist = surv_dist, data = data ) @@ -2210,8 +2244,8 @@ assess_survival_refvalue <- function( assess_beta <- function( - data, annualIndex, AC, recent.years, determinand, max.year, recent.trend, - nYearFull, firstYearFull) { + data, annualIndex, AC, recent.years, determinand, good_status, max.year, + recent.trend, nYearFull, firstYearFull) { # silence non-standard evaluation warnings info <- weight <- NULL @@ -2258,11 +2292,7 @@ assess_beta <- function( data$year_fac <- factor(data$year) - # establish other info - - good_status <- ctsm_get_info(info$determinand, determinand, "good_status") - - + # type of fit depends on number of years: # nYear <= 2 none # nYear <= 4 mean @@ -2610,12 +2640,12 @@ assess_beta <- function( assess_negativebinomial <- function( - data, annualIndex, AC, recent.years, determinand, max.year, recent.trend, - nYearFull, firstYearFull) { - + data, annualIndex, AC, recent.years, determinand, good_status, max.year, + recent.trend, nYearFull, firstYearFull) { + # silence non-standard evaluation warnings info <- weight <- NULL - + # over-dispersed count data (perhaps very low over-dispersed values from a # binomial distribution, such an MNC) @@ -2631,11 +2661,26 @@ assess_negativebinomial <- function( output <- list(data = data) - # set up offset - e.g. for MNC these are the number of individuals - # specified in MNc-QC-NR + # check all values are valid counts + # response currently expressed as numbers per 1000 cells + + data$response <- data$response * data[["MNC-QC-NR"]] / 1000 - if (!("offset" %in% names(data))) { - data$offset <- 1 + if (!(all(data$response >= 0) & + isTRUE(all.equal(data$response, as.integer(data$response))))) { + stop("invalid values for negative binomial distribution data") + } + + + # set up offset + # for MNC these are the number of cells specified in MNC-QC-NR (but note that + # the offset is then log transformed in the call to gam - this should be + # rationalised) + + if ("offset" %in% names(data)) { + data$offset <- log(data$offset / 1000) + } else { + data$offset <- 0 } @@ -2648,10 +2693,6 @@ assess_negativebinomial <- function( data$year_fac <- factor(data$year) - # establish other info - - good_status <- ctsm_get_info(info$determinand, determinand, "good_status") - # type of fit depends on number of years: # nYear <= 2 none @@ -2662,7 +2703,7 @@ assess_negativebinomial <- function( # have only currently coded for mean and linear - look at ctsm.anyyear.lmm for # extensions to smoothers - if (nYear >= 3) { + if (nYear >= 7) { stop("time series too long: need to include code for smoothers") } @@ -2681,9 +2722,9 @@ assess_negativebinomial <- function( fits$mean <- mgcv::gam( response ~ 1 + s(year_fac, bs = "re"), - weights = weight, - data = data, - family = "betar", + data = data, + offset = data$offset, + family = "nb", method = "ML" ) @@ -2788,7 +2829,7 @@ assess_negativebinomial <- function( } - # get estimated change in logit value over whole time series and in the + # get estimated change in log value over whole time series and in the # most recent # e.g. twenty years of monitoring (truncate when data missing # and only compute if at least five years in that period) # NB p value from contrast is NOT the same as from likelihood ratio test even @@ -2825,8 +2866,8 @@ assess_negativebinomial <- function( output$reference.values <- lapply(AC, function(i) { ctsm.lmm.refvalue( output, - yearID = max(data$year), - refvalue = qlogis(i / 100), + year = max(data$year), + refvalue = log(i), lower.tail = switch(good_status, low = TRUE, high = FALSE) ) }) @@ -2916,14 +2957,20 @@ assess_negativebinomial <- function( }) else { meanLY <- tail(output$pred$fit, 1) - meanLY <- 100 * plogis(meanLY) + meanLY <- exp(meanLY) clLY <- switch( good_status, low = tail(output$pred$ci.upper, 1), high = tail(output$pred$ci.lower, 1) ) - clLY <- 100 * plogis(clLY) + clLY <- exp(clLY) } + + # turn trends into 'percentage trends' + + ltrend <- ltrend * 100 + rtrend <- rtrend * 100 + }) if (!is.null(AC)) { @@ -2949,7 +2996,7 @@ assess_negativebinomial <- function( else if (rtrend >= 0) bigYear else { - wk <- (qlogis(value / 100) - qlogis(meanLY / 100)) / rtrend + wk <- (exp(value) - exp(meanLY)) / rtrend wk <- round(wk + maxYear) min(wk, bigYear) } @@ -2963,7 +3010,7 @@ assess_negativebinomial <- function( else if (rtrend <= 0) bigYear else { - wk <- (qlogis(value / 100) - qlogis(meanLY / 100)) / rtrend + wk <- (exp(value) - exp(meanLY)) / rtrend wk <- round(wk + maxYear) min(wk, bigYear) } @@ -2996,5 +3043,3 @@ assess_negativebinomial <- function( rownames(output$summary) <- NULL output } - - diff --git a/R/import_check_functions.R b/R/import_check_functions.R index 3f2187b..8b2214a 100644 --- a/R/import_check_functions.R +++ b/R/import_check_functions.R @@ -8,7 +8,6 @@ ctsm_check_variable <- function(data, var_id, info) { return(data) } - # augment data with four variables: # ok says whether original value is ok and should be retained # ok.delete says whether original value is valid but is not to be used in the @@ -59,7 +58,7 @@ ctsm_check_variable <- function(data, var_id, info) { stop( "Not all cases considered when checking '", var_id, "': see '", outfile_name, "'\n", - "You might need to contact the HARSAT development team to fix this.", + " You might need to contact the HARSAT development team to fix this." ) } diff --git a/R/import_functions.R b/R/import_functions.R index 31608b3..12faa89 100644 --- a/R/import_functions.R +++ b/R/import_functions.R @@ -19,9 +19,8 @@ library(readxl) #' @param data_dir The directory where the data files can be found (sometimes #' supplied using 'file.path'). Defaults to "."; i.e. the working directory. #' @param data_format A string specifying whether the data were extracted from -#' the ICES webservice ("ICES" - the default) or are in the simplified format -#' designed for other data sources ("external"). The value "ICES_old" is -#' deprecated. +#' the ICES webservice (`"ICES"` - the default) or are in the simplified +#' format designed for other data sources (`"external"`). #' @param info_files A list of files specifying reference tables which override #' the defaults. See examples. #' @param info_dir The directory where the reference tables can be found @@ -65,13 +64,20 @@ library(readxl) #' `retain == FALSE` are deleted later in `tidy_data` #' * `stations` #' -#' ## Control parameters +#' @details #' -#' Many aspects of the assessment process can be controlled through the -#' parameters stored in `info$control`. This is a list populated with default -#' values which can then be overwritten, if required, using the `control` -#' argument. +#' ## Control parameters #' +#' Many aspects of the assessment process can be controlled through the +#' parameters stored in `info$control`. This is a list populated with default +#' values which can then be overwritten, if required, using the `control` +#' argument. +#' +#' ## External data +#' +#' If `data_format = "external"`, a simplified data and station file can +#' be supplied. See `vignette("external-file-format")` for details. +#' #' @export read_data <- function( compartment = c("biota", "sediment", "water"), @@ -285,6 +291,11 @@ control_default <- function(purpose, compartment) { # use_stage is a logical which determines whether, for biota, stage is used # to populate subseries + # relative_uncertainties is a 2-vector giving the range of acceptable + # relative uncertainties for log-normally distributed data; the default is + # to accept relative uncertainties greater than (but not equal) to 1% and + # less than (but not equal to) 100% + region <- list() region$id <- switch( @@ -315,6 +326,8 @@ control_default <- function(purpose, compartment) { ) use_stage <- FALSE + + relative_uncertainty <- c(1, 100) add_stations <- switch( purpose, @@ -366,7 +379,8 @@ control_default <- function(purpose, compartment) { region = region, add_stations = add_stations, bivalve_spawning_season = bivalve_spawning_season, - use_stage = use_stage + use_stage = use_stage, + relative_uncertainty = relative_uncertainty ) } @@ -410,6 +424,17 @@ control_modify <- function(control_default, control) { ) } } + + + if (length(control$relative_uncertainty) != 2L || + control$relative_uncertainty[1] < 0 || + control$relative_uncertainty[1] > control$relative_uncertainty[2]) { + stop( + "error in control argument: invalid range of acceptable relative ", + "uncertainties", + call. = FALSE + ) + } control } @@ -1290,25 +1315,28 @@ add_stations <- function(data, stations, info){ .id = .id | (.data$country == "France") | (.data$country == "Spain" & .data$.year > 2004) | - (.data$country == "The Netherlands" & .data$.year > 2006) + (.data$country == "The Netherlands" & .data$.year > 2006) | + (.data$country == "Germany" & .data$.year > 2022) ), sediment = dplyr::mutate( x, .id = .id | (.data$country == "France" & .data$.year > 2008) | (.data$country == "Spain" & .data$.year > 2004) | - (.data$country == "The Netherlands" & .data$.year > 2006) + (.data$country == "The Netherlands" & .data$.year > 2006) | + (.data$country == "Germany" & .data$.year > 2022) ), water = dplyr::mutate( x, .id = .id | (.data$country == "France") | (.data$country == "Spain" & .data$.year > 2004) | - (.data$country == "The Netherlands" & .data$.year > 2006) + (.data$country == "The Netherlands" & .data$.year > 2006) | + (.data$country == "Germany" & .data$.year > 2022) ), ) - # and Germany currently only matches by name for HELCOM biota + # and Germany always matches by name for HELCOM biota if (info$compartment == "biota" && info$purpose == "HELCOM") { x <- dplyr::mutate(x, .id = .id | (.data$country %in% "Germany")) @@ -2244,7 +2272,7 @@ create_timeseries <- function( ) } - + # normalisation can either be a logical (TRUE uses default normalisation function) # or a function @@ -2415,7 +2443,13 @@ create_timeseries <- function( data$pargroup <- ctsm_get_info(info$determinand, data$determinand, "pargroup") } + # NB distribution will be missing for auxiliary data + data$distribution <- ctsm_get_info( + info$determinand, data$determinand, "distribution", na_action = "output_ok" + ) + + # drop samples which only have auxiliary data ok <- with(data, sample %in% sample[group != "Auxiliary"]) @@ -2470,62 +2504,27 @@ create_timeseries <- function( # ensure censoring, limit of detection and limit of quantification are consistent - data <- ctsm_check_censoring(data, info, print_code_warnings) + data <- check_censoring(data, info, print_code_warnings) - # convert uncertainty into standard deviations, and remove any associated variables - - data <- ctsm_check( - data, - !is.na(uncertainty) & uncertainty <= 0, - action = "make.NA", - message = "Non-positive uncertainties", - file_name = "non_positive_uncertainties", - missing_id = "uncertainty", - info = info - ) - - data <- dplyr::mutate( - data, - uncertainty_sd = dplyr::case_when( - unit_uncertainty %in% "U2" ~ uncertainty / 2, - unit_uncertainty %in% "%" ~ value * uncertainty / 100, - TRUE ~ uncertainty - ), - uncertainty_rel = 100 * (uncertainty_sd / value) - ) + # ensure uncertainties are plausible + + data <- check_uncertainty(data, info, type = "reported") - wk_id <- match("unit_uncertainty", names(data)) - wk_n <- ncol(data) - data <- data[c( - names(data)[1:wk_id], - "uncertainty_sd", "uncertainty_rel", - names(data)[(wk_id+1):(wk_n-2)])] - - ctsm_check( - data, - !is.na(uncertainty) & uncertainty_rel >= 100, - action = "warning", - message = "Large uncertainties", - file_name = "large uncertainties", - info = info - ) - - # delete data with large relative uncertainties + # convert all uncertainties to unit SD + data <- dplyr::mutate( data, - uncertainty_sd = dplyr::if_else( - .data$uncertainty_rel < 100, - .data$uncertainty_sd, - NA_real_ - ), - uncertainty = .data$uncertainty_sd, - unit_uncertainty = NULL, - uncertainty_sd = NULL, - uncertainty_rel = NULL + uncertainty = dplyr::case_when( + .data$unit_uncertainty %in% "U2" ~ .data$uncertainty / 2, + .data$unit_uncertainty %in% "%" ~ .data$value * .data$uncertainty / 100, + .default = .data$uncertainty + ), + unit_uncertainty = "SD" ) - + + # sort out determinands where several determinands represent the same variable of interest # three types of behaviour: replace, sum and bespoke @@ -2540,7 +2539,7 @@ create_timeseries <- function( bespoke = get(paste("determinand.link", i, sep = "."), mode = "function") ) - args = list(data = data, keep = i, drop = wk$det) + args = list(data = data, info = info, keep = i, drop = wk$det) if ("weights" %in% names(wk)) { args = c(args, list(weights = wk$weights)) } @@ -2568,8 +2567,6 @@ create_timeseries <- function( cat("\nCreating time series data\n") - data <- data[setdiff(names(data), c("qalink", "alabo"))] - # create new.unit and concentration columns comprising the details from the # determinand file in the information folder, required to get correct unit details @@ -2600,7 +2597,7 @@ create_timeseries <- function( # missing values for correction if (info$compartment == "biota") { - data <- ctsm.imposex.check.femalepop(data) + data <- ctsm.imposex.check.femalepop(data, info) } @@ -2610,14 +2607,9 @@ create_timeseries <- function( if (return_early) { - out = c( + out <- c( out, - ctsm.import.value( - data, - station_dictionary, - info$compartment, - info$purpose, - print_code_warnings) + output_timeseries(data, station_dictionary, info, extra = "alabo") ) return(out) @@ -2640,7 +2632,19 @@ create_timeseries <- function( } + # check that all normal and lognormal data have uncertainties + + data <- ctsm_check( + data, + distribution %in% c("normal", "lognormal") & !is.na(concentration) & + is.na(uncertainty), + action = "delete", + message = "Missing uncertainties which cannot be imputed", + file_name = "missing_uncertainties", + info = info + ) + # filter contaminant data to remove bivalve and gastropod records in the # spawning season when they are elevated / more variable @@ -2697,20 +2701,25 @@ create_timeseries <- function( data <- normalise(data, station_dictionary, info, normalise.control) } - - # remove concentrations where: - # uncertainty is missing - # uncertainty cv is > 100% - # ensure uncertainty and censoring are missing when concentration is missing + # check whether implausible uncertainties have been calculated during the + # data processing (e.g. during normalisation) + # if so - make concentration, uncertainty and censoring missing - ok <- !is.na(data$concentration) & !is.na(data$uncertainty) - ok <- ok & (data$uncertainty <= data$concentration) + data <- check_uncertainty(data, info, type = "calculated") + + + # final check to ensure all normal and lognormal data have an uncertainty + + notok <- data$distribution %in% c("normal", "lognormal") & + !is.na(data$concentration) & is.na(data$uncertainty) + + if (any(notok)) { + stop( + "uncertainties missing where they should be present: \n", + "contact HARSAT development team") + } - data$concentration[!ok] <- NA_real_ - data$uncertainty[!ok] <- NA_real_ - data$censoring[!ok] <- NA_character_ - # drop groups of data at stations with no data in recent years cat(" Dropping groups of compounds / stations with no data between", @@ -2725,7 +2734,7 @@ create_timeseries <- function( out <- c( out, - ctsm_import_value(data, station_dictionary, info) + output_timeseries(data, station_dictionary, info) ) out @@ -2841,7 +2850,7 @@ ctsm_check <- function( } -ctsm_import_value <- function(data, station_dictionary, info) { +output_timeseries <- function(data, station_dictionary, info, extra = NULL) { # silence non-standard evaluation warnings .data <- .group <- seriesID <- NULL @@ -2875,6 +2884,10 @@ ctsm_import_value <- function(data, station_dictionary, info) { "limit_detection", "limit_quantification", "uncertainty" ) + if (!is.null(extra)) { + id <- c(id, extra) + } + auxiliary <- ctsm_get_auxiliary(data$determinand, info) auxiliary_id <- paste0( rep(auxiliary, each = 5), @@ -3244,7 +3257,7 @@ ctsm_check_determinands <- function(info, data, determinands, control = NULL) { -determinand.link.check <- function(data, keep, drop, printDuplicates = TRUE, ...) { +determinand.link.check <- function(data, info, keep, drop, printDuplicates = TRUE, ...) { # check whether any drop and keep are both submitted for the same sample and # matrix and, if so, delete drop - note that ctsm_check doesn't do the @@ -3271,7 +3284,8 @@ determinand.link.check <- function(data, keep, drop, printDuplicates = TRUE, ... keep, "and", dropTxt, "submitted in same sample - deleting", dropTxt, "data" ), - file_name = paste("determinand_link", keep, sep = "_"), + file_name = paste("determinand_link", keep, sep = "_"), + info = info, ... ) } @@ -3280,7 +3294,7 @@ determinand.link.check <- function(data, keep, drop, printDuplicates = TRUE, ... } -determinand.link.replace <- function(data, keep, drop, ...) { +determinand.link.replace <- function(data, info, keep, drop, ...) { # core function for relabelling determinand 'drop' as determinand 'keep' # most of the work is checking that there aren't data submitted as both for the same @@ -3295,7 +3309,7 @@ determinand.link.replace <- function(data, keep, drop, ...) { # check for samples with both drop and keep and, if they exist, delete drop - data <- determinand.link.check(data, keep, drop, ...) + data <- determinand.link.check(data, info, keep, drop, ...) # relabel the levels so that drop becomes keep @@ -3307,7 +3321,7 @@ determinand.link.replace <- function(data, keep, drop, ...) { } -determinand.link.imposex <- function(data, keep, drop, ...) { +determinand.link.imposex <- function(data, info, keep, drop, ...) { stopifnot(length(keep) == 1, length(drop) == 1) @@ -3338,6 +3352,7 @@ determinand.link.imposex <- function(data, keep, drop, ...) { action = "warning", message = paste("inconsistent", keep, "and", drop, "submitted in same year"), file_name = paste("determinand_link", keep, sep = "_"), + info = info, ... ) @@ -3363,7 +3378,7 @@ determinand.link.imposex <- function(data, keep, drop, ...) { determinand.link.VDS <- determinand.link.IMPS <- determinand.link.INTS <- determinand.link.imposex -determinand.link.BBKF <- function(data, keep, drop, ...) { +determinand.link.BBKF <- function(data, info, keep, drop, ...) { stopifnot( identical(keep, "BBKF"), @@ -3372,30 +3387,36 @@ determinand.link.BBKF <- function(data, keep, drop, ...) { # first sum samples with both BBF and BKF - data <- determinand.link.sum(data, "BBKF", c("BBF", "BKF")) + data <- determinand.link.sum(data, info, "BBKF", c("BBF", "BKF")) # now sum samples with both BBJF and BKF to give BBJKF - data <- determinand.link.sum(data, "BBJKF", c("BBJF", "BKF")) + data <- determinand.link.sum(data, info, "BBJKF", c("BBJF", "BKF")) # now replace BBJKF with BBKF - data <- determinand.link.replace(data, "BBKF", "BBJKF") + data <- determinand.link.replace(data, info, "BBKF", "BBJKF") data } -assign("determinand.link.LIPIDWT%", function(data, keep, drop, ...) { +assign("determinand.link.LIPIDWT%", function(data, info, keep, drop, ...) { stopifnot(identical(keep, "LIPIDWT%"), identical(sort(drop), c("EXLIP%", "FATWT%"))) # if multiple values present, choose FATWT%, then LIPIDWT%, then EXLIP% (from Foppe) - data <- determinand.link.check(data, keep = "LIPIDWT%", drop = "EXLIP%", printDuplicates = FALSE, ...) - data <- determinand.link.check(data, keep = "FATWT%", drop = "EXLIP%", printDuplicates = FALSE, ...) - data <- determinand.link.check(data, keep = "FATWT%", drop = "LIPIDWT%", printDuplicates = FALSE, ...) + data <- determinand.link.check( + data, info, keep = "LIPIDWT%", drop = "EXLIP%", printDuplicates = FALSE, ... + ) + data <- determinand.link.check( + data, info, keep = "FATWT%", drop = "EXLIP%", printDuplicates = FALSE, ... + ) + data <- determinand.link.check( + data, info, keep = "FATWT%", drop = "LIPIDWT%", printDuplicates = FALSE, ... + ) if (!any(data$determinand %in% drop)) return(data) @@ -3410,7 +3431,7 @@ assign("determinand.link.LIPIDWT%", function(data, keep, drop, ...) { }) -determinand.link.sum <- function(data, keep, drop, ...) { +determinand.link.sum <- function(data, info, keep, drop, ...) { stopifnot(length(keep) == 1, length(drop) > 1) @@ -3531,7 +3552,7 @@ determinand.link.sum <- function(data, keep, drop, ...) { -determinand.link.TEQDFP <- function(data, keep, drop, weights) { +determinand.link.TEQDFP <- function(data, info, keep, drop, weights) { stopifnot(length(keep) == 1, length(drop) > 1) @@ -3648,7 +3669,7 @@ determinand.link.TEQDFP <- function(data, keep, drop, weights) { } -ctsm_check_censoring <- function(data, info, print_code_warnings) { +check_censoring <- function(data, info, print_code_warnings) { # silence non-standard evaluation warnings value <- limit_detection <- limit_quantification <- NULL @@ -3792,6 +3813,93 @@ ctsm_check_censoring <- function(data, info, print_code_warnings) { } +check_uncertainty <- function(data, info, type = c("reported", "calculated")) { + + # import_functions.r + + # uncertainties must be non-negative for all data + # uncertainties must be strictly positive for normal or lognormal data + # relative uncertainties must be within specified range (1, 100) default for + # lognormal data + + # type = reported is used for submitted data + # type = calculated is used to check whether implausible uncertainties have + # been created in e.g. the normalisation process + + type <- match.arg(type) + + + # calculate relative uncertainties for lognormal data + # use value for reported data and concentration for calculated data + + id <- switch(type, reported = "value", calculated = "concentration") + + data <- dplyr::mutate( + data, + .ok = .data$distribution %in% "lognormal", + relative_uncertainty = dplyr::case_when( + .ok & .data$unit_uncertainty %in% "SD" ~ + 100 * .data$uncertainty / .data[[id]], + .ok & .data$unit_uncertainty %in% "U2" ~ + 100 * .data$uncertainty / (2 * .data[[id]]), + .ok & .data$unit_uncertainty %in% "%" ~ .data$uncertainty, + .default = NA_real_ + ), + .ok = NULL + ) + + data <- dplyr::mutate( + data, + reason = dplyr::case_when( + .data$uncertainty < 0 ~ "negative", + .data$distribution %in% c("normal", "lognormal") & + .data$uncertainty == 0 ~ "zero", + .data$distribution %in% "lognormal" & + .data$relative_uncertainty <= info$relative_uncertainty[1] ~ "small", + .data$distribution %in% "lognormal" & + .data$relative_uncertainty >= info$relative_uncertainty[2] ~ "large", + .default = "ok" + ) + ) + + data <- dplyr::relocate( + data, + "relative_uncertainty", + .after = "unit_uncertainty" + ) + + data <- dplyr::relocate(data, "reason") + + if (type == "reported") { + message <- "Implausible uncertainties reported with data" + file_name <- "implausible_uncertainties_reported" + missing_id <- "uncertainty" + } + + if (type == "calculated") { + message <- "Implausible uncertainties calculated in data processing" + file_name <- "implausible_uncertainties_calculated" + missing_id <- c("concentration", "uncertainty", "censoring") + } + + data <- ctsm_check( + data, + reason != "ok", + action = "make.NA", + message = message, + file_name = file_name, + missing_id = missing_id, + info = info + ) + + data$reason <- NULL + data$relative_uncertainty <- NULL + + data +} + + + check_subseries <- function(data, info) { # import_functions.R @@ -4460,7 +4568,7 @@ normalise_sediment_OSPAR <- function(data, station_dictionary, info, control) { data } -#' Normalises sediment concentrations, HELCOM vwersion +#' Normalises sediment concentrations, HELCOM version #' #' @param data the data object #' @param station_dictionary the station dictionary diff --git a/R/imposex_clm.R b/R/imposex_clm.R index 2fda0b4..f9f4c47 100644 --- a/R/imposex_clm.R +++ b/R/imposex_clm.R @@ -203,7 +203,8 @@ imposex.clm.predict <- function(clmFit, theta, data) { imposex_assess_clm <- function( - data, theta, annualIndex, species, recent.trend = 20, max.year) { + data, theta, annualIndex, species, recent.trend = 20, max.year, + seed = NULL) { # silence non-standard evaluation warnings dfResid <- twiceLogLik <- pFixed <- NULL @@ -211,6 +212,13 @@ imposex_assess_clm <- function( output <- list() summary <- list() + + # set seed for random number generations (used to obtain confidence limits on + # fitted trend) + + set.seed(seed) + + # decide whether there are sufficient years to model data # appropriate type of fit depends on total number of years and # number of years with intermediate values (i.e between 0 and max(VDS)) diff --git a/R/imposex_functions.R b/R/imposex_functions.R index 6b1dbd8..8f0e0ea 100644 --- a/R/imposex_functions.R +++ b/R/imposex_functions.R @@ -240,9 +240,14 @@ assess_imposex <- function( VDS = pmin(.data$concentration, theta$K), VDS = factor(.data$VDS, levels = 0:theta$K) ) - + + # create seed for random number generation based on combination of + # station_code and species + + seed <- TeachingDemos::char2seed(paste0(station_code, species)) + assessment <- imposex_assess_clm( - data, theta, annualIndex, species, recent.trend, max.year + data, theta, annualIndex, species, recent.trend, max.year, seed ) } else { diff --git a/R/information_functions.R b/R/information_functions.R index 2c184a5..f135ed3 100644 --- a/R/information_functions.R +++ b/R/information_functions.R @@ -710,6 +710,7 @@ ctsm_get_datatype <- function(determinand, info, abbr = FALSE){ startsWith(pargroup, "OC-") ~ "contaminant", pargroup %in% c("I-MET", "I-RNC") ~ "contaminant", pargroup %in% c("B-MBA", "B-TOX", "B-END") ~ "effect", + determinand %in% "SURVT" ~ "effect", pargroup %in% c("B-GRS", "B-HST") ~ "disease", TRUE ~ "auxiliary" ) @@ -2081,6 +2082,10 @@ ctsm_convert_basis <- function( # set up working data frame + if (all(exclude)) { + return(conc) + } + data <- data.frame( conc, from, to, drywt, drywt_censoring, lipidwt, lipidwt_censoring, exclude ) diff --git a/R/proportional_odds_functions.R b/R/proportional_odds_functions.R index 76534b4..7e8822e 100644 --- a/R/proportional_odds_functions.R +++ b/R/proportional_odds_functions.R @@ -175,6 +175,8 @@ ctsm.VDS.cl <- function(fit, nsim = 1000) { indexID <- setdiff(names(fit$par), cutsID) + set.seed(fit$seed) + data <- MASS::mvrnorm(nsim, fit$par, fit$vcov) data.cuts <- data[, cutsID, drop = FALSE] diff --git a/R/reporting_functions.R b/R/reporting_functions.R index ca35827..f181e30 100644 --- a/R/reporting_functions.R +++ b/R/reporting_functions.R @@ -316,14 +316,11 @@ ctsm.web.AC <- function(assessment_ob, classification) { drop = TRUE ) - # identity all AC that are relevant to the overall assessment - # more AC might be included in the assessment for each timeseries - this - # is a legacy issue that needs to be resolved - has arisen in looking - # at both environmental and health criteria + # identity all AC that are relevant + + AC_id <- names(classification[["below"]]) + stopifnot(AC_id %in% assessment_ob$info$AC) - AC_id <- assessment_ob$info$AC - stopifnot(AC_id %in% names(classification[["below"]])) - # loop over determinands out <- sapply(assessment_id, USE.NAMES = TRUE, simplify = FALSE, FUN = function(id) { @@ -840,6 +837,9 @@ ctsm_collapse_AC <- function(x, type = c("real", "character")) { #' @param output_dir The output directory for the assessment plots (possibly #' supplied using 'file.path'). The default is the working directory. The #' output directory must already exist. +#' @param output_file An alterntive file name to override the default. This is +#' currently only implemented for a single report. If not supplied, the .html +#' extension will be added. #' @param max_report The maximum number of reports that will be generated. #' Defaults to 100. Each report is about 1MB in size and takes a few seconds #' to run, so this prevents a ridiculous number of reports being created. @@ -854,6 +854,7 @@ report_assessment <- function( assessment_obj, subset = NULL, output_dir = ".", + output_file = NULL, max_report = 100L) { # reporting_functions.R @@ -867,6 +868,14 @@ report_assessment <- function( ) } + if (!is.null(output_file) & length(output_file) > 1) { + stop( + "\n`output_file` can currently only be a single character string for", + " renaming a single\nreport.", + call. = FALSE + ) + } + info <- assessment_obj$info timeSeries <- assessment_obj$timeSeries @@ -924,31 +933,50 @@ report_assessment <- function( } + # if output_file supplied, ensure there is only one series + + if (!is.null(output_file) & n_series > 1) { + stop( + "\n`output_file` can currently only be used to rename a single report", + " and ", n_series, " reports have\nbeen requested", + call. = FALSE + ) + } + + # report on each time series lapply(series_id, function(id) { - # get file name from id, and add country and station name - # for easier identification - - series <- timeSeries[id, ] - - output_id <- sub( - series$station_code, - paste(series$station_code, series$country, series$station_name), - id, - fixed=TRUE - ) + # get file name + # if not supplied, use id and add country and station name for easier + # identification - # get rid of any slashes that might have crept in - - output_id <- gsub(" / ", " ", output_id, fixed = TRUE) - output_id <- gsub("/", " ", output_id, fixed = TRUE) - - output_id <- gsub(" \ ", " ", output_id, fixed = TRUE) - output_id <- gsub("\\", " ", output_id, fixed = TRUE) - + if (!is.null(output_file)) { + + output_id = output_file + + } else { + series <- timeSeries[id, ] + + output_id <- sub( + series$station_code, + paste(series$station_code, series$country, series$station_name), + id, + fixed=TRUE + ) + + # get rid of any slashes that might have crept in + + output_id <- gsub(" / ", " ", output_id, fixed = TRUE) + output_id <- gsub("/", " ", output_id, fixed = TRUE) + + output_id <- gsub(" \ ", " ", output_id, fixed = TRUE) + output_id <- gsub("\\", " ", output_id, fixed = TRUE) + + } + package_dir = system.file(package = "harsat") template_dir = file.path(package_dir, "markdown") report_file <- file.path(template_dir, "report_assessment.Rmd") @@ -973,17 +1001,16 @@ report_assessment <- function( #' @export ctsm_OHAT_legends <- function( - assessments, determinandGroups, regionalGroups = NULL, distanceGroups = NULL, path) { + assessments, determinandGroups, determinands, symbology, + regionalGroups = NULL, distanceGroups = NULL, path) { # silence non-standard evaluation warnings info <- NULL out <- sapply(names(assessments), simplify = FALSE, USE.NAMES = TRUE, FUN = function(media) { - assessment.ob <- assessments[[media]] - assessment <- assessment.ob$assessment - classColour <- assessment.ob$classColour - determinands <- assessment.ob$determinands + assessment <- assessments[[media]] + classColour <- symbology[[media]] regionalGroups <- regionalGroups[[media]] distanceGroups <- distanceGroups[[media]] @@ -994,16 +1021,16 @@ ctsm_OHAT_legends <- function( compartment <- assessment$info$compartment group <- ctsm_get_info( - info$determinand, determinands, "group", compartment, sep = "_" + assessment$info$determinand, determinands, "group", compartment, sep = "_" ) web_group <- factor( group, levels = determinandGroups$levels, labels = determinandGroups$labels ) - web_group <- wk_group[, drop = TRUE] + web_group <- web_group[, drop = TRUE] - goodStatus <- ctsm_get_info(info$determinand, determinands, "good_status") + goodStatus <- ctsm_get_info(assessment$info$determinand, determinands, "good_status") goodStatus <- as.character(goodStatus) legendName <- apply(legends, 1, function(i) paste(colnames(legends)[i], collapse = " ")) diff --git a/R/uncertainty_functions.R b/R/uncertainty_functions.R index 34be926..6b947e4 100644 --- a/R/uncertainty_functions.R +++ b/R/uncertainty_functions.R @@ -1,32 +1,30 @@ #' @export -ctsm_uncrt_workup <- function(clean_data) { +ctsm_uncrt_workup <- function(harsat_obj) { # silence non-standard evaluation warnings - determinands <- qaID <- uncertainty <- concentration <- NULL - + .data <- NULL + # turn 'clean' data into uncertainty data # read in data - data <- clean_data$data - stations <- clean_data$stations - compartment <- clean_data$info$compartment + data <- harsat_obj$data + stations <- harsat_obj$stations + info <- harsat_obj$info - rm(clean_data) + rm(harsat_obj) # link to country - data$country <- stations[as.character(data$station), "country"] + data <- dplyr::left_join( + data, + stations[c("station_code", "country")], + by = "station_code" + ) - # get alabo and remove missing alabo - - data <- within(data, { - alabo <- sapply(strsplit(as.character(qaID), "_"), "[", 3) - alabo[alabo %in% "NA"] <- NA - alabo <- factor(alabo) - }) + # remove data with no analytical laboratory information data <- data[!is.na(data$alabo), ] @@ -38,23 +36,25 @@ ctsm_uncrt_workup <- function(clean_data) { id_aux <- c( "", ".uncertainty", ".censoring", ".limit_detection", ".limit_quantification" ) + - id <- intersect( - c("country", "alabo", "year", "sample", "group", "determinand", - "concentration", "uncertainty", - "censoring", "limit_detection", "limit_quantification", - paste0("AL", id_aux), - paste0("LI", id_aux), - paste0("CORG", id_aux), - paste0("LOIGN", id_aux)), - names(data) + id <- c( + "country", "alabo", "year", "sample", "group", "determinand", + "concentration", "uncertainty", + "censoring", "limit_detection", "limit_quantification", + paste0("AL", id_aux), + paste0("LI", id_aux), + paste0("CORG", id_aux), + paste0("LOIGN", id_aux) ) - data <- data[id] + + data <- dplyr::select(data, any_of(id)) + # sort out AL and CORG etc for sediment - if (compartment == "sediment") { + if (info$compartment == "sediment") { id <- c("country", "alabo", "year", "group", "sample", "determinand") @@ -115,52 +115,61 @@ ctsm_uncrt_workup <- function(clean_data) { # restrict to 'log-normally' distributed responses - - ok <- with(data, { - dist <- ctsm_get_info( - "determinand", determinand, "distribution", na_action = "output_ok" + # keep explicit mention of CORG and LOIGN just in case + + data <- dplyr::mutate( + data, + distribution = ctsm_get_info( + info$determinand, + .data$determinand, + "distribution", + na_action = "output_ok" ) - dist %in% "lognormal" | determinand %in% c("CORG", "LOIGN") - }) - - data <- data[ok, ] + ) + data <- dplyr::filter( + data, + .data$distribution %in% "lognormal" | .data$determinand %in% c("CORG", "LOIGN") + ) + # order groups and determinands within group - det_list <- determinands[[stringr::str_to_title(compartment)]] - - data <- within(data, { - group <- factor(as.character(group), levels = c(names(det_list), "auxiliary")) - determinand <- factor( - as.character(determinand), - levels = c(unlist(det_list), "AL", "LI", "CORG", "LOIGN")) - }) + # det_list <- determinands[[stringr::str_to_title(compartment)]] + # + # data <- within(data, { + # group <- factor(as.character(group), levels = c(names(det_list), "auxiliary")) + # determinand <- factor( + # as.character(determinand), + # levels = c(unlist(det_list), "AL", "LI", "CORG", "LOIGN")) + # }) # calculate relative uncertainty - data <- within(data, relative_u <- 100 * uncertainty / concentration) - - data <- droplevels(data) + data <- dplyr::mutate( + data, + relative_u = 100 * .data$uncertainty / .data$concentration + ) - list(compartment = compartment, data = data) + list(compartment = info$compartment, data = data) } + #' @export ctsm_uncrt_estimate <- function(data) { # silence non-standard evaluation warnings - .data <- n <- relative_u <- sd_variable <- sd_constant <- NULL + .data <- NULL # initialise output with total number of values by determinand options(dplyr.summarise.inform = FALSE) on.exit(options(dplyr.summarise.inform = NULL)) - out <- data %>% - dplyr::group_by(.data$determinand) %>% - dplyr::summarise(n_values = n()) + out <- data |> + dplyr::group_by(.data$determinand) |> + dplyr::summarise(n_values = dplyr::n()) # remove duplicate combinations of concentration and uncertainty (and associated censoring variables) @@ -177,9 +186,12 @@ ctsm_uncrt_estimate <- function(data) { # get number of 'unique values - out_unique <- data %>% - dplyr::group_by(.data$determinand) %>% - dplyr::summarise(n_unique = n(), n_alabo = dplyr::n_distinct(.data$alabo)) + out_unique <- data |> + dplyr::group_by(.data$determinand) |> + dplyr::summarise( + n_unique = dplyr::n(), + n_alabo = dplyr::n_distinct(.data$alabo) + ) out <- dplyr::left_join(out, out_unique, by = "determinand") @@ -193,16 +205,16 @@ ctsm_uncrt_estimate <- function(data) { # relative error # median relative_u for values above the detection level by alabo - out_relative <- data %>% - dplyr::filter(.data$censoring == "") %>% - dplyr::group_by(.data$determinand, .data$alabo) %>% + out_relative <- data |> + dplyr::filter(.data$censoring == "") |> + dplyr::group_by(.data$determinand, .data$alabo) |> dplyr::summarise(sd_variable = median(.data$relative_u) / 100) # now the median value across alabos - out_relative <- out_relative %>% - dplyr::group_by(.data$determinand) %>% - dplyr::summarise(sd_variable = median(sd_variable)) + out_relative <- out_relative |> + dplyr::group_by(.data$determinand) |> + dplyr::summarise(sd_variable = median(.data$sd_variable)) out <- dplyr::left_join(out, out_relative, by = "determinand") @@ -211,52 +223,53 @@ ctsm_uncrt_estimate <- function(data) { # median limit_detection for values with censoring == D, Q or "" by alabo # don't use "<" because we can't trust any of the limit values - out_constant <- data %>% - dplyr::filter(.data$censoring %in% c("D", "Q", "")) %>% - tidyr::drop_na(.data$limit_detection) %>% - dplyr::group_by(.data$determinand, .data$alabo) %>% + out_constant <- data |> + dplyr::filter(.data$censoring %in% c("D", "Q", "")) |> + tidyr::drop_na(.data$limit_detection) |> + dplyr::group_by(.data$determinand, .data$alabo) |> dplyr::summarise(sd_constant = median(.data$limit_detection) / 3) # now the median value across alabos - out_constant <- out_constant %>% - dplyr::group_by(.data$determinand) %>% - dplyr::summarise(sd_constant = median(sd_constant)) + out_constant <- out_constant |> + dplyr::group_by(.data$determinand) |> + dplyr::summarise(sd_constant = median(.data$sd_constant)) out <- dplyr::left_join(out, out_constant, by = "determinand") # tidy up - out <- out %>% - as.data.frame() %>% - column_to_rownames("determinand") %>% + out <- out |> + as.data.frame() |> + tibble::column_to_rownames("determinand") |> round(6) out } #' @export -ctsm_uncrt_plot_estimates <- function(uncrt_obj, old_estimates, group_id) { +ctsm_uncrt_plot_estimates <- function(uncrt_obj, group_id) { - id <- with(uncrt_obj$data, group %in% group_id) - data <- uncrt_obj$data[id, ] + data <- dplyr::filter(uncrt_obj$data, .data$group %in% group_id) - data <- data[with(data, order(determinand, concentration)), ] + data <- dplyr::arrange(data, .data$determinand, .data$concentration) + + data <- dplyr::filter(data, .data$relative_u >= 1 & .data$relative_u <= 100) - ok <- with(data, relative_u >= 1 & relative_u <= 100) - data <- data[ok, ] - new <- uncrt_obj$estimates[c("sd_constant", "sd_variable")] names(new) <- c("sdC", "sdV") - var_id <- paste(uncrt_obj$compartment, c("sd_constant", "sd_variable"), sep= ".") - old <- old_estimates[var_id] + var_id <- paste0(uncrt_obj$compartment, c("_sd_constant", "_sd_variable")) + old <- uncrt_obj$old_estimates[var_id] names(old) <- c("sdC", "sdV") xyplot( relative_u ~ concentration | determinand, data = data, aspect = 1, - scales = list(alternating = FALSE, x = list(log = TRUE, relation = "free", equispaced = FALSE)), + scales = list( + alternating = FALSE, + x = list(log = TRUE, relation = "free", equispaced = FALSE) + ), as.table = TRUE, panel = function(x, y, subscripts) { data <- data[subscripts, ] diff --git a/example_HELCOM.r b/example_HELCOM.r index 1fa69eb..0239f1a 100644 --- a/example_HELCOM.r +++ b/example_HELCOM.r @@ -154,10 +154,12 @@ write_summary_table( levels = c("Metals", "Organotins", "Organofluorines"), labels = c("Metals", "Organotins", "Organofluorines") ), - classColour = list( - below = c("EQS" = "green"), - above = c("EQS" = "red"), - none = "black" + symbology = list( + colour = list( + below = c("EQS" = "green"), + above = c("EQS" = "red"), + none = "black" + ) ), collapse_AC = list(EAC = "EQS"), output_dir = file.path("output", "example_HELCOM") @@ -239,10 +241,12 @@ write_summary_table( "Organobromines", "Organobromines" ) ), - classColour = list( - below = c("EQS" = "green"), - above = c("EQS" = "red"), - none = "black" + symbology = list( + colour = list( + below = c("EQS" = "green"), + above = c("EQS" = "red"), + none = "black" + ) ), collapse_AC = list(EAC = "EQS"), output_dir = file.path("output", "example_HELCOM") @@ -356,10 +360,12 @@ write_summary_table( "PCBs and dioxins", "PCBs and dioxins" ) ), - classColour = list( - below = c("BAC" = "green", "EAC" = "green", "EQS" = "green", "MPC" = "green"), - above = c("BAC" = "red", "EAC" = "red", "EQS" = "red", "MPC" = "red"), - none = "black" + symbology = list( + colour = list( + below = c("BAC" = "green", "EAC" = "green", "EQS" = "green", "MPC" = "green"), + above = c("BAC" = "red", "EAC" = "red", "EQS" = "red", "MPC" = "red"), + none = "black" + ) ), collapse_AC = list(EAC = c("EAC", "EQS", "MPC")), output_dir = file.path("output", "example_HELCOM") diff --git a/example_OSPAR.r b/example_OSPAR.r index 4ddadea..0709554 100644 --- a/example_OSPAR.r +++ b/example_OSPAR.r @@ -75,7 +75,6 @@ report_assessment( - # Sediment ---- sediment_data <- read_data( @@ -142,22 +141,24 @@ write_summary_table( "Polychlorinated biphenyls", "Dioxins", "Organochlorines (other)" ) ), - classColour = list( - below = c( - "BAC" = "blue", - "ERL" = "green", - "EAC" = "green", - "EQS" = "green", - "FEQG" = "green" - ), - above = c( - "BAC" = "orange", - "ERL" = "red", - "EAC" = "red", - "EQS" = "red", - "FEQG" = "red" - ), - none = "black" + symbology = list( + colour = list( + below = c( + "BAC" = "blue", + "ERL" = "green", + "EAC" = "green", + "EQS" = "green", + "FEQG" = "green" + ), + above = c( + "BAC" = "orange", + "ERL" = "red", + "EAC" = "red", + "EQS" = "red", + "FEQG" = "red" + ), + none = "black" + ) ), collapse_AC = list(BAC = "BAC", EAC = c("EAC", "ERL", "EQS", "FEQG")), output_dir = file.path("output", "example_OSPAR") @@ -187,6 +188,7 @@ biota_data <- read_data( biota_data <- tidy_data(biota_data) + biota_timeseries <- create_timeseries( biota_data, determinands.control = list( @@ -244,7 +246,7 @@ biota_assessment <- run_assessment( biota_assessment <- update_assessment( biota_assessment, - subset = !determinand %in% wk_metals, + subset = !determinand %in% wk_metals, parallel = TRUE ) @@ -260,9 +262,6 @@ biota_assessment <- update_assessment( check_assessment(biota_assessment) - - - # environmental summary wk_groups <- list( diff --git a/example_external_data.r b/example_external_data.r index 7d97c1f..3d741ac 100644 --- a/example_external_data.r +++ b/example_external_data.r @@ -129,3 +129,4 @@ plot_assessment( output_dir = file.path("output", "graphics"), file_format = "pdf" ) + diff --git a/inst/information/method_extraction.csv b/inst/information/method_extraction.csv index 421b28c..f82032f 100644 --- a/inst/information/method_extraction.csv +++ b/inst/information/method_extraction.csv @@ -10,6 +10,7 @@ AM-AQR,"APDC-complexation, MIBK-extraction, Aqua regia digestion",nn AM-HF-C,"APDC-complexation, MIBK-extraction, HF/HNO3 digestion", ANT,Acetonenitrile (legacy data text),nn AQR,Aqua regia extraction HNO3:HCL = 1:3,Pw +ASE-DCM-HX,Accelerated Solvent Extraction with dichloromethane and hexane,nn BRB,Bromate/Bromide solvent,nn CD,Cadmium reduction,nn CDS,Cadmium reduction and Sulfanilamid and N-1-Naphtylethylendiamindihydrochlorid,nn @@ -19,6 +20,7 @@ CTC,Extraction with carbon tetrachloride,nn DBC,"DBCDTC-Complexation, Chloroform-Methanol-Extraction",nn DCM,Dichloromethane,nn DET,Diethyl ether, +dSPE-Q,QuEChERS buffered acetonitrile (MeCN) extraction with salting out with MgSO4 and cleanup on dispersive solid-phase column,nn dSPE-QEN,QuEChERS following EN 15662,nn ETA,Ethylacetate,nn ETH,Ethanol,nn @@ -52,7 +54,7 @@ KPX-BA,Potassium-peroxodisulphate and boric acid (K2S2O8-H3BO3), LMF-A,Lithium metaborate fusion LiBO2 followed by dissolution in acid,Tot LMF-A-L,Lithium metaborate fusion LiBO2 followed by dissolution in acid (Lanthanoides),Tot MDCM,Methanol and dichloromethane,nn -METH,Methanol, +METH,Methanol,nn MGN-MGO,Ashing in presence of magnesiumnitrate and magnesiumoxide, MHCL,Methanol and HCL,nn MHX,Methanol/hexane mixture in acetic acid environment,nn @@ -72,6 +74,7 @@ SFE,Supercritical Fluid Extraction,nn SMD,Smedes extraction (cyclohexane/isopropanol),nn SOX,Soxhlet method,nn SPE-DCM,Solid phase extraction with dichloromethane, +SPE-MNA-SPE,Solid phase extraction with methanol and NaOH and Solid phase extraction,nn TCF,"Extraction with 1,1,2-Trichlortrifluorethan",nn TOL,Toluene,nn TOT,Total extraction method - report in METOA,nn diff --git a/man/normalise_sediment_HELCOM.Rd b/man/normalise_sediment_HELCOM.Rd index 84c9f8e..cb0921b 100644 --- a/man/normalise_sediment_HELCOM.Rd +++ b/man/normalise_sediment_HELCOM.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/import_functions.R \name{normalise_sediment_HELCOM} \alias{normalise_sediment_HELCOM} -\title{Normalises sediment concentrations, HELCOM vwersion} +\title{Normalises sediment concentrations, HELCOM version} \usage{ normalise_sediment_HELCOM(data, station_dictionary, info, control) } @@ -16,5 +16,5 @@ normalise_sediment_HELCOM(data, station_dictionary, info, control) \item{control}{control values} } \description{ -Normalises sediment concentrations, HELCOM vwersion +Normalises sediment concentrations, HELCOM version } diff --git a/man/read_data.Rd b/man/read_data.Rd index fd57158..e406751 100644 --- a/man/read_data.Rd +++ b/man/read_data.Rd @@ -33,9 +33,8 @@ read_data( supplied using 'file.path'). Defaults to "."; i.e. the working directory.} \item{data_format}{A string specifying whether the data were extracted from -the ICES webservice ("ICES" - the default) or are in the simplified format -designed for other data sources ("external"). The value "ICES_old" is -deprecated.} +the ICES webservice (\code{"ICES"} - the default) or are in the simplified +format designed for other data sources (\code{"external"}).} \item{info_files}{A list of files specifying reference tables which override the defaults. See examples.} @@ -89,6 +88,14 @@ will be \code{FALSE} if the vflag entry is \code{"S"} or suspect. Records for wh } \item \code{stations} } +} +\description{ +Reads in contaminant and effects data, the station dictionary and various +reference tables. For data from the ICES webservice, it matches data to +stations in the station dictionary. It also allows the user to set control +parameters that dictate the assessment process. +} +\details{ \subsection{Control parameters}{ Many aspects of the assessment process can be controlled through the @@ -96,10 +103,10 @@ parameters stored in \code{info$control}. This is a list populated with default values which can then be overwritten, if required, using the \code{control} argument. } + +\subsection{External data}{ + +If \code{data_format = "external"}, a simplified data and station file can +be supplied. See \code{vignette("external-file-format")} for details. } -\description{ -Reads in contaminant and effects data, the station dictionary and various -reference tables. For data from the ICES webservice, it matches data to -stations in the station dictionary. It also allows the user to set control -parameters that dictate the assessment process. } diff --git a/man/report_assessment.Rd b/man/report_assessment.Rd index 41a2311..508cab6 100644 --- a/man/report_assessment.Rd +++ b/man/report_assessment.Rd @@ -8,6 +8,7 @@ report_assessment( assessment_obj, subset = NULL, output_dir = ".", + output_file = NULL, max_report = 100L ) } @@ -23,6 +24,10 @@ assessment_obj; use 'series' to identify individual timeseries.} supplied using 'file.path'). The default is the working directory. The output directory must already exist.} +\item{output_file}{An alterntive file name to override the default. This is +currently only implemented for a single report. If not supplied, the .html +extension will be added.} + \item{max_report}{The maximum number of reports that will be generated. Defaults to 100. Each report is about 1MB in size and takes a few seconds to run, so this prevents a ridiculous number of reports being created.} diff --git a/vignettes/example_HELCOM.Rmd b/vignettes/example_HELCOM.Rmd index 4aa2d95..ad21f80 100644 --- a/vignettes/example_HELCOM.Rmd +++ b/vignettes/example_HELCOM.Rmd @@ -47,7 +47,7 @@ in a directory `data`, and information files in a directory `information`, but you can use any directory for these. ```r -working.directory <- '/Users/stuart/git/HARSAT' +working.directory <- 'C:/Users/robfr/Documents/HARSAT/HARSAT' ``` # Water assessment @@ -92,23 +92,23 @@ water_data <- read_data( info_dir = file.path(working.directory, "information", "HELCOM_2023"), extraction = "2023/08/23" ) -#> Found in path determinand.csv /Users/stuart/git/HARSAT/information/HELCOM_2023/determinand.csv -#> Found in path species.csv /Users/stuart/git/HARSAT/information/HELCOM_2023/species.csv -#> Found in path thresholds_water.csv /Users/stuart/git/HARSAT/information/HELCOM_2023/thresholds_water.csv -#> Found in package method_extraction.csv /Users/stuart/git/HARSAT/inst/information/method_extraction.csv -#> Found in package pivot_values.csv /Users/stuart/git/HARSAT/inst/information/pivot_values.csv -#> Found in package matrix.csv /Users/stuart/git/HARSAT/inst/information/matrix.csv -#> Found in package imposex.csv /Users/stuart/git/HARSAT/inst/information/imposex.csv -#> MD5 digest for: '/Users/stuart/git/HARSAT/information/HELCOM_2023/determinand.csv': 'ed6c2f076d852976e23eab797bd16164' -#> MD5 digest for: '/Users/stuart/git/HARSAT/inst/information/matrix.csv': '9ba2731a7d90accddac659025835a6e4' -#> MD5 digest for: '/Users/stuart/git/HARSAT/information/HELCOM_2023/thresholds_water.csv': '599000609710f7a53450f16fef814d1c' +#> Found in path determinand.csv C:\Users\robfr\Documents\HARSAT\HARSAT\information\HELCOM_2023\determinand.csv +#> Found in path species.csv C:\Users\robfr\Documents\HARSAT\HARSAT\information\HELCOM_2023\species.csv +#> Found in path thresholds_water.csv C:\Users\robfr\Documents\HARSAT\HARSAT\information\HELCOM_2023\thresholds_water.csv +#> Found in package method_extraction.csv C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/method_extraction.csv +#> Found in package pivot_values.csv C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/pivot_values.csv +#> Found in package matrix.csv C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/matrix.csv +#> Found in package imposex.csv C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/imposex.csv +#> MD5 digest for: 'C:\Users\robfr\Documents\HARSAT\HARSAT\information\HELCOM_2023\determinand.csv': '4b48cbec9c71380f4b464779e643cab2' +#> MD5 digest for: 'C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/matrix.csv': '4b4fb3814bb84cfbf9b37f7b59d45eb9' +#> MD5 digest for: 'C:\Users\robfr\Documents\HARSAT\HARSAT\information\HELCOM_2023\thresholds_water.csv': '7e9487630022c11b0c3dd6d553a9955b' #> Reading station dictionary from: -#> '/Users/stuart/git/HARSAT/data/example_HELCOM/stations.txt' -#> MD5 digest for: '/Users/stuart/git/HARSAT/data/example_HELCOM/stations.txt': 'ada1ffa58215843e8e4d5f4d74f5e21e' +#> 'C:/Users/robfr/Documents/HARSAT/HARSAT/data/example_HELCOM/stations.txt' +#> MD5 digest for: 'C:/Users/robfr/Documents/HARSAT/HARSAT/data/example_HELCOM/stations.txt': 'd229a1c984d507537840e73080f3773c' #> #> Reading contaminant and effects data from: -#> '/Users/stuart/git/HARSAT/data/example_HELCOM/water.txt' -#> MD5 digest for: '/Users/stuart/git/HARSAT/data/example_HELCOM/water.txt': 'd229a93b6e1c8b37008d375365488db4' +#> 'C:/Users/robfr/Documents/HARSAT/HARSAT/data/example_HELCOM/water.txt' +#> MD5 digest for: 'C:/Users/robfr/Documents/HARSAT/HARSAT/data/example_HELCOM/water.txt': 'b18b0556f6f78378c6f0a77682f51988' #> #> Matching data with station dictionary #> - restricting to stations in these convention areas: HELCOM @@ -335,25 +335,25 @@ sediment_data <- read_data( info_dir = file.path(working.directory, "information", "HELCOM_2023"), extraction = "2023/08/23" ) -#> Found in path determinand.csv /Users/stuart/git/HARSAT/information/HELCOM_2023/determinand.csv -#> Found in path species.csv /Users/stuart/git/HARSAT/information/HELCOM_2023/species.csv -#> Found in path thresholds_sediment.csv /Users/stuart/git/HARSAT/information/HELCOM_2023/thresholds_sediment.csv -#> Found in package method_extraction.csv /Users/stuart/git/HARSAT/inst/information/method_extraction.csv -#> Found in package pivot_values.csv /Users/stuart/git/HARSAT/inst/information/pivot_values.csv -#> Found in package matrix.csv /Users/stuart/git/HARSAT/inst/information/matrix.csv -#> Found in package imposex.csv /Users/stuart/git/HARSAT/inst/information/imposex.csv -#> MD5 digest for: '/Users/stuart/git/HARSAT/information/HELCOM_2023/determinand.csv': 'ed6c2f076d852976e23eab797bd16164' -#> MD5 digest for: '/Users/stuart/git/HARSAT/inst/information/matrix.csv': '9ba2731a7d90accddac659025835a6e4' -#> MD5 digest for: '/Users/stuart/git/HARSAT/inst/information/method_extraction.csv': 'b3c891f17b9b35774114edaa2f58b6cc' -#> MD5 digest for: '/Users/stuart/git/HARSAT/inst/information/pivot_values.csv': '372ad2d2ef807cec64ce1a7bd1967158' -#> MD5 digest for: '/Users/stuart/git/HARSAT/information/HELCOM_2023/thresholds_sediment.csv': '52456c255f587a539177d5fa0fbb7cf1' +#> Found in path determinand.csv C:\Users\robfr\Documents\HARSAT\HARSAT\information\HELCOM_2023\determinand.csv +#> Found in path species.csv C:\Users\robfr\Documents\HARSAT\HARSAT\information\HELCOM_2023\species.csv +#> Found in path thresholds_sediment.csv C:\Users\robfr\Documents\HARSAT\HARSAT\information\HELCOM_2023\thresholds_sediment.csv +#> Found in package method_extraction.csv C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/method_extraction.csv +#> Found in package pivot_values.csv C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/pivot_values.csv +#> Found in package matrix.csv C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/matrix.csv +#> Found in package imposex.csv C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/imposex.csv +#> MD5 digest for: 'C:\Users\robfr\Documents\HARSAT\HARSAT\information\HELCOM_2023\determinand.csv': '4b48cbec9c71380f4b464779e643cab2' +#> MD5 digest for: 'C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/matrix.csv': '4b4fb3814bb84cfbf9b37f7b59d45eb9' +#> MD5 digest for: 'C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/method_extraction.csv': '28e38bdd0b9e735643c60026dcda8a78' +#> MD5 digest for: 'C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/pivot_values.csv': '23ca1799017bfea360d586b1a70bffd4' +#> MD5 digest for: 'C:\Users\robfr\Documents\HARSAT\HARSAT\information\HELCOM_2023\thresholds_sediment.csv': '41c686160bc8e0877477239eec0f0b1b' #> Reading station dictionary from: -#> '/Users/stuart/git/HARSAT/data/example_HELCOM/stations.txt' -#> MD5 digest for: '/Users/stuart/git/HARSAT/data/example_HELCOM/stations.txt': 'ada1ffa58215843e8e4d5f4d74f5e21e' +#> 'C:/Users/robfr/Documents/HARSAT/HARSAT/data/example_HELCOM/stations.txt' +#> MD5 digest for: 'C:/Users/robfr/Documents/HARSAT/HARSAT/data/example_HELCOM/stations.txt': 'd229a1c984d507537840e73080f3773c' #> #> Reading contaminant and effects data from: -#> '/Users/stuart/git/HARSAT/data/example_HELCOM/sediment.txt' -#> MD5 digest for: '/Users/stuart/git/HARSAT/data/example_HELCOM/sediment.txt': '0cd5fa2f4a07a2750a56a24b2fe887bf' +#> 'C:/Users/robfr/Documents/HARSAT/HARSAT/data/example_HELCOM/sediment.txt' +#> MD5 digest for: 'C:/Users/robfr/Documents/HARSAT/HARSAT/data/example_HELCOM/sediment.txt': 'a5635836e9a69f3dd8be42d5078cad6b' #> #> Matching data with station dictionary #> - restricting to stations in these convention areas: HELCOM @@ -442,8 +442,7 @@ sediment_timeseries <- create_timeseries( #> Limit of quantification less than limit of detection: see limits_inconsistent.csv #> Censoring codes D and Q inconsistent with respective limits: see censoring_codes_inconsistent.csv #> Detection limit higher than data: see detection_limit_high.csv -#> Non-positive uncertainties: see non_positive_uncertainties.csv -#> Large uncertainties: see large_uncertainties.csv +#> Implausible uncertainties reported with data: see implausible_uncertainties_reported.csv #> Data submitted as BDE28, BDE47, BDE99, BD100, BD153, BD154 summed to give #> SBDE6 #> 61 of 124 samples lost due to incomplete submissions @@ -458,6 +457,7 @@ sediment_timeseries <- create_timeseries( #> Normalising metals to AL using pivot values #> Normalising organics to 5% CORG #> Removing sediment data where normaliser is a less than +#> Implausible uncertainties calculated in data processing: see implausible_uncertainties_calculated.csv #> Dropping groups of compounds / stations with no data between 2015 and 2020 ``` @@ -530,25 +530,25 @@ biota_data <- read_data( info_dir = file.path(working.directory, "information", "HELCOM_2023"), extraction = "2023/08/23" ) -#> Found in path determinand.csv /Users/stuart/git/HARSAT/information/HELCOM_2023/determinand.csv -#> Found in path species.csv /Users/stuart/git/HARSAT/information/HELCOM_2023/species.csv -#> Found in path thresholds_biota.csv /Users/stuart/git/HARSAT/information/HELCOM_2023/thresholds_biota.csv -#> Found in package method_extraction.csv /Users/stuart/git/HARSAT/inst/information/method_extraction.csv -#> Found in package pivot_values.csv /Users/stuart/git/HARSAT/inst/information/pivot_values.csv -#> Found in package matrix.csv /Users/stuart/git/HARSAT/inst/information/matrix.csv -#> Found in package imposex.csv /Users/stuart/git/HARSAT/inst/information/imposex.csv -#> MD5 digest for: '/Users/stuart/git/HARSAT/information/HELCOM_2023/determinand.csv': 'ed6c2f076d852976e23eab797bd16164' -#> MD5 digest for: '/Users/stuart/git/HARSAT/inst/information/matrix.csv': '9ba2731a7d90accddac659025835a6e4' -#> MD5 digest for: '/Users/stuart/git/HARSAT/information/HELCOM_2023/species.csv': '895d4f259f2a1ee8d9c7ec52210f584c' -#> MD5 digest for: '/Users/stuart/git/HARSAT/inst/information/imposex.csv': '7e42cb57944b9d79216ad25c12ccada5' -#> MD5 digest for: '/Users/stuart/git/HARSAT/information/HELCOM_2023/thresholds_biota.csv': '1798bfbfb15104c2cbf8ff00ccf13abd' +#> Found in path determinand.csv C:\Users\robfr\Documents\HARSAT\HARSAT\information\HELCOM_2023\determinand.csv +#> Found in path species.csv C:\Users\robfr\Documents\HARSAT\HARSAT\information\HELCOM_2023\species.csv +#> Found in path thresholds_biota.csv C:\Users\robfr\Documents\HARSAT\HARSAT\information\HELCOM_2023\thresholds_biota.csv +#> Found in package method_extraction.csv C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/method_extraction.csv +#> Found in package pivot_values.csv C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/pivot_values.csv +#> Found in package matrix.csv C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/matrix.csv +#> Found in package imposex.csv C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/imposex.csv +#> MD5 digest for: 'C:\Users\robfr\Documents\HARSAT\HARSAT\information\HELCOM_2023\determinand.csv': '4b48cbec9c71380f4b464779e643cab2' +#> MD5 digest for: 'C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/matrix.csv': '4b4fb3814bb84cfbf9b37f7b59d45eb9' +#> MD5 digest for: 'C:\Users\robfr\Documents\HARSAT\HARSAT\information\HELCOM_2023\species.csv': '769328e51065226809c91944b6d8fe79' +#> MD5 digest for: 'C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/imposex.csv': 'b602a882d4783085c896bcf130c8f848' +#> MD5 digest for: 'C:\Users\robfr\Documents\HARSAT\HARSAT\information\HELCOM_2023\thresholds_biota.csv': '9af82cd9730c0b135edd4a003724e8a6' #> Reading station dictionary from: -#> '/Users/stuart/git/HARSAT/data/example_HELCOM/stations.txt' -#> MD5 digest for: '/Users/stuart/git/HARSAT/data/example_HELCOM/stations.txt': 'ada1ffa58215843e8e4d5f4d74f5e21e' +#> 'C:/Users/robfr/Documents/HARSAT/HARSAT/data/example_HELCOM/stations.txt' +#> MD5 digest for: 'C:/Users/robfr/Documents/HARSAT/HARSAT/data/example_HELCOM/stations.txt': 'd229a1c984d507537840e73080f3773c' #> #> Reading contaminant and effects data from: -#> '/Users/stuart/git/HARSAT/data/example_HELCOM/biota.txt' -#> MD5 digest for: '/Users/stuart/git/HARSAT/data/example_HELCOM/biota.txt': 'a986e6899ecd6c9fbc0bd7854b452c9a' +#> 'C:/Users/robfr/Documents/HARSAT/HARSAT/data/example_HELCOM/biota.txt' +#> MD5 digest for: 'C:/Users/robfr/Documents/HARSAT/HARSAT/data/example_HELCOM/biota.txt': '0a1a33c4e668e63c97a6d50cdc644d22' #> #> Matching data with station dictionary #> - restricting to stations in these convention areas: HELCOM @@ -661,8 +661,7 @@ biota_timeseries <- create_timeseries( #> Non-positive quantification limits: see non_positive_quant_limits.csv #> Censoring codes D and Q inconsistent with respective limits: see censoring_codes_inconsistent.csv #> Detection limit higher than data: see detection_limit_high.csv -#> Non-positive uncertainties: see non_positive_uncertainties.csv -#> Large uncertainties: see large_uncertainties.csv +#> Implausible uncertainties reported with data: see implausible_uncertainties_reported.csv #> Data submitted as BDE28, BDE47, BDE99, BD100, BD153, BD154 summed to give #> SBDE6 #> 257 of 497 samples lost due to incomplete submissions @@ -710,7 +709,6 @@ errors. Dealing with non-converged timeseries is a topic for a future vignette. check_assessment(biota_assessment) #> The following assessment models have not converged: #> 2299 PYR1OH Limanda limanda BI HPLC-FD -#> 5844 CD Clupea harengus LI biota_assessment <- update_assessment( biota_assessment, @@ -721,8 +719,7 @@ biota_assessment <- update_assessment( #> assessing series: station_code 2299; determinand PYR1OH; species Limanda limanda; matrix BI; method_analysis HPLC-FD; unit ng/ml check_assessment(biota_assessment) -#> The following assessment models have not converged: -#> 5844 CD Clupea harengus LI +#> All assessment models have converged ``` diff --git a/vignettes/example_OSPAR.Rmd b/vignettes/example_OSPAR.Rmd index aa8dc62..7e4f8bd 100644 --- a/vignettes/example_OSPAR.Rmd +++ b/vignettes/example_OSPAR.Rmd @@ -40,7 +40,7 @@ in a directory `data`, and information files in a directory `information`, but you can use any directory for these. ```r -working.directory <- '/Users/stuart/git/HARSAT' +working.directory <- 'C:/Users/robfr/Documents/HARSAT/HARSAT' ``` # Water assessment @@ -60,23 +60,23 @@ water_data <- read_data( info_dir = file.path(working.directory, "information", "OSPAR_2022"), extraction = "2023/08/23" ) -#> Found in path determinand.csv /Users/stuart/git/HARSAT/information/OSPAR_2022/determinand.csv -#> Found in path species.csv /Users/stuart/git/HARSAT/information/OSPAR_2022/species.csv -#> Found in path thresholds_water.csv /Users/stuart/git/HARSAT/information/OSPAR_2022/thresholds_water.csv -#> Found in package method_extraction.csv /Users/stuart/git/HARSAT/inst/information/method_extraction.csv -#> Found in package pivot_values.csv /Users/stuart/git/HARSAT/inst/information/pivot_values.csv -#> Found in package matrix.csv /Users/stuart/git/HARSAT/inst/information/matrix.csv -#> Found in package imposex.csv /Users/stuart/git/HARSAT/inst/information/imposex.csv -#> MD5 digest for: '/Users/stuart/git/HARSAT/information/OSPAR_2022/determinand.csv': '912a86ca3efdc719e405a7632e2b89ce' -#> MD5 digest for: '/Users/stuart/git/HARSAT/inst/information/matrix.csv': '9ba2731a7d90accddac659025835a6e4' -#> MD5 digest for: '/Users/stuart/git/HARSAT/information/OSPAR_2022/thresholds_water.csv': '2b165f406bb440297435ea3f46eb3612' +#> Found in path determinand.csv C:\Users\robfr\Documents\HARSAT\HARSAT\information\OSPAR_2022\determinand.csv +#> Found in path species.csv C:\Users\robfr\Documents\HARSAT\HARSAT\information\OSPAR_2022\species.csv +#> Found in path thresholds_water.csv C:\Users\robfr\Documents\HARSAT\HARSAT\information\OSPAR_2022\thresholds_water.csv +#> Found in package method_extraction.csv C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/method_extraction.csv +#> Found in package pivot_values.csv C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/pivot_values.csv +#> Found in package matrix.csv C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/matrix.csv +#> Found in package imposex.csv C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/imposex.csv +#> MD5 digest for: 'C:\Users\robfr\Documents\HARSAT\HARSAT\information\OSPAR_2022\determinand.csv': '6b36346446c0ac04a52b3f1347829f6b' +#> MD5 digest for: 'C:/Users/robfr/Documents/HARSAT/HARSAT/inst/information/matrix.csv': '4b4fb3814bb84cfbf9b37f7b59d45eb9' +#> MD5 digest for: 'C:\Users\robfr\Documents\HARSAT\HARSAT\information\OSPAR_2022\thresholds_water.csv': '615ef96f716ef1d43c01ab67f383c881' #> Reading station dictionary from: -#> '/Users/stuart/git/HARSAT/data/example_OSPAR/stations.txt' -#> MD5 digest for: '/Users/stuart/git/HARSAT/data/example_OSPAR/stations.txt': '057984ad2a1885bc5d15a41ee3b34471' +#> 'C:/Users/robfr/Documents/HARSAT/HARSAT/data/example_OSPAR/stations.txt' +#> MD5 digest for: 'C:/Users/robfr/Documents/HARSAT/HARSAT/data/example_OSPAR/stations.txt': '58b9e90f314e89f637c60558c06755f4' #> #> Reading contaminant and effects data from: -#> '/Users/stuart/git/HARSAT/data/example_OSPAR/water.txt' -#> MD5 digest for: '/Users/stuart/git/HARSAT/data/example_OSPAR/water.txt': '0ccaec75c5fd7e875c730467d58fdb26' +#> 'C:/Users/robfr/Documents/HARSAT/HARSAT/data/example_OSPAR/water.txt' +#> MD5 digest for: 'C:/Users/robfr/Documents/HARSAT/HARSAT/data/example_OSPAR/water.txt': '13d63b6161b671165b215b58f5e22469' #> #> Matching data with station dictionary #> - restricting to stations in these convention areas: OSPAR @@ -93,8 +93,7 @@ water_data <- read_data( water_data <- tidy_data(water_data) #> -#> Oddities will be written to 'oddities/water' with previous oddities backed up to -#> 'oddities/water_backup' +#> Oddities will be written to 'oddities/water' #> #> Dropping 411 records from data flagged for deletion. Possible reasons are: #> - vflag = suspect @@ -136,6 +135,7 @@ water_timeseries <- create_timeseries( #> Limit of quantification less than limit of detection: see limits_inconsistent.csv #> Censoring codes D and Q inconsistent with respective limits: see censoring_codes_inconsistent.csv #> Detection limit higher than data: see detection_limit_high.csv +#> Implausible uncertainties reported with data: see implausible_uncertainties_reported.csv #> Data submitted as CHRTR relabelled as CHR #> Data submitted as BBF, BKF summed to give BBKF #> 1 of 71 samples lost due to incomplete submissions @@ -322,7 +322,7 @@ Now let's look at the `water_summary.csv` file: