diff --git a/NAMESPACE b/NAMESPACE index a5fb497e0..5b895ceb9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,7 +20,6 @@ export(CombineSpecs) export(FilterByFlags) export(FilterByLatestSnapshotDate) export(Flag) -export(Flag_Fisher) export(Flag_NormalApprox) export(Flag_Poisson) export(GetStrFunctionIfNamespaced) @@ -37,7 +36,6 @@ export(MakeParamLabelsList) export(MakeStudyInfo) export(MakeWideGroups) export(MakeWorkflowList) -export(Make_Timeline) export(ParseThreshold) export(RenderRmd) export(Report_FlagOverTime) diff --git a/R/Flag.R b/R/Flag.R index 0360907ea..7281733c1 100644 --- a/R/Flag.R +++ b/R/Flag.R @@ -29,76 +29,103 @@ #' #' @param dfAnalyzed `data.frame` where flags should be added. #' @param strColumn `character` Name of the column to use for thresholding. Default: `"Score"` -#' @param vThreshold `numeric` Vector of 2 numeric values representing lower and upper threshold values. All -#' values in `strColumn` are compared to `vThreshold` using strict comparisons. Values less than -#' the lower threshold or greater than the upper threshold are flagged. Values equal to the -#' threshold values are set to 0 (i.e., not flagged). If NA is provided for either threshold value, -#' it is ignored and no values are flagged based on the threshold. NA and NaN values in `strColumn` -#' are given NA flag values. -#' @param strValueColumn `character` Name of the column to use for sign of `Flag.` If the value for -#' that row is higher than the median of `strValueColumn`, then `Flag` is set to 1. Similarly, if -#' the value for that row is lower than the median of `strValueColumn`, then Flag is set to -1. -#' -#' @return `data.frame` with one row per site with columns: `GroupID`, `TotalCount`, `Metric`, `Score`, `Flag` +#' @param vThreshold `numeric` Vector of numeric values representing threshold values. Default is `c(-3,-2,2,3)` which is typical for z-scores. +#' @param vFlag `numeric` Vector of flag values. There must be one more item in Flag than thresholds - that is `length(vThreshold)+1 == length(vFlagValues)`. Default is `c(-2,-1,0,1,2)`, which is typical for z-scores. +#' @param vFlagOrder `numeric` Vector of ordered flag values. Output data.frame will be sorted based on flag column using the order provided. NULL (or values that don't match vFlag) will leave the data unsorted. Must have identical values to vFlag. Default is `c(2,-2,1,-1,0)` which puts largest z-score outliers first in the data set. #' -#' @examples -#' dfTransformed <- Transform_Count(analyticsInput, strCountCol = "Numerator") +#' @return `data.frame` dfAnalyzed is returned with an additional `Flag` column. #' -#' dfAnalyzed <- Analyze_Identity(dfTransformed) +#' @examples #' -#' dfFlagged <- Flag(dfAnalyzed, vThreshold = c(0.001, 0.01)) +#' dfTransformed <- Transform_Rate(analyticsInput) +#' dfAnalyzed <- Analyze_NormalApprox(dfTransformed) +#' dfFlagged <- Flag(dfAnalyzed) #' #' @export Flag <- function( dfAnalyzed, strColumn = "Score", - vThreshold = NULL, - strValueColumn = NULL + vThreshold = c(-3,-2,2,3), + vFlag = c(-2,-1,0,1,2), + vFlagOrder = c(2,-2,1,-1,0) ) { + stop_if(cnd = !is.data.frame(dfAnalyzed), message = "dfAnalyzed is not a data frame") stop_if(cnd = !is.character(strColumn), message = "strColumn is not character") stop_if(cnd = !is.numeric(vThreshold), message = "vThreshold is not numeric") - stop_if(cnd = length(vThreshold) != 2, message = "vThreshold must be length of 2") + stop_if(cnd = !all(vThreshold == sort(vThreshold)), message = "vThreshold is not in ascending order") stop_if(cnd = is.null(vThreshold), message = "vThreshold cannot be NULL") stop_if(cnd = length(strColumn) != 1, message = "strColumn must be length of 1") stop_if(cnd = !(strColumn %in% names(dfAnalyzed)), message = "strColumn not found in dfAnalyzed") - stop_if(cnd = !("GroupID" %in% names(dfAnalyzed)), message = "GroupID not found in dfAnalyzed") + stop_if(cnd = !is.numeric(vFlag), message = "vFlag must be numeric") + stop_if(cnd = length(vFlag) != length(vThreshold)+1, message = "Improper number of Flag values provided") + stop_if(cnd = !is.numeric(vFlagOrder) & !is.null(vFlagOrder), message = "vFlagOrder must be numeric or NULL") - if (all(!is.na(vThreshold))) { - stop_if(cnd = vThreshold[2] <= vThreshold[1], "vThreshold must contain a minimum and maximum value (i.e., vThreshold = c(1, 2))") - } + dfFlagged <- dfAnalyzed - # Flag values outside the specified threshold. - dfFlagged <- dfAnalyzed %>% - mutate( - Flag = case_when( - !is.na(vThreshold[1]) & (.data[[strColumn]] < vThreshold[1]) ~ -1, - !is.na(vThreshold[2]) & (.data[[strColumn]] > vThreshold[2]) ~ 1, - !is.na(.data[[strColumn]]) & !is.nan(.data[[strColumn]]) ~ 0 + # generate flag values for dfAnalyzed[strColumn] based on vThresold and vFlag + dfFlagged$Flag <- cut( + dfFlagged[[strColumn]], + breaks = c(-Inf, vThreshold, Inf), + labels = vFlag, + right = FALSE + ) %>% as.character() %>% as.numeric() #Parse from factor to numeric + + # Apply custom sort order using vFlagOrder + if(!is.null(vFlagOrder)){ + #all values in vFlag should be included in vFlagOrder + if(identical(sort(vFlag), sort(vFlagOrder))){ + dfFlagged <- dfFlagged %>% arrange(match(.data$Flag, vFlagOrder)) + LogMessage( + level = "info", + message = "Sorted dfFlagged using custom Flag order: {vFlagOrder}.", + cli_detail = "alert_info" ) + } else { + LogMessage( + level = "info", + message = "Mismatch in vFlagOrder and vFlag values. Aborting Sort and returning unsorted data.", + cli_detail = "alert_info" ) + } + } - # If strValueColumn is supplied, it can only affect sign of Flag (1 or -1). - if (!is.null(strValueColumn)) { - stop_if(cnd = !(strValueColumn %in% names(dfAnalyzed)), message = "strValueColumn not found in dfAnalyzed") + return(dfFlagged) +} + +#' Flag_NormalApprox +#' +#' #' @description +#' `r lifecycle::badge("stable")` +#' +#' Alias for `Flag()` +#' +#' @param dfAnalyzed `data.frame` where flags should be added. +#' @param strColumn `character` Name of the column to use for thresholding. Default: `"Score"` +#' @param vThreshold `numeric` Vector of numeric values representing threshold values. Default is `c(-3,-2,2,3)` which is typical for z-scores. +#' @param vFlag `numeric` Vector of flag values. There must be one more item in Flag than thresholds - that is `length(vThreshold)+1 == length(vFlagValues)`. Default is `c(-2,-1,0,1,2)`, which is typical for z-scores. +#' @param vFlagOrder `numeric` Vector of ordered flag values. Output data.frame will be sorted based on flag column using the order provided. NULL (or values that don't match vFlag) will leave the data unsorted. Must have identical values to vFlag. Default is `c(2,-2,1,-1,0)` which puts largest z-score outliers first in the data set. +#' +#' +#' @export - nMedian <- dfFlagged %>% - pull(strValueColumn) %>% - stats::median(na.rm = TRUE) +Flag_NormalApprox <- Flag - dfFlagged <- dfFlagged %>% - mutate( - Flag = case_when( - Flag != 0 & .data[[strValueColumn]] >= nMedian ~ 1, - Flag != 0 & .data[[strValueColumn]] < nMedian ~ -1, - TRUE ~ Flag - ) - ) - } - dfFlagged <- dfFlagged %>% - arrange(match(.data$Flag, c(1, -1, 0))) +#' Flag_Poisson +#' +#' #' @description +#' `r lifecycle::badge("stable")` +#' +#' Alias for `Flag()` +#' +#' @param dfAnalyzed `data.frame` where flags should be added. +#' @param strColumn `character` Name of the column to use for thresholding. Default: `"Score"` +#' @param vThreshold `numeric` Vector of numeric values representing threshold values. Default is `c(-3,-2,2,3)` which is typical for z-scores. +#' @param vFlag `numeric` Vector of flag values. There must be one more item in Flag than thresholds - that is `length(vThreshold)+1 == length(vFlagValues)`. Default is `c(-2,-1,0,1,2)`, which is typical for z-scores. +#' @param vFlagOrder `numeric` Vector of ordered flag values. Output data.frame will be sorted based on flag column using the order provided. NULL (or values that don't match vFlag) will leave the data unsorted. Must have identical values to vFlag. Default is `c(2,-2,1,-1,0)` which puts largest z-score outliers first in the data set. +#' +#' @export - return(dfFlagged) -} +Flag_Poisson <- Flag diff --git a/R/Flag_Fisher.R b/R/Flag_Fisher.R deleted file mode 100644 index 70288d842..000000000 --- a/R/Flag_Fisher.R +++ /dev/null @@ -1,68 +0,0 @@ -#' Flag_Fisher -#' -#' @description -#' `r lifecycle::badge("stable")` -#' -#' Add columns flagging sites that represent possible statistical outliers when the Fisher's Exact Test is used. -#' -#' @details -#' This function flags sites based on the Fisher's Exact Test result as part of the GSM data model (see `vignette("DataModel")`). -#' -#' @section Data Specification: -#' \code{Flag_Fisher} is designed to support the input data (`dfAnalyzed`) generated from the \code{Analyze_Fisher} function. At a minimum, the input must define a `dfAnalyzed` data frame with `Score`, `Prop`, and `Prop_Other` variables included and a `vThreshold`. These inputs will be used to identify possible statistical outliers in a new `Flag` column by comparing `Score`, `Prop`, and `Prop_Other` values to the specified thresholds. -#' -#' The following columns are considered required: -#' - `GroupID` - Group ID; default is `SiteID` -#' - `GroupLevel` - Group Type -#' - `Score` - P-value calculated from the rates of exposure provided to `Analyze_Fisher()` -#' - `Prop` - Proportion of events of interest over days of exposure -#' - `Prop_Other` - Cumulative proportion of events of interest over days of exposure -#' -#' @param dfAnalyzed data.frame where flags should be added. -#' @param vThreshold Vector of 2 numeric values representing lower and upper p-value thresholds. -#' -#' @return `data.frame` with one row per site with columns: `GroupID`, `Numerator`, `Denominator`, `Metric`, `Score`, `PredictedCount`, and `Flag`. -#' -#' @examples -#' dfTransformed <- Transform_Rate(analyticsInput) -#' -#' dfAnalyzed <- Analyze_Poisson(dfTransformed) -#' -#' dfFlagged <- Flag(dfAnalyzed, vThreshold = c(-5, 5)) -#' -#' @export - -Flag_Fisher <- function( - dfAnalyzed, - vThreshold = NULL -) { - stop_if(cnd = !is.data.frame(dfAnalyzed), message = "dfAnalyzed is not a data frame") - stop_if(cnd = !is.numeric(vThreshold), message = "vThreshold is not numeric") - stop_if(cnd = length(vThreshold) != 2, message = "vThreshold must be length of 2") - stop_if(cnd = is.null(vThreshold), message = "vThreshold cannot be NULL") - stop_if(cnd = !("GroupID" %in% names(dfAnalyzed)), message = "GroupID not found in dfAnalyzed") - - if (all(!is.na(vThreshold))) { - stop_if(cnd = vThreshold[2] <= vThreshold[1], "vThreshold must contain a minimum and maximum value (i.e., vThreshold = c(1, 2))") - } - - dfFlagged <- dfAnalyzed %>% - mutate( - Flag = case_when( - # score < lower threshold and metric < overall metric sans current group - (.data$Score < vThreshold[1]) & (.data$Prop < .data$Prop_Other) ~ -2, - # score < lower threshold and metric >= overall metric sans current group - (.data$Score < vThreshold[1]) & (.data$Prop >= .data$Prop_Other) ~ 2, - # score < upper threshold and metric < overall metric sans current group - (.data$Score < vThreshold[2]) & (.data$Prop < .data$Prop_Other) ~ -1, - # score < upper threshold and metric >= overall metric sans current group - (.data$Score < vThreshold[2]) & (.data$Prop >= .data$Prop_Other) ~ 1, - !is.na(.data$Score) & !is.nan(.data$Score) ~ 0 - ) - ) - - dfFlagged <- dfFlagged %>% - arrange(match(.data$Flag, c(2, -2, 1, -1, 0))) - - return(dfFlagged) -} diff --git a/R/Flag_NormalApprox.R b/R/Flag_NormalApprox.R deleted file mode 100644 index 5014c14b9..000000000 --- a/R/Flag_NormalApprox.R +++ /dev/null @@ -1,85 +0,0 @@ -#' Flag_NormalApprox -#' -#' @description -#' `r lifecycle::badge("stable")` -#' -#' Add columns flagging sites that represent possible statistical outliers. -#' -#' @details -#' This function flags sites based on the funnel plot with normal approximation analysis result as part of -#' the GSM data model (see `vignette("DataModel")`). -#' -#' @section Data Specification: -#' \code{Flag_NormalApprox} is designed to support the input data (`dfAnalyzed`) from \code{Analyze_NormalApprox} function. -#' At a minimum, the input data must have a `GroupID` column and a column of numeric values (identified -#' by the `strColumn` parameter) that will be compared to the specified thresholds (`vThreshold`) to -#' calculate a new `Flag` column. -#' In short, the following columns are considered: -#' - `GroupID` - Group ID (required) -#' - `GroupLevel` - Group Type -#' - `strColumn` - A column to use for Thresholding (required) -#' - `strValueColumn` - A column to be used for the sign of the flag (optional) -#' -#' @param dfAnalyzed `data.frame` where flags should be added. -#' @param vThreshold `vector` of 4 numeric values representing lower and upper threshold values. All -#' values in `strColumn` are compared to `vThreshold` using strict comparisons. Values less than the lower threshold or greater than the upper threshold are flagged as -1 and 1 respectively. Values equal to the threshold values are set to 0 (i.e. not flagged). If NA is provided for either threshold value it is ignored, and no values are flagged based on the threshold. NA and NaN values in `strColumn` are given NA flag values. -#' -#' @return `data.frame` with "Flag" column added -#' -#' @examples -#' dfTransformed <- Transform_Rate(analyticsInput) -#' -#' # Binary -#' dfAnalyzed <- Analyze_NormalApprox(dfTransformed, strType = "binary") -#' dfFlagged <- Flag_NormalApprox(dfAnalyzed, vThreshold = c(-3, -2, 2, 3)) -#' -#' # Rate -#' dfAnalyzed <- Analyze_NormalApprox(dfTransformed, strType = "rate") -#' dfFlagged <- Flag_NormalApprox(dfAnalyzed, vThreshold = c(-3, -2, 2, 3)) -#' -#' @export - -Flag_NormalApprox <- function( - dfAnalyzed, - vThreshold = NULL -) { - stop_if(cnd = !is.data.frame(dfAnalyzed), message = "dfAnalyzed is not a data frame") - stop_if(cnd = !is.numeric(vThreshold), message = "vThreshold is not numeric") - stop_if(cnd = !(length(vThreshold) == 4), message = "vThreshold must be length of 4") - stop_if(cnd = is.null(vThreshold), message = "vThreshold cannot be NULL") - - - if (all(!is.na(vThreshold))) { - stop_if( - cnd = vThreshold[2] <= vThreshold[1], - message = "vThreshold must contain cutoff for moderate/high risks in two directions (i.e., vThreshold = c(-3, -2, 2, 3))" - ) - stop_if( - cnd = vThreshold[3] <= vThreshold[2], - message = "vThreshold must contain cutoff for moderate/high risks in two directions (i.e., vThreshold = c(-3, -2, 2, 3))" - ) - stop_if( - cnd = vThreshold[4] <= vThreshold[3], - message = "vThreshold must contain cutoff for moderate/high risks in two directions (i.e., vThreshold = c(-3, -2, 2, 3))" - ) - } - - - # Flag values outside the specified threshold. - dfFlagged <- dfAnalyzed %>% - mutate( - Flag = case_when( - (.data$Score < vThreshold[1]) ~ -2, - (.data$Score < vThreshold[2]) ~ -1, - (.data$Score < vThreshold[3]) ~ 0, - (.data$Score < vThreshold[4]) ~ 1, - (.data$Score >= vThreshold[4]) ~ 2 - ) - ) - - dfFlagged <- dfFlagged %>% - arrange(match(.data$Flag, c(2, -2, 1, -1, 0))) - - - return(dfFlagged) -} diff --git a/R/Flag_Poisson.R b/R/Flag_Poisson.R deleted file mode 100644 index 9f0caf06e..000000000 --- a/R/Flag_Poisson.R +++ /dev/null @@ -1,75 +0,0 @@ -#' Flag_Poisson -#' -#' @description -#' `r lifecycle::badge("stable")` -#' -#' Add columns flagging sites that represent possible statistical outliers when the Poisson statistical method is used. -#' -#' @details -#' This function flags sites based on the Poisson analysis result as part of `vignette("DataModel")`. -#' -#' @section Data Specification: -#' \code{Flag_Poisson} is designed to support the input data (`dfAnalyzed`) generated from the \code{Analyze_Poisson} function. At a minimum, the input must define a `dfAnalyzed` data frame with a `Score` variable included and a `vThreshold`. These inputs will be used to identify possible statistical outliers in a new `Flag` column by comparing `Score` values to the specified thresholds. -#' -#' The following columns are considered required: -#' - `GroupID` - Group ID; default is `SiteID` -#' - `GroupLevel` - Group Type -#' - `Score` - Site residuals calculated from the rates of exposure provided to `Analyze_Poisson()` -#' -#' @param dfAnalyzed data.frame where flags should be added. -#' @param vThreshold Vector of 4 numeric values representing lower and upper threshold values. All values in the `Score` column are compared to `vThreshold` using strict comparisons. Values less than the lower thresholds or greater than the upper thresholds are flagged. Values equal to the threshold values are set to 0 (i.e., not flagged). If NA is provided for either threshold value, it is ignored and no values are flagged based on the thresholds. NA and NaN values in `Score` are given NA flag values. -#' -#' @return `data.frame` with one row per site with columns: `GroupID`, `Numerator`, `Denominator`, `Metric`, `Score`, `PredictedCount`, `Flag` -#' -#' @examples -#' dfTransformed <- Transform_Rate(analyticsInput) -#' -#' dfAnalyzed <- Analyze_Poisson(dfTransformed) -#' -#' dfFlagged <- Flag_Poisson(dfAnalyzed, vThreshold = c(-7, -5, 5, 7)) -#' -#' @export - -Flag_Poisson <- function( - dfAnalyzed, - vThreshold = NULL -) { - stop_if(cnd = !is.data.frame(dfAnalyzed), message = "dfAnalyzed is not a data frame") - stop_if(cnd = !is.numeric(vThreshold), message = "vThreshold is not numeric") - stop_if(cnd = !(length(vThreshold) == 4), message = "vThreshold must be length of 4") - stop_if(cnd = is.null(vThreshold), message = "vThreshold cannot be NULL") - - - if (all(!is.na(vThreshold))) { - stop_if( - cnd = vThreshold[2] <= vThreshold[1], - message = "vThreshold must contain cutoff for moderate/high risks in two directions (i.e., vThreshold = c(-7, -5, 5, 7))" - ) - stop_if( - cnd = vThreshold[3] <= vThreshold[2], - message = "vThreshold must contain cutoff for moderate/high risks in two directions (i.e., vThreshold = = c(-7, -5, 5, 7))" - ) - stop_if( - cnd = vThreshold[4] <= vThreshold[3], - message = "vThreshold must contain cutoff for moderate/high risks in two directions (i.e., vThreshold = = c(-7, -5, 5, 7))" - ) - } - - # Flag values outside the specified threshold. - dfFlagged <- dfAnalyzed %>% - mutate( - Flag = case_when( - (.data$Score < vThreshold[1]) ~ -2, - (.data$Score < vThreshold[2]) ~ -1, - (.data$Score < vThreshold[3]) ~ 0, - (.data$Score < vThreshold[4]) ~ 1, - (.data$Score >= vThreshold[4]) ~ 2 - ) - ) - - dfFlagged <- dfFlagged %>% - arrange(match(.data$Flag, c(2, -2, 1, -1, 0))) - - - return(dfFlagged) -} diff --git a/R/Report_Timeline.R b/R/Report_Timeline.R deleted file mode 100644 index d09097c44..000000000 --- a/R/Report_Timeline.R +++ /dev/null @@ -1,164 +0,0 @@ -#' Create interactive timeline for study data -#' -#' @description -#' `r lifecycle::badge("stable")` -#' -#' @param status_study the table containing study data of interest -#' @param longitudinal optional argument for supplying longitudinal snapshot information -#' @param n_breaks the number of breaks to include in the x-axis. NOTE pretty breaks are used to assign break locations and may give more or less that the number of breaks specified. -#' @param date_format The format to apply to the x-axis labels -#' @param bInteractive logical argument to make plot interactive or not. -#' -#' @return `plot` interactive timeline plot. -#' -#' @examples -#' if (require("gggenes") && require("ggiraph")) { -#' Make_Timeline(status_study = clindata::ctms_study) -#' } -#' -#' @export - -Make_Timeline <- function(status_study, longitudinal = NULL, n_breaks = 10, date_format = "%b\n%Y", bInteractive = TRUE) { - history <- length(longitudinal) > 0 - - if (history) { - snapshots <- longitudinal$rpt_study_details %>% - select(date = "gsm_analysis_date") %>% - mutate( - .before = date, - activity = "Snapshot" - ) %>% - as_tibble() - } - - d <- status_study %>% - { - if (history) select(., -c("snapshot_date", "gsm_analysis_date")) else . - } %>% - mutate(across( - everything(), - ~ as.Date(as.character(.), tz = "UTC", format = "%Y-%m-%d") - )) %>% - select_if(!is.na(.)) - - if (ncol(d) > 1) { - d <- d %>% - tidyr::pivot_longer(everything(), names_to = "activity", values_to = "date") %>% - { - if (history) bind_rows(., snapshots) else . - } %>% - mutate( - "date" = as.Date(.data$date), - "estimate" = grepl("est", .data$activity), - "disp" = case_when( - grepl("\n", date_format) & .data$estimate ~ 3.75, - grepl("\n", date_format) & - !.data$estimate ~ -3.75, - !grepl("\n", date_format) & - .data$estimate ~ 2.7, - !grepl("\n", date_format) & - !.data$estimate ~ -2.7 - ), - label = case_when( - grepl("fpfv", .data$activity) ~ "First Patient First Visit", - grepl("lpfv", .data$activity) ~ "Last Patient First Visit", - grepl("lplv", .data$activity) ~ "Last Patient Last Visit", - TRUE ~ .data$activity - ) - ) - - # Define x axis breaks - breaks <- pretty(seq(min(d$date), max(d$date), by = "day"), n_breaks) - - # Define blank plot theme - empty <- function() { - (theme( - plot.background = element_rect(fill = "white"), - panel.background = element_rect(fill = "white"), - legend.position = c(0.5, 0.35), - legend.title = element_text(vjust = 2.5), - legend.key = element_rect(fill = "white"), - legend.box = "horizontal", - legend.text = element_text(hjust = 0, size = 8), - legend.margin = margin( - t = 1, - r = 1, - b = 1, - l = 1, - unit = "mm" - ), - legend.spacing.y = unit(0, "cm"), - axis.title = element_blank(), - axis.text.y = element_blank(), - axis.ticks.y = element_blank(), - axis.line = element_blank(), - axis.ticks.x = element_blank(), - axis.text.x = element_blank() - )) - } - - # Generate Plot - a <- ggplot(d, aes(.data$date, .data$disp)) + - scale_x_date( - date_labels = date_format, - limits = c( - min(d$date) - (as.numeric(max(d$date) - min(d$date)) * .1), - max(d$date) + (as.numeric(max(d$date) - min(d$date)) * .1) - ), - expand = c(.15, -.15) - ) + - gggenes::geom_gene_arrow( - aes( - xmin = min(date) - (as.numeric(max(date) - min(date)) * .1), - xmax = max(date) + (as.numeric(max(date) - min(date)) * .1), - y = 0 - ), - color = "dodgerblue", - arrowhead_height = unit(15, "mm"), - arrow_body_height = unit(ifelse(grepl("\n", date_format), 10, 7), "mm") - ) + - { - if (bInteractive) { - ggiraph::geom_point_interactive( - aes( - color = .data$label, - shape = .data$estimate, - data_id = .data$date, - tooltip = paste0(.data$label, "\n", .data$date) - ), - size = 2 - ) - } else { - geom_point( - aes( - color = .data$label, - shape = .data$estimate - ) - ) - } - } + - scale_shape_manual(values = c(19, 1), labels = c("Actual", "Estimated")) + - annotate(geom = "text", x = breaks, y = 0, label = format(breaks, format = date_format), size = 3, fontface = 2) + - expand_limits(y = c(5, -25)) + - guides( - alpha = "none", - shape = guide_legend(title.position = "top"), - color = guide_legend( - ncol = 3, - title.position = "top" - ) - ) + - labs( - shape = "Fill/Empty", - color = "Legend" - ) + - empty() - - return(ggiraph::girafe(ggobj = a, canvas_id = "timeline")) - } else { - LogMessage( - level = "warn", - message = "Could not detect any columns in date format." - ) - } -} diff --git a/R/Summarize.R b/R/Summarize.R index 1b31ca3fa..5ae12afff 100644 --- a/R/Summarize.R +++ b/R/Summarize.R @@ -24,12 +24,10 @@ #' when associated with a workflow. #' #' @examples +#' #' dfTransformed <- Transform_Rate(analyticsInput) -#' -#' dfAnalyzed <- Analyze_Poisson(dfTransformed) -#' -#' dfFlagged <- Flag(dfAnalyzed, vThreshold = c(-5, 5)) -#' +#' dfAnalyzed <- Analyze_NormalApprox(dfTransformed) +#' dfFlagged <- Flag(dfAnalyzed) #' dfSummary <- Summarize(dfFlagged) #' #' @export diff --git a/R/util-MakeBounds.R b/R/util-MakeBounds.R index b8fdca230..724e230d5 100644 --- a/R/util-MakeBounds.R +++ b/R/util-MakeBounds.R @@ -65,6 +65,9 @@ MakeBounds <- function( as.list() vThreshold <- ParseThreshold(strThreshold = lMetric$Threshold) if (!is.null(lMetric$AnalysisType) && + tolower(unique(lMetric$AnalysisType)) %in% c("identity")) { + dfBounds <- NULL + } else if (!is.null(lMetric$AnalysisType) && tolower(unique(lMetric$AnalysisType)) %in% c("poisson")) { dfBounds <- Analyze_Poisson_PredictBounds( dfResult, diff --git a/_pkgdown.yml b/_pkgdown.yml index 22decd723..df94e2e21 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -44,7 +44,6 @@ reference: - starts_with("Widget_") - Analyze_NormalApprox_PredictBounds - Analyze_Poisson_PredictBounds - - Make_Timeline - title: Analyze desc: Conduct statistical analysis based on input data @@ -61,7 +60,6 @@ reference: - Transform_Count - Transform_Rate - Flag - - Flag_Fisher - Flag_NormalApprox - Flag_Poisson - Summarize diff --git a/inst/examples/pk/PK.yaml b/inst/examples/pk/PK.yaml new file mode 100644 index 000000000..c4df7e050 --- /dev/null +++ b/inst/examples/pk/PK.yaml @@ -0,0 +1,23 @@ +meta: + Type: Mapped + ID: PK + Description: PK Data Mapping + Priority: 1 +spec: + Raw_PK: + subjid: + required: true + type: character + pktpt: + required: true + type: character + pkperf: + required: true + type: character +steps: + - output: Mapped_PK + name: = + params: + lhs: Mapped_PK + rhs: Raw_PK + diff --git a/inst/examples/pk/kri0013.yaml b/inst/examples/pk/kri0013.yaml new file mode 100644 index 000000000..bf5f98b5a --- /dev/null +++ b/inst/examples/pk/kri0013.yaml @@ -0,0 +1,80 @@ +meta: + Type: Analysis + ID: kri0013a + GroupLevel: Site + Abbreviation: PKi + Metric: PK Rate - Identity + Numerator: PK Measures Completed + Denominator: PK Measures Expected + Model: Identity + Score: Percent Complete + AnalysisType: identity + Threshold: 0.85, 1.0 + nMinDenominator: 10 +spec: + Mapped_SUBJ: + subjid: + required: true + type: character + invid: + required: true + type: character + Mapped_PK: + subjid: + required: true + type: character + pktpt: + required: true + type: character + pkperf: + required: true + type: character +steps: + - output: vThreshold + name: ParseThreshold + params: + strThreshold: Threshold + - output: Temp_PK_Complete + name: RunQuery + params: + df: Mapped_PK + strQuery: "SELECT * FROM df WHERE pkperf = 'Yes'" + - output: Analysis_Input + name: Input_Rate + params: + dfSubjects: Mapped_SUBJ + dfNumerator: Temp_PK_Complete + dfDenominator: Mapped_PK + strSubjectCol: subjid + strGroupCol: invid + strGroupLevel: GroupLevel + strNumeratorMethod: Count + strDenominatorMethod: Count + - output: Analysis_Transformed + name: Transform_Rate + params: + dfInput: Analysis_Input + - output: Analysis_Analyzed + name: Analyze_Identity + params: + dfTransformed: Analysis_Transformed + - output: Analysis_Flagged + name: Flag + params: + dfAnalyzed: Analysis_Analyzed + vThreshold: vThreshold + - output: Analysis_Summary + name: Summarize + params: + dfFlagged: Analysis_Flagged + nMinDenominator: nMinDenominator + - output: lAnalysis + name: list + params: + ID: ID + Analysis_Input: Analysis_Input + Analysis_Transformed: Analysis_Transformed + Analysis_Analyzed: Analysis_Analyzed + Analysis_Flagged: Analysis_Flagged + Analysis_Summary: Analysis_Summary + diff --git a/inst/examples/pk/pk_kri.R b/inst/examples/pk/pk_kri.R new file mode 100644 index 000000000..2d9392b9b --- /dev/null +++ b/inst/examples/pk/pk_kri.R @@ -0,0 +1,75 @@ +library(tidyr) +set.seed(1234) + +# Step 0 - Simulate PK data for clindata participants +mapped_pk <- tidyr::crossing( + subjid = unique(clindata::rawplus_dm$subjid), + pktpt = c( + "Cycle 1 Day 1", + "Cycle 1 Day 1", + "Cycle 1 Day 1", + "Cycle 1 Day 15", + "Cycle 1 Day 2", + "Cycle 1 Day 4", + "Cycle 1 Day 8" + ) +) +mapped_pk$pkperf <- sample(c("Yes","No"), prob=c(0.95,0.05),nrow(mapped_pk), replace = TRUE) + +pk_data <-list( + Mapped_SUBJ= clindata::rawplus_dm, + Mapped_PK= mapped_pk +) + +# Example 1 - Standard KRI with normal approximation +pk_kri <- gsm::RunWorkflows( + lWorkflow = MakeWorkflowList(strName = "kri0013.yaml"), + lData = pk_data +) +Widget_BarChart(dfResults = pk_kri$Analysis_kri0013$Analysis_Summary) + +# Example 2 - KRI with custom flagging based on 90% threshold + +pk_kri_alt <- gsm::RunWorkflows( + lWorkflow = MakeWorkflowList(strName = "kri0013a.yaml"), + lData = pk_data +) +Widget_BarChart(dfResults = pk_kri_alt$Analysis_kri0013a$Analysis_Summary) + + +# Example 3 - Run Reports with other KRIs +lSource <- list( + Source_SUBJ = clindata::rawplus_dm, + Source_AE = clindata::rawplus_ae, + Source_PD = clindata::ctms_protdev, + Source_LB = clindata::rawplus_lb, + Source_STUDCOMP = clindata::rawplus_studcomp, + Source_SDRGCOMP = clindata::rawplus_sdrgcomp %>% dplyr::filter(.data$phase == 'Blinded Study Drug Completion'), + Source_DATACHG = clindata::edc_data_points, + Source_DATAENT = clindata::edc_data_pages, + Source_QUERY = clindata::edc_queries, + Source_ENROLL = clindata::rawplus_enroll, + Source_SITE = clindata::ctms_site, + Source_STUDY = clindata::ctms_study, + Source_PK = pk_data$Mapped_PK # only new line +) + +# Step 0 - Data Ingestion - standardize tables/columns names +mappings_wf <- MakeWorkflowList(strPath = "workflow/1_mappings") +mappings_spec <- CombineSpecs(mappings_wf) +lRaw <- Ingest(lSource, mappings_spec) + +# Step 1 - Create Mapped Data Layer - filter, aggregate and join raw data to create mapped data layer +mapped <- RunWorkflows(mappings_wf, lRaw) + +# Step 2 - Create Metrics - calculate metrics using mapped data +metrics_wf <- MakeWorkflowList(strPath = "workflow/2_metrics") +analyzed <- RunWorkflows(metrics_wf, mapped) + +# Step 3 - Create Reporting Layer - create reports using metrics data +reporting_wf <- MakeWorkflowList(strPath = "workflow/3_reporting") +reporting <- RunWorkflows(reporting_wf, c(mapped, list(lAnalyzed = analyzed, lWorkflows = metrics_wf))) + +# Step 4 - Create KRI Report - create KRI report using reporting data +module_wf <- MakeWorkflowList(strPath = "workflow/4_modules") +lReports <- RunWorkflows(module_wf, reporting) diff --git a/inst/workflow/2_metrics/cou0001.yaml b/inst/workflow/2_metrics/cou0001.yaml index ed5967d6d..7db8bf417 100644 --- a/inst/workflow/2_metrics/cou0001.yaml +++ b/inst/workflow/2_metrics/cou0001.yaml @@ -49,7 +49,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/cou0002.yaml b/inst/workflow/2_metrics/cou0002.yaml index c42887df0..c5e025e31 100644 --- a/inst/workflow/2_metrics/cou0002.yaml +++ b/inst/workflow/2_metrics/cou0002.yaml @@ -56,7 +56,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/cou0003.yaml b/inst/workflow/2_metrics/cou0003.yaml index decacb221..abfe3e9e8 100644 --- a/inst/workflow/2_metrics/cou0003.yaml +++ b/inst/workflow/2_metrics/cou0003.yaml @@ -56,7 +56,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/cou0004.yaml b/inst/workflow/2_metrics/cou0004.yaml index 34157c30b..713691b59 100644 --- a/inst/workflow/2_metrics/cou0004.yaml +++ b/inst/workflow/2_metrics/cou0004.yaml @@ -56,7 +56,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/cou0005.yaml b/inst/workflow/2_metrics/cou0005.yaml index 44f192b79..f76f614ac 100644 --- a/inst/workflow/2_metrics/cou0005.yaml +++ b/inst/workflow/2_metrics/cou0005.yaml @@ -58,7 +58,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/cou0006.yaml b/inst/workflow/2_metrics/cou0006.yaml index c1bfd402d..dc2469c00 100644 --- a/inst/workflow/2_metrics/cou0006.yaml +++ b/inst/workflow/2_metrics/cou0006.yaml @@ -53,7 +53,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/cou0007.yaml b/inst/workflow/2_metrics/cou0007.yaml index df6864e33..0e15096e7 100644 --- a/inst/workflow/2_metrics/cou0007.yaml +++ b/inst/workflow/2_metrics/cou0007.yaml @@ -55,7 +55,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/cou0008.yaml b/inst/workflow/2_metrics/cou0008.yaml index 99a054f2f..b3714e9a9 100644 --- a/inst/workflow/2_metrics/cou0008.yaml +++ b/inst/workflow/2_metrics/cou0008.yaml @@ -56,7 +56,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/cou0009.yaml b/inst/workflow/2_metrics/cou0009.yaml index bdd889591..ef43e81da 100644 --- a/inst/workflow/2_metrics/cou0009.yaml +++ b/inst/workflow/2_metrics/cou0009.yaml @@ -60,7 +60,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/cou0010.yaml b/inst/workflow/2_metrics/cou0010.yaml index 4a7ac421c..c6015dd13 100644 --- a/inst/workflow/2_metrics/cou0010.yaml +++ b/inst/workflow/2_metrics/cou0010.yaml @@ -53,7 +53,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/cou0011.yaml b/inst/workflow/2_metrics/cou0011.yaml index eb9b42233..8731057b4 100644 --- a/inst/workflow/2_metrics/cou0011.yaml +++ b/inst/workflow/2_metrics/cou0011.yaml @@ -53,7 +53,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/cou0012.yaml b/inst/workflow/2_metrics/cou0012.yaml index ba9417505..52c29cc05 100644 --- a/inst/workflow/2_metrics/cou0012.yaml +++ b/inst/workflow/2_metrics/cou0012.yaml @@ -50,7 +50,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/kri0001.yaml b/inst/workflow/2_metrics/kri0001.yaml index 3e80d6b15..0ab663581 100644 --- a/inst/workflow/2_metrics/kri0001.yaml +++ b/inst/workflow/2_metrics/kri0001.yaml @@ -49,7 +49,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/kri0002.yaml b/inst/workflow/2_metrics/kri0002.yaml index e45071924..5457245c5 100644 --- a/inst/workflow/2_metrics/kri0002.yaml +++ b/inst/workflow/2_metrics/kri0002.yaml @@ -56,7 +56,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/kri0003.yaml b/inst/workflow/2_metrics/kri0003.yaml index ea55acd87..0fba8849f 100644 --- a/inst/workflow/2_metrics/kri0003.yaml +++ b/inst/workflow/2_metrics/kri0003.yaml @@ -56,7 +56,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/kri0004.yaml b/inst/workflow/2_metrics/kri0004.yaml index 6aac77c83..e287d6f8b 100644 --- a/inst/workflow/2_metrics/kri0004.yaml +++ b/inst/workflow/2_metrics/kri0004.yaml @@ -56,7 +56,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/kri0005.yaml b/inst/workflow/2_metrics/kri0005.yaml index 59ce5a66c..f20e4f7b2 100644 --- a/inst/workflow/2_metrics/kri0005.yaml +++ b/inst/workflow/2_metrics/kri0005.yaml @@ -58,7 +58,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/kri0006.yaml b/inst/workflow/2_metrics/kri0006.yaml index 04c5ebb87..bc9de9408 100644 --- a/inst/workflow/2_metrics/kri0006.yaml +++ b/inst/workflow/2_metrics/kri0006.yaml @@ -53,7 +53,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/kri0007.yaml b/inst/workflow/2_metrics/kri0007.yaml index dc7d87376..0b2bf5929 100644 --- a/inst/workflow/2_metrics/kri0007.yaml +++ b/inst/workflow/2_metrics/kri0007.yaml @@ -55,7 +55,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/kri0008.yaml b/inst/workflow/2_metrics/kri0008.yaml index 5f1f89688..e7ac2267b 100644 --- a/inst/workflow/2_metrics/kri0008.yaml +++ b/inst/workflow/2_metrics/kri0008.yaml @@ -56,7 +56,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/kri0009.yaml b/inst/workflow/2_metrics/kri0009.yaml index b2eddca2c..17e7f2aa7 100644 --- a/inst/workflow/2_metrics/kri0009.yaml +++ b/inst/workflow/2_metrics/kri0009.yaml @@ -60,7 +60,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/kri0010.yaml b/inst/workflow/2_metrics/kri0010.yaml index 42c9909fe..9aaa1d286 100644 --- a/inst/workflow/2_metrics/kri0010.yaml +++ b/inst/workflow/2_metrics/kri0010.yaml @@ -53,7 +53,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/kri0011.yaml b/inst/workflow/2_metrics/kri0011.yaml index a7877db8a..03aef2d93 100644 --- a/inst/workflow/2_metrics/kri0011.yaml +++ b/inst/workflow/2_metrics/kri0011.yaml @@ -53,7 +53,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/inst/workflow/2_metrics/kri0012.yaml b/inst/workflow/2_metrics/kri0012.yaml index 6f5b93d27..4dc457b06 100644 --- a/inst/workflow/2_metrics/kri0012.yaml +++ b/inst/workflow/2_metrics/kri0012.yaml @@ -50,7 +50,7 @@ steps: dfTransformed: Analysis_Transformed strType: AnalysisType - output: Analysis_Flagged - name: Flag_NormalApprox + name: Flag params: dfAnalyzed: Analysis_Analyzed vThreshold: vThreshold diff --git a/man/Flag.Rd b/man/Flag.Rd index 1a07c18bd..333b43c7b 100644 --- a/man/Flag.Rd +++ b/man/Flag.Rd @@ -4,26 +4,27 @@ \alias{Flag} \title{Flag} \usage{ -Flag(dfAnalyzed, strColumn = "Score", vThreshold = NULL, strValueColumn = NULL) +Flag( + dfAnalyzed, + strColumn = "Score", + vThreshold = c(-3, -2, 2, 3), + vFlag = c(-2, -1, 0, 1, 2), + vFlagOrder = c(2, -2, 1, -1, 0) +) } \arguments{ \item{dfAnalyzed}{\code{data.frame} where flags should be added.} \item{strColumn}{\code{character} Name of the column to use for thresholding. Default: \code{"Score"}} -\item{vThreshold}{\code{numeric} Vector of 2 numeric values representing lower and upper threshold values. All -values in \code{strColumn} are compared to \code{vThreshold} using strict comparisons. Values less than -the lower threshold or greater than the upper threshold are flagged. Values equal to the -threshold values are set to 0 (i.e., not flagged). If NA is provided for either threshold value, -it is ignored and no values are flagged based on the threshold. NA and NaN values in \code{strColumn} -are given NA flag values.} +\item{vThreshold}{\code{numeric} Vector of numeric values representing threshold values. Default is \code{c(-3,-2,2,3)} which is typical for z-scores.} -\item{strValueColumn}{\code{character} Name of the column to use for sign of \code{Flag.} If the value for -that row is higher than the median of \code{strValueColumn}, then \code{Flag} is set to 1. Similarly, if -the value for that row is lower than the median of \code{strValueColumn}, then Flag is set to -1.} +\item{vFlag}{\code{numeric} Vector of flag values. There must be one more item in Flag than thresholds - that is \code{length(vThreshold)+1 == length(vFlagValues)}. Default is \code{c(-2,-1,0,1,2)}, which is typical for z-scores.} + +\item{vFlagOrder}{\code{numeric} Vector of ordered flag values. Output data.frame will be sorted based on flag column using the order provided. NULL (or values that don't match vFlag) will leave the data unsorted. Must have identical values to vFlag. Default is \code{c(2,-2,1,-1,0)} which puts largest z-score outliers first in the data set.} } \value{ -\code{data.frame} with one row per site with columns: \code{GroupID}, \code{TotalCount}, \code{Metric}, \code{Score}, \code{Flag} +\code{data.frame} dfAnalyzed is returned with an additional \code{Flag} column. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} @@ -59,10 +60,9 @@ The following column is considered optional: } \examples{ -dfTransformed <- Transform_Count(analyticsInput, strCountCol = "Numerator") - -dfAnalyzed <- Analyze_Identity(dfTransformed) -dfFlagged <- Flag(dfAnalyzed, vThreshold = c(0.001, 0.01)) +dfTransformed <- Transform_Rate(analyticsInput) +dfAnalyzed <- Analyze_NormalApprox(dfTransformed) +dfFlagged <- Flag(dfAnalyzed) } diff --git a/man/Flag_Fisher.Rd b/man/Flag_Fisher.Rd deleted file mode 100644 index 7d1d2aa7a..000000000 --- a/man/Flag_Fisher.Rd +++ /dev/null @@ -1,46 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Flag_Fisher.R -\name{Flag_Fisher} -\alias{Flag_Fisher} -\title{Flag_Fisher} -\usage{ -Flag_Fisher(dfAnalyzed, vThreshold = NULL) -} -\arguments{ -\item{dfAnalyzed}{data.frame where flags should be added.} - -\item{vThreshold}{Vector of 2 numeric values representing lower and upper p-value thresholds.} -} -\value{ -\code{data.frame} with one row per site with columns: \code{GroupID}, \code{Numerator}, \code{Denominator}, \code{Metric}, \code{Score}, \code{PredictedCount}, and \code{Flag}. -} -\description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} - -Add columns flagging sites that represent possible statistical outliers when the Fisher's Exact Test is used. -} -\details{ -This function flags sites based on the Fisher's Exact Test result as part of the GSM data model (see \code{vignette("DataModel")}). -} -\section{Data Specification}{ - -\code{Flag_Fisher} is designed to support the input data (\code{dfAnalyzed}) generated from the \code{Analyze_Fisher} function. At a minimum, the input must define a \code{dfAnalyzed} data frame with \code{Score}, \code{Prop}, and \code{Prop_Other} variables included and a \code{vThreshold}. These inputs will be used to identify possible statistical outliers in a new \code{Flag} column by comparing \code{Score}, \code{Prop}, and \code{Prop_Other} values to the specified thresholds. - -The following columns are considered required: -\itemize{ -\item \code{GroupID} - Group ID; default is \code{SiteID} -\item \code{GroupLevel} - Group Type -\item \code{Score} - P-value calculated from the rates of exposure provided to \code{Analyze_Fisher()} -\item \code{Prop} - Proportion of events of interest over days of exposure -\item \code{Prop_Other} - Cumulative proportion of events of interest over days of exposure -} -} - -\examples{ -dfTransformed <- Transform_Rate(analyticsInput) - -dfAnalyzed <- Analyze_Poisson(dfTransformed) - -dfFlagged <- Flag(dfAnalyzed, vThreshold = c(-5, 5)) - -} diff --git a/man/Flag_NormalApprox.Rd b/man/Flag_NormalApprox.Rd index f541850a8..da76b70b6 100644 --- a/man/Flag_NormalApprox.Rd +++ b/man/Flag_NormalApprox.Rd @@ -1,53 +1,32 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Flag_NormalApprox.R +% Please edit documentation in R/Flag.R \name{Flag_NormalApprox} \alias{Flag_NormalApprox} \title{Flag_NormalApprox} \usage{ -Flag_NormalApprox(dfAnalyzed, vThreshold = NULL) +Flag_NormalApprox( + dfAnalyzed, + strColumn = "Score", + vThreshold = c(-3, -2, 2, 3), + vFlag = c(-2, -1, 0, 1, 2), + vFlagOrder = c(2, -2, 1, -1, 0) +) } \arguments{ \item{dfAnalyzed}{\code{data.frame} where flags should be added.} -\item{vThreshold}{\code{vector} of 4 numeric values representing lower and upper threshold values. All -values in \code{strColumn} are compared to \code{vThreshold} using strict comparisons. Values less than the lower threshold or greater than the upper threshold are flagged as -1 and 1 respectively. Values equal to the threshold values are set to 0 (i.e. not flagged). If NA is provided for either threshold value it is ignored, and no values are flagged based on the threshold. NA and NaN values in \code{strColumn} are given NA flag values.} -} -\value{ -\code{data.frame} with "Flag" column added +\item{strColumn}{\code{character} Name of the column to use for thresholding. Default: \code{"Score"}} + +\item{vThreshold}{\code{numeric} Vector of numeric values representing threshold values. Default is \code{c(-3,-2,2,3)} which is typical for z-scores.} + +\item{vFlag}{\code{numeric} Vector of flag values. There must be one more item in Flag than thresholds - that is \code{length(vThreshold)+1 == length(vFlagValues)}. Default is \code{c(-2,-1,0,1,2)}, which is typical for z-scores.} + +\item{vFlagOrder}{\code{numeric} Vector of ordered flag values. Output data.frame will be sorted based on flag column using the order provided. NULL (or values that don't match vFlag) will leave the data unsorted. Must have identical values to vFlag. Default is \code{c(2,-2,1,-1,0)} which puts largest z-score outliers first in the data set.} } \description{ +#' @description \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} - -Add columns flagging sites that represent possible statistical outliers. } \details{ -This function flags sites based on the funnel plot with normal approximation analysis result as part of -the GSM data model (see \code{vignette("DataModel")}). -} -\section{Data Specification}{ - -\code{Flag_NormalApprox} is designed to support the input data (\code{dfAnalyzed}) from \code{Analyze_NormalApprox} function. -At a minimum, the input data must have a \code{GroupID} column and a column of numeric values (identified -by the \code{strColumn} parameter) that will be compared to the specified thresholds (\code{vThreshold}) to -calculate a new \code{Flag} column. -In short, the following columns are considered: -\itemize{ -\item \code{GroupID} - Group ID (required) -\item \code{GroupLevel} - Group Type -\item \code{strColumn} - A column to use for Thresholding (required) -\item \code{strValueColumn} - A column to be used for the sign of the flag (optional) -} -} - -\examples{ -dfTransformed <- Transform_Rate(analyticsInput) - -# Binary -dfAnalyzed <- Analyze_NormalApprox(dfTransformed, strType = "binary") -dfFlagged <- Flag_NormalApprox(dfAnalyzed, vThreshold = c(-3, -2, 2, 3)) - -# Rate -dfAnalyzed <- Analyze_NormalApprox(dfTransformed, strType = "rate") -dfFlagged <- Flag_NormalApprox(dfAnalyzed, vThreshold = c(-3, -2, 2, 3)) - +Alias for \code{Flag()} } diff --git a/man/Flag_Poisson.Rd b/man/Flag_Poisson.Rd index 88fef1f3f..4021394d5 100644 --- a/man/Flag_Poisson.Rd +++ b/man/Flag_Poisson.Rd @@ -1,44 +1,32 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Flag_Poisson.R +% Please edit documentation in R/Flag.R \name{Flag_Poisson} \alias{Flag_Poisson} \title{Flag_Poisson} \usage{ -Flag_Poisson(dfAnalyzed, vThreshold = NULL) +Flag_Poisson( + dfAnalyzed, + strColumn = "Score", + vThreshold = c(-3, -2, 2, 3), + vFlag = c(-2, -1, 0, 1, 2), + vFlagOrder = c(2, -2, 1, -1, 0) +) } \arguments{ -\item{dfAnalyzed}{data.frame where flags should be added.} +\item{dfAnalyzed}{\code{data.frame} where flags should be added.} -\item{vThreshold}{Vector of 4 numeric values representing lower and upper threshold values. All values in the \code{Score} column are compared to \code{vThreshold} using strict comparisons. Values less than the lower thresholds or greater than the upper thresholds are flagged. Values equal to the threshold values are set to 0 (i.e., not flagged). If NA is provided for either threshold value, it is ignored and no values are flagged based on the thresholds. NA and NaN values in \code{Score} are given NA flag values.} -} -\value{ -\code{data.frame} with one row per site with columns: \code{GroupID}, \code{Numerator}, \code{Denominator}, \code{Metric}, \code{Score}, \code{PredictedCount}, \code{Flag} -} -\description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} +\item{strColumn}{\code{character} Name of the column to use for thresholding. Default: \code{"Score"}} -Add columns flagging sites that represent possible statistical outliers when the Poisson statistical method is used. -} -\details{ -This function flags sites based on the Poisson analysis result as part of \code{vignette("DataModel")}. -} -\section{Data Specification}{ +\item{vThreshold}{\code{numeric} Vector of numeric values representing threshold values. Default is \code{c(-3,-2,2,3)} which is typical for z-scores.} -\code{Flag_Poisson} is designed to support the input data (\code{dfAnalyzed}) generated from the \code{Analyze_Poisson} function. At a minimum, the input must define a \code{dfAnalyzed} data frame with a \code{Score} variable included and a \code{vThreshold}. These inputs will be used to identify possible statistical outliers in a new \code{Flag} column by comparing \code{Score} values to the specified thresholds. +\item{vFlag}{\code{numeric} Vector of flag values. There must be one more item in Flag than thresholds - that is \code{length(vThreshold)+1 == length(vFlagValues)}. Default is \code{c(-2,-1,0,1,2)}, which is typical for z-scores.} -The following columns are considered required: -\itemize{ -\item \code{GroupID} - Group ID; default is \code{SiteID} -\item \code{GroupLevel} - Group Type -\item \code{Score} - Site residuals calculated from the rates of exposure provided to \code{Analyze_Poisson()} +\item{vFlagOrder}{\code{numeric} Vector of ordered flag values. Output data.frame will be sorted based on flag column using the order provided. NULL (or values that don't match vFlag) will leave the data unsorted. Must have identical values to vFlag. Default is \code{c(2,-2,1,-1,0)} which puts largest z-score outliers first in the data set.} } +\description{ +#' @description +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} } - -\examples{ -dfTransformed <- Transform_Rate(analyticsInput) - -dfAnalyzed <- Analyze_Poisson(dfTransformed) - -dfFlagged <- Flag_Poisson(dfAnalyzed, vThreshold = c(-7, -5, 5, 7)) - +\details{ +Alias for \code{Flag()} } diff --git a/man/Make_Timeline.Rd b/man/Make_Timeline.Rd deleted file mode 100644 index 3a47daf84..000000000 --- a/man/Make_Timeline.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Report_Timeline.R -\name{Make_Timeline} -\alias{Make_Timeline} -\title{Create interactive timeline for study data} -\usage{ -Make_Timeline( - status_study, - longitudinal = NULL, - n_breaks = 10, - date_format = "\%b\\n\%Y", - bInteractive = TRUE -) -} -\arguments{ -\item{status_study}{the table containing study data of interest} - -\item{longitudinal}{optional argument for supplying longitudinal snapshot information} - -\item{n_breaks}{the number of breaks to include in the x-axis. NOTE pretty breaks are used to assign break locations and may give more or less that the number of breaks specified.} - -\item{date_format}{The format to apply to the x-axis labels} - -\item{bInteractive}{logical argument to make plot interactive or not.} -} -\value{ -\code{plot} interactive timeline plot. -} -\description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} -} -\examples{ -if (require("gggenes") && require("ggiraph")) { - Make_Timeline(status_study = clindata::ctms_study) -} - -} diff --git a/man/Summarize.Rd b/man/Summarize.Rd index 69bf81e85..7c23cad6a 100644 --- a/man/Summarize.Rd +++ b/man/Summarize.Rd @@ -36,12 +36,10 @@ Create a concise summary of assessment results that is easy to aggregate across } \examples{ -dfTransformed <- Transform_Rate(analyticsInput) - -dfAnalyzed <- Analyze_Poisson(dfTransformed) - -dfFlagged <- Flag(dfAnalyzed, vThreshold = c(-5, 5)) +dfTransformed <- Transform_Rate(analyticsInput) +dfAnalyzed <- Analyze_NormalApprox(dfTransformed) +dfFlagged <- Flag(dfAnalyzed) dfSummary <- Summarize(dfFlagged) } diff --git a/man/Widget_FlagOverTime.Rd b/man/Widget_FlagOverTime.Rd index 2d80210f6..d8dca8e67 100644 --- a/man/Widget_FlagOverTime.Rd +++ b/man/Widget_FlagOverTime.Rd @@ -57,4 +57,5 @@ Widget_FlagOverTime( dfMetrics = reportingMetrics, bExcludeEver = TRUE ) + } diff --git a/tests/testthat/_snaps/util-Logger.md b/tests/testthat/_snaps/util-Logger.md new file mode 100644 index 000000000..ec1079020 --- /dev/null +++ b/tests/testthat/_snaps/util-Logger.md @@ -0,0 +1,40 @@ +# Use cli style messages via logger + + Code + LogMessage(level = "info", message = "cli style info", cli_detail = "h1") + Message + + -- cli style info -------------------------------------------------------------- + +--- + + Code + LogMessage(level = "info", message = "cli style info", cli_detail = "h2") + Message + + -- cli style info -- + + +--- + + Code + LogMessage(level = "info", message = "cli style info", cli_detail = "h3") + Message + + -- cli style info + +--- + + Code + LogMessage(level = "info", message = "cli style info", cli_detail = "alert_success") + Message + v cli style info + +--- + + Code + tryCatch(LogMessage(level = "warn", message = "cli style warn")) + Condition + Warning: + cli style warn + diff --git a/tests/testthat/_snaps/util-MakeWorkflowList.md b/tests/testthat/_snaps/util-MakeWorkflowList.md index 1280ef6be..31110b930 100644 --- a/tests/testthat/_snaps/util-MakeWorkflowList.md +++ b/tests/testthat/_snaps/util-MakeWorkflowList.md @@ -223,7 +223,7 @@ [1] "Analysis_Flagged" $cou0001[[5]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $cou0001[[5]]$params $cou0001[[5]]$params$dfAnalyzed @@ -380,7 +380,7 @@ [1] "Analysis_Flagged" $cou0002[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $cou0002[[6]]$params $cou0002[[6]]$params$dfAnalyzed @@ -537,7 +537,7 @@ [1] "Analysis_Flagged" $cou0003[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $cou0003[[6]]$params $cou0003[[6]]$params$dfAnalyzed @@ -694,7 +694,7 @@ [1] "Analysis_Flagged" $cou0004[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $cou0004[[6]]$params $cou0004[[6]]$params$dfAnalyzed @@ -864,7 +864,7 @@ [1] "Analysis_Flagged" $cou0005[[7]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $cou0005[[7]]$params $cou0005[[7]]$params$dfAnalyzed @@ -1018,7 +1018,7 @@ [1] "Analysis_Flagged" $cou0006[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $cou0006[[6]]$params $cou0006[[6]]$params$dfAnalyzed @@ -1172,7 +1172,7 @@ [1] "Analysis_Flagged" $cou0007[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $cou0007[[6]]$params $cou0007[[6]]$params$dfAnalyzed @@ -1326,7 +1326,7 @@ [1] "Analysis_Flagged" $cou0008[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $cou0008[[6]]$params $cou0008[[6]]$params$dfAnalyzed @@ -1496,7 +1496,7 @@ [1] "Analysis_Flagged" $cou0009[[7]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $cou0009[[7]]$params $cou0009[[7]]$params$dfAnalyzed @@ -1650,7 +1650,7 @@ [1] "Analysis_Flagged" $cou0010[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $cou0010[[6]]$params $cou0010[[6]]$params$dfAnalyzed @@ -1804,7 +1804,7 @@ [1] "Analysis_Flagged" $cou0011[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $cou0011[[6]]$params $cou0011[[6]]$params$dfAnalyzed @@ -1958,7 +1958,7 @@ [1] "Analysis_Flagged" $cou0012[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $cou0012[[6]]$params $cou0012[[6]]$params$dfAnalyzed @@ -2099,7 +2099,7 @@ [1] "Analysis_Flagged" $kri0001[[5]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $kri0001[[5]]$params $kri0001[[5]]$params$dfAnalyzed @@ -2256,7 +2256,7 @@ [1] "Analysis_Flagged" $kri0002[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $kri0002[[6]]$params $kri0002[[6]]$params$dfAnalyzed @@ -2413,7 +2413,7 @@ [1] "Analysis_Flagged" $kri0003[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $kri0003[[6]]$params $kri0003[[6]]$params$dfAnalyzed @@ -2570,7 +2570,7 @@ [1] "Analysis_Flagged" $kri0004[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $kri0004[[6]]$params $kri0004[[6]]$params$dfAnalyzed @@ -2740,7 +2740,7 @@ [1] "Analysis_Flagged" $kri0005[[7]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $kri0005[[7]]$params $kri0005[[7]]$params$dfAnalyzed @@ -2894,7 +2894,7 @@ [1] "Analysis_Flagged" $kri0006[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $kri0006[[6]]$params $kri0006[[6]]$params$dfAnalyzed @@ -3048,7 +3048,7 @@ [1] "Analysis_Flagged" $kri0007[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $kri0007[[6]]$params $kri0007[[6]]$params$dfAnalyzed @@ -3202,7 +3202,7 @@ [1] "Analysis_Flagged" $kri0008[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $kri0008[[6]]$params $kri0008[[6]]$params$dfAnalyzed @@ -3372,7 +3372,7 @@ [1] "Analysis_Flagged" $kri0009[[7]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $kri0009[[7]]$params $kri0009[[7]]$params$dfAnalyzed @@ -3526,7 +3526,7 @@ [1] "Analysis_Flagged" $kri0010[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $kri0010[[6]]$params $kri0010[[6]]$params$dfAnalyzed @@ -3680,7 +3680,7 @@ [1] "Analysis_Flagged" $kri0011[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $kri0011[[6]]$params $kri0011[[6]]$params$dfAnalyzed @@ -3834,7 +3834,7 @@ [1] "Analysis_Flagged" $kri0012[[6]]$name - [1] "Flag_NormalApprox" + [1] "Flag" $kri0012[[6]]$params $kri0012[[6]]$params$dfAnalyzed diff --git a/tests/testthat/test-Flag.R b/tests/testthat/test-Flag.R new file mode 100644 index 000000000..e777e5838 --- /dev/null +++ b/tests/testthat/test-Flag.R @@ -0,0 +1,83 @@ +test_that("Flag function works correctly with z-score data", { + dfAnalyzed <- data.frame( + GroupID = 1:12, + Score = c(-4, -3.1,-3,-2.9, -2.1, -2,-1.9, 0, 2, 2.9, 3, 3.1) + ) + + #unsorted + dfFlagged <- Flag(dfAnalyzed, vFlagOrder = NULL) + expect_equal(dfFlagged$Flag, c(-2, -2, -1, -1, -1, 0, 0, 0, 1, 1, 2, 2)) + + # sorted + dfFlagged <- Flag(dfAnalyzed) + expect_equal(dfFlagged$Flag, c(2, 2, -2, -2, 1, 1, -1, -1, -1, 0, 0, 0)) + + # Test with custom thresholds and flags + dfFlagged <- Flag(dfAnalyzed, vThreshold = c(-2,2), vFlag = c(-1,0,1), vFlagOrder = NULL) + expect_equal(dfFlagged$Flag, c(-1, -1, -1, -1, -1, 0, 0, 0, 1, 1, 1, 1)) + + # Test Alias + dfFlagged <- Flag_NormalApprox(dfAnalyzed, vFlagOrder=NULL) + expect_equal(dfFlagged$Flag, c(-2, -2, -1, -1, -1, 0, 0, 0, 1, 1, 2, 2)) +}) + +test_that("Flag function works correctly with rate data", { + # Test with rate data + dfAnalyzed_Rate <- data.frame( + GroupID = 1:9, + Score = c(0.1, 0.2, 0.5, 0.6, 0.8, 0.85, 0.86, 0.9, 0.99) + ) + dfFlagged_Rate <- Flag_NormalApprox(dfAnalyzed_Rate, vFlag=c(2,1,0), vThreshold = c(0.85, 0.9)) + expect_equal(dfFlagged_Rate$Flag, c(2,2,2,2,2,1,1,0,0)) + + dfFlagged_Rate <- Flag_NormalApprox(dfAnalyzed_Rate, vFlag=c(2,1,0), vThreshold = c(0.85, 0.9), vFlagOrder=c(0,1,2)) + expect_equal(dfFlagged_Rate$Flag, c(0,0,1,1,2,2,2,2,2)) + +}) + + +test_that("Flag function works correctly with poisson data", { + # Test with Poisson Data + dfAnalyzedCustom <- tibble::tribble( + ~GroupID, ~Numerator, ~Denominator, ~Metric, ~Score, ~PredictedCount, + "166", 5L, 857L, 0.0058343057176196, -11, 5.12722560489132, + "76", 2L, 13L, 0.153846153846154, -6, 2.00753825876477, + "86", 5L, 678L, 0.00737463126843658, 6, 4.86523613634436, + "80", 5L, 678L, 0.00737463126843658, 11, 4.86523613634436 + ) + + dfFlagged <- Flag_Poisson(dfAnalyzedCustom, vThreshold = c(-10, -5, 5, 10)) + expect_equal(dfFlagged$Flag, c(2, -2,1, -1)) + expect_equal(dfFlagged$GroupID, c("80","166", "86", "76")) +}) + + +test_that("Flag function works correctly with NA data", { + dfAnalyzed_NA <- data.frame( + GroupID = 1:7, + Score = c(-4, -1, 0, NA, 2, 5, NA) + ) + dfFlagged_NA <- Flag(dfAnalyzed_NA, vFlagOrder = NULL) + expect_equal(dfFlagged_NA$Flag, c(-2, 0, 0, NA, 1, 2, NA)) +}) + +test_that("errors working as expected", { + dfAnalyzed <- data.frame( + GroupID = 1:12, + Score = c(-4, -3.1,-3,-2.9, -2.1, -2,-1.9, 0, 2, 2.9, 3, 3.1) + ) + # Test with missing strColumn + expect_error(Flag(dfAnalyzed, strColumn = "MissingColumn"), "strColumn not found in dfAnalyzed") + + # Test with improper number of flag values + expect_error(Flag(dfAnalyzed, vThreshold = c(-2, 0, 2), vFlag = c(1,2,3)), "Improper number of Flag values provided") + + # Test with non-numeric vThreshold + expect_error(Flag(dfAnalyzed, vThreshold = c("a", "b", "c")), "vThreshold is not numeric") + + # Test with non-character strColumn + expect_error(Flag(dfAnalyzed, strColumn = 123), "strColumn is not character") + + # Test with non-data frame dfAnalyzed + expect_error(Flag(list(SiteID = 1:10, Score = c(-4, -3, -2.5, -2, -1, 0, 1, 2, 2.5, 3))), "dfAnalyzed is not a data frame") +}) diff --git a/tests/testthat/test-Flag_NormalApprox.R b/tests/testthat/test-Flag_NormalApprox.R deleted file mode 100644 index 6e10960bd..000000000 --- a/tests/testthat/test-Flag_NormalApprox.R +++ /dev/null @@ -1,42 +0,0 @@ -test_that("output is created as expected", { - dfAnalyzed <- Transform_Rate(analyticsInput) %>% quiet_Analyze_NormalApprox() - dfFlagged <- Flag_NormalApprox(dfAnalyzed, vThreshold = c(-3, -2, 2, 3)) - expect_true(is.data.frame(dfFlagged)) - expect_equal(sort(unique(dfAnalyzed$GroupID)), sort(dfFlagged$GroupID)) - expect_true(all(names(dfAnalyzed) %in% names(dfFlagged))) - expect_equal(names(dfFlagged), c("GroupID", "GroupLevel", "Numerator", "Denominator", "Metric", "OverallMetric", "Factor", "Score", "Flag")) - expect_equal(length(unique(dfAnalyzed$GroupID)), length(unique(dfFlagged$GroupID))) - expect_equal(length(unique(dfAnalyzed$GroupID)), nrow(dfFlagged)) -}) - -test_that("incorrect inputs throw errors", { - dfAnalyzed <- Transform_Rate(analyticsInput) %>% quiet_Analyze_NormalApprox() - expect_error(Flag_NormalApprox(list(), vThreshold = c(-3, -2, 2, 3)), "dfAnalyzed is not a data frame") - expect_error(Flag_NormalApprox(dfAnalyzed, "1", "2")) - expect_error(Flag_NormalApprox(dfAnalyzed, vThreshold = c("-3", "-2", "2", "3")), "vThreshold is not numeric") - expect_error(Flag_NormalApprox(dfAnalyzed, vThreshold = c(-1, 0, 1)), "vThreshold must be length of 4") - expect_error(Flag_NormalApprox(dfAnalyzed, vThreshold = NULL)) - expect_error(Flag_NormalApprox(dfAnalyzed %>% select(-c(GroupID)))) - expect_error(Flag_NormalApprox(dfAnalyzed, vThreshold = c(4, 3, 2, 1))) -}) - -test_that("flagging works correctly", { - dfAnalyzedCustom <- tibble::tribble( - ~GroupID, ~Numerator, ~Denominator, ~Metric, ~OverallMetric, ~Factor, ~Score, - "139", 0, 2, 0, 0.08, 0.910, -0.437, - "109", 0, 1, 0, 0.08, 0.910, -0.309, - "43", 1, 2, 0.5, 0.08, 0.910, 2.295, - "127", 1, 1, 1, 0.08, 0.910, 3.554 - ) - - expect_silent({ - dfFlagged <- Flag_NormalApprox(dfAnalyzedCustom, vThreshold = c(-3, -2, 2, 3)) - }) - expect_equal(dfFlagged$Flag, c(2, 1, 0, 0)) -}) - -test_that("yaml workflow produces same table as R function", { - source(test_path("testdata", "create_double_data.R"), local = TRUE) - expect_equal(dfFlagged$Flag, lResults$Analysis_kri0001$Analysis_Flagged$Flag) - expect_equal(dim(dfFlagged), dim(lResults$Analysis_kri0001$Analysis_Flagged)) -}) diff --git a/tests/testthat/test-Report_Timeline.R b/tests/testthat/test-Report_Timeline.R deleted file mode 100644 index 57aaa2eb8..000000000 --- a/tests/testthat/test-Report_Timeline.R +++ /dev/null @@ -1,10 +0,0 @@ -test_that("Make_Timeline function works as expected", { - status_study <- clindata::ctms_study - - # map ctms data ----------------------------------------------------------- - # Test the function - plot <- Make_Timeline(status_study, bInteractive = F) - - expect_true(is.list(plot)) - expect_true(plot$x$uid == "timeline") -}) diff --git a/tests/testthat/test_Flag.R b/tests/testthat/test_Flag.R deleted file mode 100644 index 07cdad80f..000000000 --- a/tests/testthat/test_Flag.R +++ /dev/null @@ -1,58 +0,0 @@ -dfAnalyzed <- tibble::tibble( - GroupID = c("123", "45", "67", "101"), - TotalCount = c(1, 1, 2, 2), - Metric = c(1, 1, 2, 2), - Score = c(1, 1, 2, 2) -) - -# output is created as expected ------------------------------------------- -test_that("output is created as expected", { - dfFlagged <- Flag(dfAnalyzed, vThreshold = c(-1, 1)) - expect_true(is.data.frame(dfFlagged)) - expect_equal(sort(unique(dfAnalyzed$GroupID)), sort(dfFlagged$GroupID)) - expect_true(all(names(dfAnalyzed) %in% names(dfFlagged))) - expect_equal(names(dfFlagged), c("GroupID", "TotalCount", "Metric", "Score", "Flag")) - expect_equal(length(unique(dfAnalyzed$GroupID)), length(unique(dfFlagged$GroupID))) - expect_equal(length(unique(dfAnalyzed$GroupID)), nrow(dfFlagged)) -}) - -# incorrect inputs throw errors ------------------------------------------- -test_that("incorrect inputs throw errors", { - expect_error(Flag(list(), -1, 1)) - expect_error(Flag("Hi", -1, 1)) - expect_error(Flag(dfAnalyzed, "1", "2")) - expect_error(Flag(dfAnalyzed, vThreshold = c(NA, 1), strColumn = 1.0, strValueColumn = "Estimate")) - expect_error(Flag(dfAnalyzed, vThreshold = "1", strValueColumn = "Estimate")) - expect_error(Flag(dfAnalyzed, vThreshold = 0.5, strValueColumn = "Estimate")) - expect_error(Flag(dfAnalyzed, vThreshold = c(NA, 1), strColumn = "PValue1", strValueColumn = "Estimate")) - expect_error(Flag(dfAnalyzed, vThreshold = c(NA, 1), strValueColumn = "Mean")) - expect_error(Flag(dfAnalyzed, vThreshold = NULL)) - expect_error(Flag(dfAnalyzed, strColumn = c("Score", "GroupID"))) - expect_error(Flag(dfAnalyzed %>% select(-c(GroupID)))) - expect_error(Flag(dfAnalyzed, vThreshold = c(1, -1))) -}) - - -# custom tests ------------------------------------------------------------ -test_that("strValueColumn paramter works as intended", { - dfFlagged <- Flag(dfAnalyzed, vThreshold = c(-1, 1), strValueColumn = "TotalCount") - expect_equal(dfFlagged$Flag[1], 1) - dfFlagged <- Flag(dfAnalyzed, vThreshold = c(-1, 1), strValueColumn = NULL) - expect_equal(dfFlagged$Flag[1], 1) -}) - -test_that("vThreshold parameter works as intended", { - sim1 <- Flag(data.frame(GroupID = seq(1:100), vals = seq(1:100)), strColumn = "vals", vThreshold = c(10, NA)) - expect_equal(sim1$Flag, c(rep(-1, 9), rep(0, 91))) - sim2 <- Flag(data.frame(GroupID = seq(1:100), vals = seq(1:100)), strColumn = "vals", vThreshold = c(NA, 91)) - expect_equal(sim2$Flag, c(rep(1, 9), rep(0, 91))) - sim3 <- Flag(data.frame(GroupID = seq(1:100), vals = seq(1:100)), strColumn = "vals", vThreshold = c(2, 91)) - expect_equal(sim3$Flag, c(rep(1, 9), -1, rep(0, 90))) - sim4 <- Flag(data.frame(GroupID = seq(1:201), vals = seq(from = -100, to = 100)), strColumn = "vals", vThreshold = c(-91, 91)) - expect_equal(sim4$Flag, c(rep(1, 9), rep(-1, 9), rep(0, 183))) -}) - -test_that("NA values in strColumn result in NA in Flag column", { - NAsim <- Flag(data.frame(GroupID = seq(1:100), vals = c(seq(1:90), rep(NA, 10))), strColumn = "vals", vThreshold = c(10, NA)) - expect_equal(NAsim$Flag, c(rep(-1, 9), rep(0, 81), rep(NA, 10))) -}) diff --git a/tests/testthat/test_Flag_Fisher.R b/tests/testthat/test_Flag_Fisher.R deleted file mode 100644 index b5e426556..000000000 --- a/tests/testthat/test_Flag_Fisher.R +++ /dev/null @@ -1,32 +0,0 @@ -dfAnalyzed <- Transform_Rate(analyticsInput) %>% Analyze_Fisher() -test_that("output is created as expected", { - dfFlagged <- Flag_Fisher(dfAnalyzed, vThreshold = c(-.05, .05)) - expect_true(is.data.frame(dfFlagged)) - expect_equal(sort(unique(dfAnalyzed$GroupID)), sort(dfFlagged$GroupID)) - expect_true(all(names(dfAnalyzed) %in% names(dfFlagged))) - expect_equal(names(dfFlagged), c("GroupID", "Numerator", "Numerator_Other", "Denominator", "Denominator_Other", "Prop", "Prop_Other", "Metric", "Estimate", "Score", "Flag")) - expect_equal(length(unique(dfAnalyzed$GroupID)), length(unique(dfFlagged$GroupID))) - expect_equal(length(unique(dfAnalyzed$GroupID)), nrow(dfFlagged)) -}) - -test_that("incorrect inputs throw errors", { - expect_error(Flag_Fisher(list(), vThreshold = c(-2, 2)), "dfAnalyzed is not a data frame") - expect_error(Flag_Fisher(dfAnalyzed, "1", "2")) - expect_error(Flag_Fisher(dfAnalyzed, vThreshold = c("-2", "2")), "vThreshold is not numeric") - expect_error(Flag_Fisher(dfAnalyzed, vThreshold = c(-1, 0, 1)), "vThreshold must be length of 2") - expect_error(Flag_Fisher(dfAnalyzed, vThreshold = NULL)) - expect_error(Flag_Fisher(dfAnalyzed %>% select(-c(GroupID)))) - expect_error(Flag_Fisher(dfAnalyzed, vThreshold = c(4, 3))) -}) - -test_that("flagging works correctly", { - dfAnalyzedCustom <- tibble::tribble( - ~GroupID, ~Numerator, ~Numerator_Other, ~Denominator, ~Denominator_Other, ~Prop, ~Prop_Other, ~Metric, ~Estimate, ~Score, - "100", 77, 31697, 1667, 1130403, 0.04619076, 0.02804044, 0.04619076, 1.6786274, 3.938196e-05, - "101", 52, 31722, 2622, 1129448, 0.01983219, 0.02808629, 0.01983219, 0.7001692, 9.084693e-03, - "102", 62, 31712, 2685, 1129385, 0.02309125, 0.02807900, 0.02309125, 0.8181680, 1.279212e-01 - ) - - expect_silent(dfFlagged <- Flag_Fisher(dfAnalyzedCustom, vThreshold = c(-.05, .05))) - expect_equal(dfFlagged$Flag, c(1, -1, 0)) -}) diff --git a/tests/testthat/test_Flag_Poisson.R b/tests/testthat/test_Flag_Poisson.R deleted file mode 100644 index 8e463e357..000000000 --- a/tests/testthat/test_Flag_Poisson.R +++ /dev/null @@ -1,33 +0,0 @@ -dfAnalyzed <- Transform_Rate(analyticsInput) %>% Analyze_Poisson() - -test_that("output is created as expected", { - dfFlagged <- Flag_Poisson(dfAnalyzed, vThreshold = c(-.05, -.005, .005, .05)) - expect_true(is.data.frame(dfFlagged)) - expect_equal(sort(unique(dfAnalyzed$GroupID)), sort(dfFlagged$GroupID)) - expect_true(all(names(dfAnalyzed) %in% names(dfFlagged))) - expect_equal(names(dfFlagged), c("GroupID", "GroupLevel", "Numerator", "Denominator", "Metric", "Score", "PredictedCount", "Flag")) - expect_equal(length(unique(dfAnalyzed$GroupID)), length(unique(dfFlagged$GroupID))) - expect_equal(length(unique(dfAnalyzed$GroupID)), nrow(dfFlagged)) -}) - -test_that("incorrect inputs throw errors", { - expect_error(Flag_Poisson(list(), vThreshold = c(-2, -1, 1, 2)), "dfAnalyzed is not a data frame") - expect_error(Flag_Poisson(dfAnalyzed, "1", "2")) - expect_error(Flag_Poisson(dfAnalyzed, vThreshold = c("-2", "-1", "1", "2")), "vThreshold is not numeric") - expect_error(Flag_Poisson(dfAnalyzed, vThreshold = c(-1, 1)), "vThreshold must be length of 4") - expect_error(Flag_Poisson(dfAnalyzed, vThreshold = NULL)) - expect_error(Flag_Poisson(dfAnalyzed, vThreshold = c(4, 3, 2, 1))) -}) - -test_that("flagging works correctly", { - dfAnalyzedCustom <- tibble::tribble( - ~GroupID, ~Numerator, ~Denominator, ~Metric, ~Score, ~PredictedCount, - "166", 5L, 857L, 0.0058343057176196, -11, 5.12722560489132, - "76", 2L, 13L, 0.153846153846154, -6, 2.00753825876477, - "86", 5L, 678L, 0.00737463126843658, 6, 4.86523613634436, - "80", 5L, 678L, 0.00737463126843658, 11, 4.86523613634436 - ) - - expect_silent(dfFlagged <- Flag_Poisson(dfAnalyzedCustom, vThreshold = c(-10, -5, 5, 10))) - expect_equal(dfFlagged$Flag, c(2, -2, 1, -1)) -})