-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add funnel plot methods #812
Changes from 18 commits
5e7c957
596c73c
b90a940
9cdf85a
38e5c78
ce6a7ba
7a2ca90
d5d707d
e0b69ae
d62d4cb
a4301f5
fbdc015
f38e69b
f2b0738
b6e9a11
88ddf0a
4c313a4
221b959
6db137a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -44,7 +44,7 @@ | |||||
#' | ||||||
#' @examples | ||||||
#' dfInput <- AE_Map_Raw() | ||||||
#' ae_assessment_poisson <- AE_Assess(dfInput) | ||||||
#' ae_assessment_funnel <- AE_Assess(dfInput) | ||||||
#' | ||||||
#' @importFrom cli cli_alert_success cli_alert_warning cli_h2 cli_text | ||||||
#' @importFrom yaml read_yaml | ||||||
|
@@ -56,15 +56,15 @@ | |||||
AE_Assess <- function( | ||||||
dfInput, | ||||||
vThreshold = NULL, | ||||||
strMethod = "poisson", | ||||||
strMethod = "funnel", | ||||||
lMapping = yaml::read_yaml(system.file("mappings", "AE_Assess.yaml", package = "gsm")), | ||||||
strGroup = "Site", | ||||||
bQuiet = TRUE | ||||||
) { | ||||||
|
||||||
# data checking ----------------------------------------------------------- | ||||||
stopifnot( | ||||||
"strMethod is not 'poisson' or 'identity'" = strMethod %in% c("poisson", "identity"), | ||||||
"strMethod is not 'funnel', 'poisson' or 'identity'" = strMethod %in% c("funnel", "poisson", "identity"), | ||||||
"strMethod must be length 1" = length(strMethod) == 1, | ||||||
"strGroup must be one of: Site, Study, Country, or CustomGroup" = strGroup %in% c("Site", "Study", "Country", "CustomGroup"), | ||||||
"bQuiet must be logical" = is.logical(bQuiet) | ||||||
|
@@ -82,12 +82,14 @@ AE_Assess <- function( | |||||
# set thresholds and flagging parameters ---------------------------------- | ||||||
if (is.null(vThreshold)) { | ||||||
vThreshold <- switch(strMethod, | ||||||
funnel = c(-3, -2, 2, 3), | ||||||
poisson = c(-7, -5, 5, 7), | ||||||
identity = c(0.00006, 0.01) | ||||||
) | ||||||
} | ||||||
|
||||||
strValueColumnVal <- switch(strMethod, | ||||||
funnel = NULL, | ||||||
poisson = NULL, | ||||||
identity = "Score" | ||||||
) | ||||||
|
@@ -116,7 +118,10 @@ AE_Assess <- function( | |||||
if (!bQuiet) cli::cli_alert_success("{.fn Transform_Rate} returned output with {nrow(lData$dfTransformed)} rows.") | ||||||
|
||||||
# dfAnalyzed -------------------------------------------------------------- | ||||||
if (strMethod == "poisson") { | ||||||
if (strMethod == "funnel") { | ||||||
lData$dfAnalyzed <- gsm::Analyze_Rate(lData$dfTransformed, bQuiet = bQuiet) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
lData$dfBounds <- gsm::Analyze_Rate_PredictBounds(lData$dfTransformed, vThreshold = vThreshold, bQuiet = bQuiet) | ||||||
} else if (strMethod == "poisson") { | ||||||
lData$dfAnalyzed <- gsm::Analyze_Poisson(lData$dfTransformed, bQuiet = bQuiet) | ||||||
lData$dfBounds <- gsm::Analyze_Poisson_PredictBounds(lData$dfTransformed, vThreshold = vThreshold, bQuiet = bQuiet) | ||||||
} else if (strMethod == "identity") { | ||||||
|
@@ -127,13 +132,16 @@ AE_Assess <- function( | |||||
if (!bQuiet) cli::cli_alert_success("{.fn {strAnalyzeFunction}} returned output with {nrow(lData$dfAnalyzed)} rows.") | ||||||
|
||||||
# dfFlagged --------------------------------------------------------------- | ||||||
if (strMethod == "poisson") { | ||||||
if (strMethod == "funnel") { | ||||||
lData$dfFlagged <- gsm::Flag_Funnel(lData$dfAnalyzed, vThreshold = vThreshold) | ||||||
} else if (strMethod == "poisson") { | ||||||
lData$dfFlagged <- gsm::Flag_Poisson(lData$dfAnalyzed, vThreshold = vThreshold) | ||||||
} else if (strMethod == "identity") { | ||||||
lData$dfFlagged <- gsm::Flag(lData$dfAnalyzed, vThreshold = vThreshold, strValueColumn = strValueColumnVal) | ||||||
} | ||||||
|
||||||
flag_function_name <- switch(strMethod, | ||||||
funnel = "Flag_Funnel", | ||||||
identity = "Flag", | ||||||
poisson = "Flag_Poisson" | ||||||
) | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -69,7 +69,8 @@ AE_Map_Adam <- function( | |
dfInput <- dfs$dfADSL %>% | ||
mutate( | ||
SubjectID = .data[[lMapping$dfADSL$strIDCol]], | ||
Exposure = as.numeric(.data[[lMapping$dfADSL$strEndCol]] - .data[[lMapping$dfADSL$strStartCol]]) + 1) %>% | ||
Exposure = as.numeric(.data[[lMapping$dfADSL$strEndCol]] - .data[[lMapping$dfADSL$strStartCol]]) + 1 | ||
) %>% | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seems accident enter press (probably not a change needed?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just an automated change @gwu05 - I ran the code formatter prior to my review. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh i see it makes sense |
||
rowwise() %>% | ||
mutate( | ||
Count = sum(dfs$dfADAE[[lMapping$dfADAE$strIDCol]] == .data$SubjectID), | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
#' Funnel Plot Analysis with Normal Approximation for Binary Outcomes | ||
#' | ||
#' @details | ||
#' Creates analysis results data for percentage data using funnel plot method with normal approximation. | ||
#' | ||
#' | ||
#' @section Statistical Methods: | ||
#' This function applies funnel plots using asymptotic limits based on normal approximation of binomial distribution for | ||
#' the binary outcome with the sample sizes of the sites to assess data quality and safety. | ||
#' | ||
#' @section Data Specification: | ||
#' The input data (`dfTransformed`) for Analyze_Binary is typically created using \code{\link{Transform_Rate}} and should be one record per site with required columns for: | ||
#' - `GroupID` - Site ID | ||
#' - `Numerator` - Total number of participants at site with event of interest | ||
#' - `Denominator` - Total number of participants at site | ||
#' - `Metric` - Proportion of participants at site with event of interest | ||
#' | ||
#' @param dfTransformed `data.frame` in format produced by \code{\link{Transform_Rate}} | ||
#' @param bQuiet `logical` Suppress warning messages? Default: `TRUE` | ||
#' | ||
#' @return `data.frame` with one row per site with columns: GroupID, Numerator, Numerator_Other, Denominator, Denominator_Other, Prop, Prop_Other, Metric, Estimate, Score. | ||
#' | ||
#' @examples | ||
#' dfInput <- Disp_Map_Raw() | ||
#' | ||
#' dfTransformed <- Transform_Rate( | ||
#' dfInput, | ||
#' strGroupCol = "SiteID", | ||
#' strNumeratorCol = "Count", | ||
#' strDenominatorCol = "Total" | ||
#' ) | ||
#' | ||
#' dfAnalyzed <- Analyze_Binary(dfTransformed) | ||
#' | ||
#' @import dplyr | ||
#' | ||
#' @export | ||
|
||
Analyze_Binary <- function( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would |
||
dfTransformed, | ||
bQuiet = TRUE | ||
) { | ||
stopifnot( | ||
"dfTransformed is not a data.frame" = is.data.frame(dfTransformed), | ||
"One or more of these columns not found: GroupID, Denominator, Numerator, Metric" = | ||
all(c("GroupID", "Denominator", "Numerator", "Metric") %in% names(dfTransformed)), | ||
"NA value(s) found in GroupID" = all(!is.na(dfTransformed[["GroupID"]])) | ||
) | ||
|
||
dfAnalyzed <- dfTransformed %>% | ||
mutate( | ||
vMu = sum(.data$Numerator) / sum(.data$Denominator), | ||
z_0 = (.data$Metric - .data$vMu) / | ||
sqrt(.data$vMu * (1 - .data$vMu) / .data$Denominator), | ||
phi = mean(.data$z_0^2), | ||
z_i = (.data$Metric - .data$vMu) / | ||
sqrt(.data$phi * .data$vMu * (1 - .data$vMu) / .data$Denominator) | ||
Comment on lines
+52
to
+57
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Very clean and easy to follow. Will be good to explain this (and link to references) in the proposed stat vignette. |
||
) %>% | ||
select( | ||
"GroupID", | ||
"Numerator", | ||
"Denominator", | ||
jwildfire marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"Metric", | ||
OverallMetric = "vMu", | ||
Factor = "phi", | ||
Score = "z_i" | ||
jwildfire marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) %>% | ||
arrange(.data$Score) | ||
|
||
if (!bQuiet) { | ||
cli::cli_text("{.var Score} column created from normal approxiamtion of the binomial distribution") | ||
} | ||
|
||
return(dfAnalyzed) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
#' Funnel Plot Analysis for Binary Outcomes - Predicted Boundaries | ||
#' | ||
#' @details | ||
#' Apply funnel plot analysis to site level data and then calculates predicted percentages and upper- and | ||
#' lower- bounds for across the full range of sample sizes. | ||
#' | ||
#' @section Statistical Methods: | ||
#' This function applies funnel plots analysis to site-level data and then calculates predicted percentages | ||
#' and upper- and lower- bounds (funnels) for across the full range of sample sizes. | ||
#' | ||
#' @section Data Specification: | ||
#' The input data (`dfTransformed`) for Analyze_Poisson is typically created using | ||
#' \code{\link{Transform_Rate}} and should be one record per site with columns for: | ||
#' - `GroupID` - Unique subject ID | ||
#' - `Numerator` - Number of Events | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the predictbounds, I'm seeing the output Numerator is actually the Metric, and we need to calculate out the actual numerator. I think may be ideal to have both generated from the dfBounds, so have both Numerator, Denominator, and Metric. This applies to Rate Predict Bounds as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry for the error and inconsistency. They are updated. |
||
#' - `Denominator` - Number of days of exposure | ||
#' | ||
#' @param dfTransformed `data.frame` data.frame in format produced by | ||
#' \code{\link{Transform_Rate}}. Must include GroupID, N, Numerator and Denominator | ||
#' @param vThreshold `numeric` upper and lower boundaries in residual space. Should be identical to | ||
#' the thresholds used AE_Assess(). | ||
#' @param bQuiet `logical` Suppress warning messages? Default: `TRUE` | ||
#' | ||
#' @return `data.frame` containing predicted boundary values with upper and lower bounds across the | ||
#' range of observed values. | ||
#' | ||
#' @examples | ||
#' dfInput <- Disp_Map_Raw() | ||
#' | ||
#' dfTransformed <- Transform_Rate( | ||
#' dfInput, | ||
#' strGroupCol = "SiteID", | ||
#' strNumeratorCol = "Count", | ||
#' strDenominatorCol = "Total" | ||
#' ) | ||
#' | ||
#' dfAnalyzed <- Analyze_Binary(dfTransformed) | ||
#' | ||
#' dfBounds <- Analyze_Binary_PredictBounds(dfTransformed, c(-3, -2, 2, 3)) | ||
#' | ||
#' @import dplyr | ||
#' @importFrom tidyr expand_grid | ||
#' | ||
#' @export | ||
|
||
Analyze_Binary_PredictBounds <- function(dfTransformed, vThreshold = c(-3, -2, 2, 3), bQuiet = TRUE) { | ||
if (is.null(vThreshold)) { | ||
vThreshold <- c(-3, -2, 2, 3) | ||
cli::cli_alert("vThreshold was not provided. Setting default threshold to c(-3, -2, 2, 3)") | ||
} | ||
|
||
# add a 0 threhsold to calcultate estimate without an offset | ||
vThreshold <- unique(c(vThreshold, 0)) | ||
|
||
# Calculate expected event count and predicted bounds across range of total exposure. | ||
vRange <- seq( | ||
min(dfTransformed$Denominator) - 0.05, | ||
max(dfTransformed$Denominator) + 0.05, | ||
by = 0.05 | ||
) | ||
|
||
dfBounds <- tidyr::expand_grid(Threshold = vThreshold, Denominator = vRange) %>% | ||
mutate( | ||
LogDenominator = log(.data$Denominator), | ||
# Calculate expected event percentage at sample size. | ||
vMu = sum(dfTransformed$Numerator) / sum(dfTransformed$Denominator), | ||
phi = mean(((dfTransformed$Metric - sum(dfTransformed$Numerator) / sum(dfTransformed$Denominator)) / | ||
sqrt(sum(dfTransformed$Numerator) / sum(dfTransformed$Denominator) * (1 - sum(dfTransformed$Numerator) / sum(dfTransformed$Denominator)) / dfTransformed$Denominator)) ^ 2), | ||
# Calculate lower and upper bounds of expected event percentage given specified threshold. | ||
Metric = .data$vMu + .data$Threshold * sqrt(.data$phi * .data$vMu * (1 - .data$vMu) / .data$Denominator), | ||
Numerator = .data$Metric * .data$Denominator | ||
) %>% | ||
# Only positive percentages are meaningful bounds | ||
filter(.data$Numerator >= 0) %>% | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think dropping negative values is fine (and creates better visualizations) per our previous discussion, but might be worth considering whether there are edge cases where this will create unexpected results (like entire boundary lines being missing). |
||
select( | ||
"Threshold", | ||
"Denominator", | ||
"LogDenominator", | ||
"Numerator", | ||
"Metric" | ||
) | ||
|
||
return(dfBounds) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
#' Funnel Plot Analysis with Normal Approximation for Rate Outcomes | ||
#' | ||
#' @details | ||
#' Creates analysis results data for rate data using funnel plot method with normal approximation. | ||
#' | ||
#' | ||
#' @section Statistical Methods: | ||
#' This function applies funnel plots using asymptotic limits based on normal approximation of Poisson distribution | ||
#' the rate outcome verses the total exposure of the sites under investigation to assess data quality and safety. | ||
#' | ||
#' @section Data Specification: | ||
#' The input data (`dfTransformed`) for Analyze_Rate is typically created using \code{\link{Transform_Rate}} and should be one record per site with required columns for: | ||
#' - `GroupID` - Site ID | ||
#' - `Numerator` - Total number of events of interest at site | ||
#' - `Denominator` - Total number of days of exposure at site | ||
#' - `Metric` - Rate of events at site (Numerator / Denominator) | ||
#' | ||
#' @param dfTransformed `data.frame` in format produced by \code{\link{Transform_Rate}} | ||
#' @param bQuiet `logical` Suppress warning messages? Default: `TRUE` | ||
#' | ||
#' @return `data.frame` with one row per site with columns: GroupID, Numerator, Numerator_Other, Denominator, Denominator_Other, Prop, Prop_Other, Metric, Estimate, Score. | ||
#' | ||
#' @examples | ||
#' dfInput <- AE_Map_Raw() %>% na.omit() | ||
#' | ||
#' dfTransformed <- Transform_Rate( | ||
#' dfInput, | ||
#' strGroupCol = "SiteID", | ||
#' strNumeratorCol = "Count", | ||
#' strDenominatorCol = "Exposure" | ||
#' ) | ||
#' | ||
#' dfAnalyzed <- Analyze_Rate(dfTransformed) | ||
#' | ||
#' @import dplyr | ||
#' | ||
#' @export | ||
|
||
Analyze_Rate <- function( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
dfTransformed, | ||
bQuiet = TRUE | ||
) { | ||
stopifnot( | ||
"dfTransformed is not a data.frame" = is.data.frame(dfTransformed), | ||
"One or more of these columns not found: GroupID, Denominator, Numerator, Metric" = | ||
all(c("GroupID", "Denominator", "Numerator", "Metric") %in% names(dfTransformed)), | ||
"NA value(s) found in GroupID" = all(!is.na(dfTransformed[["GroupID"]])) | ||
) | ||
|
||
dfAnalyzed <- dfTransformed %>% | ||
mutate( | ||
vMu = sum(.data$Numerator) / sum(.data$Denominator), | ||
z_0 = (.data$Metric - .data$vMu) / | ||
sqrt(.data$vMu / .data$Denominator), | ||
phi = mean(.data$z_0^2), | ||
z_i = (.data$Metric - .data$vMu) / | ||
sqrt(.data$phi * .data$vMu / .data$Denominator) | ||
) %>% | ||
select( | ||
"GroupID", | ||
"Numerator", | ||
"Denominator", | ||
"Metric", | ||
OverallMetric = "vMu", | ||
Factor = "phi", | ||
Score = "z_i" | ||
jwildfire marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) %>% | ||
arrange(.data$Score) | ||
|
||
if (!bQuiet) { | ||
cli::cli_text("{.var Score} column created from normal approxiamtion of the Poisson distribution") | ||
} | ||
|
||
return(dfAnalyzed) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Zhongkai-Wang @gwu05 - Can we call this "normalApprox" or just "normal" or something like that? A Funnel plot is really just a type of visualization, not a method for analysis.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(not going to comment on this repeatedly, but it will require a bunch of minor changes)