Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add funnel plot methods #812

Merged
merged 19 commits into from
Oct 27, 2022
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@ export("%>%")
export(AE_Assess)
export(AE_Map_Adam)
export(AE_Map_Raw)
export(Analyze_Binary)
export(Analyze_Binary_PredictBounds)
export(AnalyzeQTL)
export(Analyze_Fisher)
export(Analyze_Identity)
export(Analyze_Poisson)
export(Analyze_Poisson_PredictBounds)
export(Analyze_Rate)
export(Analyze_Rate_PredictBounds)
export(CheckInputs)
export(Consent_Assess)
export(Consent_Map_Raw)
Expand All @@ -19,6 +23,7 @@ export(FilterData)
export(FilterDomain)
export(Flag)
export(Flag_Fisher)
export(Flag_Funnel)
export(Flag_Poisson)
export(Get_Enrolled)
export(IE_Assess)
Expand Down
18 changes: 13 additions & 5 deletions R/AE_Assess.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
#'
#' @examples
#' dfInput <- AE_Map_Raw()
#' ae_assessment_poisson <- AE_Assess(dfInput)
#' ae_assessment_funnel <- AE_Assess(dfInput)
#'
#' @importFrom cli cli_alert_success cli_alert_warning cli_h2 cli_text
#' @importFrom yaml read_yaml
Expand All @@ -56,15 +56,15 @@
AE_Assess <- function(
dfInput,
vThreshold = NULL,
strMethod = "poisson",
strMethod = "funnel",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Zhongkai-Wang @gwu05 - Can we call this "normalApprox" or just "normal" or something like that? A Funnel plot is really just a type of visualization, not a method for analysis.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(not going to comment on this repeatedly, but it will require a bunch of minor changes)

lMapping = yaml::read_yaml(system.file("mappings", "AE_Assess.yaml", package = "gsm")),
strGroup = "Site",
bQuiet = TRUE
) {

# data checking -----------------------------------------------------------
stopifnot(
"strMethod is not 'poisson' or 'identity'" = strMethod %in% c("poisson", "identity"),
"strMethod is not 'funnel', 'poisson' or 'identity'" = strMethod %in% c("funnel", "poisson", "identity"),
"strMethod must be length 1" = length(strMethod) == 1,
"strGroup must be one of: Site, Study, Country, or CustomGroup" = strGroup %in% c("Site", "Study", "Country", "CustomGroup"),
"bQuiet must be logical" = is.logical(bQuiet)
Expand All @@ -82,12 +82,14 @@ AE_Assess <- function(
# set thresholds and flagging parameters ----------------------------------
if (is.null(vThreshold)) {
vThreshold <- switch(strMethod,
funnel = c(-3, -2, 2, 3),
poisson = c(-7, -5, 5, 7),
identity = c(0.00006, 0.01)
)
}

strValueColumnVal <- switch(strMethod,
funnel = NULL,
poisson = NULL,
identity = "Score"
)
Expand Down Expand Up @@ -116,7 +118,10 @@ AE_Assess <- function(
if (!bQuiet) cli::cli_alert_success("{.fn Transform_Rate} returned output with {nrow(lData$dfTransformed)} rows.")

# dfAnalyzed --------------------------------------------------------------
if (strMethod == "poisson") {
if (strMethod == "funnel") {
lData$dfAnalyzed <- gsm::Analyze_Rate(lData$dfTransformed, bQuiet = bQuiet)
Copy link
Contributor

@jwildfire jwildfire Oct 27, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
lData$dfAnalyzed <- gsm::Analyze_Rate(lData$dfTransformed, bQuiet = bQuiet)
lData$dfAnalyzed <- gsm::Analyze_NormalApprox(lData$dfTransformed, strType="rate", bQuiet = bQuiet)

lData$dfBounds <- gsm::Analyze_Rate_PredictBounds(lData$dfTransformed, vThreshold = vThreshold, bQuiet = bQuiet)
} else if (strMethod == "poisson") {
lData$dfAnalyzed <- gsm::Analyze_Poisson(lData$dfTransformed, bQuiet = bQuiet)
lData$dfBounds <- gsm::Analyze_Poisson_PredictBounds(lData$dfTransformed, vThreshold = vThreshold, bQuiet = bQuiet)
} else if (strMethod == "identity") {
Expand All @@ -127,13 +132,16 @@ AE_Assess <- function(
if (!bQuiet) cli::cli_alert_success("{.fn {strAnalyzeFunction}} returned output with {nrow(lData$dfAnalyzed)} rows.")

# dfFlagged ---------------------------------------------------------------
if (strMethod == "poisson") {
if (strMethod == "funnel") {
lData$dfFlagged <- gsm::Flag_Funnel(lData$dfAnalyzed, vThreshold = vThreshold)
} else if (strMethod == "poisson") {
lData$dfFlagged <- gsm::Flag_Poisson(lData$dfAnalyzed, vThreshold = vThreshold)
} else if (strMethod == "identity") {
lData$dfFlagged <- gsm::Flag(lData$dfAnalyzed, vThreshold = vThreshold, strValueColumn = strValueColumnVal)
}

flag_function_name <- switch(strMethod,
funnel = "Flag_Funnel",
identity = "Flag",
poisson = "Flag_Poisson"
)
Expand Down
3 changes: 2 additions & 1 deletion R/AE_Map_Adam.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ AE_Map_Adam <- function(
dfInput <- dfs$dfADSL %>%
mutate(
SubjectID = .data[[lMapping$dfADSL$strIDCol]],
Exposure = as.numeric(.data[[lMapping$dfADSL$strEndCol]] - .data[[lMapping$dfADSL$strStartCol]]) + 1) %>%
Exposure = as.numeric(.data[[lMapping$dfADSL$strEndCol]] - .data[[lMapping$dfADSL$strStartCol]]) + 1
) %>%
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems accident enter press (probably not a change needed?)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just an automated change @gwu05 - I ran the code formatter prior to my review.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh i see it makes sense

rowwise() %>%
mutate(
Count = sum(dfs$dfADAE[[lMapping$dfADAE$strIDCol]] == .data$SubjectID),
Expand Down
75 changes: 75 additions & 0 deletions R/Analyze_Binary.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#' Funnel Plot Analysis with Normal Approximation for Binary Outcomes
#'
#' @details
#' Creates analysis results data for percentage data using funnel plot method with normal approximation.
#'
#'
#' @section Statistical Methods:
#' This function applies funnel plots using asymptotic limits based on normal approximation of binomial distribution for
#' the binary outcome with the sample sizes of the sites to assess data quality and safety.
#'
#' @section Data Specification:
#' The input data (`dfTransformed`) for Analyze_Binary is typically created using \code{\link{Transform_Rate}} and should be one record per site with required columns for:
#' - `GroupID` - Site ID
#' - `Numerator` - Total number of participants at site with event of interest
#' - `Denominator` - Total number of participants at site
#' - `Metric` - Proportion of participants at site with event of interest
#'
#' @param dfTransformed `data.frame` in format produced by \code{\link{Transform_Rate}}
#' @param bQuiet `logical` Suppress warning messages? Default: `TRUE`
#'
#' @return `data.frame` with one row per site with columns: GroupID, Numerator, Numerator_Other, Denominator, Denominator_Other, Prop, Prop_Other, Metric, Estimate, Score.
#'
#' @examples
#' dfInput <- Disp_Map_Raw()
#'
#' dfTransformed <- Transform_Rate(
#' dfInput,
#' strGroupCol = "SiteID",
#' strNumeratorCol = "Count",
#' strDenominatorCol = "Total"
#' )
#'
#' dfAnalyzed <- Analyze_Binary(dfTransformed)
#'
#' @import dplyr
#'
#' @export

Analyze_Binary <- function(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would Analyze_Normal_Binary or something similar be more descriptive?

dfTransformed,
bQuiet = TRUE
) {
stopifnot(
"dfTransformed is not a data.frame" = is.data.frame(dfTransformed),
"One or more of these columns not found: GroupID, Denominator, Numerator, Metric" =
all(c("GroupID", "Denominator", "Numerator", "Metric") %in% names(dfTransformed)),
"NA value(s) found in GroupID" = all(!is.na(dfTransformed[["GroupID"]]))
)

dfAnalyzed <- dfTransformed %>%
mutate(
vMu = sum(.data$Numerator) / sum(.data$Denominator),
z_0 = (.data$Metric - .data$vMu) /
sqrt(.data$vMu * (1 - .data$vMu) / .data$Denominator),
phi = mean(.data$z_0^2),
z_i = (.data$Metric - .data$vMu) /
sqrt(.data$phi * .data$vMu * (1 - .data$vMu) / .data$Denominator)
Comment on lines +52 to +57
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very clean and easy to follow. Will be good to explain this (and link to references) in the proposed stat vignette.

) %>%
select(
"GroupID",
"Numerator",
"Denominator",
jwildfire marked this conversation as resolved.
Show resolved Hide resolved
"Metric",
OverallMetric = "vMu",
Factor = "phi",
Score = "z_i"
jwildfire marked this conversation as resolved.
Show resolved Hide resolved
) %>%
arrange(.data$Score)

if (!bQuiet) {
cli::cli_text("{.var Score} column created from normal approxiamtion of the binomial distribution")
}

return(dfAnalyzed)
}
84 changes: 84 additions & 0 deletions R/Analyze_Binary_PredictBounds.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#' Funnel Plot Analysis for Binary Outcomes - Predicted Boundaries
#'
#' @details
#' Apply funnel plot analysis to site level data and then calculates predicted percentages and upper- and
#' lower- bounds for across the full range of sample sizes.
#'
#' @section Statistical Methods:
#' This function applies funnel plots analysis to site-level data and then calculates predicted percentages
#' and upper- and lower- bounds (funnels) for across the full range of sample sizes.
#'
#' @section Data Specification:
#' The input data (`dfTransformed`) for Analyze_Poisson is typically created using
#' \code{\link{Transform_Rate}} and should be one record per site with columns for:
#' - `GroupID` - Unique subject ID
#' - `Numerator` - Number of Events
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the predictbounds, I'm seeing the output Numerator is actually the Metric, and we need to calculate out the actual numerator. I think may be ideal to have both generated from the dfBounds, so have both Numerator, Denominator, and Metric. This applies to Rate Predict Bounds as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the error and inconsistency. They are updated.

#' - `Denominator` - Number of days of exposure
#'
#' @param dfTransformed `data.frame` data.frame in format produced by
#' \code{\link{Transform_Rate}}. Must include GroupID, N, Numerator and Denominator
#' @param vThreshold `numeric` upper and lower boundaries in residual space. Should be identical to
#' the thresholds used AE_Assess().
#' @param bQuiet `logical` Suppress warning messages? Default: `TRUE`
#'
#' @return `data.frame` containing predicted boundary values with upper and lower bounds across the
#' range of observed values.
#'
#' @examples
#' dfInput <- Disp_Map_Raw()
#'
#' dfTransformed <- Transform_Rate(
#' dfInput,
#' strGroupCol = "SiteID",
#' strNumeratorCol = "Count",
#' strDenominatorCol = "Total"
#' )
#'
#' dfAnalyzed <- Analyze_Binary(dfTransformed)
#'
#' dfBounds <- Analyze_Binary_PredictBounds(dfTransformed, c(-3, -2, 2, 3))
#'
#' @import dplyr
#' @importFrom tidyr expand_grid
#'
#' @export

Analyze_Binary_PredictBounds <- function(dfTransformed, vThreshold = c(-3, -2, 2, 3), bQuiet = TRUE) {
if (is.null(vThreshold)) {
vThreshold <- c(-3, -2, 2, 3)
cli::cli_alert("vThreshold was not provided. Setting default threshold to c(-3, -2, 2, 3)")
}

# add a 0 threhsold to calcultate estimate without an offset
vThreshold <- unique(c(vThreshold, 0))

# Calculate expected event count and predicted bounds across range of total exposure.
vRange <- seq(
min(dfTransformed$Denominator) - 0.05,
max(dfTransformed$Denominator) + 0.05,
by = 0.05
)

dfBounds <- tidyr::expand_grid(Threshold = vThreshold, Denominator = vRange) %>%
mutate(
LogDenominator = log(.data$Denominator),
# Calculate expected event percentage at sample size.
vMu = sum(dfTransformed$Numerator) / sum(dfTransformed$Denominator),
phi = mean(((dfTransformed$Metric - sum(dfTransformed$Numerator) / sum(dfTransformed$Denominator)) /
sqrt(sum(dfTransformed$Numerator) / sum(dfTransformed$Denominator) * (1 - sum(dfTransformed$Numerator) / sum(dfTransformed$Denominator)) / dfTransformed$Denominator)) ^ 2),
# Calculate lower and upper bounds of expected event percentage given specified threshold.
Metric = .data$vMu + .data$Threshold * sqrt(.data$phi * .data$vMu * (1 - .data$vMu) / .data$Denominator),
Numerator = .data$Metric * .data$Denominator
) %>%
# Only positive percentages are meaningful bounds
filter(.data$Numerator >= 0) %>%
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think dropping negative values is fine (and creates better visualizations) per our previous discussion, but might be worth considering whether there are edge cases where this will create unexpected results (like entire boundary lines being missing).

select(
"Threshold",
"Denominator",
"LogDenominator",
"Numerator",
"Metric"
)

return(dfBounds)
}
75 changes: 75 additions & 0 deletions R/Analyze_Rate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#' Funnel Plot Analysis with Normal Approximation for Rate Outcomes
#'
#' @details
#' Creates analysis results data for rate data using funnel plot method with normal approximation.
#'
#'
#' @section Statistical Methods:
#' This function applies funnel plots using asymptotic limits based on normal approximation of Poisson distribution
#' the rate outcome verses the total exposure of the sites under investigation to assess data quality and safety.
#'
#' @section Data Specification:
#' The input data (`dfTransformed`) for Analyze_Rate is typically created using \code{\link{Transform_Rate}} and should be one record per site with required columns for:
#' - `GroupID` - Site ID
#' - `Numerator` - Total number of events of interest at site
#' - `Denominator` - Total number of days of exposure at site
#' - `Metric` - Rate of events at site (Numerator / Denominator)
#'
#' @param dfTransformed `data.frame` in format produced by \code{\link{Transform_Rate}}
#' @param bQuiet `logical` Suppress warning messages? Default: `TRUE`
#'
#' @return `data.frame` with one row per site with columns: GroupID, Numerator, Numerator_Other, Denominator, Denominator_Other, Prop, Prop_Other, Metric, Estimate, Score.
#'
#' @examples
#' dfInput <- AE_Map_Raw() %>% na.omit()
#'
#' dfTransformed <- Transform_Rate(
#' dfInput,
#' strGroupCol = "SiteID",
#' strNumeratorCol = "Count",
#' strDenominatorCol = "Exposure"
#' )
#'
#' dfAnalyzed <- Analyze_Rate(dfTransformed)
#'
#' @import dplyr
#'
#' @export

Analyze_Rate <- function(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Analyze_Normal_Rate?

dfTransformed,
bQuiet = TRUE
) {
stopifnot(
"dfTransformed is not a data.frame" = is.data.frame(dfTransformed),
"One or more of these columns not found: GroupID, Denominator, Numerator, Metric" =
all(c("GroupID", "Denominator", "Numerator", "Metric") %in% names(dfTransformed)),
"NA value(s) found in GroupID" = all(!is.na(dfTransformed[["GroupID"]]))
)

dfAnalyzed <- dfTransformed %>%
mutate(
vMu = sum(.data$Numerator) / sum(.data$Denominator),
z_0 = (.data$Metric - .data$vMu) /
sqrt(.data$vMu / .data$Denominator),
phi = mean(.data$z_0^2),
z_i = (.data$Metric - .data$vMu) /
sqrt(.data$phi * .data$vMu / .data$Denominator)
) %>%
select(
"GroupID",
"Numerator",
"Denominator",
"Metric",
OverallMetric = "vMu",
Factor = "phi",
Score = "z_i"
jwildfire marked this conversation as resolved.
Show resolved Hide resolved
) %>%
arrange(.data$Score)

if (!bQuiet) {
cli::cli_text("{.var Score} column created from normal approxiamtion of the Poisson distribution")
}

return(dfAnalyzed)
}
Loading