fix #454

Gilead-BioStats · May 19, 2022 · 4e7ed61 · 4e7ed61
1 parent 688f185
commit 4e7ed61
Show file tree

Hide file tree

Showing 11 changed files with 121 additions and 88 deletions.
diff --git a/R/AE_Assess.R b/R/AE_Assess.R
@@ -24,10 +24,10 @@
 #' @return `list` Assessment, a named list with:
 #' - each data frame in the data pipeline
 #'   - `dfInput`
-#'   - `dfTransformed`, returned by {gsm::Transform_EventCount()}
-#'   - `dfAnalyzed`, returned by {gsm::Analyze_Poisson} or {gsm::Analyze_Wilcoxon}
-#'   - `dfFlagged`, returned by {gsm::Flag()}
-#'   - `dfSummary`, returned by {gsm::Summarize()}
+#'   - `dfTransformed`, returned by [gsm::Transform_EventCount()]
+#'   - `dfAnalyzed`, returned by [gsm::Analyze_Poisson()] or [gsm::Analyze_Wilcoxon()]
+#'   - `dfFlagged`, returned by [gsm::Flag()]
+#'   - `dfSummary`, returned by [gsm::Summarize()]
 #' - assessment metadata
 #'   - `strFunctionName`
 #'   - `lParams`
@@ -79,7 +79,7 @@ AE_Assess <- function(
     lTags = lTags,
     dfInput = dfInput
   )
-browser()
+
   checks <- CheckInputs(
     context = "AE_Assess",
     dfs = list(dfInput = lAssess$dfInput),

diff --git a/R/AE_Map_Adam.R b/R/AE_Map_Adam.R
@@ -2,23 +2,23 @@
 #'
 #' @description
 #' Convert analysis adverse event (AE) data, by default ADaM data, to formatted input data to
-#' {gsm::AE_Assess()}.
+#' [gsm::AE_Assess()].
 #'
 #' @details
 #' `AE_Map_Adam` combines AE data with subject-level treatment exposure data to create formatted
-#' input data to {gsm::AE_Assess()}. This function creates an input dataset for the AE Assessment
-#' ({gsm::AE_Assess()}) by binding subject-level AE counts (derived from `dfADAE`) to subject-level
+#' input data to [gsm::AE_Assess()]. This function creates an input dataset for the AE Assessment
+#' ([gsm::AE_Assess()]) by binding subject-level AE counts (derived from `dfADAE`) to subject-level
 #' data (from `dfADSL`). Note that the function can generate data summaries for specific types of
 #' AEs by passing filtered AE data to `dfADAE`.
 #'
 #' @param dfs `list` Input data frames:
 #'  - `dfADAE`: `data.frame` Event-level data with one record per AE.
 #'  - `dfADSL`: `data.frame` Subject-level data with one record per subject.
 #' @param lMapping `list` Column metadata with structure `domain$key`, where `key` contains the name of the column.
-#' @param bReturnChecks `logical` Return input checks from {gsm::is_mapping_valid()}? Default: `FALSE`
+#' @param bReturnChecks `logical` Return input checks from [gsm::is_mapping_valid()]? Default: `FALSE`
 #' @param bQuiet `logical` Suppress warning messages? Default: `TRUE`
 #'
-#' @return `data.frame` Data frame with one record per subject, the input to {gsm::AE_Assess()}. If
+#' @return `data.frame` Data frame with one record per subject, the input to [gsm::AE_Assess()]. If
 #' `bReturnChecks` is `TRUE` `AE_Map_Adam` returns a named `list` with:
 #' - `df`: the data frame described above
 #' - `lChecks`: a named `list` of check results

diff --git a/R/AE_Map_Raw.R b/R/AE_Map_Raw.R
@@ -2,23 +2,23 @@
 #'
 #' @description
 #' Convert raw adverse event (AE) data, typically processed case report form data, to formatted
-#' input data to {gsm::AE_Assess()}.
+#' input data to [gsm::AE_Assess()].
 #'
 #' @details
 #' `AE_Map_Raw` combines AE data with subject-level treatment exposure data to create formatted
-#' input data to {gsm::AE_Assess()}. This function creates an input dataset for the AE Assessment
-#' ({gsm::AE_Assess()}) by binding subject-level AE counts (derived from `dfAE`) to subject-level
+#' input data to [gsm::AE_Assess()]. This function creates an input dataset for the AE Assessment
+#' ([gsm::AE_Assess()]) by binding subject-level AE counts (derived from `dfAE`) to subject-level
 #' data (from `dfSUBJ`). Note that the function can generate data summaries for specific types of
 #' AEs by passing filtered AE data to `dfAE`.
 #'
 #' @param dfs `list` Input data frames:
 #'  - `dfAE`: `data.frame` Event-level data with one record per AE.
 #'  - `dfSUBJ`: `data.frame` Subject-level data with one record per subject.
 #' @param lMapping `list` Column metadata with structure `domain$key`, where `key` contains the name of the column.
-#' @param bReturnChecks `logical` Return input checks from {gsm::is_mapping_valid()}? Default: `FALSE`
+#' @param bReturnChecks `logical` Return input checks from [gsm::is_mapping_valid()]? Default: `FALSE`
 #' @param bQuiet `logical` Suppress warning messages? Default: `TRUE`
 #'
-#' @return `data.frame` Data frame with one record per subject, the input to {gsm::AE_Assess()}. If
+#' @return `data.frame` Data frame with one record per subject, the input to [gsm::AE_Assess()]. If
 #' `bReturnChecks` is `TRUE` `AE_Map_Raw` returns a named `list` with:
 #' - `df`: the data frame described above
 #' - `lChecks`: a named `list` of check results

diff --git a/R/Analyze_Poisson_PredictBounds.R b/R/Analyze_Poisson_PredictBounds.R
@@ -63,22 +63,35 @@ Analyze_Poisson_PredictBounds <- function(dfTransformed, vThreshold = c(-5, 5))
     mutate(
       # expected event count
       vMu = as.numeric(exp(.data$LogExposure * cModel$coefficients[2] + cModel$coefficients[1])),
-      vLo = vThreshold[1]^2 - 2 * .data$vMu,
-      vHi = vThreshold[2]^2 - 2 * .data$vMu,
+      a = qchisq(0.95, 1), # used in Pearson calculation
 
-      # ?
+      # lower bound
+      vLo = vThreshold[1]^2 - 2 * .data$vMu,
       vWLo = vLo / (2 * exp(1) * .data$vMu),
+      PredictYLo = vLo / (2 * lamW::lambertWm1(.data$vWLo)), # Lambert W
+
+      CINormalLo = vMu - 1.96*sqrt(vMu / nrow(dfTransformed)), # Normal approximation
+      CIExactLo = qchisq(0.025, 2*vMu)/2, # Exact
+      CIPearsonLo = ( vMu + a / 2 ) - sqrt(a) * sqrt( vMu + a/4 ), # Pearson
+
+      #  upper bound
+      vHi = vThreshold[2]^2 - 2 * .data$vMu,
       vWHi = vHi / (2 * exp(1) * .data$vMu),
+      PredictYHi = vHi / (2 * lamW::lambertW0(.data$vWHi)), # Lambert W
 
-      # predict bounds
-      PredictYLo = vLo / (2 * lamW::lambertWm1(.data$vWLo)),
-      PredictYHigh = vHi / (2 * lamW::lambertW0(.data$vWHi)),
+      CINormalHi = vMu + 1.96*sqrt(vMu / nrow(dfTransformed)), # Normal approximation
+      CIExactHi = qchisq(0.975, 2*(vMu + 1))/2, # Exact
+      CIPearsonHi = ( vMu + a / 2 ) + sqrt(a) * sqrt( vMu + a/4 ), # Pearson
 
       # Set lower limit of predicted bounds to 0.
       LowerCount = if_else(is.nan(.data$PredictYLo), 0, .data$PredictYLo),
-      UpperCount = if_else(is.nan(.data$PredictYHigh), 0, .data$PredictYHigh)
-    ) %>%
-    select(.data$LogExposure, MeanCount = .data$vMu, .data$LowerCount, .data$UpperCount)
-browser()
-  return(dfBounds)
+      UpperCount = if_else(is.nan(.data$PredictYHi), 0, .data$PredictYHi)
+    )
+
+  return(
+    dfBounds %>%
+      select(
+        .data$LogExposure, MeanCount = .data$vMu, .data$LowerCount, .data$UpperCount
+      )
+  )
 }
diff --git a/R/Consent_Assess.R b/R/Consent_Assess.R
@@ -31,10 +31,10 @@
 #' @return `list` Assessment, a named list with:
 #' - each data frame in the data pipeline
 #'   - `dfInput`
-#'   - `dfTransformed`, returned by {gsm::Transform_EventCount()}
-#'   - `dfAnalyzed`, a copy of `dfTransformed` and input to `gsm::Flag`
-#'   - `dfFlagged`, returned by {gsm::Flag()}
-#'   - `dfSummary`, returned by {gsm::Summarize()}
+#'   - `dfTransformed`, returned by [gsm::Transform_EventCount()]
+#'   - `dfAnalyzed`, a copy of `dfTransformed` and input to `[gsm::Flag()]`
+#'   - `dfFlagged`, returned by [gsm::Flag()]
+#'   - `dfSummary`, returned by [gsm::Summarize()]
 #' - assessment metadata
 #'   - `strFunctionName`
 #'   - `lParams`

diff --git a/R/Consent_Map_Raw.R b/R/Consent_Map_Raw.R
@@ -5,7 +5,7 @@
 #'
 #' @details
 #' `Consent_Map_Raw` combines consent data with subject-level data to create formatted input data
-#' to {gsm::Consent_Assess()}. This function creates an input dataset for the Consent Assessment
+#' to [gsm::Consent_Assess()]. This function creates an input dataset for the Consent Assessment
 #' (${Consent_Assess()} by binding subject-level counts of consent issues (derived from `dfCONSENT`) to
 #' subject-level data (from `dfSUBJ`). Note the function can generate data summaries for specific
 #' types of consent by customizing `lMapping$dfCONSENT`.
@@ -14,11 +14,11 @@
 #'  - `dfCONSENT`: `data.frame` Consent type-level data with one record per subject per consent type.
 #'  - `dfSUBJ`: `data.frame` Subject-level data with one record per subject.
 #' @param lMapping `list` Column metadata with structure `domain$key`, where `key` contains the name of the column.
-#' @param bReturnChecks `logical` Return input checks from {gsm::is_mapping_valid()}? Default: `FALSE`
+#' @param bReturnChecks `logical` Return input checks from [gsm::is_mapping_valid()]? Default: `FALSE`
 #' @param bQuiet `logical` Suppress warning messages? Default: `TRUE`
 #'
 #' @return `data.frame` Data frame with one record per subject, the input to
-#' {gsm::Consent_Assess()}. If `bReturnChecks` is `TRUE` `Consent_Map_Raw` returns a named `list`
+#' [gsm::Consent_Assess()]. If `bReturnChecks` is `TRUE` `Consent_Map_Raw` returns a named `list`
 #' with:
 #' - `df`: the data frame described above
 #' - `lChecks`: a named `list` of check results

diff --git a/R/Flag.R b/R/Flag.R
@@ -1,14 +1,19 @@
 #' Make data frame with flagged values
 #'
-#' Adds columns flagging sites that represent possible statistical outliers. Rows with PValue less than 0.05 are flagged by default.
+#' Adds columns flagging sites that represent possible statistical outliers. Rows with PValue less
+#' than 0.05 are flagged by default.
 #'
 #' @details
-#'
-#' This function provides a generalized framework for flagging sites as part of the GSM data pipeline (TODO add link to data vignette).
+#' This function provides a generalized framework for flagging sites as part of the GSM data
+#' pipeline (TODO add link to data vignette).
 #'
 #' @section Data Specification:
-#'
-#' \code{Flag} is designed to support the input data (` dfAnalyzed`) input data from many different \code{Analyze} functions. At a minimum, the input data must have a `SiteID` column and a column of numeric values (identified by the `strColumn` parameter) that will be compared to the specified thresholds (`vThreshold`) to calculate a new `Flag` column. Optionally, a second column of numeric values (identified by `strValueColumn`) can be specified to set the directionality of the `Flag`.
+#' \code{Flag} is designed to support the input data (` dfAnalyzed`) input data from many different
+#' \code{Analyze} functions. At a minimum, the input data must have a `SiteID` column and a column
+#' of numeric values (identified by the `strColumn` parameter) that will be compared to the
+#' specified thresholds (`vThreshold`) to calculate a new `Flag` column. Optionally, a second column
+#' of numeric values (identified by `strValueColumn`) can be specified to set the directionality of
+#' the `Flag`.
 #'
 #' In short, the following columns are considered:
 #' - `SiteID` - Site ID (required)
@@ -17,26 +22,34 @@
 #'
 #' @param dfAnalyzed data.frame where flags should be added.
 #' @param strColumn Name of the column to use for thresholding.
-#' @param vThreshold Vector of 2 numeric values representing lower and upper threshold values. All values in strColumn are compared to vThreshold using strict comparisons. Values less than the lower threshold or greater than the upper threshold are flagged as -1 and 1 respectively. Values equal to the threshold values are set to 0 (i.e. not flagged). If NA is provided for either threshold value it is ignored, and no values are flagged based on the threshold. NA and NaN values in strColumn are given NA flag values.
-#' @param strValueColumn Optional, Name of the Column to use for sign of Flag. If value for that row is higher than median of strValueColumn then Flag = 1, if lower then Flag = -1.
+#' @param vThreshold Vector of 2 numeric values representing lower and upper threshold values. All
+#' values in strColumn are compared to vThreshold using strict comparisons. Values less than the lower threshold or greater than the upper threshold are flagged as -1 and 1 respectively. Values equal to the threshold values are set to 0 (i.e. not flagged). If NA is provided for either threshold value it is ignored, and no values are flagged based on the threshold. NA and NaN values in strColumn are given NA flag values.
+#' @param strValueColumn Optional, Name of the Column to use for sign of Flag. If value for that row
+#' is higher than median of strValueColumn then Flag = 1, if lower then Flag = -1.
 #'
-#' @return input data frame with the columns added for "ThresholdLow","ThresholdHigh","ThresholdCol" and "Flag"
+#' @return input data frame with the columns added for "ThresholdLow","ThresholdHigh","ThresholdCol"
+#' and "Flag"
 #'
 #' @examples
 #' dfInput <- AE_Map_Adam()
-#' dfTransformed <- Transform_EventCount( dfInput, strCountCol = 'Count', strExposureCol = "Exposure" )
-#' dfAnalyzed <- Analyze_Wilcoxon( dfTransformed)
-#' dfFlagged <- Flag( dfAnalyzed ) #PValue < 0.05 flagged
-#' dfFlagged10 <- Flag( dfAnalyzed, vThreshold=c(0.10,NA) ) #PValue <0.10 flagged
-#' #Flag direction set based on 'Statistic' column
-#' dfFlagged <- Flag( dfAnalyzed ,  strColumn = 'PValue', strValueColumn = 'Estimate')
+#' dfTransformed <- Transform_EventCount(dfInput, strCountCol = "Count", strExposureCol = "Exposure")
+#' dfAnalyzed <- Analyze_Wilcoxon(dfTransformed)
+#' dfFlagged <- Flag(dfAnalyzed) # PValue < 0.05 flagged
+#' dfFlagged10 <- Flag(dfAnalyzed, vThreshold = c(0.10, NA)) # PValue <0.10 flagged
+#' # Flag direction set based on 'Statistic' column
+#' dfFlagged <- Flag(dfAnalyzed, strColumn = "PValue", strValueColumn = "Estimate")
 #'
 #' @import dplyr
 #' @importFrom stats median
 #'
 #' @export
 
-Flag <- function( dfAnalyzed , strColumn="PValue", vThreshold=c(0.05,NA),strValueColumn = NULL){
+Flag <- function(
+  dfAnalyzed,
+  strColumn = "PValue",
+  vThreshold = c(0.05, NA),
+  strValueColumn = NULL
+) {
   stopifnot(
     "dfAnalyzed is not a data frame" = is.data.frame(dfAnalyzed),
     "strColumn is not character" = is.character(strColumn),
@@ -48,34 +61,41 @@ Flag <- function( dfAnalyzed , strColumn="PValue", vThreshold=c(0.05,NA),strValu
     "SiteID not found in dfAnalyzed" = "SiteID" %in% names(dfAnalyzed)
   )
 
-  if(all(!is.na(vThreshold))){
-    "vThreshold must contain a minimum and maximum value (i.e., vThreshold = c(1, 2))" = stopifnot(vThreshold[2]>vThreshold[1])
+  if (all(!is.na(vThreshold))) {
+    "vThreshold must contain a minimum and maximum value (i.e., vThreshold = c(1, 2))" <- stopifnot(vThreshold[2] > vThreshold[1])
   }
 
-  dfFlagged<-dfAnalyzed %>%
-    mutate(ThresholdLow = vThreshold[1]) %>%
-    mutate(ThresholdHigh= vThreshold[2]) %>%
-    mutate(ThresholdCol = strColumn) %>%
-    mutate(Flag = case_when(
-      !is.na(vThreshold[1]) & (.data[[strColumn]] < vThreshold[1]) ~ -1,
-      !is.na(vThreshold[2]) & (.data[[strColumn]] > vThreshold[2]) ~ 1,
-      is.na(.data[[strColumn]]) ~ NA_real_,
-      is.nan(.data[[strColumn]]) ~ NA_real_,
-      TRUE~0 # All other values set to 0 (not flagged)
-    ))
+  dfFlagged <- dfAnalyzed %>%
+    mutate(
+      ThresholdLow = vThreshold[1],
+      ThresholdHigh = vThreshold[2],
+      ThresholdCol = strColumn,
+      Flag = case_when(
+        !is.na(vThreshold[1]) & (.data[[strColumn]] < vThreshold[1]) ~ -1,
+        !is.na(vThreshold[2]) & (.data[[strColumn]] > vThreshold[2]) ~ 1,
+        is.na(.data[[strColumn]]) ~ NA_real_,
+        is.nan(.data[[strColumn]]) ~ NA_real_,
+        TRUE ~ 0 # All other values set to 0 (not flagged)
+      )
+    )
 
   # if strValueColumn is supplied, it can only affect sign of Flag (1 or -1)
-  if(!is.null(strValueColumn)){
-    nMedian <-  dfFlagged %>% pull(strValueColumn) %>% stats::median(na.rm=TRUE)
-    dfFlagged <- dfFlagged  %>%
-      mutate(Flag = case_when(
-        Flag != 0 & .data[[strValueColumn]] >= nMedian ~ 1,
-        Flag != 0 & .data[[strValueColumn]] < nMedian ~ -1,
-        TRUE ~ Flag
-      ))
+  if (!is.null(strValueColumn)) {
+    nMedian <- dfFlagged %>%
+      pull(strValueColumn) %>%
+      stats::median(na.rm = TRUE)
+
+    dfFlagged <- dfFlagged %>%
+      mutate(
+        Flag = case_when(
+          Flag != 0 & .data[[strValueColumn]] >= nMedian ~ 1,
+          Flag != 0 & .data[[strValueColumn]] < nMedian ~ -1,
+          TRUE ~ Flag
+        )
+      )
   }
 
-  dfFlagged <- dfFlagged  %>% arrange(match(.data$Flag, c(1, -1, 0)))
+  dfFlagged <- dfFlagged %>% arrange(match(.data$Flag, c(1, -1, 0)))
 
-  return( dfFlagged )
+  return(dfFlagged)
 }
diff --git a/R/IE_Assess.R b/R/IE_Assess.R
@@ -21,10 +21,10 @@
 #' @return `list` Assessment, a named list with:
 #' - each data frame in the data pipeline
 #'   - `dfInput`
-#'   - `dfTransformed`, returned by {gsm::Transform_EventCount()}
-#'   - `dfAnalyzed`, a copy of `dfTransformed` and input to `gsm::Flag`
-#'   - `dfFlagged`, returned by {gsm::Flag()}
-#'   - `dfSummary`, returned by {gsm::Summarize()}
+#'   - `dfTransformed`, returned by [gsm::Transform_EventCount()]
+#'   - `dfAnalyzed`, a copy of `dfTransformed` and input to `[gsm::Flag()]`
+#'   - `dfFlagged`, returned by [gsm::Flag()]
+#'   - `dfSummary`, returned by [gsm::Summarize()]
 #' - assessment metadata
 #'   - `strFunctionName`
 #'   - `lParams`

diff --git a/R/IE_Map_Raw.R b/R/IE_Map_Raw.R
@@ -2,23 +2,23 @@
 #'
 #' @description
 #' Convert raw inclusion/exclusion (IE) data, typically processed case report form data, to formatted
-#' input data to {gsm::IE_Assess()}.
+#' input data to [gsm::IE_Assess()].
 #'
 #' @details
 #' `IE_Map_Raw` combines IE data with subject-level data to create formatted input data to
-#' {gsm::IE_Assess()}. This function creates an input dataset for the IE Assessment
-#' (${gsm::IE_Assess()}) by binding subject-level unmet IE criteria counts (derived from `dfIE`) to
+#' [gsm::IE_Assess()]. This function creates an input dataset for the IE Assessment
+#' ($[gsm::IE_Assess()]) by binding subject-level unmet IE criteria counts (derived from `dfIE`) to
 #' subject-level data (from `dfSUBJ`). Note that the function can generate data summaries for
 #' specific types of IE criteria by passing filtered IE data to `dfIE`.
 #'
 #' @param dfs `list` Input data frames:
 #'  - `dfIE`: `data.frame` Criterion-level data with one record subject per criterion.
 #'  - `dfSUBJ`: `data.frame` Subject-level data with one record per subject.
 #' @param lMapping `list` Column metadata with structure `domain$key`, where `key` contains the name of the column.
-#' @param bReturnChecks `logical` Return input checks from {gsm::is_mapping_valid()}? Default: `FALSE`
+#' @param bReturnChecks `logical` Return input checks from [gsm::is_mapping_valid()]? Default: `FALSE`
 #' @param bQuiet `logical` Suppress warning messages? Default: `TRUE`
 #'
-#' @return `data.frame` Data frame with one record per subject, the input to {gsm::IE_Assess()}. If
+#' @return `data.frame` Data frame with one record per subject, the input to [gsm::IE_Assess()]. If
 #' `bReturnChecks` is `TRUE` `IE_Map_Raw` returns a named `list` with:
 #' - `df`: the data frame described above
 #' - `lChecks`: a named `list` of check results

diff --git a/R/PD_Assess.R b/R/PD_Assess.R
@@ -20,10 +20,10 @@
 #' @return `list` Assessment, a named list with:
 #' - each data frame in the data pipeline
 #'   - `dfInput`
-#'   - `dfTransformed`, returned by {gsm::Transform_EventCount()}
-#'   - `dfAnalyzed`, returned by {gsm::Analyze_Poisson} or {gsm::Analyze_Wilcoxon}
-#'   - `dfFlagged`, returned by {gsm::Flag()}
-#'   - `dfSummary`, returned by {gsm::Summarize()}
+#'   - `dfTransformed`, returned by [gsm::Transform_EventCount()]
+#'   - `dfAnalyzed`, returned by [gsm::Analyze_Poisson()] or [gsm::Analyze_Wilcoxon()]
+#'   - `dfFlagged`, returned by [gsm::Flag()]
+#'   - `dfSummary`, returned by [gsm::Summarize()]
 #' - assessment metadata
 #'   - `strFunctionName`
 #'   - `lParams`