Skip to content

Commit

Permalink
Merge pull request #257 from Gilead-BioStats/release-v0.2.0
Browse files Browse the repository at this point in the history
Release v0.2.0
  • Loading branch information
jwildfire authored Mar 15, 2022
2 parents 178814e + 353d221 commit 9a2d654
Show file tree
Hide file tree
Showing 75 changed files with 2,056 additions and 2,750 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: gsm
Title: Gilead Statistical Monitoring
Version: 0.1.0
Version: 0.2.0
Authors@R: c(
person("George", "Wu", email="george.wu@gilead.com", role = c("aut", "cre")),
person("Jeremy", "Wildfire", email="jeremy.wildfire@gilead.com", role = c("aut")))
Expand All @@ -11,6 +11,8 @@ Imports:
broom,
dplyr,
lubridate,
ggplot2,
lamW,
magrittr,
purrr,
tidyr
Expand Down
8 changes: 7 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export(AE_Map_Raw)
export(Analyze_Chisq)
export(Analyze_Fisher)
export(Analyze_Poisson)
export(Analyze_Poisson_PredictBounds)
export(Analyze_Wilcoxon)
export(Consent_Assess)
export(Consent_Map_Raw)
Expand All @@ -18,10 +19,14 @@ export(PD_Assess)
export(PD_Map_Raw)
export(Summarize)
export(Transform_EventCount)
export(Visualize_Count)
export(Visualize_Scatter)
import(dplyr)
import(lubridate)
import(ggplot2)
importFrom(broom,augment)
importFrom(broom,glance)
importFrom(lamW,lambertW0)
importFrom(lamW,lambertWm1)
importFrom(magrittr,"%>%")
importFrom(purrr,map)
importFrom(purrr,map_df)
Expand All @@ -33,5 +38,6 @@ importFrom(stats,median)
importFrom(stats,offset)
importFrom(stats,pnorm)
importFrom(stats,poisson)
importFrom(stats,reorder)
importFrom(stats,wilcox.test)
importFrom(tidyr,unnest)
29 changes: 16 additions & 13 deletions R/AE_Assess.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#' - `SiteID` - Site ID
#' - `Count` - Number of Adverse Events
#' - `Exposure` - Number of days of exposure
#' - `Rate` - Rate of Exposure (Count / Exposure)
#'
#' The Assessment
#' - \code{\link{Transform_EventCount}} creates `dfTransformed`.
Expand All @@ -22,37 +23,38 @@
#'
#' @section Statistical Assumptions:
#'
#' A Poisson or Wilcoxon model is used to generate estimates and p-values for each site (as specified with the `cMethod` parameter). Those model outputs are then used to flag possible outliers using the thresholds specified in `vThreshold`. In the Poisson model, sites with an estimand less than -5 are flagged as -1 and greater than 5 are flagged as 1 by default. For Wilcoxon, sites with p-values less than 0.0001 are flagged by default.
#' A Poisson or Wilcoxon model is used to generate estimates and p-values for each site (as specified with the `strMethod` parameter). Those model outputs are then used to flag possible outliers using the thresholds specified in `vThreshold`. In the Poisson model, sites with an estimand less than -5 are flagged as -1 and greater than 5 are flagged as 1 by default. For Wilcoxon, sites with p-values less than 0.0001 are flagged by default.
#'
#' See \code{\link{Analyze_Poisson}} and \code{\link{Analyze_Wilcoxon}} for additional details about the statistical methods and thier assumptions.
#'
#' @param dfInput input data with one record per person and the following required columns: SubjectID, SiteID, Count, Exposure
#' @param vThreshold numeric vector with 2 threshold values. Defaults to c(-5,5) for method = "poisson" and c(.0001,NA) for method = Wilcoxon.
#' @param cLabel Assessment label
#' @param cMethod valid methods are "poisson" (the default), or "wilcoxon"
#' @param strLabel Assessment label
#' @param strMethod valid methods are "poisson" (the default), or "wilcoxon"
#' @param bDataList Should all assessment datasets be returned as a list? If False (the default), only the Summary data frame is returned
#'
#' @examples
#' dfInput <- AE_Map_Adam( safetyData::adam_adsl, safetyData::adam_adae )
#' SafetyAE <- AE_Assess( dfInput )
#' SafetyAE_Wilk <- AE_Assess( dfInput, cMethod="wilcoxon")
#' SafetyAE_Wilk <- AE_Assess( dfInput, strMethod="wilcoxon")
#'
#' @return If `bDataList` is false (the default), the summary data frame (`dfSummary`) is returned. If `bDataList` is true, a list containing all data in the standard data pipeline (`dfInput`, `dfTransformed`, `dfAnalyzed`, `dfFlagged` and `dfSummary`) is returned.
#'
#' @export

AE_Assess <- function( dfInput, vThreshold=NULL, cLabel="", cMethod="poisson",bDataList=FALSE){
AE_Assess <- function( dfInput, vThreshold=NULL, strLabel="", strMethod="poisson",bDataList=FALSE){
stopifnot(
"dfInput is not a data.frame" = is.data.frame(dfInput),
"cLabel is not character" = is.character(cLabel),
"cMethod is not 'poisson' or 'wilcoxon'" = cMethod %in% c("poisson","wilcoxon"),
"strLabel is not character" = is.character(strLabel),
"strMethod is not 'poisson' or 'wilcoxon'" = strMethod %in% c("poisson","wilcoxon"),
"bDataList is not logical" = is.logical(bDataList),
"One or more of these columns: SubjectID, SiteID, Count, Exposure, and Rate not found in dfInput"=all(c("SubjectID","SiteID", "Count","Exposure", "Rate") %in% names(dfInput))
"One or more of these columns: SubjectID, SiteID, Count, Exposure, and Rate not found in dfInput"=all(c("SubjectID","SiteID", "Count","Exposure", "Rate") %in% names(dfInput)),
"strMethod must be length 1" = length(strMethod) == 1
)
lAssess <- list()
lAssess$dfInput <- dfInput
lAssess$dfTransformed <- gsm::Transform_EventCount( lAssess$dfInput, cCountCol = 'Count', cExposureCol = "Exposure" )
if(cMethod == "poisson"){
lAssess$dfTransformed <- gsm::Transform_EventCount( lAssess$dfInput, strCountCol = 'Count', strExposureCol = "Exposure" )
if(strMethod == "poisson"){
if(is.null(vThreshold)){
vThreshold = c(-5,5)
}else{
Expand All @@ -64,7 +66,9 @@ AE_Assess <- function( dfInput, vThreshold=NULL, cLabel="", cMethod="poisson",bD
}
lAssess$dfAnalyzed <- gsm::Analyze_Poisson( lAssess$dfTransformed)
lAssess$dfFlagged <- gsm::Flag( lAssess$dfAnalyzed , strColumn = 'Residuals', vThreshold =vThreshold)
} else if(cMethod=="wilcoxon"){
lAssess$dfSummary <- gsm::Summarize( lAssess$dfFlagged, strScoreCol = 'Residuals', strAssessment="Safety", strLabel= strLabel)

} else if(strMethod=="wilcoxon"){
if(is.null(vThreshold)){
vThreshold = c(0.0001,NA)
}else{
Expand All @@ -77,10 +81,9 @@ AE_Assess <- function( dfInput, vThreshold=NULL, cLabel="", cMethod="poisson",bD
}
lAssess$dfAnalyzed <- gsm::Analyze_Wilcoxon( lAssess$dfTransformed)
lAssess$dfFlagged <- gsm::Flag( lAssess$dfAnalyzed , strColumn = 'PValue', vThreshold =vThreshold, strValueColumn = 'Estimate')
lAssess$dfSummary <- gsm::Summarize( lAssess$dfFlagged, strAssessment="Safety", strLabel= strLabel)
}

lAssess$dfSummary <- gsm::Summarize( lAssess$dfFlagged, cAssessment="Safety", cLabel= cLabel)

if(bDataList){
return(lAssess)
} else {
Expand Down
7 changes: 4 additions & 3 deletions R/AE_Map_Raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
#'
#' Note that the function can generate data summaries for specific types of AEs, but passing filtered ADAE data to dfADAE.
#'
#' @param dfAE AE dataset with columns SUBJID and rows for each AE record
#' @param dfRDSL Subject-level Raw Data (RDSL) required columns: SubjectID, SiteID, value specified in strExposureCol
#' @param dfAE AE dataset with required column SUBJID and rows for each AE record
#' @param dfRDSL Subject-level Raw Data (RDSL) with required columns: SubjectID, SiteID, value specified in strExposureCol
#' @param strExposureCol Name of exposure column. 'TimeOnTreatment' by default
#'
#' @return Data frame with one record per person data frame with columns: SubjectID, SiteID, Count (number of AEs), Exposure (Time on Treatment in Days), Rate (AE/Day)
Expand All @@ -40,7 +40,8 @@ AE_Map_Raw <- function( dfAE, dfRDSL, strExposureCol="TimeOnTreatment"){
"SUBJID column not found in dfAE"="SUBJID" %in% names(dfAE),
"strExposureCol is not character"=is.character(strExposureCol),
"SubjectID, SiteID and strExposureCol columns not found in dfRDSL"=all(c("SubjectID","SiteID",strExposureCol) %in% names(dfRDSL)),
"NAs found in Subject ID column of dfAE" = all(!is.na(dfAE$SUBJID))
"NAs found in SUBJID column of dfAE" = all(!is.na(dfAE$SUBJID)),
"NAs found in Subject ID column of dfRDSL" = all(!is.na(dfRDSL$SubjectID))
)

dfInput <- dfRDSL %>%
Expand Down
2 changes: 1 addition & 1 deletion R/Analyze_Chisq.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#'
#' @examples
#' dfInput <- Disp_Map(dfDisp = safetyData::adam_adsl, strCol = "DCREASCD",strReason = "Adverse Event")
#' dfTransformed <- Transform_EventCount( dfInput, cCountCol = 'Count' )
#' dfTransformed <- Transform_EventCount( dfInput, strCountCol = 'Count' )
#' dfAnalyzed <- Analyze_Chisq( dfTransformed )
#'
#' @export
Expand Down
22 changes: 13 additions & 9 deletions R/Analyze_Fisher.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#'
#' Creates Analysis results data for count data using the Fisher's exact test
#'
#' @details
#' @details
#'
#' Analyzes count data using the Fisher's exact test
#'
Expand All @@ -12,16 +12,17 @@
#'
#' @section Data Specification:
#'
#' The input data (` dfTransformed`) for the Analyze_Fisher is typically created using \code{\link{Transform_EventCount}} and should be one record per Site with columns for:
#' The input data (`dfTransformed`) for Analyze_Fisher is typically created using \code{\link{Transform_EventCount}} and should be one record per site with required columns for:
#' - `SiteID` - Site ID
#' - `N` - Total number of participants at site
#' - `Count` - Total number of participants at site with event of interest
#' - `TotalCount` - Total number of participants at site with event of interest
#'
#'
#' @param dfTransformed data.frame in format produced by \code{\link{Transform_EventCount}}
#' @param strOutcome required, name of column in dfTransformed dataset to perform Fisher test on
#' @param strOutcome required, name of column in dfTransformed dataset to perform Fisher test on. Default is "TotalCount".
#'
#' @importFrom stats fisher.test as.formula
#' @import dplyr
#' @importFrom stats fisher.test
#' @importFrom purrr map
#' @importFrom broom glance
#' @importFrom tidyr unnest
Expand All @@ -30,16 +31,19 @@
#'
#' @examples
#' dfInput <- Disp_Map(dfDisp = safetyData::adam_adsl, strCol = "DCREASCD",strReason = "Adverse Event")
#' dfTransformed <- Transform_EventCount( dfInput, cCountCol = 'Count' )
#' dfTransformed <- Transform_EventCount( dfInput, strCountCol = 'Count' )
#' dfAnalyzed <- Analyze_Fisher( dfTransformed )
#'
#' @export

Analyze_Fisher <- function( dfTransformed , strOutcome = "TotalCount") {

stopifnot(
is.data.frame(dfTransformed),
all(c("SiteID", "N", strOutcome) %in% names(dfTransformed))
"dfTransformed is not a data.frame" = is.data.frame(dfTransformed),
"One or more of these columns: SiteID, N, or the value in strOutcome not found in dfTransformed" = all(c("SiteID", "N", strOutcome) %in% names(dfTransformed)),
"NA value(s) found in SiteID" = all(!is.na(dfTransformed[["SiteID"]])),
"strOutcome must be length 1" = length(strOutcome) == 1,
"strOutcome is not character" = is.character(strOutcome)
)

fisher_model<- function(site){
Expand All @@ -58,7 +62,7 @@ Analyze_Fisher <- function( dfTransformed , strOutcome = "TotalCount") {
dfAnalyzed <- dfTransformed %>%
mutate(model = map(.data$SiteID, fisher_model)) %>%
mutate(summary = map(.data$model, broom::glance)) %>%
unnest(summary) %>%
tidyr::unnest(summary) %>%
rename(
Estimate = .data$estimate,
PValue = .data[['p.value']]
Expand Down
60 changes: 0 additions & 60 deletions R/Analyze_Poisson.R

This file was deleted.

60 changes: 60 additions & 0 deletions R/Analyze_Poisson_PredictBounds.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#' Poisson Analysis - Predicted Boundaries
#'
#' @details
#'
#' Fits a Poisson model to site level data and then calculates predicted count values and upper- and lower- bounds for across the full range of exposure values.
#'
#' @section Statistical Methods:
#'
#' This function fits a poisson model to site-level data and then calculates residuals for each site. The poisson model is run using standard methods in the `stats` package by fitting a `glm` model with family set to `poisson` using a "log" link. Upper and lower boundary values are then calculated using the method described here TODO: Add link. In short,
#'
#' @section Data Specification:
#'
#' The input data (` dfTransformed`) for the Analyze_Poisson is typically created using \code{\link{Transform_EventCount}} and should be one record per Site with columns for:
#' - `SubjectID` - Unique subject ID
#' - `SiteID` - Site ID
#' - `TotalCount` - Number of Events
#' - `TotalExposure` - Number of days of exposure
#'
#' @param dfTransformed data.frame in format produced by \code{\link{Transform_EventCount}}. Must include SubjectID, SiteID, TotalCount and TotalExposure.
#' @param vThreshold upper and lower boundaries in residual space. Should be identical to the threhsolds used AE_Assess().
#'
#' @importFrom stats glm offset poisson pnorm
#' @importFrom broom augment
#' @importFrom lamW lambertW0 lambertWm1
#'
#' @return data frame containing predicted boundary values with upper and lower bounds across the range of observed values
#'
#' @examples
#' dfInput <- AE_Map_Adam( safetyData::adam_adsl, safetyData::adam_adae )
#' dfTransformed <- Transform_EventCount( dfInput, strCountCol = 'Count', strExposureCol = "Exposure" )
#' dfBounds <- Analyze_Poisson_PredictBounds(dfTransformed, c(-5,5))
#'
#' @export
Analyze_Poisson_PredictBounds <- function( dfTransformed, vThreshold=c(-5,5)){
dfTransformed$LogExposure <- log(dfTransformed$TotalExposure)
cModel <- glm(
TotalCount ~ stats::offset(LogExposure),
family=poisson(link="log"),
data=dfTransformed
)

dfBounds <- data.frame(
LogExposure = seq(
min(dfTransformed$LogExposure)-0.05,
max(dfTransformed$LogExposure)+0.05,
by=0.05
)) %>%
mutate( vMu = as.numeric( exp( .data$LogExposure * cModel$coefficients[2] + cModel$coefficients[1] ))) %>%
mutate( vWHi = (vThreshold[2]^2 - 2 * .data$vMu) / ( 2 * exp(1) * .data$vMu )) %>%
mutate( vWLo = (vThreshold[1]^2 - 2 * .data$vMu) / ( 2 * exp(1) * .data$vMu )) %>%
mutate( PredictYHigh = ( vThreshold[2]^2-2* .data$vMu) / (2*lamW::lambertW0( .data$vWHi ))) %>%
mutate( PredictYLo = ( vThreshold[1]^2-2* .data$vMu) / (2*lamW::lambertWm1( .data$vWLo ))) %>%
mutate( MeanCount = exp( .data$LogExposure * cModel$coefficients[2] + cModel$coefficients[1])) %>%
mutate( LowerCount = if_else(is.nan( .data$PredictYLo ), 0, .data$PredictYLo )) %>%
mutate( UpperCount = if_else(is.nan( .data$PredictYHigh ), 0, .data$PredictYHigh )) %>%
select( .data$LogExposure, .data$MeanCount, .data$LowerCount, .data$UpperCount )


return( dfBounds )
}
Loading

0 comments on commit 9a2d654

Please sign in to comment.