Merge pull request #346 from Gilead-BioStats/fix-344-clindata-update

Fix #344 clindata update
Gilead-BioStats · Apr 6, 2022 · 47e47f7 · 47e47f7
2 parents c0be72b + 87c0e5e
commit 47e47f7
Show file tree

Hide file tree

Showing 61 changed files with 643 additions and 935 deletions.
diff --git a/.github/workflows/R-CMD-check-dev.yaml b/.github/workflows/R-CMD-check-dev.yaml
@@ -19,7 +19,7 @@ jobs:
           - {os: macOS-latest,   r: 'release'}
 
     env:
-      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}   
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
       R_KEEP_PKG_SOURCE: yes
 
     steps:
@@ -36,15 +36,15 @@ jobs:
           r-version: ${{ matrix.config.r }}
           http-user-agent: ${{ matrix.config.http-user-agent }}
           use-public-rspm: true
-      
+
       - name: Clone clindata
         run: |
           cd ..
           git clone git@github.com:Gilead-BioStats/clindata.git
           cd clindata
-          git checkout tags/v0.3.0
+          git checkout tags/v0.4.0
           cd ../gsm
-          
+
       - name: install packages
         shell: Rscript {0}
         run: |

diff --git a/.github/workflows/R-CMD-check-main.yaml b/.github/workflows/R-CMD-check-main.yaml
@@ -21,7 +21,7 @@ jobs:
           - {os: ubuntu-18.04,   r: 'release'}
 
     env:
-      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}   
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
       R_KEEP_PKG_SOURCE: yes
 
     steps:
@@ -38,15 +38,15 @@ jobs:
           r-version: ${{ matrix.config.r }}
           http-user-agent: ${{ matrix.config.http-user-agent }}
           use-public-rspm: true
-      
+
       - name: Clone clindata
         run: |
           cd ..
           git clone git@github.com:Gilead-BioStats/clindata.git
           cd clindata
-          git checkout tags/v0.3.0
+          git checkout tags/v0.4.0
           cd ../gsm
-          
+
       - name: install packages
         shell: Rscript {0}
         run: |

diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
@@ -30,9 +30,9 @@ jobs:
           cd ..
           git clone git@github.com:Gilead-BioStats/clindata.git
           cd clindata
-          git checkout tags/v0.3.0
+          git checkout tags/v0.4.0
           cd ../gsm
-          
+
       - name: install packages
         shell: Rscript {0}
         run: |

diff --git a/R/AE_Map_Raw.R b/R/AE_Map_Raw.R
@@ -4,7 +4,7 @@
 #'
 #' @details
 #'
-#' This function combines AE data with treatment exposure from subject-level Raw Data (RDSL) to create the required input for \code{\link{AE_Assess}}.
+#' This function combines AE data with treatment exposure from subject-level Raw Data to create the required input for \code{\link{AE_Assess}}.
 #'
 #' @section Data Specification:
 #'
@@ -13,36 +13,33 @@
 #' The following columns are required:
 #' - `dfAE`
 #'     - `SUBJID` - Unique subject ID
-#' - `dfRDSL`
+#' - `dfSUBJ`
 #'     - `SubjectID` - Unique subject ID
 #'     - `SiteID` - Site ID
 #'     - Value specified in strExposureCol - Treatment Exposure in days; "TimeOnTreatment" by default
 #'
 #' Note that the function can generate data summaries for specific types of AEs, but passing filtered ADAE data to dfADAE.
 #'
 #' @param dfAE AE dataset with required column SUBJID and rows for each AE record
-#' @param dfRDSL Subject-level Raw Data (RDSL) with required columns: SubjectID, SiteID, value specified in strExposureCol
-#' @param mapping List containing expected columns in each data set. By default, mapping for dfAE is: `strIDCol` = "SUBJID". By default, mapping for dfRDSL is: `strIDCol` = "SubjectID", `strSiteCol` = "SiteID", and `strExposureCol` = "TimeOnTreatment". TODO: add more descriptive info or reference to mapping.
+#' @param dfSUBJ Subject-level Raw Data with required columns: SubjectID, SiteID, value specified in strExposureCol
+#' @param mapping List containing expected columns in each data set. By default, mapping for dfAE is: `strIDCol` = "SUBJID". By default, mapping for dfSUBJ is: `strIDCol` = "SubjectID", `strSiteCol` = "SiteID", and `strExposureCol` = "TimeOnTreatment". TODO: add more descriptive info or reference to mapping.
 #'
 #' @return Data frame with one record per person data frame with columns: SubjectID, SiteID, Count (number of AEs), Exposure (Time on Treatment in Days), Rate (AE/Day)
 #'
 #' @examples
-#' dfAE <- clindata::raw_ae %>% dplyr::filter(SUBJID != "")
-#' dfRDSL <- clindata::rawplus_rdsl %>% dplyr::filter(!is.na(TimeOnTreatment))
-#'
-#' dfInput <- AE_Map_Raw(dfAE, dfRDSL)
+#' dfInput <- AE_Map_Raw(dfAE = clindata::rawplus_ae, dfSUBJ = clindata::rawplus_subj)
 #'
 #' @import dplyr
 #'
 #' @export
 
-AE_Map_Raw <- function( dfAE, dfRDSL, mapping = NULL ){
+AE_Map_Raw <- function( dfAE, dfSUBJ, mapping = NULL ){
 
     # Set defaults for mapping if none is provided
     if(is.null(mapping)){
         mapping <- list(
-            dfAE = list(strIDCol="SUBJID"),
-            dfRDSL = list(strIDCol="SubjectID", strSiteCol="SiteID", strExposureCol="TimeOnTreatment")
+            dfAE = list(strIDCol="SubjectID"),
+            dfSUBJ = list(strIDCol="SubjectID", strSiteCol="SiteID", strTimeOnTreatmentCol="TimeOnTreatment")
         )
     }
 
@@ -54,29 +51,29 @@ AE_Map_Raw <- function( dfAE, dfRDSL, mapping = NULL ){
         bQuiet = FALSE
     )
 
-    is_rdsl_valid <- is_mapping_valid(
-        dfRDSL,
-        mapping$dfRDSL,
-        vRequiredParams = c("strIDCol", "strSiteCol", "strExposureCol"),
-        vUniqueCols = mapping$dfRDSL$strIDCol,
+    is_subj_valid <- is_mapping_valid(
+        dfSUBJ,
+        mapping$dfSUBJ,
+        vRequiredParams = c("strIDCol", "strSiteCol", "strTimeOnTreatmentCol"),
+        vUniqueCols = mapping$dfSUBJ$strIDCol,
         bQuiet = FALSE
     )
 
     stopifnot(
         "Errors found in dfAE." = is_ae_valid$status,
-        "Errors found in dfRDSL." = is_rdsl_valid$status
+        "Errors found in dfSUBJ." = is_subj_valid$status
     )
 
     # Standarize Column Names
     dfAE_mapped <- dfAE %>%
         rename(SubjectID = mapping[["dfAE"]][["strIDCol"]]) %>%
         select(.data$SubjectID)
 
-    dfRDSL_mapped <- dfRDSL %>%
+    dfSUBJ_mapped <- dfSUBJ %>%
         rename(
-            SubjectID = mapping[["dfRDSL"]][["strIDCol"]],
-            SiteID = mapping[["dfRDSL"]][["strSiteCol"]],
-            Exposure = mapping[["dfRDSL"]][["strExposureCol"]]
+            SubjectID = mapping[["dfSUBJ"]][["strIDCol"]],
+            SiteID = mapping[["dfSUBJ"]][["strSiteCol"]],
+            Exposure = mapping[["dfSUBJ"]][["strTimeOnTreatmentCol"]]
         ) %>%
         select(.data$SubjectID, .data$SiteID, .data$Exposure)
 
@@ -85,7 +82,7 @@ AE_Map_Raw <- function( dfAE, dfRDSL, mapping = NULL ){
         group_by(.data$SubjectID) %>%
         summarize(Count=n()) %>%
         ungroup() %>%
-        mergeSubjects(dfRDSL_mapped, vFillZero="Count") %>%
+        mergeSubjects(dfSUBJ_mapped, vFillZero="Count") %>%
         mutate(Rate = .data$Count/.data$Exposure) %>%
         select(.data$SubjectID,.data$SiteID, .data$Count, .data$Exposure, .data$Rate)
 

diff --git a/R/Analyze_Poisson.R b/R/Analyze_Poisson.R
@@ -26,7 +26,7 @@
 #' @return input data.frame with columns added for "Residuals" and "PredictedCount"
 #'
 #' @examples
-#' dfInput <- AE_Map_Adam( safetyData::adam_adsl, safetyData::adam_adae )
+#' dfInput <- AE_Map_Raw(clindata::rawplus_ae, clindata::rawplus_subj)
 #' dfTransformed <- Transform_EventCount( dfInput, strCountCol = 'Count', strExposureCol = "Exposure" )
 #' dfAnalyzed <- Analyze_Poisson( dfTransformed )
 #'

diff --git a/R/Analyze_Wilcoxon.R b/R/Analyze_Wilcoxon.R
@@ -32,7 +32,7 @@
 #' @return data.frame with one row per site, columns: SiteID, N, TotalCount, TotalExposure, Rate, Estimate, PValue
 #'
 #' @examples
-#' dfInput <- AE_Map_Adam( safetyData::adam_adsl, safetyData::adam_adae )
+#' dfInput <- AE_Map_Raw(clindata::rawplus_ae, clindata::rawplus_subj)
 #' dfTransformed <- Transform_EventCount( dfInput, strCountCol = 'Count', strExposureCol = "Exposure" )
 #' dfAnalyzed <- Analyze_Wilcoxon( dfTransformed , strOutcome ="Rate")
 #'

diff --git a/R/Consent_Assess.R b/R/Consent_Assess.R
@@ -39,8 +39,8 @@
 #'    filter(SUBJID != "")
 #'
 #' dfInput <- Consent_Map_Raw(
-#'    dfConsent = raw_consent,
-#'    dfRDSL = clindata::rawplus_rdsl,
+#'    dfCONSENT = clindata::rawplus_consent,
+#'    dfSUBJ = clindata::rawplus_subj,
 #'    strConsentTypeValue = "MAINCONSENT",
 #'    strConsentStatusValue="Y"
 #' )

diff --git a/R/Consent_Map_Raw.R b/R/Consent_Map_Raw.R
@@ -4,108 +4,101 @@
 #'
 #' @details
 #'
-#' This function uses raw Consent and RDSL data to create the required input for \code{\link{Consent_Assess}}.
+#' This function uses raw Consent and Subject data to create the required input for \code{\link{Consent_Assess}}.
 #'
 #' @section Data Specification:
 #' The following columns are required:
-#' - `dfConsent`
-#'     - `SUBJID` - Subject ID
+#' - `dfCONSENT`
+#'     - `SubjectID` - Subject ID
 #'     - `CONSCAT_STD` - Type of Consent_Coded value
 #'     - `CONSYN` - Did the subject give consent? Yes / No.
 #'     - `CONSDAT` - If yes, provide date consent signed
-#' - `dfRDSL`
+#' - `dfSUBJ`
 #'     - `SubjectID` - Unique subject ID
 #'     - `SiteID` - Site ID
 #'     - `RandDate` - Randomization Date
 #'
-#' @param dfConsent consent data frame with columns: SUBJID, CONSCAT_STD , CONSYN , CONSDAT.
-#' @param dfRDSL Subject-level Raw Data (RDSL) required columns: SubjectID SiteID RandDate.
+#' @param dfCONSENT consent data frame with columns: SUBJID, CONSCAT_STD , CONSYN , CONSDAT.
+#' @param dfSUBJ Subject-level Raw Data required columns: SubjectID SiteID RandDate.
 #' @param mapping List containing expected columns in each data set.
-#' @param strConsentTypeValue default = "mainconsent", filters on CONSCAT_STD of dfConsent, if NULL no filtering is done.
+#' @param strConsentTypeValue default = "mainconsent", filters on CONSCAT_STD of dfCONSENT, if NULL no filtering is done.
 #' @param strConsentStatusValue default = "Yes", expected Status value for valid consent.
 #'
 #' @return Data frame with one record per person data frame with columns: SubjectID, SiteID, Count.
 #'
 #' @import dplyr
 #'
 #' @examples
-#' library(dplyr)
-#' raw_consent <- clindata::raw_ic_elig %>%
-#'    select( c("SUBJID","DSSTDAT_RAW") )%>%
-#'    mutate( CONSCAT_STD = "MAINCONSENT", CONSYN="Y") %>%
-#'    rename( CONSDAT = DSSTDAT_RAW ) %>%
-#'    mutate( CONSDAT = as.Date(CONSDAT, format="%d %B %Y") ) %>%
-#'    filter(SUBJID != "")
 #'
 #' input <- Consent_Map_Raw(
-#'    dfConsent = raw_consent,
-#'    dfRDSL = clindata::rawplus_rdsl,
+#'    dfCONSENT = clindata::rawplus_consent,
+#'    dfSUBJ = clindata::rawplus_subj,
 #'    strConsentTypeValue = "MAINCONSENT",
 #'    strConsentStatusValue="Y"
 #')
 #'
 #' @export
 
-Consent_Map_Raw <- function( dfConsent, dfRDSL, mapping = NULL, strConsentTypeValue = "mainconsent", strConsentStatusValue="Yes"){
+Consent_Map_Raw <- function( dfCONSENT, dfSUBJ, mapping = NULL, strConsentTypeValue = "mainconsent", strConsentStatusValue="Yes"){
 
   # Set defaults for mapping if none is provided
   if(is.null(mapping)){
     mapping <- list(
-      dfConsent = list(strIDCol = "SUBJID", strConsentTypeCol = "CONSCAT_STD", strConsentStatusCol = "CONSYN", strConsentDateCol = "CONSDAT"),
-      dfRDSL = list(strIDCol = "SubjectID", strSiteCol = "SiteID", strRandDateCol = "RandDate")
+      dfCONSENT = list(strIDCol = "SubjectID", strTypeCol = "CONSENT_TYPE", strValueCol = "CONSENT_VALUE", strDateCol = "CONSENT_DATE"),
+      dfSUBJ = list(strIDCol = "SubjectID", strSiteCol = "SiteID", strRandDateCol = "RandDate")
     )
   }
 
   # Check input data vs. mapping
   is_consent_valid <- is_mapping_valid(
-    df = dfConsent,
-    mapping = mapping$dfConsent,
-    vRequiredParams = c("strIDCol", "strConsentTypeCol", "strConsentStatusCol", "strConsentDateCol"),
-    vNACols = c("strConsentDateCol"),
+    df = dfCONSENT,
+    mapping = mapping$dfCONSENT,
+    vRequiredParams = c("strIDCol", "strTypeCol", "strValueCol", "strDateCol"),
+    vNACols = c("strDateCol"),
     bQuiet=FALSE
   )
 
-  is_rdsl_valid <- is_mapping_valid(
-    df = dfRDSL,
-    mapping = mapping$dfRDSL,
+  is_subj_valid <- is_mapping_valid(
+    df = dfSUBJ,
+    mapping = mapping$dfSUBJ,
     vRequiredParams = c("strIDCol", "strSiteCol", "strRandDateCol"),
     vUniqueCols = "strIDCol",
     bQuiet=FALSE
   )
 
   stopifnot(
-    "Errors found in dfConsent." = is_consent_valid$status,
-    "Errors found in dfRDSL." = is_rdsl_valid$status,
+    "Errors found in dfCONSENT." = is_consent_valid$status,
+    "Errors found in dfSUBJ." = is_subj_valid$status,
     "strConsentTypeValue is not character"= is.character(strConsentTypeValue),
     "strConsentTypeValue has multiple values, specify only one" = length(strConsentTypeValue)==1
   )
 
   # Standarize Column Names
-  dfRDSL_mapped <- dfRDSL %>%
+  dfSUBJ_mapped <- dfSUBJ %>%
     rename(
-      SubjectID = mapping[["dfRDSL"]][["strIDCol"]],
-      SiteID = mapping[["dfRDSL"]][["strSiteCol"]],
-      RandDate = mapping[["dfRDSL"]][["strRandDateCol"]]
+      SubjectID = mapping[["dfSUBJ"]][["strIDCol"]],
+      SiteID = mapping[["dfSUBJ"]][["strSiteCol"]],
+      RandDate = mapping[["dfSUBJ"]][["strRandDateCol"]]
     ) %>%
     select(.data$SubjectID, .data$SiteID, .data$RandDate)
 
-  dfConsent_mapped <- dfConsent %>%
+  dfCONSENT_mapped <- dfCONSENT %>%
     rename(
-      SubjectID = mapping[["dfConsent"]][["strIDCol"]],
-      ConsentType = mapping[["dfConsent"]][["strConsentTypeCol"]],
-      ConsentStatus = mapping[["dfConsent"]][["strConsentStatusCol"]],
-      ConsentDate = mapping[["dfConsent"]][["strConsentDateCol"]]
+      SubjectID = mapping[["dfCONSENT"]][["strIDCol"]],
+      ConsentType = mapping[["dfCONSENT"]][["strTypeCol"]],
+      ConsentStatus = mapping[["dfCONSENT"]][["strValueCol"]],
+      ConsentDate = mapping[["dfCONSENT"]][["strDateCol"]]
     ) %>%
     select(.data$SubjectID, .data$ConsentType , .data$ConsentStatus , .data$ConsentDate)
 
 
   if(!is.null(strConsentTypeValue)){
-    dfConsent_mapped <- dfConsent_mapped %>%
+    dfCONSENT_mapped <- dfCONSENT_mapped %>%
       filter(.data$ConsentType == strConsentTypeValue)
-    if(nrow(dfConsent_mapped)==0) stop("supplied strConsentTypeValue not found in data")
+    if(nrow(dfCONSENT_mapped)==0) stop("supplied strConsentTypeValue not found in data")
   }
 
-  dfInput <- mergeSubjects(dfConsent_mapped, dfRDSL_mapped)%>%
+  dfInput <- mergeSubjects(dfCONSENT_mapped, dfSUBJ_mapped)%>%
     mutate(flag_noconsent = .data$ConsentStatus != strConsentStatusValue) %>%
     mutate(flag_missing_consent = is.na(.data$ConsentDate))%>%
     mutate(flag_missing_rand = is.na(.data$RandDate))%>%

diff --git a/R/IE_Assess.R b/R/IE_Assess.R
@@ -31,8 +31,8 @@
 #' @examples
 #'
 #' dfInput <- IE_Map_Raw(
-#'    clindata::raw_ie_all %>% dplyr::filter(SUBJID != "" ),
-#'    clindata::rawplus_rdsl,
+#'    clindata::rawplus_ie,
+#'    clindata::rawplus_subj,
 #'    vCategoryValues= c("EXCL","INCL"),
 #'    vExpectedResultValues=c(0,1)
 #')