pharmaverse · thomas-neitmann · May 7, 2021 · Apr 21, 2021 · Apr 27, 2021 · Apr 27, 2021
diff --git a/R/assertions.R b/R/assertions.R
@@ -1,3 +1,31 @@
+#' Does a Dataset exist?
+#'
+#' Checks if a dataset exists in the environement
+#'
+#' @param dataset A `data.frame`
+#'
+#' @author Samia Kabi
+#'
+#' @return The function throws an error if the datasets is not present
+#' in the local environment
+#'
+#' @export
+#'
+#' @examples
+#' data(dm)
+#' assert_dataset_exist("dm")
+assert_dataset_exist <- function(dataset) {
+  if (!exists(dataset)) {
+    err_msg <- paste0(
+      "dataset ",
+      dataset,
+      " does not exist."
+    )
+    abort(err_msg)
+  }
+}
+
+
 #' Does a Dataset Contain All Required Variables?
 #'
 #' Checks if a dataset contains all required variables
@@ -94,9 +122,9 @@ assert_has_only_one_baseline_record <- function(dataset, by) { # nolint
 #' @examples
 #' data(ex)
 #' assert_has_unique_records(ex,
-#'                           by_vars = rlang::exprs(USUBJID) ,
-#'                           order = rlang::exprs(desc(EXENDTC)))
-
+#'   by_vars = rlang::exprs(USUBJID),
+#'   order = rlang::exprs(desc(EXENDTC))
+#' )
 assert_has_unique_records <- function(dataset, by_vars, order, message, message_type = "error") {
   # variables used for check
   all_vars <- list()
@@ -139,9 +167,11 @@ assert_has_unique_records <- function(dataset, by_vars, order, message, message_
     # create message
     tbl <- capture.output(print(duplicates))
     if (missing(message)) {
-      message <- paste0("Dataset contains multiple records with respect to ",
-                       paste(all_vars_msg, collapse = ", "),
-                       ".")
+      message <- paste0(
+        "Dataset contains multiple records with respect to ",
+        paste(all_vars_msg, collapse = ", "),
+        "."
+      )
     }
     err_msg <- paste0(
       message,

diff --git a/R/derive_disposition_dt.R b/R/derive_disposition_dt.R
@@ -0,0 +1,98 @@
+#' Derive --DTM (and --DTF/--TMF)
+#'
+#' Derive --DTM based on --DTC. --DTM is imputed based on user input
+#' Derive --DTF/--TMF if needed based on --DTC and --DT
+#'
+#' @param dataset Input dataset
+#'
+#' @param dataset_ds Datasets containiung the disposition information
+#' (usually: ds)
+#'
+#' The variable specified in the dtc parameter must be in dataset_ds
+#'
+#'
+#' @param new_var Name of the disposition date variable
+#'
+#' a variable name is expected
+#'
+#' @param dtc The --DTC date used to derive/impute --DT
+#'
+#'   A character date is expected in a format like yyyy-mm-dd or yyyy-mm-ddThh:mm:ss.
+#'   If the year part is not recorded (missing date), no imputation is performed.
+#'
+#' @param date_imputation The value to impute the day/month when a datepart is missing.
+#'
+#'   If NULL: no date imputation is performed and partial dates are returned as missing.
+#'
+#'   Otherwise, a character value is expected, either as a
+#'   - format with day and month specified as 'dd-mm': e.g. '15-06' for the 15th of June
+#'   - or as a keyword: 'FIRST', 'MID', 'LAST' to impute to the first/mid/last day/month
+#'
+#'   Default is NULL
+#'
+#' @return the input dataset with the disposition date (new_var) added
+#'
+#' @author Samia Kabi
+#'
+#' @export
+#'
+#' @examples
+#' derive_disposition_dt(
+#'   dataset = dm,
+#'   dataset_ds = ds,
+#'   new_var = RFICDT,
+#'   dtc = DSSTDTC,
+#'   filter = expr(DSCAT == "PROTOCOL MILESTONE" & DSDECOD == "INFORMED CONSENT OBTAINED")
+#' )
+derive_disposition_dt <- function(dataset,
+                                  dataset_ds,
+                                  new_var,
+                                  dtc,
+                                  filter = NULL,
+                                  date_imputation = NULL) {
+  # check if dataset_ds exists
+  assert_dataset_exist(deparse(substitute(dataset_ds)))
+  # Check DTC is present in dataset_ds
+  assert_has_variables(dataset_ds, deparse(substitute(dtc)))
+
+  # Warn if the variable to derive already exists in the input dataset
+  warn_if_vars_exist(dataset, deparse(substitute(new_var)))
+
+  # if DS needs to be filtered, filter
+  if (!is.null(filter)) {
+    ds_subset <- dataset_ds %>%
+      filter(!!!filter)
+  }
+  else {
+    ds_subset <- dataset_ds
+  }
+
+  # only 1 record per subject is expected - issue a warning otherwise
+  assert_has_unique_records(
+    dataset = ds_subset,
+    by_vars = "USUBJID",
+    message_type = "warning",
+    message = "the filter used for DS results in several records per patient - please check"
+  )
+
+  # set DTC in datedtc (resolves in mutate)
+  ds_subset <- ds_subset %>%
+    mutate(datedtc = !!enquo(dtc))
+
+  # Prefix to use in derive_vars_dt ("RFIC--", "ENRL--",...)
+  prefix <- sub("\\DT.*", "", deparse(substitute(new_var)))
+  newvar <- paste0(prefix, "DT")
+  # Create the new dispo date
+  ds__ <- derive_vars_dt(
+    ds_subset,
+    new_vars_prefix = prefix,
+    dtc = datedtc,
+    date_imputation = date_imputation,
+    flag_imputation = FALSE
+  ) %>%
+    select(STUDYID, USUBJID, !!enquo(new_var) := !!sym(newvar))
+
+  # add the new dispo date to the input dataset
+  dataset %>%
+    left_join(ds__, by = c("STUDYID", "USUBJID"))
+}
diff --git a/R/derive_disposition_eoxxstt.R b/R/derive_disposition_eoxxstt.R
@@ -0,0 +1,82 @@
+#' Derive a date representing a disposition status.
+#'
+#' Derive dates from the the relevant reecords in the disposition domain.
+#'
+#' @param dataset Input dataset.
+#'
+#' @param dataset_ds Datasets containing the disposition information
+#' (usually: ds).
+#'
+#' The variable specified in the status_var parameter must be in dataset_ds.
+#'
+#'
+#' @param new_var Name of the disposition date variable.
+#'
+#' a variable name is expected.
+#'
+#' @param status_var The variable used to derive the disposition status.
+#'
+#'   A character vector is expected:
+#'   If status_var equals 'COMPLETED' then the disposition status is set to COMPLETED,
+#'   If status_var is not equal to 'COMPLETED' then the disposition status is set to DISCONTINUED,
+#'   Otherwise if there is no record available for status_var in dataset_ds, set to ONGOING.
+#'
+#' @author Samia Kabi
+#'
+#' @export
+#'
+#' @examples
+#' derive_disposition_eoxxstt(
+#'   dataset = dm,
+#'   dataset_ds = ds,
+#'   new_var = EOSSTT,
+#'   status_var = DSDECOD,
+#'   filter = expr(DSCAT == "DISPOSITION EVENT" & DSSCAT == "STUDY COMPLETION/EARLY DISCONTINUATION")
+#' )
+derive_disposition_eoxxstt <- function(dataset,
+                                       dataset_ds,
+                                       new_var,
+                                       status_var,
+                                       filter = NULL) {
+  # check if dataset_ds exists
+  assert_dataset_exist(deparse(substitute(dataset_ds)))
+  # Check status_var is present in dataset_ds
+  assert_has_variables(dataset_ds, deparse(substitute(status_var)))
+
+  # Warn if the variable to derive already exists in the input dataset
+  warn_if_vars_exist(dataset, deparse(substitute(new_var)))
+
+  # if DS needs to be filtered, filter
+  if (!is.null(filter)) {
+    ds_subset <- dataset_ds %>%
+      filter(!!!filter)
+  }
+  else {
+    ds_subset <- dataset_ds
+  }
+
+  # only 1 record per subject is expected - issue a warning otherwise
+  assert_has_unique_records(
+    dataset = ds_subset,
+    by_vars = "USUBJID",
+    message_type = "warning",
+    message = "the filter used for DS results in several records per patient - please check"
+  )
+
+  # set status_var in status (resolves in mutate)
+  ds_subset <- ds_subset %>%
+    mutate(status___ = !!enquo(status_var)) %>%
+    select(STUDYID, USUBJID, status___)
+
+  # Add the status var and Derive the new dispo status in the input dataset
+  dataset <- dataset %>%
+    left_join(ds_subset, by = c("STUDYID", "USUBJID")) %>%
+    mutate(
+      !!enquo(new_var) := case_when(
+        status___ == "COMPLETED" ~ "COMPLETED",
+        status___ != "COMPLETED" ~ "DISCONTINUED",
+        TRUE ~ "ONGOING"
+      )
+    ) %>%
+    select(-ends_with("___"))
+}
diff --git a/R/derive_var_brthdt.R b/R/derive_var_brthdt.R
@@ -0,0 +1,8 @@
+
+derive_var_brthdt <- function(dataset, date_imputation="MID") {
+  derive_vars_dt(dataset,
+    new_vars_prefix = "BRTH",
+    dtc = BRTHDTC,
+    date_imputation = date_imputation
+  )
+}
diff --git a/R/derive_var_dcsreas.R b/R/derive_var_dcsreas.R
@@ -0,0 +1,16 @@
+
+derive_var_dcsreas <- function(dataset,
+                               dataset_ds,
+                               filter_ds = expr(DSCAT == "DISPOSITION EVENT" & DSSCAT == "STUDY COMPLETION/EARLY DISCONTINUATION")) {
+  derive_merged_vars(
+    dataset,
+    dataset_add = dataset_ds,
+    filter_add = filter_ds,
+    new_vars = exprs(temp___ = DSDECOD)
+  ) %>%
+    mutate(DCSREAS = case_when(
+      temp___ != "COMPLETED" ~ temp___,
+      TRUE ~ NA_character_
+    )) %>%
+    select(-ends_with("___"))
+}
diff --git a/R/derive_var_dcsreasp.R b/R/derive_var_dcsreasp.R
@@ -0,0 +1,18 @@
+
+derive_var_dcsreasp <- function(dataset,
+                                dataset_ds,
+                                filter_ds = expr(DSCAT == "DISPOSITION EVENT" & DSSCAT == "STUDY COMPLETION/EARLY DISCONTINUATION")) {
+  derive_merged_vars(
+    dataset,
+    dataset_add = dataset_ds,
+    filter_add = filter_ds,
+    new_vars = exprs(term___ = DSTERM, decod___ = DSDECOD)
+  ) %>%
+    mutate(
+      DCSREASP = case_when(
+        decod___ != "COMPLETED" ~ term___,
+        TRUE ~ NA_character_
+      )
+    ) %>%
+    select(-ends_with("___"))
+}
diff --git a/R/derive_var_enrldt.R b/R/derive_var_enrldt.R
@@ -0,0 +1,16 @@
+
+derive_var_enrldt <- function(dataset,
+                              dataset_ds,
+                              filter_ds = expr(DSDECOD == "ENROLLED"),
+                              date_imputation = NULL) {
+
+
+  derive_merged_vars(
+    dataset,
+    dataset_add = dataset_ds,
+    filter_add = filter_ds,
+    new_vars = exprs(ENRLDT = convert_dtc_to_dt(impute_dtc(DSSTDTC, date_imputation = !!enquo(date_imputation))))
+  )
+
+}
+
diff --git a/R/derive_var_eosdt.R b/R/derive_var_eosdt.R
@@ -0,0 +1,12 @@
+
+derive_var_eosdt <- function(dataset,
+                             dataset_ds,
+                             filter_ds = exprs(DSCAT == "DISPOSITION EVENT" & DSSCAT == "STUDY COMPLETION/EARLY DISCONTINUATION"),
+                             date_imputation = NULL) {
+  derive_merged_vars(
+    dataset,
+    dataset_add = dataset_ds,
+    filter_add = filter_ds,
+    new_vars = exprs(EOSDT = convert_dtc_to_dt(impute_dtc(DSSTDTC, date_imputation = !!enquo(date_imputation))))
+  )
+}
diff --git a/R/derive_var_eosdy.R b/R/derive_var_eosdy.R
@@ -0,0 +1,9 @@
+
+derive_var_eosdy <- function(dataset, start_date = TRTSDT, end_date = EOSDT) {
+  derive_duration(
+    dataset,
+    new_var = EOSDY,
+    start_date = !!enquo(start_date),
+    end_date = !!enquo(end_date)
+  )
+}
diff --git a/R/derive_var_eosstt.R b/R/derive_var_eosstt.R
@@ -0,0 +1,17 @@
+
+derive_var_eosstt <- function(dataset,
+                              dataset_ds,
+                              filter_ds = expr(DSCAT == "DISPOSITION EVENT" & DSSCAT == "STUDY COMPLETION/EARLY DISCONTINUATION")) {
+  derive_merged_vars(
+    dataset,
+    dataset_add = dataset_ds,
+    filter_add = filter_ds,
+    new_vars = exprs(temp___ = DSDECOD)
+  ) %>%
+    mutate(EOSSTT = case_when(
+      temp___ == "COMPLETED" ~ "COMPLETED",
+      temp___ != "COMPLETED" ~ "DISCONTINUED",
+      TRUE ~ "ONGOING"
+    )) %>%
+    select(-ends_with("___"))
+}
diff --git a/R/derive_var_randdt.R b/R/derive_var_randdt.R
@@ -0,0 +1,11 @@
+derive_var_randdt <- function(dataset,
+                              dataset_ds,
+                              filter_ds = expr(DSCAT == "PROTOCOL MILESTONE" & DSDECOD == "RANDOMIZATION"),
+                              date_imputation = NULL) {
+  derive_merged_vars(
+    dataset,
+    dataset_add = dataset_ds,
+    filter_add = filter_ds,
+    new_vars = exprs(RANDDT = convert_dtc_to_dt(impute_dtc(DSSTDTC, date_imputation = !!enquo(date_imputation))))
+  )
+}
diff --git a/R/derive_var_rficdt.R b/R/derive_var_rficdt.R
@@ -0,0 +1,13 @@
+
+derive_var_rficdt <- function(dataset,
+                              dataset_ds,
+                              filter_ds = expr(DSCAT == "PROTOCOL MILESTONE" & startsWith(DSSCAT, "PROTOCOL") & DSDECOD == "INFORMED CONSENT OBTAINED"),
+                              date_imputation = NULL) {
+  derive_merged_vars(
+    dataset,
+    dataset_add = dataset_ds,
+    filter_add = filter_ds,
+    #expr() does not work here... exprs() required?
+    new_vars = exprs(RFICDT = convert_dtc_to_dt(impute_dtc(DSSTDTC, date_imputation = !!enquo(date_imputation))))
+  )
+}