diff --git a/NAMESPACE b/NAMESPACE index 8e9b124535..9bab92b1cf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -100,6 +100,7 @@ export(derive_var_trtdurd) export(derive_var_trtemfl) export(derive_vars_aage) export(derive_vars_atc) +export(derive_vars_cat) export(derive_vars_computed) export(derive_vars_crit_flag) export(derive_vars_dt) diff --git a/NEWS.md b/NEWS.md index 07b2f5c5ef..7bb95dc688 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,8 @@ ## New Features +- New function `derive_vars_cat()` for deriving pairs of variables or more, e.g. +`AVALCATx` & `AVALCAxN`. (#2480) - New function `derive_vars_crit_flag()` for deriving criterion flag variables (`CRITy`, `CRITyFL`, `CRITyFLN`). (#2468) diff --git a/R/derive_vars_cat.R b/R/derive_vars_cat.R new file mode 100644 index 0000000000..7e0c12aabf --- /dev/null +++ b/R/derive_vars_cat.R @@ -0,0 +1,258 @@ +#' Derive Categorization Variables Like `AVALCATy` and `AVALCAyN` +#' @param dataset +#' `r roxygen_param_dataset(expected_vars = c("by_vars", "definition"))` +#' @param definition List of expressions created by `exprs()`. +#' Must be in rectangular format and specified using the same syntax as when creating +#' a `tibble` using the `tribble()` function. +#' The `definition` object will be converted to a `tibble` using `tribble()` inside this function. +#' +#' Must contain: +#' - the column `condition` which will be converted to a logical expression and +#' will be used on the `dataset` input. +#' - at least one additional column with the new column name and +#' the category value(s) used by the logical expression. +#' - the column specified in `by_vars` (if `by_vars` is specified) +#' +#' e.g. if `by_vars` is not specified: +#' +#' ```{r} +#' #| eval: false +#' exprs(~condition, ~AVALCAT1, ~AVALCA1N, +#' AVAL >= 140, ">=140 cm", 1, +#' AVAL < 140, "<140 cm", 2) +#' ``` +#' +#' e.g. if `by_vars` is specified as `exprs(VSTEST)`: +#' +#' ```{r} +#' #| eval: false +#' exprs(~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, +#' "Height", AVAL >= 140, ">=140 cm", 1, +#' "Height", AVAL < 140, "<140 cm", 2) +#' ``` +#' +#' @param by_vars list of expressions with one element. `NULL` by default. +#' Allows for specifying by groups, e.g. `exprs(PARAMCD)`. +#' Variable must be present in both `dataset` and `definition`. +#' The conditions in `definition` are applied only to those records that match `by_vars`. +#' The categorization variables are set to `NA` for records +#' not matching any of the by groups in `definition`. +#' +#' +#' @details +#' If conditions are overlapping, the row order of `definitions` must be carefully considered. +#' The **first** match will determine the category. +#' i.e. if +#' +#' `AVAL = 155` +#' +#' and the `definition` is: +#' +#' ```{r} +#' #| eval: false +#' definition <- exprs( +#' ~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, +#' "Height", AVAL > 170, ">170 cm", 1, +#' "Height", AVAL <= 170, "<=170 cm", 2, +#' "Height", AVAL <= 160, "<=160 cm", 3 +#' ) +#' ``` +#' then `AVALCAT1` will be `"<=170 cm"`, as this is the first match for `AVAL`. +#' If you specify: +#' +#' ```{r} +#' #| eval: false +#' definition <- exprs( +#' ~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, +#' "Height", AVAL <= 160, "<=160 cm", 3, +#' "Height", AVAL <= 170, "<=170 cm", 2, +#' "Height", AVAL > 170, ">170 cm", 1 +#' ) +#' ``` +#' +#' Then `AVAL <= 160` will lead to `AVALCAT1 == "<=160 cm"`, +#' `AVAL` in-between `160` and `170` will lead to `AVALCAT1 == "<=170 cm"`, +#' and `AVAL <= 170` will lead to `AVALCAT1 == ">170 cm"`. +#' +#' However, we suggest to be more explicit when defining the `condition`, to avoid overlap. +#' In this case, the middle condition should be: +#' `AVAL <= 170 & AVAL > 160` +#' +#' @return The input dataset with the new variables defined in `definition` added +#' @family der_gen +#' @keywords der_gen +#' @export +#' +#' @examples +#' library(dplyr) +#' library(tibble) +#' +#' advs <- tibble::tribble( +#' ~USUBJID, ~VSTEST, ~AVAL, +#' "01-701-1015", "Height", 147.32, +#' "01-701-1015", "Weight", 53.98, +#' "01-701-1023", "Height", 162.56, +#' "01-701-1023", "Weight", NA, +#' "01-701-1028", "Height", NA, +#' "01-701-1028", "Weight", NA, +#' "01-701-1033", "Height", 175.26, +#' "01-701-1033", "Weight", 88.45 +#' ) +#' +#' definition <- exprs( +#' ~condition, ~AVALCAT1, ~AVALCA1N, ~NEWCOL, +#' VSTEST == "Height" & AVAL > 160, ">160 cm", 1, "extra1", +#' VSTEST == "Height" & AVAL <= 160, "<=160 cm", 2, "extra2" +#' ) +#' derive_vars_cat( +#' dataset = advs, +#' definition = definition +#' ) +#' +#' # Using by_vars: +#' definition2 <- exprs( +#' ~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, +#' "Height", AVAL > 160, ">160 cm", 1, +#' "Height", AVAL <= 160, "<=160 cm", 2, +#' "Weight", AVAL > 70, ">70 kg", 1, +#' "Weight", AVAL <= 70, "<=70 kg", 2 +#' ) +#' +#' derive_vars_cat( +#' dataset = advs, +#' definition = definition2, +#' by_vars = exprs(VSTEST) +#' ) +#' +#' # With three conditions: +#' definition3 <- exprs( +#' ~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, +#' "Height", AVAL > 170, ">170 cm", 1, +#' "Height", AVAL <= 170 & AVAL > 160, "<=170 cm", 2, +#' "Height", AVAL <= 160, "<=160 cm", 3 +#' ) +#' +#' derive_vars_cat( +#' dataset = advs, +#' definition = definition3, +#' by_vars = exprs(VSTEST) +#' ) +#' +#' # Let's derive both the MCRITyML and the MCRITyMN variables +#' adlb <- tibble::tribble( +#' ~USUBJID, ~PARAM, ~AVAL, ~AVALU, ~ANRHI, +#' "01-701-1015", "ALT", 150, "U/L", 40, +#' "01-701-1023", "ALT", 70, "U/L", 40, +#' "01-701-1036", "ALT", 130, "U/L", 40, +#' "01-701-1048", "ALT", 30, "U/L", 40, +#' "01-701-1015", "AST", 50, "U/L", 35 +#' ) +#' +#' definition_mcrit <- exprs( +#' ~PARAM, ~condition, ~MCRIT1ML, ~MCRIT1MN, +#' "ALT", AVAL <= ANRHI, "<=ANRHI", 1, +#' "ALT", ANRHI < AVAL & AVAL <= 3 * ANRHI, ">1-3*ANRHI", 2, +#' "ALT", 3 * ANRHI < AVAL, ">3*ANRHI", 3 +#' ) +#' +#' adlb %>% +#' derive_vars_cat( +#' definition = definition_mcrit, +#' by_vars = exprs(PARAM) +#' ) +derive_vars_cat <- function(dataset, + definition, + by_vars = NULL) { + assert_expr_list(definition) + assert_vars(by_vars, optional = TRUE) + if (length(by_vars) > 1) { + cli_abort("{.arg by_vars} must contain just one variable, e.g. {.code exprs(PARAMCD)}") + } + + assert_data_frame(dataset, + required_vars = c( + admiraldev::extract_vars(definition) %>% unique(), + by_vars + ) + ) + + # transform definition to tibble + names(definition) <- NULL + definition <- tryCatch( + { + tibble::tribble(!!!definition) + }, + error = function(e) { + # Catch the error and append your own message + cli_abort( + c( + paste( + "Failed to convert {.arg definition} to {.cls tibble}.", + "{.arg definition} should be specified similarly to how you would", + "specify a {.cls tibble} using the {.fun tibble::tribble} function so it", + "can be converted to {.cls tibble} using {.fun tibble::tribble}." + ), + e$message + ) + ) + } + ) + assert_data_frame(definition, required_vars = c(exprs(condition), by_vars)) + if (!is.null(by_vars)) { + # add condition + definition <- definition %>% + mutate( + condition = extend_condition(as.character(condition), + as.character(by_vars), + is = !!sym(as.character(by_vars)) + ) %>% + parse_exprs() + ) %>% + select(-by_vars[[1]]) + } + + # extract new variable names and conditions + new_col_names <- names(definition)[!names(definition) == "condition"] + condition <- definition[["condition"]] + + # warn if new variables already exist + if (any(new_col_names %in% names(dataset))) { + cli_warn(paste( + "Column(s) in {.arg definition} already exist in {.arg dataset}.", + "Did you forget to specify {.arg by_vars},", + "or are you rerunning your code?" + )) + } + + # (re)apply the function for each new variable name and iteratively derive the categories + new_dataset <- reduce(new_col_names, function(.data, col_name) { + # extract conditions + values <- definition[[col_name]] + + .data %>% + mutate(!!sym(col_name) := eval(rlang::call2( + "case_when", + !!!map2(condition, values, ~ expr(!!.x ~ !!.y)) + ))) + }, .init = dataset) + + return(new_dataset) +} + +#' Extend a condition string by adding a new condition based on a variable and its value +#' +#' This internal helper function extends a condition string by appending a new condition +#' that checks if a variable equals a specific value. +#' +#' @param cond A character string representing an existing condition. +#' @param var A character string representing the name of the variable to check. +#' @param is A character string representing the value the variable should be equal to. +#' +#' @return A character string representing the extended condition. +#' @examples +#' # Extend an existing condition to include a check for 'AGE == "30"' +#' admiral:::extend_condition("SEX == 'M'", "AGE", "30") +#' @keywords internal +extend_condition <- function(cond, var, is) { + paste(cond, " & ", var, " == '", is, "'", sep = "") +} diff --git a/inst/templates/ad_adeg.R b/inst/templates/ad_adeg.R index ea0983e9e2..b9ed092c00 100644 --- a/inst/templates/ad_adeg.R +++ b/inst/templates/ad_adeg.R @@ -31,67 +31,45 @@ eg <- convert_blanks_to_na(eg) # Assign PARAMCD, PARAM, and PARAMN param_lookup <- tibble::tribble( - ~EGTESTCD, ~PARAMCD, ~PARAM, ~PARAMN, - "ECGINT", "EGINTP", "ECG Interpretation", 1, - "HR", "HR", "Heart Rate (beats/min)", 2, - "RR", "RR", "RR Duration (msec)", 3, - "RRR", "RRR", "RR Duration Rederived (msec)", 4, - "QT", "QT", "QT Duration (msec)", 10, - "QTCBR", "QTCBR", "QTcB - Bazett's Correction Formula Rederived (msec)", 11, - "QTCFR", "QTCFR", "QTcF - Fridericia's Correction Formula Rederived (msec)", 12, - "QTLCR", "QTLCR", "QTlc - Sagie's Correction Formula Rederived (msec)", 13, + ~EGTESTCD, ~PARAMCD, ~PARAM, ~PARAMN, + "ECGINT", "EGINTP", "ECG Interpretation", 1, + "HR", "HR", "Heart Rate (beats/min)", 2, + "RR", "RR", "RR Duration (msec)", 3, + "RRR", "RRR", "RR Duration Rederived (msec)", 4, + "QT", "QT", "QT Duration (msec)", 10, + "QTCBR", "QTCBR", "QTcB - Bazett's Correction Formula Rederived (msec)", 11, + "QTCFR", "QTCFR", "QTcF - Fridericia's Correction Formula Rederived (msec)", 12, + "QTLCR", "QTLCR", "QTlc - Sagie's Correction Formula Rederived (msec)", 13, ) range_lookup <- tibble::tribble( ~PARAMCD, ~ANRLO, ~ANRHI, - "EGINTP", NA, NA, - "HR", 40, 100, - "RR", 600, 1500, - "QT", 350, 450, - "RRR", 600, 1500, - "QTCBR", 350, 450, - "QTCFR", 350, 450, - "QTLCR", 350, 450, + "EGINTP", NA, NA, + "HR", 40, 100, + "RR", 600, 1500, + "QT", 350, 450, + "RRR", 600, 1500, + "QTCBR", 350, 450, + "QTCFR", 350, 450, + "QTLCR", 350, 450 ) -# ASSIGN AVALCAT1 -avalcat_lookup <- tibble::tribble( - ~AVALCA1N, ~AVALCAT1, - 1, "<= 450 msec", - 2, ">450<=480 msec", - 3, ">480<=500 msec", - 4, ">500 msec" +# Assign AVALCAx +avalcax_lookup <- exprs( + ~condition, ~AVALCAT1, ~AVALCA1N, + startsWith(PARAMCD, "QT") & AVAL <= 450, "<= 450 msec", 1, + startsWith(PARAMCD, "QT") & AVAL > 450 & AVAL <= 480, ">450<=480 msec", 2, + startsWith(PARAMCD, "QT") & AVAL > 480 & AVAL <= 500, ">480<=500 msec", 3, + startsWith(PARAMCD, "QT") & AVAL > 500, ">500 msec", 4 ) - -# ASSIGN CHGCAT1 -chgcat_lookup <- tibble::tribble( - ~CHGCAT1N, ~CHGCAT1, - 1, "<= 30 msec", - 2, ">30<=60 msec", - 3, ">60 msec" +# Assign CHGCAx +chgcax_lookup <- exprs( + ~condition, ~CHGCAT1, ~CHGCAT1N, + startsWith(PARAMCD, "QT") & CHG <= 30, "<= 30 msec", 1, + startsWith(PARAMCD, "QT") & CHG > 30 & CHG <= 60, ">30<=60 msec", 2, + startsWith(PARAMCD, "QT") & CHG > 60, ">60 msec", 3 ) -# Here are some examples of how you can create your own functions that -# operates on vectors, which can be used in `mutate()`. Info then used for -# lookup table -format_avalca1n <- function(paramcd, aval) { - case_when( - str_detect(paramcd, "QT") & aval <= 450 ~ 1, - str_detect(paramcd, "QT") & aval > 450 & aval <= 480 ~ 2, - str_detect(paramcd, "QT") & aval > 480 & aval <= 500 ~ 3, - str_detect(paramcd, "QT") & aval > 500 ~ 4 - ) -} - -format_chgcat1n <- function(paramcd, chg) { - case_when( - str_detect(paramcd, "QT") & chg <= 30 ~ 1, - str_detect(paramcd, "QT") & chg > 30 & chg <= 60 ~ 2, - str_detect(paramcd, "QT") & chg > 60 ~ 3 - ) -} - - # Derivations ---- # Get list of ADSL vars required for derivations @@ -316,14 +294,13 @@ adeg <- adeg %>% check_type = "error" ) %>% # Derive AVALCA1N and AVALCAT1 - mutate(AVALCA1N = format_avalca1n(param = PARAMCD, aval = AVAL)) %>% - derive_vars_merged( - dataset_add = avalcat_lookup, - by_vars = exprs(AVALCA1N) + derive_vars_cat( + definition = avalcax_lookup ) %>% # Derive CHGCAT1N and CHGCAT1 - mutate(CHGCAT1N = format_chgcat1n(param = PARAMCD, chg = CHG)) %>% - derive_vars_merged(dataset_add = chgcat_lookup, by_vars = exprs(CHGCAT1N)) %>% + derive_vars_cat( + definition = chgcax_lookup + ) %>% # Derive PARAM and PARAMN derive_vars_merged( dataset_add = select(param_lookup, -EGTESTCD), diff --git a/inst/templates/ad_adex.R b/inst/templates/ad_adex.R index ca7bb17e34..ac0f980cac 100644 --- a/inst/templates/ad_adex.R +++ b/inst/templates/ad_adex.R @@ -257,42 +257,42 @@ adex <- adex %>% # Assign PARAMCD, PARAM, and PARAMN # ---- Lookup tables ---- param_lookup <- tibble::tribble( - ~PARAMCD, ~PARAM, ~PARAMN, - "DURD", "Study drug duration during constant dosing interval (days)", 1, - "DOSE", "Dose administered during constant dosing interval (mg)", 2, - "PLDOSE", "Planned dose during constant dosing interval (mg)", 3, - "ADJ", "Dose adjusted during constant dosing interval", 4, - "ADJAE", "Dose adjusted due to AE during constant dosing interval", 5, - "TDURD", "Overall duration (days)", 7, - "TDOSE", "Total dose administered (mg)", 8, - "AVDDSE", "Average daily dose administered (mg/mg)", 10, - "TPDOSE", "Total planned dose (mg)", 11, - "TADJ", "Dose adjusted during study", 13, - "TADJAE", "Dose adjusted during study due to AE", 14, - "PDURD", "Overall duration in W2-W24 (days)", 19, - "PDOSE", "Total dose administered in W2-W2 (mg)4", 20, - "PPDOSE", "Total planned dose in W2-W24 (mg)", 21, - "PAVDDSE", "Average daily dose administered in W2-W24 (mg)", 23, - "PADJ", "Dose adjusted during W2-W24", 24, - "PADJAE", "Dose adjusted in W2-W24 due to AE", 25, - "TDOSINT", "Overall dose intensity (%)", 90, - "PDOSINT", "W2-24 dose intensity (%)", 91 + ~PARAMCD, ~PARAM, ~PARAMN, + "DURD", "Study drug duration during constant dosing interval (days)", 1, + "DOSE", "Dose administered during constant dosing interval (mg)", 2, + "PLDOSE", "Planned dose during constant dosing interval (mg)", 3, + "ADJ", "Dose adjusted during constant dosing interval", 4, + "ADJAE", "Dose adjusted due to AE during constant dosing interval", 5, + "TDURD", "Overall duration (days)", 7, + "TDOSE", "Total dose administered (mg)", 8, + "AVDDSE", "Average daily dose administered (mg/mg)", 10, + "TPDOSE", "Total planned dose (mg)", 11, + "TADJ", "Dose adjusted during study", 13, + "TADJAE", "Dose adjusted during study due to AE", 14, + "PDURD", "Overall duration in W2-W24 (days)", 19, + "PDOSE", "Total dose administered in W2-W2 (mg)4", 20, + "PPDOSE", "Total planned dose in W2-W24 (mg)", 21, + "PAVDDSE", "Average daily dose administered in W2-W24 (mg)", 23, + "PADJ", "Dose adjusted during W2-W24", 24, + "PADJAE", "Dose adjusted in W2-W24 due to AE", 25, + "TDOSINT", "Overall dose intensity (%)", 90, + "PDOSINT", "W2-24 dose intensity (%)", 91 ) -# User defined functions ---- -# Derive AVALCAT1 -# Here are some examples of how you can create your own functions that -# operates on vectors, which can be used in `mutate()`. -format_avalcat1 <- function(param, aval) { - case_when( - param %in% c("TDURD", "PDURD") & aval < 30 & !is.na(aval) ~ "< 30 days", - param %in% c("TDURD", "PDURD") & aval >= 30 & aval < 90 ~ ">= 30 and < 90 days", - param %in% c("TDURD", "PDURD") & aval >= 90 ~ ">=90 days", - param %in% c("TDOSE", "PDOSE") & aval < 100 & !is.na(aval) ~ "< 100 mg", - param %in% c("TDOSE", "PDOSE") & aval >= 100 ~ ">= 100 mg", - TRUE ~ NA_character_ - ) -} +# Assign AVALCATx +avalcax_lookup <- exprs( + ~PARAMCD, ~condition, ~AVALCAT1, + "TDURD", AVAL >= 90, ">= 90 days", + "TDURD", AVAL >= 30 & AVAL < 90, ">= 30 and < 90 days", + "TDURD", AVAL < 30, "< 30 days", + "PDURD", AVAL >= 90, ">= 90 days", + "PDURD", AVAL >= 30 & AVAL < 90, ">= 30 and < 90 days", + "PDURD", AVAL < 30, "< 30 days", + "TDOSE", AVAL < 100, "< 100 mg", + "TDOSE", AVAL >= 100, ">= 100 mg", + "PDOSE", AVAL < 100, "< 100 mg", + "PDOSE", AVAL >= 100, ">= 100 mg" +) adex <- adex %>% # Add PARAMN and PARAM, AVALU @@ -301,7 +301,10 @@ adex <- adex %>% by_vars = exprs(PARAMCD) ) %>% # Derive AVALCATx - mutate(AVALCAT1 = format_avalcat1(param = PARAMCD, aval = AVAL)) %>% + derive_vars_cat( + definition = avalcax_lookup, + by_vars = exprs(PARAMCD) + ) %>% # Calculate ASEQ derive_var_obs_number( new_var = ASEQ, diff --git a/inst/templates/ad_adlb.R b/inst/templates/ad_adlb.R index b04807ec83..d6ad0f5ec3 100644 --- a/inst/templates/ad_adlb.R +++ b/inst/templates/ad_adlb.R @@ -29,54 +29,54 @@ lb <- convert_blanks_to_na(lb) # Assign PARAMCD, PARAM, and PARAMN param_lookup <- tibble::tribble( - ~LBTESTCD, ~PARAMCD, ~PARAM, ~PARAMN, - "ALB", "ALB", "Albumin (g/L)", 1, - "ALP", "ALKPH", "Alkaline Phosphatase (U/L)", 2, - "ALT", "ALT", "Alanine Aminotransferase (U/L)", 3, - "ANISO", "ANISO", "Anisocytes", 4, - "AST", "AST", "Aspartate Aminotransferase (U/L)", 5, - "BASO", "BASO", "Basophils Abs (10^9/L)", 6, - "BASOLE", "BASOLE", "Basophils/Leukocytes (FRACTION)", 7, - "BILI", "BILI", "Bilirubin (umol/L)", 8, - "BUN", "BUN", "Blood Urea Nitrogen (mmol/L)", 9, - "CA", "CA", "Calcium (mmol/L)", 10, - "CHOL", "CHOLES", "Cholesterol (mmol/L)", 11, - "CK", "CK", "Creatinine Kinase (U/L)", 12, - "CL", "CL", "Chloride (mmol/L)", 13, - "COLOR", "COLOR", "Color", 14, - "CREAT", "CREAT", "Creatinine (umol/L)", 15, - "EOS", "EOS", "Eosinophils (10^9/L)", 16, - "EOSLE", "EOSLE", "Eosinophils/Leukocytes (FRACTION)", 17, - "GGT", "GGT", "Gamma Glutamyl Transferase (U/L)", 18, - "GLUC", "GLUC", "Glucose (mmol/L)", 19, - "HBA1C", "HBA1C", "Hemoglobin A1C (1)", 20, - "HCT", "HCT", "Hematocrit (1)", 21, - "HGB", "HGB", "Hemoglobin (mmol/L)", 22, - "K", "POTAS", "Potassium (mmol/L)", 23, - "KETONES", "KETON", "Ketones", 24, - "LYM", "LYMPH", "Lymphocytes Abs (10^9/L)", 25, - "LYMLE", "LYMPHLE", "Lymphocytes/Leukocytes (FRACTION)", 26, - "MACROCY", "MACROC", "Macrocytes", 27, - "MCH", "MCH", "Ery. Mean Corpuscular Hemoglobin (fmol(Fe))", 28, - "MCHC", "MCHC", "Ery. Mean Corpuscular HGB Concentration (mmol/L)", 29, - "MCV", "MCV", "Ery. Mean Corpuscular Volume (f/L)", 30, - "MICROCY", "MICROC", "Microcytes", 31, - "MONO", "MONO", "Monocytes (10^9/L)", 32, - "MONOLE", "MONOLE", "Monocytes/Leukocytes (FRACTION)", 33, - "PH", "PH", "pH", 34, - "PHOS", "PHOS", "Phosphate (mmol/L)", 35, - "PLAT", "PLAT", "Platelet (10^9/L)", 36, - "POIKILO", "POIKIL", "Poikilocytes", 37, - "POLYCHR", "POLYCH", "Polychromasia", 38, - "PROT", "PROT", "Protein (g/L)", 39, - "RBC", "RBC", "Erythrocytes (TI/L)", 40, - "SODIUM", "SODIUM", "Sodium (mmol/L)", 41, - "SPGRAV", "SPGRAV", "Specific Gravity", 42, - "TSH", "TSH", "Thyrotropin (mU/L)", 43, - "URATE", "URATE", "Urate (umol/L)", 44, - "UROBIL", "UROBIL", "Urobilinogen", 45, - "VITB12", "VITB12", "Vitamin B12 (pmol/L)", 46, - "WBC", "WBC", "Leukocytes (10^9/L)", 47 + ~LBTESTCD, ~PARAMCD, ~PARAM, ~PARAMN, + "ALB", "ALB", "Albumin (g/L)", 1, + "ALP", "ALKPH", "Alkaline Phosphatase (U/L)", 2, + "ALT", "ALT", "Alanine Aminotransferase (U/L)", 3, + "ANISO", "ANISO", "Anisocytes", 4, + "AST", "AST", "Aspartate Aminotransferase (U/L)", 5, + "BASO", "BASO", "Basophils Abs (10^9/L)", 6, + "BASOLE", "BASOLE", "Basophils/Leukocytes (FRACTION)", 7, + "BILI", "BILI", "Bilirubin (umol/L)", 8, + "BUN", "BUN", "Blood Urea Nitrogen (mmol/L)", 9, + "CA", "CA", "Calcium (mmol/L)", 10, + "CHOL", "CHOLES", "Cholesterol (mmol/L)", 11, + "CK", "CK", "Creatinine Kinase (U/L)", 12, + "CL", "CL", "Chloride (mmol/L)", 13, + "COLOR", "COLOR", "Color", 14, + "CREAT", "CREAT", "Creatinine (umol/L)", 15, + "EOS", "EOS", "Eosinophils (10^9/L)", 16, + "EOSLE", "EOSLE", "Eosinophils/Leukocytes (FRACTION)", 17, + "GGT", "GGT", "Gamma Glutamyl Transferase (U/L)", 18, + "GLUC", "GLUC", "Glucose (mmol/L)", 19, + "HBA1C", "HBA1C", "Hemoglobin A1C (1)", 20, + "HCT", "HCT", "Hematocrit (1)", 21, + "HGB", "HGB", "Hemoglobin (mmol/L)", 22, + "K", "POTAS", "Potassium (mmol/L)", 23, + "KETONES", "KETON", "Ketones", 24, + "LYM", "LYMPH", "Lymphocytes Abs (10^9/L)", 25, + "LYMLE", "LYMPHLE", "Lymphocytes/Leukocytes (FRACTION)", 26, + "MACROCY", "MACROC", "Macrocytes", 27, + "MCH", "MCH", "Ery. Mean Corpuscular Hemoglobin (fmol(Fe))", 28, + "MCHC", "MCHC", "Ery. Mean Corpuscular HGB Concentration (mmol/L)", 29, + "MCV", "MCV", "Ery. Mean Corpuscular Volume (f/L)", 30, + "MICROCY", "MICROC", "Microcytes", 31, + "MONO", "MONO", "Monocytes (10^9/L)", 32, + "MONOLE", "MONOLE", "Monocytes/Leukocytes (FRACTION)", 33, + "PH", "PH", "pH", 34, + "PHOS", "PHOS", "Phosphate (mmol/L)", 35, + "PLAT", "PLAT", "Platelet (10^9/L)", 36, + "POIKILO", "POIKIL", "Poikilocytes", 37, + "POLYCHR", "POLYCH", "Polychromasia", 38, + "PROT", "PROT", "Protein (g/L)", 39, + "RBC", "RBC", "Erythrocytes (TI/L)", 40, + "SODIUM", "SODIUM", "Sodium (mmol/L)", 41, + "SPGRAV", "SPGRAV", "Specific Gravity", 42, + "TSH", "TSH", "Thyrotropin (mU/L)", 43, + "URATE", "URATE", "Urate (umol/L)", 44, + "UROBIL", "UROBIL", "Urobilinogen", 45, + "VITB12", "VITB12", "Vitamin B12 (pmol/L)", 46, + "WBC", "WBC", "Leukocytes (10^9/L)", 47 ) @@ -241,25 +241,25 @@ adlb <- adlb %>% # ATOXDSCL and ATOXDSCH hold terms defined by NCI-CTCAEv4. # See (https://pharmaverse.github.io/admiral/articles/lab_grading.html#implement_ctcv4) grade_lookup <- tibble::tribble( - ~PARAMCD, ~ATOXDSCL, ~ATOXDSCH, - "ALB", "Hypoalbuminemia", NA_character_, - "ALKPH", NA_character_, "Alkaline phosphatase increased", - "ALT", NA_character_, "Alanine aminotransferase increased", - "AST", NA_character_, "Aspartate aminotransferase increased", - "BILI", NA_character_, "Blood bilirubin increased", - "CA", "Hypocalcemia", "Hypercalcemia", - "CHOLES", NA_character_, "Cholesterol high", - "CK", NA_character_, "CPK increased", - "CREAT", NA_character_, "Creatinine increased", - "GGT", NA_character_, "GGT increased", - "GLUC", "Hypoglycemia", "Hyperglycemia", - "HGB", "Anemia", "Hemoglobin increased", - "POTAS", "Hypokalemia", "Hyperkalemia", - "LYMPH", "CD4 lymphocytes decreased", NA_character_, - "PHOS", "Hypophosphatemia", NA_character_, - "PLAT", "Platelet count decreased", NA_character_, - "SODIUM", "Hyponatremia", "Hypernatremia", - "WBC", "White blood cell decreased", "Leukocytosis", + ~PARAMCD, ~ATOXDSCL, ~ATOXDSCH, + "ALB", "Hypoalbuminemia", NA_character_, + "ALKPH", NA_character_, "Alkaline phosphatase increased", + "ALT", NA_character_, "Alanine aminotransferase increased", + "AST", NA_character_, "Aspartate aminotransferase increased", + "BILI", NA_character_, "Blood bilirubin increased", + "CA", "Hypocalcemia", "Hypercalcemia", + "CHOLES", NA_character_, "Cholesterol high", + "CK", NA_character_, "CPK increased", + "CREAT", NA_character_, "Creatinine increased", + "GGT", NA_character_, "GGT increased", + "GLUC", "Hypoglycemia", "Hyperglycemia", + "HGB", "Anemia", "Hemoglobin increased", + "POTAS", "Hypokalemia", "Hyperkalemia", + "LYMPH", "CD4 lymphocytes decreased", NA_character_, + "PHOS", "Hypophosphatemia", NA_character_, + "PLAT", "Platelet count decreased", NA_character_, + "SODIUM", "Hyponatremia", "Hypernatremia", + "WBC", "White blood cell decreased", "Leukocytosis", ) # Assign grade criteria diff --git a/inst/templates/ad_adpc.R b/inst/templates/ad_adpc.R index 850bf98082..0678b7e302 100644 --- a/inst/templates/ad_adpc.R +++ b/inst/templates/ad_adpc.R @@ -41,18 +41,6 @@ param_lookup <- tibble::tribble( "DOSE", "DOSE", "Xanomeline Patch Dose", 2, ) -# ---- User defined functions ---- - -# Here is an example of how you can create your own function that -# operates on vectors, which can be used in `mutate`. -format_avalcat1n <- function(param, aval) { - case_when( - param == "PKCONC" & aval < 1 ~ 1, - param == "PKCONC" & aval >= 1 ~ 2, - T ~ NA_real_ - ) -} - # ---- Derivations ---- # Get list of ADSL vars required for derivations diff --git a/inst/templates/ad_adpp.R b/inst/templates/ad_adpp.R index 544c976ae1..a95f7e5242 100644 --- a/inst/templates/ad_adpp.R +++ b/inst/templates/ad_adpp.R @@ -31,52 +31,40 @@ pp <- convert_blanks_to_na(pp) # Lookup tables ---- param_lookup <- tibble::tribble( - ~PPTESTCD, ~PARAMCD, ~PARAM, ~PARAMN, - "AUCALL", "AUCALL", "AUC All", 1, - "AUCIFO", "AUCIFO", "AUC Infinity Obs", 2, - "AUCIFOD", "AUCIFOD", "AUC Infinity Obs Norm by Dose", 3, - "AUCINT", "AUCINT", "AUC from T1 to T2", 4, - "AUCLST", "AUCLST", "AUC to Last Nonzero Conc", 5, - "AUCPEO", "AUCPEO", "AUC %Extrapolation Obs", 6, - "CEND", "CEND", "Concentration at the end of the infusion", 7, # non Cdisc Term - "CLO", "CLO", "Total CL Obs", 8, - "CLST", "CLST", "Last Nonzero Conc", 9, - "CMAX", "CMAX", "Max Conc", 10, - "CMAXD", "CMAXD", "Max Conc Norm by Dose", 11, - "CSF", "CSF", "CSF to Plasma Ratio", 12, # non Cdisc Term - "LAMZ", "LAMZ", "Lambda z", 13, - "LAMZHL", "LAMZHL", "Half-Life Lambda z", 14, - "LAMZNPT", "LAMZNPT", "Number of Points for Lambda z", 15, - "R2ADJ", "R2ADJ", "R Squared Adjusted", 16, - "TCEND", "TCEND", "Time of CEND", 17, # non Cdisc Term - "TLST", "TLST", "Time of Last Nonzero Conc", 18, - "TMAX", "TMAX", "Time of CMAX", 19, - "VSSO", "VSSO", "Vol Dist Steady State Obs", 20, - "RCAMINT", "RCAMINT", "Ae", 21, - "RENALCL", "RENALCL", "CLR", 22 + ~PPTESTCD, ~PARAMCD, ~PARAM, ~PARAMN, + "AUCALL", "AUCALL", "AUC All", 1, + "AUCIFO", "AUCIFO", "AUC Infinity Obs", 2, + "AUCIFOD", "AUCIFOD", "AUC Infinity Obs Norm by Dose", 3, + "AUCINT", "AUCINT", "AUC from T1 to T2", 4, + "AUCLST", "AUCLST", "AUC to Last Nonzero Conc", 5, + "AUCPEO", "AUCPEO", "AUC %Extrapolation Obs", 6, + "CEND", "CEND", "Concentration at the end of the infusion", 7, # non Cdisc Term + "CLO", "CLO", "Total CL Obs", 8, + "CLST", "CLST", "Last Nonzero Conc", 9, + "CMAX", "CMAX", "Max Conc", 10, + "CMAXD", "CMAXD", "Max Conc Norm by Dose", 11, + "CSF", "CSF", "CSF to Plasma Ratio", 12, # non Cdisc Term + "LAMZ", "LAMZ", "Lambda z", 13, + "LAMZHL", "LAMZHL", "Half-Life Lambda z", 14, + "LAMZNPT", "LAMZNPT", "Number of Points for Lambda z", 15, + "R2ADJ", "R2ADJ", "R Squared Adjusted", 16, + "TCEND", "TCEND", "Time of CEND", 17, # non Cdisc Term + "TLST", "TLST", "Time of Last Nonzero Conc", 18, + "TMAX", "TMAX", "Time of CMAX", 19, + "VSSO", "VSSO", "Vol Dist Steady State Obs", 20, + "RCAMINT", "RCAMINT", "Ae", 21, + "RENALCL", "RENALCL", "CLR", 22 ) -# ASSIGN AVALCAT1 -avalcat_lookup <- tibble::tribble( - ~PARAMCD, ~AVALCA1N, ~AVALCAT1, - "AUCALL", 1, "< 19", - "AUCALL", 2, ">= 19" +# Assign AVALCATx +avalcax_lookup <- exprs( + ~PARAMCD, ~condition, ~AVALCAT1, ~AVALCA1N, + "AUCALL", AVAL < 19, "<19", 1, + "AUCALL", AVAL >= 19, ">=19", 2 ) attr(param_lookup$PPTESTCD, "label") <- "Parameter Short Name" -# User defined functions ---- - -# Here are some examples of how you can create your own functions that -# operates on vectors, which can be used in `mutate`. -format_avalcat1n <- function(param, aval) { - case_when( - param == "AUCALL" & aval < 19 ~ 1, - param == "AUCALL" & aval >= 19 ~ 2, - TRUE ~ NA_real_ - ) -} - # Derivations ---- # Get list of ADSL vars required for derivations @@ -137,9 +125,10 @@ adpp_avisit <- adpp_aval %>% TRTA = TRT01A ) %>% ## Derive AVALCA1N and AVALCAT1 ---- - mutate(AVALCA1N = format_avalcat1n(param = PARAMCD, aval = AVAL)) %>% - derive_vars_merged(dataset_add = avalcat_lookup, by_vars = exprs(PARAMCD, AVALCA1N)) - + derive_vars_cat( + definition = avalcax_lookup, + by_vars = exprs(PARAMCD) + ) # Add all ADSL variables adpp <- adpp_avisit %>% derive_vars_merged( diff --git a/inst/templates/ad_advs.R b/inst/templates/ad_advs.R index b68ddcc399..5a6151ee13 100644 --- a/inst/templates/ad_advs.R +++ b/inst/templates/ad_advs.R @@ -29,16 +29,16 @@ vs <- convert_blanks_to_na(vs) # Assign PARAMCD, PARAM, and PARAMN param_lookup <- tibble::tribble( - ~VSTESTCD, ~PARAMCD, ~PARAM, ~PARAMN, - "SYSBP", "SYSBP", "Systolic Blood Pressure (mmHg)", 1, - "DIABP", "DIABP", "Diastolic Blood Pressure (mmHg)", 2, - "PULSE", "PULSE", "Pulse Rate (beats/min)", 3, - "WEIGHT", "WEIGHT", "Weight (kg)", 4, - "HEIGHT", "HEIGHT", "Height (cm)", 5, - "TEMP", "TEMP", "Temperature (C)", 6, - "MAP", "MAP", "Mean Arterial Pressure (mmHg)", 7, - "BMI", "BMI", "Body Mass Index(kg/m^2)", 8, - "BSA", "BSA", "Body Surface Area(m^2)", 9 + ~VSTESTCD, ~PARAMCD, ~PARAM, ~PARAMN, + "SYSBP", "SYSBP", "Systolic Blood Pressure (mmHg)", 1, + "DIABP", "DIABP", "Diastolic Blood Pressure (mmHg)", 2, + "PULSE", "PULSE", "Pulse Rate (beats/min)", 3, + "WEIGHT", "WEIGHT", "Weight (kg)", 4, + "HEIGHT", "HEIGHT", "Height (cm)", 5, + "TEMP", "TEMP", "Temperature (C)", 6, + "MAP", "MAP", "Mean Arterial Pressure (mmHg)", 7, + "BMI", "BMI", "Body Mass Index(kg/m^2)", 8, + "BSA", "BSA", "Body Surface Area(m^2)", 9 ) attr(param_lookup$VSTESTCD, "label") <- "Vital Signs Test Short Name" @@ -46,28 +46,18 @@ attr(param_lookup$VSTESTCD, "label") <- "Vital Signs Test Short Name" # Assign ANRLO/HI, A1LO/HI range_lookup <- tibble::tribble( ~PARAMCD, ~ANRLO, ~ANRHI, ~A1LO, ~A1HI, - "SYSBP", 90, 130, 70, 140, - "DIABP", 60, 80, 40, 90, - "PULSE", 60, 100, 40, 110, - "TEMP", 36.5, 37.5, 35, 38 + "SYSBP", 90, 130, 70, 140, + "DIABP", 60, 80, 40, 90, + "PULSE", 60, 100, 40, 110, + "TEMP", 36.5, 37.5, 35, 38 ) -# ASSIGN AVALCAT1 -avalcat_lookup <- tibble::tribble( - ~PARAMCD, ~AVALCA1N, ~AVALCAT1, - "HEIGHT", 1, ">100 cm", - "HEIGHT", 2, "<= 100 cm" -) - -# User defined functions ---- -# Here are some examples of how you can create your own functions that -# operates on vectors, which can be used in `mutate()`. -format_avalcat1n <- function(param, aval) { - case_when( - param == "HEIGHT" & aval > 140 ~ 1, - param == "HEIGHT" & aval <= 140 ~ 2 - ) -} +# Assign AVALCATx +avalcax_lookup <- exprs( + ~PARAMCD, ~condition, ~AVALCAT1, ~AVALCA1N, + "HEIGHT", AVAL > 100, ">100 cm", 1, + "HEIGHT", AVAL <= 100, "<=100 cm", 2 +) # Derivations ---- @@ -283,13 +273,16 @@ advs <- advs %>% order = exprs(PARAMCD, ADT, AVISITN, VISITNUM, ATPTN, DTYPE), check_type = "error" ) %>% - # Derive AVALCA1N and AVALCAT1 - mutate(AVALCA1N = format_avalcat1n(param = PARAMCD, aval = AVAL)) %>% - derive_vars_merged(dataset_add = avalcat_lookup, by_vars = exprs(PARAMCD, AVALCA1N)) %>% + # Define condition and categories using derive_vars_cat + derive_vars_cat( + definition = avalcax_lookup, + by_vars = exprs(PARAMCD) + ) %>% # Derive PARAM and PARAMN derive_vars_merged(dataset_add = select(param_lookup, -VSTESTCD), by_vars = exprs(PARAMCD)) + # Add all ADSL variables advs <- advs %>% derive_vars_merged( diff --git a/man/derive_var_extreme_flag.Rd b/man/derive_var_extreme_flag.Rd index 7deac86681..933f5f0afa 100644 --- a/man/derive_var_extreme_flag.Rd +++ b/man/derive_var_extreme_flag.Rd @@ -268,6 +268,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_var_merged_summary}()}, \code{\link{derive_var_obs_number}()}, \code{\link{derive_var_relative_flag}()}, +\code{\link{derive_vars_cat}()}, \code{\link{derive_vars_computed}()}, \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged}()}, diff --git a/man/derive_var_joined_exist_flag.Rd b/man/derive_var_joined_exist_flag.Rd index 1a3d9da157..9334f60eb5 100644 --- a/man/derive_var_joined_exist_flag.Rd +++ b/man/derive_var_joined_exist_flag.Rd @@ -460,6 +460,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_var_merged_summary}()}, \code{\link{derive_var_obs_number}()}, \code{\link{derive_var_relative_flag}()}, +\code{\link{derive_vars_cat}()}, \code{\link{derive_vars_computed}()}, \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged}()}, diff --git a/man/derive_var_merged_ef_msrc.Rd b/man/derive_var_merged_ef_msrc.Rd index 538921116d..551c7f7c22 100644 --- a/man/derive_var_merged_ef_msrc.Rd +++ b/man/derive_var_merged_ef_msrc.Rd @@ -194,6 +194,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_var_merged_summary}()}, \code{\link{derive_var_obs_number}()}, \code{\link{derive_var_relative_flag}()}, +\code{\link{derive_vars_cat}()}, \code{\link{derive_vars_computed}()}, \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged}()}, diff --git a/man/derive_var_merged_exist_flag.Rd b/man/derive_var_merged_exist_flag.Rd index 70a3071677..ffe8701d4e 100644 --- a/man/derive_var_merged_exist_flag.Rd +++ b/man/derive_var_merged_exist_flag.Rd @@ -152,6 +152,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_var_merged_summary}()}, \code{\link{derive_var_obs_number}()}, \code{\link{derive_var_relative_flag}()}, +\code{\link{derive_vars_cat}()}, \code{\link{derive_vars_computed}()}, \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged}()}, diff --git a/man/derive_var_merged_summary.Rd b/man/derive_var_merged_summary.Rd index e5ce37aa4b..532fd263c0 100644 --- a/man/derive_var_merged_summary.Rd +++ b/man/derive_var_merged_summary.Rd @@ -160,6 +160,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_var_merged_exist_flag}()}, \code{\link{derive_var_obs_number}()}, \code{\link{derive_var_relative_flag}()}, +\code{\link{derive_vars_cat}()}, \code{\link{derive_vars_computed}()}, \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged}()}, diff --git a/man/derive_var_obs_number.Rd b/man/derive_var_obs_number.Rd index b727cc7f7d..8e4f3a544a 100644 --- a/man/derive_var_obs_number.Rd +++ b/man/derive_var_obs_number.Rd @@ -103,6 +103,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_var_merged_exist_flag}()}, \code{\link{derive_var_merged_summary}()}, \code{\link{derive_var_relative_flag}()}, +\code{\link{derive_vars_cat}()}, \code{\link{derive_vars_computed}()}, \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged}()}, diff --git a/man/derive_var_relative_flag.Rd b/man/derive_var_relative_flag.Rd index 4fc22c0565..6f2b709882 100644 --- a/man/derive_var_relative_flag.Rd +++ b/man/derive_var_relative_flag.Rd @@ -179,6 +179,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_var_merged_exist_flag}()}, \code{\link{derive_var_merged_summary}()}, \code{\link{derive_var_obs_number}()}, +\code{\link{derive_vars_cat}()}, \code{\link{derive_vars_computed}()}, \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged}()}, diff --git a/man/derive_vars_cat.Rd b/man/derive_vars_cat.Rd new file mode 100644 index 0000000000..808c333929 --- /dev/null +++ b/man/derive_vars_cat.Rd @@ -0,0 +1,185 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/derive_vars_cat.R +\name{derive_vars_cat} +\alias{derive_vars_cat} +\title{Derive Categorization Variables Like \code{AVALCATy} and \code{AVALCAyN}} +\usage{ +derive_vars_cat(dataset, definition, by_vars = NULL) +} +\arguments{ +\item{dataset}{Input dataset + +The variables specified by the \code{by_vars} and \code{definition} arguments are expected to be in the dataset.} + +\item{definition}{List of expressions created by \code{exprs()}. +Must be in rectangular format and specified using the same syntax as when creating +a \code{tibble} using the \code{tribble()} function. +The \code{definition} object will be converted to a \code{tibble} using \code{tribble()} inside this function. + +Must contain: +\itemize{ +\item the column \code{condition} which will be converted to a logical expression and +will be used on the \code{dataset} input. +\item at least one additional column with the new column name and +the category value(s) used by the logical expression. +\item the column specified in \code{by_vars} (if \code{by_vars} is specified) +} + +e.g. if \code{by_vars} is not specified: + +\if{html}{\out{
}}\preformatted{exprs(~condition, ~AVALCAT1, ~AVALCA1N, + AVAL >= 140, ">=140 cm", 1, + AVAL < 140, "<140 cm", 2) +}\if{html}{\out{
}} + +e.g. if \code{by_vars} is specified as \code{exprs(VSTEST)}: + +\if{html}{\out{
}}\preformatted{exprs(~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, + "Height", AVAL >= 140, ">=140 cm", 1, + "Height", AVAL < 140, "<140 cm", 2) +}\if{html}{\out{
}}} + +\item{by_vars}{list of expressions with one element. \code{NULL} by default. +Allows for specifying by groups, e.g. \code{exprs(PARAMCD)}. +Variable must be present in both \code{dataset} and \code{definition}. +The conditions in \code{definition} are applied only to those records that match \code{by_vars}. +The categorization variables are set to \code{NA} for records +not matching any of the by groups in \code{definition}.} +} +\value{ +The input dataset with the new variables defined in \code{definition} added +} +\description{ +Derive Categorization Variables Like \code{AVALCATy} and \code{AVALCAyN} +} +\details{ +If conditions are overlapping, the row order of \code{definitions} must be carefully considered. +The \strong{first} match will determine the category. +i.e. if + +\code{AVAL = 155} + +and the \code{definition} is: + +\if{html}{\out{
}}\preformatted{definition <- exprs( + ~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, + "Height", AVAL > 170, ">170 cm", 1, + "Height", AVAL <= 170, "<=170 cm", 2, + "Height", AVAL <= 160, "<=160 cm", 3 +) +}\if{html}{\out{
}} + +then \code{AVALCAT1} will be \code{"<=170 cm"}, as this is the first match for \code{AVAL}. +If you specify: + +\if{html}{\out{
}}\preformatted{definition <- exprs( + ~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, + "Height", AVAL <= 160, "<=160 cm", 3, + "Height", AVAL <= 170, "<=170 cm", 2, + "Height", AVAL > 170, ">170 cm", 1 +) +}\if{html}{\out{
}} + +Then \code{AVAL <= 160} will lead to \code{AVALCAT1 == "<=160 cm"}, +\code{AVAL} in-between \code{160} and \code{170} will lead to \code{AVALCAT1 == "<=170 cm"}, +and \code{AVAL <= 170} will lead to \code{AVALCAT1 == ">170 cm"}. + +However, we suggest to be more explicit when defining the \code{condition}, to avoid overlap. +In this case, the middle condition should be: +\code{AVAL <= 170 & AVAL > 160} +} +\examples{ +library(dplyr) +library(tibble) + +advs <- tibble::tribble( + ~USUBJID, ~VSTEST, ~AVAL, + "01-701-1015", "Height", 147.32, + "01-701-1015", "Weight", 53.98, + "01-701-1023", "Height", 162.56, + "01-701-1023", "Weight", NA, + "01-701-1028", "Height", NA, + "01-701-1028", "Weight", NA, + "01-701-1033", "Height", 175.26, + "01-701-1033", "Weight", 88.45 +) + +definition <- exprs( + ~condition, ~AVALCAT1, ~AVALCA1N, ~NEWCOL, + VSTEST == "Height" & AVAL > 160, ">160 cm", 1, "extra1", + VSTEST == "Height" & AVAL <= 160, "<=160 cm", 2, "extra2" +) +derive_vars_cat( + dataset = advs, + definition = definition +) + +# Using by_vars: +definition2 <- exprs( + ~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, + "Height", AVAL > 160, ">160 cm", 1, + "Height", AVAL <= 160, "<=160 cm", 2, + "Weight", AVAL > 70, ">70 kg", 1, + "Weight", AVAL <= 70, "<=70 kg", 2 +) + +derive_vars_cat( + dataset = advs, + definition = definition2, + by_vars = exprs(VSTEST) +) + +# With three conditions: +definition3 <- exprs( + ~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, + "Height", AVAL > 170, ">170 cm", 1, + "Height", AVAL <= 170 & AVAL > 160, "<=170 cm", 2, + "Height", AVAL <= 160, "<=160 cm", 3 +) + +derive_vars_cat( + dataset = advs, + definition = definition3, + by_vars = exprs(VSTEST) +) + +# Let's derive both the MCRITyML and the MCRITyMN variables +adlb <- tibble::tribble( + ~USUBJID, ~PARAM, ~AVAL, ~AVALU, ~ANRHI, + "01-701-1015", "ALT", 150, "U/L", 40, + "01-701-1023", "ALT", 70, "U/L", 40, + "01-701-1036", "ALT", 130, "U/L", 40, + "01-701-1048", "ALT", 30, "U/L", 40, + "01-701-1015", "AST", 50, "U/L", 35 +) + +definition_mcrit <- exprs( + ~PARAM, ~condition, ~MCRIT1ML, ~MCRIT1MN, + "ALT", AVAL <= ANRHI, "<=ANRHI", 1, + "ALT", ANRHI < AVAL & AVAL <= 3 * ANRHI, ">1-3*ANRHI", 2, + "ALT", 3 * ANRHI < AVAL, ">3*ANRHI", 3 +) + +adlb \%>\% + derive_vars_cat( + definition = definition_mcrit, + by_vars = exprs(PARAM) + ) +} +\seealso{ +General Derivation Functions for all ADaMs that returns variable appended to dataset: +\code{\link{derive_var_extreme_flag}()}, +\code{\link{derive_var_joined_exist_flag}()}, +\code{\link{derive_var_merged_ef_msrc}()}, +\code{\link{derive_var_merged_exist_flag}()}, +\code{\link{derive_var_merged_summary}()}, +\code{\link{derive_var_obs_number}()}, +\code{\link{derive_var_relative_flag}()}, +\code{\link{derive_vars_computed}()}, +\code{\link{derive_vars_joined}()}, +\code{\link{derive_vars_merged}()}, +\code{\link{derive_vars_merged_lookup}()}, +\code{\link{derive_vars_transposed}()} +} +\concept{der_gen} +\keyword{der_gen} diff --git a/man/derive_vars_computed.Rd b/man/derive_vars_computed.Rd index 205f804d78..7f7542bdd5 100644 --- a/man/derive_vars_computed.Rd +++ b/man/derive_vars_computed.Rd @@ -173,6 +173,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_var_merged_summary}()}, \code{\link{derive_var_obs_number}()}, \code{\link{derive_var_relative_flag}()}, +\code{\link{derive_vars_cat}()}, \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged}()}, \code{\link{derive_vars_merged_lookup}()}, diff --git a/man/derive_vars_joined.Rd b/man/derive_vars_joined.Rd index 2730270870..6c7913918e 100644 --- a/man/derive_vars_joined.Rd +++ b/man/derive_vars_joined.Rd @@ -504,6 +504,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_var_merged_summary}()}, \code{\link{derive_var_obs_number}()}, \code{\link{derive_var_relative_flag}()}, +\code{\link{derive_vars_cat}()}, \code{\link{derive_vars_computed}()}, \code{\link{derive_vars_merged}()}, \code{\link{derive_vars_merged_lookup}()}, diff --git a/man/derive_vars_merged.Rd b/man/derive_vars_merged.Rd index 7856510b26..700612a4fc 100644 --- a/man/derive_vars_merged.Rd +++ b/man/derive_vars_merged.Rd @@ -333,6 +333,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_var_merged_summary}()}, \code{\link{derive_var_obs_number}()}, \code{\link{derive_var_relative_flag}()}, +\code{\link{derive_vars_cat}()}, \code{\link{derive_vars_computed}()}, \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged_lookup}()}, diff --git a/man/derive_vars_merged_lookup.Rd b/man/derive_vars_merged_lookup.Rd index 3494651149..6f057576b5 100644 --- a/man/derive_vars_merged_lookup.Rd +++ b/man/derive_vars_merged_lookup.Rd @@ -190,6 +190,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_var_merged_summary}()}, \code{\link{derive_var_obs_number}()}, \code{\link{derive_var_relative_flag}()}, +\code{\link{derive_vars_cat}()}, \code{\link{derive_vars_computed}()}, \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged}()}, diff --git a/man/derive_vars_transposed.Rd b/man/derive_vars_transposed.Rd index 53ebc55b92..4292ef297b 100644 --- a/man/derive_vars_transposed.Rd +++ b/man/derive_vars_transposed.Rd @@ -118,6 +118,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_var_merged_summary}()}, \code{\link{derive_var_obs_number}()}, \code{\link{derive_var_relative_flag}()}, +\code{\link{derive_vars_cat}()}, \code{\link{derive_vars_computed}()}, \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged}()}, diff --git a/man/extend_condition.Rd b/man/extend_condition.Rd new file mode 100644 index 0000000000..da3ceb3773 --- /dev/null +++ b/man/extend_condition.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/derive_vars_cat.R +\name{extend_condition} +\alias{extend_condition} +\title{Extend a condition string by adding a new condition based on a variable and its value} +\usage{ +extend_condition(cond, var, is) +} +\arguments{ +\item{cond}{A character string representing an existing condition.} + +\item{var}{A character string representing the name of the variable to check.} + +\item{is}{A character string representing the value the variable should be equal to.} +} +\value{ +A character string representing the extended condition. +} +\description{ +This internal helper function extends a condition string by appending a new condition +that checks if a variable equals a specific value. +} +\examples{ +# Extend an existing condition to include a check for 'AGE == "30"' +admiral:::extend_condition("SEX == 'M'", "AGE", "30") +} +\keyword{internal} diff --git a/tests/testthat/_snaps/derive_vars_cat.md b/tests/testthat/_snaps/derive_vars_cat.md new file mode 100644 index 0000000000..68282aeaed --- /dev/null +++ b/tests/testthat/_snaps/derive_vars_cat.md @@ -0,0 +1,13 @@ +# derive_vars_cat Test 3: Forgot to specify by_vars + + Column(s) in `definition` already exist in `dataset`. Did you forget to specify `by_vars`, or are you rerunning your code? + +# derive_vars_cat Test 13: definition has wrong shape + + Failed to convert `definition` to . `definition` should be specified similarly to how you would specify a using the `tibble::tribble()` function so it can be converted to using `tibble::tribble()`. + Data must be rectangular. + +# derive_vars_cat Test 14: two by_vars variables + + `by_vars` must contain just one variable, e.g. `exprs(PARAMCD)` + diff --git a/tests/testthat/test-derive_vars_cat.R b/tests/testthat/test-derive_vars_cat.R new file mode 100644 index 0000000000..d3cb6e6cdd --- /dev/null +++ b/tests/testthat/test-derive_vars_cat.R @@ -0,0 +1,348 @@ +expected_result <- tibble::tribble( + ~USUBJID, ~VSTEST, ~AVAL, ~AVALCAT1, ~AVALCA1N, + "01-701-1015", "Height", 147.32, "<160", 2, + "01-701-1023", "Height", 162.56, ">=160", 1, + "01-701-1028", "Height", 177.8, ">=160", 1, + "01-701-1033", "Height", 175.26, ">=160", 1, + "01-701-1034", "Height", NA, NA, NA, + "01-701-1047", "Height", NA, NA, NA, + "01-701-1097", "Height", 168.91, ">=160", 1, + "01-701-1111", "Height", 158.24, "<160", 2, + "01-701-1115", "Height", 181.61, ">=160", 1, + "01-701-1118", "Height", 180.34, ">=160", 1, + "01-701-1015", "Weight", 53.98, NA, NA, + "01-701-1023", "Weight", 78.47, NA, NA, + "01-701-1028", "Weight", 98.88, NA, NA, + "01-701-1033", "Weight", 88.45, NA, NA, + "01-701-1034", "Weight", NA, NA, NA, + "01-701-1047", "Weight", NA, NA, NA, + "01-701-1097", "Weight", 78.02, NA, NA, + "01-701-1111", "Weight", 60.33, NA, NA, + "01-701-1115", "Weight", 78.7, NA, NA, + "01-701-1118", "Weight", 71.67, NA, NA +) + +advs <- expected_result %>% select( + USUBJID, VSTEST, AVAL +) +## Test 1: Basic functionality without by_vars ---- +test_that("derive_vars_cat Test 1: Basic functionality without by_vars", { + # Define the condition and categories + definition <- exprs( + ~condition, ~AVALCAT1, ~AVALCA1N, + VSTEST == "Height" & AVAL >= 160, ">=160", 1, + VSTEST == "Height" & AVAL < 160, "<160", 2 + ) + + expect_dfs_equal( + base = + derive_vars_cat( + advs, + definition + ), + compare = expected_result, + keys = c("USUBJID", "VSTEST") + ) +}) + +## Test 2: Basic functionality with by_vars ---- +test_that("derive_vars_cat Test 2: Basic functionality with by_vars", { + # Define the condition and categories + definition <- exprs( + ~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, + "Height", AVAL >= 160, ">=160", 1, + "Height", AVAL < 160, "<160", 2 + ) + + + + expect_dfs_equal( + base = + derive_vars_cat( + advs, + definition, + by_vars = exprs(VSTEST) + ), + compare = expected_result, + keys = c("USUBJID", "VSTEST") + ) +}) + +## Test 3: Forgot to specify by_vars ---- +test_that("derive_vars_cat Test 3: Forgot to specify by_vars", { + definition <- exprs( + ~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, + "Height", AVAL >= 160, ">=160", 1, + "Height", AVAL < 160, "<160", 2 + ) + + expect_snapshot_warning(derive_vars_cat(advs, definition)) +}) + +## Test 4: Error when dataset is not a dataframe ---- +test_that("derive_vars_cat Test 4: Error when dataset is not a dataframe", { + # Define the condition and categories + definition <- exprs( + ~condition, ~AVALCAT1, ~AVALCA1N, + VSTEST == "Height" & AVAL >= 160, ">=160", 1, + VSTEST == "Height" & AVAL < 160, "<160", 2 + ) + + # Snapshot the error message + expect_error( + derive_vars_cat(list(1, 2, 3), definition), + class = "assert_data_frame" + ) +}) + +## Test 5: Error when definition is not an exprs object ---- +test_that("derive_vars_cat Test 5: Error when definition is not an exprs object", { + definition <- tribble( + ~condition, ~AVALCAT1, ~AVALCA1N, + "AVAL >= 160", ">=160", 1, + "AVAL < 160", "<160", 2 + ) + # Snapshot the error message + expect_error( + derive_vars_cat(advs, definition), + class = "assert_expr_list" + ) +}) + +## Test 6: Error when required columns are missing from dataset ---- +test_that("derive_vars_cat Test 6: Error when required columns are missing from dataset", { + # Define the condition and categories (without VSTEST in the dataset) + definition <- exprs( + ~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, + "Height", AVAL >= 160, ">=160", 1, + "Height", AVAL < 160, "<160", 2 + ) + + # Remove VSTEST column from dataset + advs_missing_col <- advs %>% select(-VSTEST) + + # Snapshot the error message + expect_error( + derive_vars_cat(advs_missing_col, definition, by_vars = exprs(VSTEST)), + class = "assert_data_frame" + ) +}) + +## Test 7: Correct behavior when no conditions are met ---- +test_that("derive_vars_cat Test 7: Correct behavior when no conditions are met", { + # Define conditions that do not match any rows + definition <- exprs( + ~condition, ~AVALCAT1, ~AVALCA1N, + VSTEST == "Height" & AVAL < 0, "<0", 1 + ) + + expected_result <- tibble::tribble( + ~USUBJID, ~VSTEST, ~AVAL, ~AVALCAT1, ~AVALCA1N, + "01-701-1015", "Height", 147.32, NA_character_, NA_real_, + "01-701-1023", "Height", 162.56, NA_character_, NA_real_, + "01-701-1028", "Height", 177.8, NA_character_, NA_real_, + "01-701-1033", "Height", 175.26, NA_character_, NA_real_, + "01-701-1034", "Height", NA, NA_character_, NA_real_, + "01-701-1047", "Height", NA, NA_character_, NA_real_, + "01-701-1097", "Height", 168.91, NA_character_, NA_real_, + "01-701-1111", "Height", 158.24, NA_character_, NA_real_, + "01-701-1115", "Height", 181.61, NA_character_, NA_real_, + "01-701-1118", "Height", 180.34, NA_character_, NA_real_, + "01-701-1015", "Weight", 53.98, NA_character_, NA_real_, + "01-701-1023", "Weight", 78.47, NA_character_, NA_real_, + "01-701-1028", "Weight", 98.88, NA_character_, NA_real_, + "01-701-1033", "Weight", 88.45, NA_character_, NA_real_, + "01-701-1034", "Weight", NA, NA_character_, NA_real_, + "01-701-1047", "Weight", NA, NA_character_, NA_real_, + "01-701-1097", "Weight", 78.02, NA_character_, NA_real_, + "01-701-1111", "Weight", 60.33, NA_character_, NA_real_, + "01-701-1115", "Weight", 78.7, NA_character_, NA_real_, + "01-701-1118", "Weight", 71.67, NA_character_, NA_real_ + ) + + expect_dfs_equal( + base = derive_vars_cat(advs, definition), + compare = expected_result, + keys = c("USUBJID", "VSTEST") + ) +}) + +## Test 8: Overlapping conditions handled correctly ---- +test_that("derive_vars_cat Test 8: Overlapping conditions handled correctly", { + # Define overlapping conditions + definition <- exprs( + ~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, + "Height", AVAL < 160, "<160", 3, + "Height", AVAL < 170, "<170", 2, + "Height", AVAL >= 170, ">=170", 1 + ) + + expected_result <- tibble::tribble( + ~USUBJID, ~VSTEST, ~AVAL, ~AVALCAT1, ~AVALCA1N, + "01-701-1015", "Height", 147.32, "<160", 3, + "01-701-1023", "Height", 162.56, "<170", 2, + "01-701-1028", "Height", 177.8, ">=170", 1, + "01-701-1033", "Height", 175.26, ">=170", 1, + "01-701-1034", "Height", NA, NA, NA, + "01-701-1047", "Height", NA, NA, NA, + "01-701-1097", "Height", 168.91, "<170", 2, + "01-701-1111", "Height", 158.24, "<160", 3, + "01-701-1115", "Height", 181.61, ">=170", 1, + "01-701-1118", "Height", 180.34, ">=170", 1, + "01-701-1015", "Weight", 53.98, NA, NA, + "01-701-1023", "Weight", 78.47, NA, NA, + "01-701-1028", "Weight", 98.88, NA, NA, + "01-701-1033", "Weight", 88.45, NA, NA, + "01-701-1034", "Weight", NA, NA, NA, + "01-701-1047", "Weight", NA, NA, NA, + "01-701-1097", "Weight", 78.02, NA, NA, + "01-701-1111", "Weight", 60.33, NA, NA, + "01-701-1115", "Weight", 78.7, NA, NA, + "01-701-1118", "Weight", 71.67, NA, NA + ) + + expect_dfs_equal( + base = derive_vars_cat(advs, definition, by_vars = exprs(VSTEST)), + compare = expected_result, + keys = c("USUBJID", "VSTEST") + ) +}) + + +## Test 9: Error when condition is missing from `definition` ---- +test_that("derive_vars_cat Test 9: Error when condition is missing from `definition`", { + # Define the condition but omit the 'condition' column from the definition + definition <- exprs( + ~AVALCAT1, ~AVALCA1N, + ">=160", 1, + "<160", 2 + ) + + # Snapshot the error message + expect_error( + derive_vars_cat(advs, definition), + class = "assert_data_frame" + ) +}) + +## Test 10: Conditions for multiple VSTESTs (Height and Weight) ---- +test_that("derive_vars_cat Test 10: Conditions for multiple VSTESTs (Height and Weight)", { + # Define conditions for two different VSTEST values: Height and BILI + definition <- exprs( + ~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, + "Height", AVAL >= 160, "Height >= 160", 1, + "Height", AVAL < 160, "Height < 160", 2, + "Weight", AVAL >= 66.68, "Weight >= 66.68", 1, + "Weight", AVAL < 66.68, "Weight < 66.68", 2 + ) + + expected_result <- tibble::tribble( + ~USUBJID, ~VSTEST, ~AVAL, ~AVALCAT1, ~AVALCA1N, + "01-701-1015", "Height", 147.32, "Height < 160", 2, + "01-701-1023", "Height", 162.56, "Height >= 160", 1, + "01-701-1028", "Height", 177.8, "Height >= 160", 1, + "01-701-1033", "Height", 175.26, "Height >= 160", 1, + "01-701-1034", "Height", NA, NA, NA, + "01-701-1047", "Height", NA, NA, NA, + "01-701-1097", "Height", 168.91, "Height >= 160", 1, + "01-701-1111", "Height", 158.24, "Height < 160", 2, + "01-701-1115", "Height", 181.61, "Height >= 160", 1, + "01-701-1118", "Height", 180.34, "Height >= 160", 1, + "01-701-1015", "Weight", 53.98, "Weight < 66.68", 2, + "01-701-1023", "Weight", 78.47, "Weight >= 66.68", 1, + "01-701-1028", "Weight", 98.88, "Weight >= 66.68", 1, + "01-701-1033", "Weight", 88.45, "Weight >= 66.68", 1, + "01-701-1034", "Weight", NA, NA, NA, + "01-701-1047", "Weight", NA, NA, NA, + "01-701-1097", "Weight", 78.02, "Weight >= 66.68", 1, + "01-701-1111", "Weight", 60.33, "Weight < 66.68", 2, + "01-701-1115", "Weight", 78.7, "Weight >= 66.68", 1, + "01-701-1118", "Weight", 71.67, "Weight >= 66.68", 1 + ) + expect_dfs_equal( + base = derive_vars_cat(advs, definition, by_vars = exprs(VSTEST)), + compare = expected_result, + keys = c("USUBJID", "VSTEST") + ) +}) + +## Test 11: Adding an extra variable (flag) to the dataset ---- +test_that("derive_vars_cat Test 11: Adding an extra variable (flag) to the dataset", { + # Define conditions and add a third variable (flag) that is TRUE or FALSE + definition <- exprs( + ~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, ~extra_var, + "Height", AVAL >= 160, ">=160", 1, TRUE, + "Height", AVAL < 160, "<160", 2, FALSE + ) + + expected_result <- tibble::tribble( + ~USUBJID, ~VSTEST, ~AVAL, ~AVALCAT1, ~AVALCA1N, ~extra_var, + "01-701-1015", "Height", 147.32, "<160", 2, FALSE, + "01-701-1023", "Height", 162.56, ">=160", 1, TRUE, + "01-701-1028", "Height", 177.8, ">=160", 1, TRUE, + "01-701-1033", "Height", 175.26, ">=160", 1, TRUE, + "01-701-1034", "Height", NA, NA, NA, NA, + "01-701-1047", "Height", NA, NA, NA, NA, + "01-701-1097", "Height", 168.91, ">=160", 1, TRUE, + "01-701-1111", "Height", 158.24, "<160", 2, FALSE, + "01-701-1115", "Height", 181.61, ">=160", 1, TRUE, + "01-701-1118", "Height", 180.34, ">=160", 1, TRUE, + "01-701-1015", "Weight", 53.98, NA, NA, NA, + "01-701-1023", "Weight", 78.47, NA, NA, NA, + "01-701-1028", "Weight", 98.88, NA, NA, NA, + "01-701-1033", "Weight", 88.45, NA, NA, NA, + "01-701-1034", "Weight", NA, NA, NA, NA, + "01-701-1047", "Weight", NA, NA, NA, NA, + "01-701-1097", "Weight", 78.02, NA, NA, NA, + "01-701-1111", "Weight", 60.33, NA, NA, NA, + "01-701-1115", "Weight", 78.7, NA, NA, NA, + "01-701-1118", "Weight", 71.67, NA, NA, NA + ) + expect_dfs_equal( + base = derive_vars_cat(advs, definition, by_vars = exprs(VSTEST)), + compare = expected_result, + keys = c("USUBJID", "VSTEST") + ) +}) + +## Test 12: Wrong input for by_vars ---- +test_that("derive_vars_cat Test 12: Wrong input for by_vars", { + # Define conditions + definition <- exprs( + ~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, + "Height", AVAL >= 160, ">=160", 1, + "Height", AVAL < 160, "<160", 2 + ) + + expect_error(derive_vars_cat(advs, definition, by_vars = exprs(VSTEST == "Height")), + class = "assert_vars" + ) +}) + +## Test 13: definition has wrong shape ---- +test_that("derive_vars_cat Test 13: definition has wrong shape", { + # Define conditions + definition_wrong_shape <- exprs( + ~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, + "Height", AVAL >= 160, ">=160", 1, + "Height", AVAL < 160, "<160" + ) + + expect_snapshot_error(derive_vars_cat(advs, definition_wrong_shape, by_vars = exprs(VSTEST))) +}) + +## Test 14: two by_vars variables ---- +test_that("derive_vars_cat Test 14: two by_vars variables", { + # Define conditions + definition <- exprs( + ~VISIT, ~VSTEST, ~condition, ~AVALCAT1, ~AVALCA1N, + "Week 24", "Height", AVAL >= 160, ">=160", 1, + "Week 24", "Height", AVAL < 160, "<160", 2, + ) + + advs_visit <- advs %>% mutate( + VISIT = "Week 24" + ) + + expect_snapshot_error(derive_vars_cat(advs_visit, definition, by_vars = exprs(VSTEST, VISIT))) +}) diff --git a/vignettes/adsl.Rmd b/vignettes/adsl.Rmd index 0fd9648968..f620607768 100644 --- a/vignettes/adsl.Rmd +++ b/vignettes/adsl.Rmd @@ -677,16 +677,45 @@ dataset_vignette( Numeric and categorical variables (`AGE`, `RACE`, `COUNTRY`, etc.) may need to be grouped to perform the required analysis. -`{admiral}` does not **currently** have functionality to assist with all required groupings. So, the user will often need to create his/her own function to meet his/her study requirement. +`{admiral}` provides the `derive_vars_cat()` function to create such groups. +This function is especially useful if more than one variable needs to be created for +each condition, e.g., `AGEGR1` and `AGEGR1N`. -For example, if +Additionally, one needs to be careful when considering the order of the conditions in the lookup table. +The category is assigned based on the first match. +That means *catch-all* conditions must come after specific conditions, e.g. `!is.na(AGE)` +must come after `AGE < 18`. -- `AGEGR1` is required to categorize `AGE` into `<18`, `18-64` and `>64`, or -- `REGION1` is required to categorize `COUNTRY` in `North America`, `Rest of the World`, +```{r eval=TRUE} +# create lookup tables +agegr1_lookup <- exprs( + ~condition, ~AGEGR1, + AGE < 18, "<18", + between(AGE, 18, 64), "18-64", + AGE > 64, ">64", + is.na(AGE), "Missing" +) -the user defined functions would look like the following: +region1_lookup <- exprs( + ~condition, ~REGION1, + COUNTRY %in% c("CAN", "USA"), "North America", + !is.na(COUNTRY), "Rest of the World", + is.na(COUNTRY), "Missing" +) +``` ```{r eval=TRUE} +adsl <- adsl %>% + derive_vars_cat( + definition = agegr1_lookup + ) %>% + derive_vars_cat( + definition = region1_lookup + ) +``` +Alternatively, you can also solve this task with custom functions: +```{r} +#| eval: false format_agegr1 <- function(var_input) { case_when( var_input < 18 ~ "<18", @@ -695,7 +724,6 @@ format_agegr1 <- function(var_input) { TRUE ~ "Missing" ) } - format_region1 <- function(var_input) { case_when( var_input %in% c("CAN", "USA") ~ "North America", @@ -703,18 +731,16 @@ format_region1 <- function(var_input) { TRUE ~ "Missing" ) } -``` - -These functions are then used in a `mutate()` statement to derive the required grouping variables: -```{r eval=TRUE} -adsl <- adsl %>% +adsl %>% mutate( AGEGR1 = format_agegr1(AGE), REGION1 = format_region1(COUNTRY) ) ``` + + ```{r, eval=TRUE, echo=FALSE} dataset_vignette( adsl, diff --git a/vignettes/bds_exposure.Rmd b/vignettes/bds_exposure.Rmd index 8e9314741e..673f08e768 100644 --- a/vignettes/bds_exposure.Rmd +++ b/vignettes/bds_exposure.Rmd @@ -41,7 +41,7 @@ otherwise specified.* * [Create 1:1 Mapping Records](#onetoone) * [Create Summary Records](#summaryrec) * [Assign `PARAMCD`, `PARAMN`, etc. from Reference Tables](#paramcd) -* [Derive Categorization Variables (`AVALCATx`)](#cat) +* [Derive Categorization Variables (`AVALCATy`)](#cat) * [Assign `ASEQ`](#aseq) * [Add ADSL variables `ASEQ`](#adsl_vars) * [Add Labels and Attributes](#attributes) @@ -514,18 +514,18 @@ TNDOSINT | Overall dose intensity (%) | 12 ```{r eval=TRUE, include=FALSE, echo=FALSE} param_lookup <- tribble( - ~PARAMCD, ~PARAM, ~PARAMN, - "DURD", "Study drug duration during constant dosing interval (days)", 1, - "DOSE", "Dose administered during constant dosing interval (mg)", 2, - "PLDOSE", "Planned dose during constant dosing interval (mg)", 3, - "ADJ", "Dose adjusted during constant dosing interval", 4, - "ADJAE", "Dose adjusted due to AE during constant dosing interval", 5, - "TDURD", "Overall duration (days)", 6, - "TDOSE", "Total dose administered (mg)", 7, - "TPDOSE", "Total planned dose (mg)", 9, - "TADJ", "Dose adjusted during study", 10, - "TADJAE", "Dose adjusted during study due to AE", 11, - "TNDOSINT", "Overall dose intensity (%)", 12 + ~PARAMCD, ~PARAM, ~PARAMN, + "DURD", "Study drug duration during constant dosing interval (days)", 1, + "DOSE", "Dose administered during constant dosing interval (mg)", 2, + "PLDOSE", "Planned dose during constant dosing interval (mg)", 3, + "ADJ", "Dose adjusted during constant dosing interval", 4, + "ADJAE", "Dose adjusted due to AE during constant dosing interval", 5, + "TDURD", "Overall duration (days)", 6, + "TDOSE", "Total dose administered (mg)", 7, + "TPDOSE", "Total planned dose (mg)", 9, + "TADJ", "Dose adjusted during study", 10, + "TADJAE", "Dose adjusted during study due to AE", 11, + "TNDOSINT", "Overall dose intensity (%)", 12 ) ``` @@ -543,23 +543,26 @@ count(adex, PARAMCD, PARAM, PARAMN) Please note, this is an example only and additional columns may be needed for the join depending on your lookup/metadata table. -## Derive Categorization Variables (`AVALCATx`) {#cat} +## Derive Categorization Variables (`AVALCATy`) {#cat} -`{admiral}` does not currently have a generic function to aid in assigning `AVALCATx`/ -`AVALCAxN` values. Below is a simple example of how these values may be -assigned using the `dplyr::mutate` function: +We can use the `derive_vars_cat()` function to derive the categorization variables. ```{r eval=TRUE, echo=TRUE} +avalcax_lookup <- exprs( + ~PARAMCD, ~condition, ~AVALCAT1, + "TDURD", AVAL >= 90, ">= 90 days", + "TDURD", AVAL >= 30 & AVAL < 90, ">= 30 and < 90 days", + "TDURD", AVAL < 30, "< 30 days", + "TDOSE", AVAL < 1000, "< 1000 mg", + "TDOSE", AVAL >= 1000, ">= 1000 mg", + "TPDOSE", AVAL < 1000, "< 1000 mg", + "TPDOSE", AVAL >= 1000, ">= 1000 mg" +) + adex <- adex %>% - mutate( - AVALCAT1 = case_when( - PARAMCD %in% c("TDURD") & AVAL < 30 ~ "< 30 days", - PARAMCD %in% c("TDURD") & AVAL >= 30 & AVAL < 90 ~ ">= 30 and < 90 days", - PARAMCD %in% c("TDURD") & AVAL >= 90 ~ ">=90 days", - PARAMCD %in% c("TDOSE", "TPDOSE") & AVAL < 1000 ~ "< 1000 mg", - PARAMCD %in% c("TDOSE", "TPDOSE") & AVAL >= 1000 ~ ">= 1000 mg", - TRUE ~ NA_character_ - ) + derive_vars_cat( + definition = avalcax_lookup, + by_vars = exprs(PARAMCD) ) ``` diff --git a/vignettes/bds_finding.Rmd b/vignettes/bds_finding.Rmd index 59c2868475..ea28cb87f0 100644 --- a/vignettes/bds_finding.Rmd +++ b/vignettes/bds_finding.Rmd @@ -44,7 +44,7 @@ otherwise specified.* * [Derive Analysis Flags (e.g. `ANL01FL`)](#analysisrec) * [Assign Treatment (`TRTA`, `TRTP`)](#treatment) * [Assign `ASEQ`](#aseq) -* [Derive Categorization Variables (`AVALCATx`)](#cat) +* [Derive Categorization Variables (`AVALCATy`)](#cat) * [Derive Criterion Variables (`CRITy`, `CRITyFL`, `CRITyFLN`)](#crit_vars) * [Add ADSL variables](#adsl_vars) * [Derive New Rows](#additional) @@ -214,16 +214,16 @@ TEMP | TEMP | Temperature (C) | 7 | Vital Sign | 2 This lookup may now be joined to the source data: ```{r eval=TRUE, include=FALSE} -param_lookup <- tribble( - ~VSTESTCD, ~PARAMCD, ~PARAM, ~PARAMN, ~PARCAT1, ~PARCAT1N, - "HEIGHT", "HEIGHT", "Height (cm)", 1, "Subject Characteristic", 1, - "WEIGHT", "WEIGHT", "Weight (kg)", 2, "Subject Characteristic", 1, - "DIABP", "DIABP", "Diastolic Blood Pressure (mmHg)", 3, "Vital Sign", 2, - "MAP", "MAP", "Mean Arterial Pressure (mmHg)", 4, "Vital Sign", 2, - "BSA", "BSA", "Body Surface Area (m^2)", 5, "Vital Sign", 2, - "PULSE", "PULSE", "Pulse Rate (beats/min)", 6, "Vital Sign", 2, - "SYSBP", "SYSBP", "Systolic Blood Pressure (mmHg)", 7, "Vital Sign", 2, - "TEMP", "TEMP", "Temperature (C)", 8, "Vital Sign", 2 +param_lookup <- tibble::tribble( + ~VSTESTCD, ~PARAMCD, ~PARAM, ~PARAMN, ~PARCAT1, ~PARCAT1N, + "HEIGHT", "HEIGHT", "Height (cm)", 1, "Subject Characteristic", 1, + "WEIGHT", "WEIGHT", "Weight (kg)", 2, "Subject Characteristic", 1, + "DIABP", "DIABP", "Diastolic Blood Pressure (mmHg)", 3, "Vital Sign", 2, + "MAP", "MAP", "Mean Arterial Pressure (mmHg)", 4, "Vital Sign", 2, + "BSA", "BSA", "Body Surface Area (m^2)", 5, "Vital Sign", 2, + "PULSE", "PULSE", "Pulse Rate (beats/min)", 6, "Vital Sign", 2, + "SYSBP", "SYSBP", "Systolic Blood Pressure (mmHg)", 7, "Vital Sign", 2, + "TEMP", "TEMP", "Temperature (C)", 8, "Vital Sign", 2 ) attr(param_lookup$VSTESTCD, "label") <- "Vital Signs Test Short Name" ``` @@ -345,11 +345,11 @@ created with a function call. See example below for `PARAMCD` = `QTCF`. ```{r eval=FALSE} adeg <- tibble::tribble( - ~USUBJID, ~EGSTRESU, ~PARAMCD, ~AVAL, ~VISIT, - "P01", "msec", "QT", 350, "CYCLE 1 DAY 1", - "P01", "msec", "QT", 370, "CYCLE 2 DAY 1", - "P01", "msec", "RR", 842, "CYCLE 1 DAY 1", - "P01", "msec", "RR", 710, "CYCLE 2 DAY 1" + ~USUBJID, ~EGSTRESU, ~PARAMCD, ~AVAL, ~VISIT, + "P01", "msec", "QT", 350, "CYCLE 1 DAY 1", + "P01", "msec", "QT", 370, "CYCLE 2 DAY 1", + "P01", "msec", "RR", 842, "CYCLE 1 DAY 1", + "P01", "msec", "RR", 710, "CYCLE 2 DAY 1" ) adeg <- derive_param_qtc( @@ -366,11 +366,11 @@ for lab differentials converted to absolute values. See example below: ```{r eval=FALSE} adlb <- tibble::tribble( - ~USUBJID, ~PARAMCD, ~AVAL, ~PARAM, ~VISIT, - "P01", "WBC", 33, "Leukocyte Count (10^9/L)", "CYCLE 1 DAY 1", - "P01", "WBC", 38, "Leukocyte Count (10^9/L)", "CYCLE 2 DAY 1", - "P01", "LYMLE", 0.90, "Lymphocytes (fraction of 1)", "CYCLE 1 DAY 1", - "P01", "LYMLE", 0.70, "Lymphocytes (fraction of 1)", "CYCLE 2 DAY 1" + ~USUBJID, ~PARAMCD, ~AVAL, ~PARAM, ~VISIT, + "P01", "WBC", 33, "Leukocyte Count (10^9/L)", "CYCLE 1 DAY 1", + "P01", "WBC", 38, "Leukocyte Count (10^9/L)", "CYCLE 2 DAY 1", + "P01", "LYMLE", 0.90, "Lymphocytes (fraction of 1)", "CYCLE 1 DAY 1", + "P01", "LYMLE", 0.70, "Lymphocytes (fraction of 1)", "CYCLE 2 DAY 1" ) derive_param_wbc_abs( @@ -515,10 +515,10 @@ and `ref_end_date = AP01EDT`. advs_pre <- select(advs, -ONTRTFL) advs <- tibble::tribble( - ~USUBJID, ~ASTDT, ~AP01SDT, ~AP01EDT, ~AENDT, + ~USUBJID, ~ASTDT, ~AP01SDT, ~AP01EDT, ~AENDT, "P01", ymd("2020-03-15"), ymd("2020-01-01"), ymd("2020-03-01"), ymd("2020-12-01"), "P02", ymd("2019-04-30"), ymd("2020-01-01"), ymd("2020-03-01"), ymd("2020-03-15"), - "P03", ymd("2019-04-30"), ymd("2020-01-01"), ymd("2020-03-01"), NA, + "P03", ymd("2019-04-30"), ymd("2020-01-01"), ymd("2020-03-01"), NA, ) ``` @@ -846,31 +846,20 @@ dataset_vignette( ) ``` -## Derive Categorization Variables (`AVALCATx`) {#cat} +## Derive Categorization Variables (`AVALCATy`) {#cat} -Admiral does not currently have a generic function to aid in assigning `AVALCATy`/ -`AVALCAvN` values. Below is a simple example of how these values may be -assigned: +We can use the `derive_vars_cat()` function to derive the categorization variables. ```{r eval=TRUE} -avalcat_lookup <- tibble::tribble( - ~PARAMCD, ~AVALCA1N, ~AVALCAT1, - "HEIGHT", 1, ">140 cm", - "HEIGHT", 2, "<= 140 cm" +avalcat_lookup <- exprs( + ~PARAMCD, ~condition, ~AVALCAT1, ~AVALCA1N, + "HEIGHT", AVAL > 140, ">140 cm", 1, + "HEIGHT", AVAL <= 140, "<= 140 cm", 2 ) - -format_avalcat1n <- function(param, aval) { - case_when( - param == "HEIGHT" & aval > 140 ~ 1, - param == "HEIGHT" & aval <= 140 ~ 2 - ) -} - advs <- advs %>% - mutate(AVALCA1N = format_avalcat1n(param = PARAMCD, aval = AVAL)) %>% - derive_vars_merged( - avalcat_lookup, - by = exprs(PARAMCD, AVALCA1N) + derive_vars_cat( + definition = avalcat_lookup, + by_vars = exprs(PARAMCD) ) ``` diff --git a/vignettes/questionnaires.Rmd b/vignettes/questionnaires.Rmd index 18edb6fc4e..165318f0f0 100644 --- a/vignettes/questionnaires.Rmd +++ b/vignettes/questionnaires.Rmd @@ -251,26 +251,25 @@ and `EVNTDESC`, it makes sense to create a separate time to event dataset for them. However, it might be useful to create flags or categorization variables in `ADQS`. For example: ```{r} +# Create AVALCATx lookup table +avalcat_lookup <- exprs( + ~PARAMCD, ~condition, ~AVALCAT1, ~AVALCAT1N, + "GDS02TS", AVAL <= 5, "Normal", 0L, + "GDS02TS", AVAL <= 10 & AVAL > 5, "Possible Depression", 1L, + "GDS02TS", AVAL > 10, "Likely Depression", 2L +) +# Create CHGCAT1 lookup table +chgcat_lookup <- exprs( + ~condition, ~CHGCAT1, + AVALCAT1N > BASECA1N, "WORSENED", + AVALCAT1N == BASECA1N, "NO CHANGE", + AVALCAT1N < BASECA1N, "IMPROVED" +) + adgdssf <- adgdssf %>% - mutate( - AVALCAT1 = if_else( - PARAMCD == "GDS02TS", - case_when( - AVAL <= 5 ~ "Normal", - AVAL <= 10 ~ "Possible Depression", - AVAL > 10 ~ "Likely Depression" - ), - NA_character_ - ), - AVALCAT1N = if_else( - PARAMCD == "GDS02TS", - case_when( - AVAL <= 5 ~ 0L, - AVAL <= 10 ~ 1L, - AVAL > 10 ~ 2L - ), - NA_integer_ - ) + derive_vars_cat( + definition = avalcat_lookup, + by_vars = exprs(PARAMCD) ) %>% derive_var_base( by_vars = exprs(STUDYID, USUBJID, PARAMCD), @@ -282,12 +281,8 @@ adgdssf <- adgdssf %>% source_var = AVALCAT1N, new_var = BASECA1N ) %>% - mutate( - CHGCAT1 = case_when( - AVALCAT1N > BASECA1N ~ "WORSENED", - AVALCAT1N == BASECA1N ~ "NO CHANGE", - AVALCAT1N < BASECA1N ~ "IMPROVED", - ) + derive_vars_cat( + definition = chgcat_lookup ) ```