Skip to content

Commit

Permalink
#2563 no_list_columns: split files (and clean up tests)
Browse files Browse the repository at this point in the history
  • Loading branch information
bundfussr committed Dec 13, 2024
1 parent 88d6223 commit c5d8b2c
Show file tree
Hide file tree
Showing 5 changed files with 224 additions and 268 deletions.
108 changes: 108 additions & 0 deletions R/derive_vars_atc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#' Derive ATC Class Variables
#'
#' @description Add Anatomical Therapeutic Chemical class variables from `FACM` to `ADCM`.
#'
#' **Note:** This is a wrapper function for the more generic `derive_vars_transposed()`.
#'
#' @param dataset
#' `r roxygen_param_dataset(expected_vars = c("by_vars"))`
#'
#' @param dataset_facm FACM dataset
#'
#' The variables specified by the `by_vars`, `id_vars`, and `value_var`
#' arguments and `FATESTCD` are required. The variables `by_vars`, `id_vars`,
#' and `FATESTCD` must be a unique key.
#'
#' @param by_vars Grouping variables
#'
#' Keys used to merge `dataset_facm` with `dataset`.
#'
#' @param id_vars ID variables
#'
#' Variables (excluding by_vars) that uniquely identify each observation in `dataset_merge`.
#'
#' `r roxygen_param_by_vars()`
#'
#' @param value_var The variable of `dataset_facm` containing the values of the
#' transposed variables
#'
#' @return The input dataset with ATC variables added
#'
#' @seealso [derive_vars_transposed()]
#'
#' @family der_occds
#' @keywords der_occds
#'
#' @export
#'
#' @examples
#' library(tibble)
#'
#' cm <- tribble(
#' ~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD,
#' "STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL",
#' "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL",
#' "STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE"
#' )
#' facm <- tribble(
#' ~STUDYID, ~USUBJID, ~FAGRPID, ~FAREFID, ~FATESTCD, ~FASTRESC,
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC1CD", "N",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC2CD", "N02",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC3CD", "N02B",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC4CD", "N02BE",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC1CD", "D",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC2CD", "D10",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC3CD", "D10A",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC4CD", "D10AA",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC1CD", "D",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC2CD", "D07",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC3CD", "D07A",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC4CD", "D07AA",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC1CD", "H",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC2CD", "H02",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC3CD", "H02A",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC4CD", "H02AB",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC1CD", "C",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC2CD", "C03",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC3CD", "C03D",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC4CD", "C03DA"
#' )
#'
#' derive_vars_atc(cm, facm, id_vars = exprs(FAGRPID))
derive_vars_atc <- function(dataset,
dataset_facm,
by_vars = exprs(
!!!get_admiral_option("subject_keys"),
CMREFID = FAREFID
),
id_vars = NULL,
value_var = FASTRESC) {
value_var <- assert_symbol(enexpr(value_var))
assert_vars(by_vars)
assert_vars(id_vars, optional = TRUE)
assert_data_frame(dataset, required_vars = replace_values_by_names(by_vars))
assert_data_frame(
dataset_facm,
required_vars = exprs(!!!by_vars, !!value_var, !!!id_vars, FATESTCD)
)

tryCatch(
data_transposed <- derive_vars_transposed(
dataset,
select(dataset_facm, !!!unname(by_vars), !!value_var, !!!id_vars, FATESTCD),
by_vars = by_vars,
id_vars = id_vars,
key_var = FATESTCD,
value_var = !!value_var,
filter = str_detect(FATESTCD, "^CMATC[1-4](CD)?$")
),
merge_duplicates = function(cnd) {
cnd$message <- str_replace(cnd$message, "dataset_merge", "dataset_facm")
cnd$body[[1]] <- "Please check data and `by_vars` and `id_vars` arguments."
cnd_signal(cnd)
}
)
data_transposed %>%
select(-starts_with("FA")) %>%
rename_with(.fn = ~ str_remove(.x, "^CM"), .cols = starts_with("CMATC"))
}
126 changes: 0 additions & 126 deletions R/derive_vars_transposed.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,132 +181,6 @@ derive_vars_transposed <- function(dataset,
),
call = parent.frame(n = 4)
)
},
"dplyr_error_join_relationship_one_to_many" = function(cnd) {
cli_abort(
message = c(
str_replace(
str_replace(
cnd$message, "`x`", "`dataset`"
), "`y`", "the transposed `dataset_merge`"
),
i = str_replace(
str_replace(
cnd$body, "`x`", "`dataset`"
), "`y`", "the transposed `dataset_merge`"
)
),
call = parent.frame(n = 4)
)
}
)
}

#' Derive ATC Class Variables
#'
#' @description Add Anatomical Therapeutic Chemical class variables from `FACM` to `ADCM`.
#'
#' **Note:** This is a wrapper function for the more generic `derive_vars_transposed()`.
#'
#' @param dataset
#' `r roxygen_param_dataset(expected_vars = c("by_vars"))`
#'
#' @param dataset_facm FACM dataset
#'
#' The variables specified by the `by_vars`, `id_vars`, and `value_var`
#' arguments and `FATESTCD` are required. The variables `by_vars`, `id_vars`,
#' and `FATESTCD` must be a unique key.
#'
#' @param by_vars Grouping variables
#'
#' Keys used to merge `dataset_facm` with `dataset`.
#'
#' @param id_vars ID variables
#'
#' Variables (excluding by_vars) that uniquely identify each observation in `dataset_merge`.
#'
#' `r roxygen_param_by_vars()`
#'
#' @param value_var The variable of `dataset_facm` containing the values of the
#' transposed variables
#'
#' @return The input dataset with ATC variables added
#'
#' @seealso [derive_vars_transposed()]
#'
#' @family der_occds
#' @keywords der_occds
#'
#' @export
#'
#' @examples
#' library(tibble)
#'
#' cm <- tribble(
#' ~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD,
#' "STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL",
#' "STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL",
#' "STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE"
#' )
#' facm <- tribble(
#' ~STUDYID, ~USUBJID, ~FAGRPID, ~FAREFID, ~FATESTCD, ~FASTRESC,
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC1CD", "N",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC2CD", "N02",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC3CD", "N02B",
#' "STUDY01", "BP40257-1001", "1", "1192056", "CMATC4CD", "N02BE",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC1CD", "D",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC2CD", "D10",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC3CD", "D10A",
#' "STUDY01", "BP40257-1001", "1", "2007001", "CMATC4CD", "D10AA",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC1CD", "D",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC2CD", "D07",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC3CD", "D07A",
#' "STUDY01", "BP40257-1001", "2", "2007001", "CMATC4CD", "D07AA",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC1CD", "H",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC2CD", "H02",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC3CD", "H02A",
#' "STUDY01", "BP40257-1001", "3", "2007001", "CMATC4CD", "H02AB",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC1CD", "C",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC2CD", "C03",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC3CD", "C03D",
#' "STUDY01", "BP40257-1002", "1", "2791596", "CMATC4CD", "C03DA"
#' )
#'
#' derive_vars_atc(cm, facm, id_vars = exprs(FAGRPID))
derive_vars_atc <- function(dataset,
dataset_facm,
by_vars = exprs(
!!!get_admiral_option("subject_keys"),
CMREFID = FAREFID
),
id_vars = NULL,
value_var = FASTRESC) {
value_var <- assert_symbol(enexpr(value_var))
assert_vars(by_vars)
assert_vars(id_vars, optional = TRUE)
assert_data_frame(dataset, required_vars = replace_values_by_names(by_vars))
assert_data_frame(
dataset_facm,
required_vars = exprs(!!!by_vars, !!value_var, !!!id_vars, FATESTCD)
)

tryCatch(
data_transposed <- derive_vars_transposed(
dataset,
select(dataset_facm, !!!unname(by_vars), !!value_var, !!!id_vars, FATESTCD),
by_vars = by_vars,
id_vars = id_vars,
key_var = FATESTCD,
value_var = !!value_var,
filter = str_detect(FATESTCD, "^CMATC[1-4](CD)?$")
),
merge_duplicates = function(cnd) {
cnd$message <- str_replace(cnd$message, "dataset_merge", "dataset_facm")
cnd$body[[1]] <- "Please check data and `by_vars` and `id_vars` arguments."
cnd_signal(cnd)
}
)
data_transposed %>%
select(-starts_with("FA")) %>%
rename_with(.fn = ~ str_remove(.x, "^CM"), .cols = starts_with("CMATC"))
}
24 changes: 18 additions & 6 deletions tests/testthat/_snaps/derive_vars_transposed.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,25 @@
2 STUDY01 P02 31 3
3 STUDY01 P03 42 NA

# derive_vars_atc Test 5: error if facm not unique
# derive_vars_transposed Test 4: error if `relationship` is unexpected

Code
derive_vars_atc(dataset = cm, dataset_facm = facm)
cm %>% derive_vars_transposed(facm, by_vars = exprs(USUBJID, CMREFID = FAREFID),
id_vars = exprs(FAGRPID), key_var = FATESTCD, value_var = FASTRESC,
relationship = "one-to-one")
Condition
Error in `signal_duplicate_records()`:
! Dataset `dataset_facm` contains duplicate records with respect to `STUDYID`, `USUBJID`, `FAREFID`, and `FATESTCD`
Please check data and `by_vars` and `id_vars` arguments.
i Run `admiral::get_duplicates_dataset()` to access the duplicate records
Error in `tryCatch()`:
! Each row in `dataset` must match at most 1 row in the transposed `dataset_merge`.
i Row 2 of `dataset` matches multiple rows in the transposed `dataset_merge`.

---

Code
cm %>% derive_vars_transposed(facm, by_vars = exprs(USUBJID, CMREFID = FAREFID),
id_vars = exprs(FAGRPID), key_var = FATESTCD, value_var = FASTRESC,
relationship = "many-to-one")
Condition
Error in `derive_vars_transposed()`:
! Each row in `dataset` must match at most 1 row in the transposed `dataset_merge`.
i Row 2 of `dataset` matches multiple rows in the transposed `dataset_merge`.

90 changes: 90 additions & 0 deletions tests/testthat/test-derive_vars_atc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
## Test 1: ATC variables are merged properly ----
test_that("derive_vars_atc Test 1: ATC variables are merged properly", {
cm <- tibble::tribble(
~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD,
"STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL",
"STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL",
"STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE"
)
facm <- tibble::tribble(
~STUDYID, ~USUBJID, ~FAGRPID, ~FAREFID, ~FATESTCD, ~FASTRESC,
"STUDY01", "BP40257-1001", "1", "1192056", "CMATC1CD", "N",
"STUDY01", "BP40257-1001", "1", "1192056", "CMATC2CD", "N02",
"STUDY01", "BP40257-1001", "1", "1192056", "CMATC3CD", "N02B",
"STUDY01", "BP40257-1001", "1", "1192056", "CMATC4CD", "N02BE",
"STUDY01", "BP40257-1001", "1", "2007001", "CMATC1CD", "D",
"STUDY01", "BP40257-1001", "1", "2007001", "CMATC2CD", "D10",
"STUDY01", "BP40257-1001", "1", "2007001", "CMATC3CD", "D10A",
"STUDY01", "BP40257-1001", "1", "2007001", "CMATC4CD", "D10AA",
"STUDY01", "BP40257-1001", "2", "2007001", "CMATC1CD", "D",
"STUDY01", "BP40257-1001", "2", "2007001", "CMATC2CD", "D07",
"STUDY01", "BP40257-1001", "2", "2007001", "CMATC3CD", "D07A",
"STUDY01", "BP40257-1001", "2", "2007001", "CMATC4CD", "D07AA",
"STUDY01", "BP40257-1001", "3", "2007001", "CMATC1CD", "H",
"STUDY01", "BP40257-1001", "3", "2007001", "CMATC2CD", "H02",
"STUDY01", "BP40257-1001", "3", "2007001", "CMATC3CD", "H02A",
"STUDY01", "BP40257-1001", "3", "2007001", "CMATC4CD", "H02AB",
"STUDY01", "BP40257-1002", "1", "2791596", "CMATC1CD", "C",
"STUDY01", "BP40257-1002", "1", "2791596", "CMATC2CD", "C03",
"STUDY01", "BP40257-1002", "1", "2791596", "CMATC3CD", "C03D",
"STUDY01", "BP40257-1002", "1", "2791596", "CMATC4CD", "C03DA"
)
# nolint start
expected_output <- tibble::tribble(
~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD, ~ATC1CD, ~ATC2CD, ~ATC3CD, ~ATC4CD,
"STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL", "N", "N02", "N02B", "N02BE",
"STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", "D", "D07", "D07A", "D07AA",
"STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", "D", "D10", "D10A", "D10AA",
"STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", "H", "H02", "H02A", "H02AB",
"STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE", "C", "C03", "C03D", "C03DA"
)
# nolint end
actual_output <- derive_vars_atc(
dataset = cm,
dataset_facm = facm,
id_vars = exprs(FAGRPID)
)

expect_dfs_equal(expected_output, actual_output, keys = c("USUBJID", "CMDECOD", "ATC4CD"))
})

## Test 2: error if facm not unique ----
test_that("derive_vars_atc Test 2: error if facm not unique", {
cm <- tibble::tribble(
~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD,
"STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL",
"STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL",
"STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE"
)
facm <- tibble::tribble(
~STUDYID, ~USUBJID, ~FAGRPID, ~FAREFID, ~FATESTCD, ~FASTRESC,
"STUDY01", "BP40257-1001", "1", "1192056", "CMATC1CD", "N",
"STUDY01", "BP40257-1001", "1", "1192056", "CMATC2CD", "N02",
"STUDY01", "BP40257-1001", "1", "1192056", "CMATC3CD", "N02B",
"STUDY01", "BP40257-1001", "1", "1192056", "CMATC4CD", "N02BE",
"STUDY01", "BP40257-1001", "1", "2007001", "CMATC1CD", "D",
"STUDY01", "BP40257-1001", "1", "2007001", "CMATC2CD", "D10",
"STUDY01", "BP40257-1001", "1", "2007001", "CMATC3CD", "D10A",
"STUDY01", "BP40257-1001", "1", "2007001", "CMATC4CD", "D10AA",
"STUDY01", "BP40257-1001", "2", "2007001", "CMATC1CD", "D",
"STUDY01", "BP40257-1001", "2", "2007001", "CMATC2CD", "D07",
"STUDY01", "BP40257-1001", "2", "2007001", "CMATC3CD", "D07A",
"STUDY01", "BP40257-1001", "2", "2007001", "CMATC4CD", "D07AA",
"STUDY01", "BP40257-1001", "3", "2007001", "CMATC1CD", "H",
"STUDY01", "BP40257-1001", "3", "2007001", "CMATC2CD", "H02",
"STUDY01", "BP40257-1001", "3", "2007001", "CMATC3CD", "H02A",
"STUDY01", "BP40257-1001", "3", "2007001", "CMATC4CD", "H02AB",
"STUDY01", "BP40257-1002", "1", "2791596", "CMATC1CD", "C",
"STUDY01", "BP40257-1002", "1", "2791596", "CMATC2CD", "C03",
"STUDY01", "BP40257-1002", "1", "2791596", "CMATC3CD", "C03D",
"STUDY01", "BP40257-1002", "1", "2791596", "CMATC4CD", "C03DA"
)

expect_snapshot(
derive_vars_atc(
dataset = cm,
dataset_facm = facm
),
error = TRUE
)
})
Loading

0 comments on commit c5d8b2c

Please sign in to comment.