Skip to content

Commit

Permalink
#2563 no_list_columns: add check to avoid list columns
Browse files Browse the repository at this point in the history
  • Loading branch information
bundfussr committed Dec 2, 2024
1 parent 1e4f6c2 commit a43e1a4
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 29 deletions.
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ or that the queries dataset contains duplicates. (#2543)

- In `get_summary_records()`, previously deprecated formal arguments `analysis_var` and `summary_fun` now removed from function, documentation, tests etc. (#2521)

- A check was added to `derive_vars_transposed()` and `derive_vars_atc()` which
stops execution if the records in `dataset_merge` or `dataset_facm` respectively
are not unique. (#2563)

## Breaking Changes

- The following function arguments are entering the next phase of the deprecation process: (#2487)
Expand Down
29 changes: 26 additions & 3 deletions R/derive_vars_transposed.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,21 @@ derive_vars_transposed <- function(dataset,
optional = TRUE
)

# check for duplicates in dataset_merge as these will create list columns,
# which is not acceptable for ADaM datasets
signal_duplicate_records(
dataset_merge,
by_vars = c(by_vars, id_vars, exprs(!!key_var)),
msg = c(
paste(
"Dataset {.arg dataset_merge} contains duplicate records with respect to",
"{.var {by_vars}}"
),
"Please check data and {.arg by_vars}, {.arg id_vars}, and {.arg key_var} arguments."
),
class = "merge_duplicates"
)

dataset_transposed <- dataset_merge %>%
filter_if(filter) %>%
pivot_wider(
Expand Down Expand Up @@ -270,15 +285,23 @@ derive_vars_atc <- function(dataset,
assert_data_frame(dataset, required_vars = replace_values_by_names(by_vars))
assert_data_frame(dataset_facm, required_vars = exprs(!!!by_vars, !!value_var, FAGRPID, FATESTCD))

dataset %>%
derive_vars_transposed(
tryCatch(
data_transposed <- derive_vars_transposed(
dataset,
select(dataset_facm, !!!unname(by_vars), !!value_var, FAGRPID, FATESTCD),
by_vars = by_vars,
id_vars = id_vars,
key_var = FATESTCD,
value_var = !!value_var,
filter = str_detect(FATESTCD, "^CMATC[1-4](CD)?$")
) %>%
),
merge_duplicates = function(cnd) {
cnd$message <- str_replace(cnd$message, "dataset_merge", "dataset_facm")
cnd$body[[1]] <- "Please check data and `by_vars` and `id_vars` arguments."
rlang::cnd_signal(cnd)
}
)
data_transposed %>%
select(-starts_with("FA")) %>%
rename_with(.fn = ~ str_remove(.x, "^CM"), .cols = starts_with("CMATC"))
}
14 changes: 12 additions & 2 deletions R/duplicates.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ extract_duplicate_records <- function(dataset, by_vars) {
#' @param msg The condition message
#' @param cnd_type Type of condition to signal when detecting duplicate records.
#' One of `"message"`, `"warning"` or `"error"`. Default is `"error"`.
#' @param class Class of the condition
#'
#' The specified classes are added to the classes of the condition.
#' `c("duplicate_records", "assert-admiral")` is always added.
#'
#' @return No return value, called for side effects
#'
Expand All @@ -113,11 +117,13 @@ signal_duplicate_records <- function(dataset,
"with respect to",
"{.var {replace_values_by_names(by_vars)}}"
),
cnd_type = "error") {
cnd_type = "error",
class = NULL) {
assert_expr_list(by_vars)
assert_data_frame(dataset, required_vars = extract_vars(by_vars), check_is_grouped = FALSE)
assert_character_vector(msg)
assert_character_scalar(cnd_type, values = c("message", "warning", "error"))
assert_character_vector(class, optional = TRUE)

cnd_funs <- list(message = cli_inform, warning = cli_warn, error = cli_abort)

Expand All @@ -134,7 +140,11 @@ signal_duplicate_records <- function(dataset,
msg,
i = "Run {.run admiral::get_duplicates_dataset()} to access the duplicate records"
)
cnd_funs[[cnd_type]](full_msg)
cnd_funs[[cnd_type]](
full_msg,
class = c(class, "duplicate_records", "assert-admiral"),
by_vars = by_vars
)
}
}

Expand Down
8 changes: 7 additions & 1 deletion man/signal_duplicate_records.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions tests/testthat/_snaps/derive_vars_transposed.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,13 @@
2 STUDY01 P02 31 3
3 STUDY01 P03 42 NA

# derive_vars_atc Test 5: error if facm not unique

Code
derive_vars_atc(dataset = cm, dataset_facm = facm)
Condition
Error in `signal_duplicate_records()`:
! Dataset `dataset_facm` contains duplicate records with respect to `STUDYID`, `USUBJID`, `FAREFID`, and `FATESTCD`
Please check data and `by_vars` and `id_vars` arguments.
i Run `admiral::get_duplicates_dataset()` to access the duplicate records

35 changes: 12 additions & 23 deletions tests/testthat/test-derive_vars_transposed.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dataset_merge <- tibble::tribble(
"STUDY01", "P03", "T02", 9
)

# derive_vars_transposed ----
## Test 1: the merge dataset is transposed and merged correctly ----
test_that("derive_vars_transposed Test 1: the merge dataset is transposed and merged correctly", {
expected_output <- tibble::tribble(
Expand Down Expand Up @@ -52,7 +53,7 @@ test_that("derive_vars_transposed Test 2: filtering the merge dataset works", {
expect_dfs_equal(expected_output, actual_output, keys = "USUBJID")
})

## Test 3: filtering the merge dataset works with relationship 'many-to-one' ----
## Test 3: filter merge dataset 'many-to-one' ----
test_that("derive_vars_transposed Test 3: filter merge dataset 'many-to-one'", {
expect_snapshot(
derive_vars_transposed(
Expand All @@ -67,8 +68,9 @@ test_that("derive_vars_transposed Test 3: filter merge dataset 'many-to-one'", {
)
})

# derive_vars_atc ----
## Test 4: ATC variables are merged properly ----
test_that("derive_vars_transposed Test 4: ATC variables are merged properly", {
test_that("derive_vars_atc Test 4: ATC variables are merged properly", {
cm <- tibble::tribble(
~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD,
"STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL",
Expand Down Expand Up @@ -117,8 +119,8 @@ test_that("derive_vars_transposed Test 4: ATC variables are merged properly", {
expect_dfs_equal(expected_output, actual_output, keys = c("USUBJID", "CMDECOD", "ATC4CD"))
})

## Test 5: ATC variables are merged properly ----
test_that("derive_vars_transposed Test 5: ATC variables are merged properly", {
## Test 5: error if facm not unique ----
test_that("derive_vars_atc Test 5: error if facm not unique", {
cm <- tibble::tribble(
~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD,
"STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL",
Expand Down Expand Up @@ -148,25 +150,12 @@ test_that("derive_vars_transposed Test 5: ATC variables are merged properly", {
"STUDY01", "BP40257-1002", "1", "2791596", "CMATC3CD", "C03D",
"STUDY01", "BP40257-1002", "1", "2791596", "CMATC4CD", "C03DA"
)
# nolint start
expected_output <- tibble::tribble(
~STUDYID, ~USUBJID, ~CMGRPID, ~CMREFID, ~CMDECOD, ~ATC1CD, ~ATC2CD, ~ATC3CD, ~ATC4CD,
"STUDY01", "BP40257-1001", "14", "1192056", "PARACETAMOL", "N", "N02", "N02B", "N02BE",
"STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", "D", "D07", "D07A", "D07AA",
"STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", "D", "D10", "D10A", "D10AA",
"STUDY01", "BP40257-1001", "18", "2007001", "SOLUMEDROL", "H", "H02", "H02A", "H02AB",
"STUDY01", "BP40257-1002", "19", "2791596", "SPIRONOLACTONE", "C", "C03", "C03D", "C03DA"
)
# nolint end
actual_output <- derive_vars_atc(
dataset = cm,
dataset_facm = facm,
id_vars = exprs(FAGRPID)
)

expect_dfs_equal(
expected_output,
actual_output,
keys = c("STUDYID", "USUBJID", "CMDECOD", "ATC4CD")
expect_snapshot(
derive_vars_atc(
dataset = cm,
dataset_facm = facm
),
error = TRUE
)
})

0 comments on commit a43e1a4

Please sign in to comment.