Closes #2563 no_list_columns: add check to avoid list columns (#2592)

* #2563 no_list_columns: add check to avoid list columns * #2563 no_list_columns: update documentation * #2563 no_list_columns: fix example and add see also * #2563 no_list_columns: avoid package name prefix * #2563 no_list_columns: update required admiraldev version * #2563 no_list_columns: split files (and clean up tests) * #2563 no_list_columns: update man --------- Co-authored-by: Daniel Sjoberg <danield.sjoberg@gmail.com> Co-authored-by: Ben Straub <ben.x.straub@gsk.com>
pharmaverse · Dec 13, 2024 · e2415a0 · e2415a0
1 parent 29c2804
commit e2415a0
Show file tree

Hide file tree

Showing 13 changed files with 297 additions and 289 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -46,7 +46,7 @@ BugReports: https://github.com/pharmaverse/admiral/issues
 Depends: 
     R (>= 4.0)
 Imports:
-    admiraldev (>= 1.1.0),
+    admiraldev (>= 1.1.0.9007),
     cli (>= 3.6.2),
     dplyr (>= 1.0.5),
     hms (>= 0.5.3),

diff --git a/NAMESPACE b/NAMESPACE
@@ -258,6 +258,7 @@ importFrom(rlang,call2)
 importFrom(rlang,call_name)
 importFrom(rlang,caller_env)
 importFrom(rlang,cnd_muffle)
+importFrom(rlang,cnd_signal)
 importFrom(rlang,current_env)
 importFrom(rlang,enexpr)
 importFrom(rlang,enexprs)

diff --git a/NEWS.md b/NEWS.md
@@ -45,6 +45,10 @@ or that the queries dataset contains duplicates. (#2543)
 
 - In `get_summary_records()`, previously deprecated formal arguments `analysis_var` and `summary_fun` now removed from function, documentation, tests etc. (#2521)
 
+- A check was added to `derive_vars_transposed()` and `derive_vars_atc()` which
+stops execution if the records in `dataset_merge` or `dataset_facm` respectively
+are not unique. (#2563)
+
 - The functions `derive_vars_joined()`, `derive_var_joined_exist_flag()`,
 `derive_extreme_event()`, and `filter_joined()` were updated to reduce their
 memory consumption. (#2590)

diff --git a/R/admiral-package.R b/R/admiral-package.R
@@ -17,11 +17,11 @@
 #'             map_if map_lgl map2 modify_at modify_if pmap reduce transpose
 #'             walk
 #' @importFrom rlang := abort arg_match as_data_mask as_function as_label
-#'   as_name as_string call2 call_name caller_env cnd_muffle current_env .data
-#'   enexpr enexprs eval_bare eval_tidy expr expr_interp exec expr_label exprs
-#'   f_lhs f_rhs  inform is_call is_expression is_missing is_named list2
-#'   new_environment new_formula parse_expr parse_exprs set_names sym syms
-#'   type_of warn
+#'   as_name as_string call2 call_name caller_env cnd_muffle cnd_signal
+#'   current_env .data enexpr enexprs eval_bare eval_tidy expr expr_interp exec
+#'   expr_label exprs f_lhs f_rhs  inform is_call is_expression is_missing
+#'   is_named list2 new_environment new_formula parse_expr parse_exprs set_names
+#'   sym syms type_of warn
 #' @importFrom stats setNames
 #' @importFrom stringr str_c str_count str_detect str_extract str_glue
 #'             str_length str_locate str_locate_all str_match str_remove

diff --git a/R/derive_vars_atc.R b/R/derive_vars_atc.R
@@ -0,0 +1,108 @@
+#' Derive ATC Class Variables
+#'
+#' @description Add Anatomical Therapeutic Chemical class variables from `FACM` to `ADCM`.
+#'
+#' **Note:** This is a wrapper function for the more generic `derive_vars_transposed()`.
+#'
+#' @param dataset
+#'   `r roxygen_param_dataset(expected_vars = c("by_vars"))`
+#'
+#' @param dataset_facm FACM dataset
+#'
+#'   The variables specified by the `by_vars`, `id_vars`, and `value_var`
+#'   arguments and `FATESTCD` are required. The variables `by_vars`, `id_vars`,
+#'   and `FATESTCD` must be a unique key.
+#'
+#' @param by_vars Grouping variables
+#'
+#'  Keys used to merge `dataset_facm` with `dataset`.
+#'
+#' @param id_vars ID variables
+#'
+#'  Variables (excluding by_vars) that uniquely identify each observation in `dataset_merge`.
+#'
+#' `r roxygen_param_by_vars()`
+#'
+#' @param value_var The variable of `dataset_facm` containing the values of the
+#'   transposed variables
+#'
+#' @return The input dataset with ATC variables added
+#'
+#' @seealso [derive_vars_transposed()]
+#'
+#' @family der_occds
+#' @keywords der_occds
+#'
+#' @export
+#'
+#' @examples
+#' library(tibble)
+#'
+#' cm <- tribble(
+#'   ~STUDYID,  ~USUBJID,       ~CMGRPID, ~CMREFID,  ~CMDECOD,
+#'   "STUDY01", "BP40257-1001", "14",     "1192056", "PARACETAMOL",
+#'   "STUDY01", "BP40257-1001", "18",     "2007001", "SOLUMEDROL",
+#'   "STUDY01", "BP40257-1002", "19",     "2791596", "SPIRONOLACTONE"
+#' )
+#' facm <- tribble(
+#'   ~STUDYID,  ~USUBJID,       ~FAGRPID, ~FAREFID,  ~FATESTCD,  ~FASTRESC,
+#'   "STUDY01", "BP40257-1001", "1",      "1192056", "CMATC1CD", "N",
+#'   "STUDY01", "BP40257-1001", "1",      "1192056", "CMATC2CD", "N02",
+#'   "STUDY01", "BP40257-1001", "1",      "1192056", "CMATC3CD", "N02B",
+#'   "STUDY01", "BP40257-1001", "1",      "1192056", "CMATC4CD", "N02BE",
+#'   "STUDY01", "BP40257-1001", "1",      "2007001", "CMATC1CD", "D",
+#'   "STUDY01", "BP40257-1001", "1",      "2007001", "CMATC2CD", "D10",
+#'   "STUDY01", "BP40257-1001", "1",      "2007001", "CMATC3CD", "D10A",
+#'   "STUDY01", "BP40257-1001", "1",      "2007001", "CMATC4CD", "D10AA",
+#'   "STUDY01", "BP40257-1001", "2",      "2007001", "CMATC1CD", "D",
+#'   "STUDY01", "BP40257-1001", "2",      "2007001", "CMATC2CD", "D07",
+#'   "STUDY01", "BP40257-1001", "2",      "2007001", "CMATC3CD", "D07A",
+#'   "STUDY01", "BP40257-1001", "2",      "2007001", "CMATC4CD", "D07AA",
+#'   "STUDY01", "BP40257-1001", "3",      "2007001", "CMATC1CD", "H",
+#'   "STUDY01", "BP40257-1001", "3",      "2007001", "CMATC2CD", "H02",
+#'   "STUDY01", "BP40257-1001", "3",      "2007001", "CMATC3CD", "H02A",
+#'   "STUDY01", "BP40257-1001", "3",      "2007001", "CMATC4CD", "H02AB",
+#'   "STUDY01", "BP40257-1002", "1",      "2791596", "CMATC1CD", "C",
+#'   "STUDY01", "BP40257-1002", "1",      "2791596", "CMATC2CD", "C03",
+#'   "STUDY01", "BP40257-1002", "1",      "2791596", "CMATC3CD", "C03D",
+#'   "STUDY01", "BP40257-1002", "1",      "2791596", "CMATC4CD", "C03DA"
+#' )
+#'
+#' derive_vars_atc(cm, facm, id_vars = exprs(FAGRPID))
+derive_vars_atc <- function(dataset,
+                            dataset_facm,
+                            by_vars = exprs(
+                              !!!get_admiral_option("subject_keys"),
+                              CMREFID = FAREFID
+                            ),
+                            id_vars = NULL,
+                            value_var = FASTRESC) {
+  value_var <- assert_symbol(enexpr(value_var))
+  assert_vars(by_vars)
+  assert_vars(id_vars, optional = TRUE)
+  assert_data_frame(dataset, required_vars = replace_values_by_names(by_vars))
+  assert_data_frame(
+    dataset_facm,
+    required_vars = exprs(!!!by_vars, !!value_var, !!!id_vars, FATESTCD)
+  )
+
+  tryCatch(
+    data_transposed <- derive_vars_transposed(
+      dataset,
+      select(dataset_facm, !!!unname(by_vars), !!value_var, !!!id_vars, FATESTCD),
+      by_vars = by_vars,
+      id_vars = id_vars,
+      key_var = FATESTCD,
+      value_var = !!value_var,
+      filter = str_detect(FATESTCD, "^CMATC[1-4](CD)?$")
+    ),
+    merge_duplicates = function(cnd) {
+      cnd$message <- str_replace(cnd$message, "dataset_merge", "dataset_facm")
+      cnd$body[[1]] <- "Please check data and `by_vars` and `id_vars` arguments."
+      cnd_signal(cnd)
+    }
+  )
+  data_transposed %>%
+    select(-starts_with("FA")) %>%
+    rename_with(.fn = ~ str_remove(.x, "^CM"), .cols = starts_with("CMATC"))
+}
diff --git a/R/derive_vars_transposed.R b/R/derive_vars_transposed.R
@@ -7,8 +7,9 @@
 #'
 #' @param dataset_merge Dataset to transpose and merge
 #'
-#'   The variables specified by the `by_vars`, `key_var` and `value_var` parameters
-#'   are expected
+#'   The variables specified by the `by_vars`, `id_vars`, `key_var` and
+#'   `value_var` arguments are expected. The variables `by_vars`, `id_vars`,
+#'   `key_var` have to be a unique key.
 #'
 #' @param by_vars Grouping variables
 #'
@@ -35,17 +36,18 @@
 #'   <https://dplyr.tidyverse.org/reference/mutate-joins.html#arguments> for
 #'   more details.
 #'
-#'   Permitted Values for `relationship`: `"one-to-one"`, `"one-to-many"`,
-#'   `"many-to-one"`, `"many-to-many"`, `NULL`.
+#'   *Permitted Values*: `"one-to-one"`, `"one-to-many"`, `"many-to-one"`,
+#'   `"many-to-many"`, `NULL`
 #'
 #' @details
 #' After filtering `dataset_merge` based upon the condition provided in `filter`, this
 #' dataset is transposed and subsequently merged onto `dataset` using `by_vars` as
 #' keys.
 #'
-#'
 #' @return The input dataset with transposed variables from `dataset_merge` added
 #'
+#' @seealso [derive_vars_atc()]
+#'
 #' @family der_gen
 #' @keywords der_gen
 #'
@@ -116,6 +118,21 @@ derive_vars_transposed <- function(dataset,
     optional = TRUE
   )
 
+  # check for duplicates in dataset_merge as these will create list columns,
+  # which is not acceptable for ADaM datasets
+  signal_duplicate_records(
+    dataset_merge,
+    by_vars = c(by_vars, id_vars, exprs(!!key_var)),
+    msg = c(
+      paste(
+        "Dataset {.arg dataset_merge} contains duplicate records with respect to",
+        "{.var {by_vars}}"
+      ),
+      "Please check data and {.arg by_vars}, {.arg id_vars}, and {.arg key_var} arguments."
+    ),
+    class = "merge_duplicates"
+  )
+
   dataset_transposed <- dataset_merge %>%
     filter_if(filter) %>%
     pivot_wider(
@@ -164,121 +181,6 @@ derive_vars_transposed <- function(dataset,
         ),
         call = parent.frame(n = 4)
       )
-    },
-    "dplyr_error_join_relationship_one_to_many" = function(cnd) {
-      cli_abort(
-        message = c(
-          str_replace(
-            str_replace(
-              cnd$message, "`x`", "`dataset`"
-            ), "`y`", "the transposed `dataset_merge`"
-          ),
-          i = str_replace(
-            str_replace(
-              cnd$body, "`x`", "`dataset`"
-            ), "`y`", "the transposed `dataset_merge`"
-          )
-        ),
-        call = parent.frame(n = 4)
-      )
     }
   )
 }
-
-#' Derive ATC Class Variables
-#'
-#' @description Add Anatomical Therapeutic Chemical class variables from `FACM` to `ADCM`.
-#'
-#' **Note:** This is a wrapper function for the more generic `derive_vars_transposed()`.
-#'
-#' @param dataset
-#'   `r roxygen_param_dataset(expected_vars = c("by_vars"))`
-#'
-#' @param dataset_facm FACM dataset
-#'
-#'   The variables specified by the `by_vars` and `value_var` parameters,
-#'   `FAGRPID` and `FATESTCD` are required
-#'
-#' @param by_vars Grouping variables
-#'
-#'  Keys used to merge `dataset_facm` with `dataset`.
-#'
-#' @param id_vars ID variables
-#'
-#'  Variables (excluding by_vars) that uniquely identify each observation in `dataset_merge`.
-#'
-#' `r roxygen_param_by_vars()`
-#'
-#' @param value_var The variable of `dataset_facm` containing the values of the
-#'   transposed variables
-#'
-#'   Default: `FASTRESC`
-#'
-#'
-#' @return The input dataset with ATC variables added
-#'
-#' @family der_occds
-#' @keywords der_occds
-#'
-#' @export
-#'
-#' @examples
-#' library(tibble)
-#'
-#' cm <- tribble(
-#'   ~STUDYID,  ~USUBJID,       ~CMGRPID, ~CMREFID,  ~CMDECOD,
-#'   "STUDY01", "BP40257-1001", "14",     "1192056", "PARACETAMOL",
-#'   "STUDY01", "BP40257-1001", "18",     "2007001", "SOLUMEDROL",
-#'   "STUDY01", "BP40257-1002", "19",     "2791596", "SPIRONOLACTONE"
-#' )
-#' facm <- tribble(
-#'   ~STUDYID,  ~USUBJID,       ~FAGRPID, ~FAREFID,  ~FATESTCD,  ~FASTRESC,
-#'   "STUDY01", "BP40257-1001", "1",      "1192056", "CMATC1CD", "N",
-#'   "STUDY01", "BP40257-1001", "1",      "1192056", "CMATC2CD", "N02",
-#'   "STUDY01", "BP40257-1001", "1",      "1192056", "CMATC3CD", "N02B",
-#'   "STUDY01", "BP40257-1001", "1",      "1192056", "CMATC4CD", "N02BE",
-#'   "STUDY01", "BP40257-1001", "1",      "2007001", "CMATC1CD", "D",
-#'   "STUDY01", "BP40257-1001", "1",      "2007001", "CMATC2CD", "D10",
-#'   "STUDY01", "BP40257-1001", "1",      "2007001", "CMATC3CD", "D10A",
-#'   "STUDY01", "BP40257-1001", "1",      "2007001", "CMATC4CD", "D10AA",
-#'   "STUDY01", "BP40257-1001", "2",      "2007001", "CMATC1CD", "D",
-#'   "STUDY01", "BP40257-1001", "2",      "2007001", "CMATC2CD", "D07",
-#'   "STUDY01", "BP40257-1001", "2",      "2007001", "CMATC3CD", "D07A",
-#'   "STUDY01", "BP40257-1001", "2",      "2007001", "CMATC4CD", "D07AA",
-#'   "STUDY01", "BP40257-1001", "3",      "2007001", "CMATC1CD", "H",
-#'   "STUDY01", "BP40257-1001", "3",      "2007001", "CMATC2CD", "H02",
-#'   "STUDY01", "BP40257-1001", "3",      "2007001", "CMATC3CD", "H02A",
-#'   "STUDY01", "BP40257-1001", "3",      "2007001", "CMATC4CD", "H02AB",
-#'   "STUDY01", "BP40257-1002", "1",      "2791596", "CMATC1CD", "C",
-#'   "STUDY01", "BP40257-1002", "1",      "2791596", "CMATC2CD", "C03",
-#'   "STUDY01", "BP40257-1002", "1",      "2791596", "CMATC3CD", "C03D",
-#'   "STUDY01", "BP40257-1002", "1",      "2791596", "CMATC4CD", "C03DA"
-#' )
-#'
-#' derive_vars_atc(cm, facm)
-derive_vars_atc <- function(dataset,
-                            dataset_facm,
-                            by_vars = exprs(
-                              !!!get_admiral_option("subject_keys"),
-                              CMREFID = FAREFID
-                            ),
-                            id_vars = NULL,
-                            value_var = FASTRESC) {
-  value_var <- assert_symbol(enexpr(value_var))
-  assert_vars(by_vars)
-  assert_vars(id_vars, optional = TRUE)
-  assert_data_frame(dataset, required_vars = replace_values_by_names(by_vars))
-  assert_data_frame(dataset_facm, required_vars = exprs(!!!by_vars, !!value_var, FAGRPID, FATESTCD))
-
-  dataset %>%
-    derive_vars_transposed(
-      select(dataset_facm, !!!unname(by_vars), !!value_var, FAGRPID, FATESTCD),
-      by_vars = by_vars,
-      id_vars = id_vars,
-      key_var = FATESTCD,
-      value_var = !!value_var,
-      filter = str_detect(FATESTCD, "^CMATC[1-4](CD)?$")
-    ) %>%
-    select(-starts_with("FA")) %>%
-    rename_with(.fn = ~ str_remove(.x, "^CM"), .cols = starts_with("CMATC"))
-}
diff --git a/R/duplicates.R b/R/duplicates.R
@@ -92,6 +92,10 @@ extract_duplicate_records <- function(dataset, by_vars) {
 #' @param msg The condition message
 #' @param cnd_type Type of condition to signal when detecting duplicate records.
 #'   One of `"message"`, `"warning"` or `"error"`. Default is `"error"`.
+#' @param class Class of the condition
+#'
+#'   The specified classes are added to the classes of the condition.
+#'   `c("duplicate_records", "assert-admiral")` is always added.
 #'
 #' @return No return value, called for side effects
 #'
@@ -113,11 +117,13 @@ signal_duplicate_records <- function(dataset,
                                        "with respect to",
                                        "{.var {replace_values_by_names(by_vars)}}"
                                      ),
-                                     cnd_type = "error") {
+                                     cnd_type = "error",
+                                     class = NULL) {
   assert_expr_list(by_vars)
   assert_data_frame(dataset, required_vars = extract_vars(by_vars), check_is_grouped = FALSE)
   assert_character_vector(msg)
   assert_character_scalar(cnd_type, values = c("message", "warning", "error"))
+  assert_character_vector(class, optional = TRUE)
 
   cnd_funs <- list(message = cli_inform, warning = cli_warn, error = cli_abort)
 
@@ -134,7 +140,11 @@ signal_duplicate_records <- function(dataset,
       msg,
       i = "Run {.run admiral::get_duplicates_dataset()} to access the duplicate records"
     )
-    cnd_funs[[cnd_type]](full_msg)
+    cnd_funs[[cnd_type]](
+      full_msg,
+      class = c(class, "duplicate_records", "assert-admiral"),
+      by_vars = by_vars
+    )
   }
 }