From 1b7e9abaa1bb1d3e2440530f31c581f7e3ad031b Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Fri, 6 Oct 2023 19:40:54 +0000 Subject: [PATCH 01/31] feat: #2142 intiial superseding step --- R/get_summary_records.R | 14 ++++++++++++++ man/get_summary_records.Rd | 6 ++++++ 2 files changed, 20 insertions(+) diff --git a/R/get_summary_records.R b/R/get_summary_records.R index 52406841a8..bc32316034 100644 --- a/R/get_summary_records.R +++ b/R/get_summary_records.R @@ -1,6 +1,13 @@ #' Create Summary Records #' #' @description +#' +#' `r lifecycle::badge("superseded")` +#' +#' Development on `get_summary_records()` is complete, and for new code we recommend +#' switching to using the `dataset_add` argument in `derive_summary_records()`, +#' which is easier to use, more featureful, and still under active development. +#' #' It is not uncommon to have an analysis need whereby one needs to derive an #' analysis value (`AVAL`) from multiple records. The ADaM basic dataset #' structure variable `DTYPE` is available to indicate when a new derived @@ -151,6 +158,13 @@ get_summary_records <- function(dataset, analysis_var, summary_fun, set_values_to = NULL) { + inform( + message = paste0( + "`get_summary_records()` has been superseded, ", + "please use the `dataset_add` argument in `derive_summary_records()`", + sep = "" + ) + ) assert_vars(by_vars) analysis_var <- assert_symbol(enexpr(analysis_var)) filter <- assert_filter_cond(enexpr(filter), optional = TRUE) diff --git a/man/get_summary_records.Rd b/man/get_summary_records.Rd index ba0f3786d9..55ac3bf4b0 100644 --- a/man/get_summary_records.Rd +++ b/man/get_summary_records.Rd @@ -55,6 +55,12 @@ value, an expression or NA. A data frame of derived records. } \description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} + +Development on \code{get_summary_records()} is complete, and for new code we recommend +switching to using the \code{dataset_add} argument in \code{derive_summary_records()}, +which is easier to use, more featureful, and still under active development. + It is not uncommon to have an analysis need whereby one needs to derive an analysis value (\code{AVAL}) from multiple records. The ADaM basic dataset structure variable \code{DTYPE} is available to indicate when a new derived From 6050dc9140150a8380fad41e6360c3b58b9f9597 Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Fri, 6 Oct 2023 21:01:04 +0000 Subject: [PATCH 02/31] rough draft without missing_values implementation --- R/derive_summary_records.R | 78 +++++++++++++++++++++++++++++++------- R/get_summary_records.R | 2 +- man/get_summary_records.Rd | 2 +- 3 files changed, 66 insertions(+), 16 deletions(-) diff --git a/R/derive_summary_records.R b/R/derive_summary_records.R index 9418e0cafd..5f811029cc 100644 --- a/R/derive_summary_records.R +++ b/R/derive_summary_records.R @@ -13,6 +13,10 @@ #' #' @param dataset A data frame. #' +#' @param dataset_add Additional dataset +#' +#' @param dataset_ref +#' #' @param by_vars Variables to consider for generation of groupwise summary #' records. Providing the names of variables in [exprs()] will create a #' groupwise summary and generate summary records for the specified groups. @@ -47,6 +51,8 @@ #' symbol, a numeric value, an expression, or `NA`, e.g., `exprs(PARAMCD = #' "TDOSE", PARCAT1 = "OVERALL")`. #' +#' @param missing_values +#' #' @return A data frame with derived records appended to original dataset. #' #' @family der_prm_bds_findings @@ -147,31 +153,75 @@ #' set_values_to = exprs(DTYPE = "AVERAGE") #' ) derive_summary_records <- function(dataset, + dataset_add = NULL, + dataset_ref = NULL, by_vars, filter = NULL, analysis_var, summary_fun, - set_values_to = NULL) { + set_values_to = NULL, + missing_values = NULL) { assert_vars(by_vars) - analysis_var <- assert_symbol(enexpr(analysis_var)) filter <- assert_filter_cond(enexpr(filter), optional = TRUE) - assert_s3_class(summary_fun, "function") assert_data_frame( dataset, - required_vars = expr_c(by_vars, analysis_var) + required_vars = expr_c(by_vars) + ) + assert_data_frame( + dataset_add, + required_vars = expr_c(by_vars), + optional = TRUE + ) + assert_data_frame( + dataset_ref, + required_vars = expr_c(by_vars), + optional = TRUE ) assert_varval_list(set_values_to, optional = TRUE) + assert_expr_list(missing_values, named = TRUE, optional = TRUE) - # Summarise the analysis value and bind to the original dataset - bind_rows( - dataset, - get_summary_records( - dataset, - by_vars = by_vars, - filter = !!filter, - analysis_var = !!analysis_var, - summary_fun = summary_fun, - set_values_to = set_values_to + if (!missing(analysis_var) || !missing(summary_fun)) { + deprecate_warn( + "1.0.0", + I("derive_summary_records(anaylsis_var = , summary_fun = )"), + "derive_summary_records(set_values_to = )" ) + analysis_var <- assert_symbol(enexpr(analysis_var)) + assert_s3_class(summary_fun, "function") + set_values_to <- exprs(!!analysis_var := {{ summary_fun }}(!!analysis_var), !!!set_values_to) + } + + if (is.null(dataset_add)) { + dataset_add <- dataset + } + + summary_records <- dataset_add %>% + group_by(!!!by_vars) %>% + filter_if(filter) %>% + ungroup() %>% + process_set_values_to(set_values_to) + + df_return <- bind_rows( + dataset, + summary_records ) + + if (!is.null(dataset_ref)) { + add_vars <- colnames(dataset_add) + ref_vars <- colnames(dataset_ref) + + new_ref_obs <- anti_join( + select(dataset_ref, intersect(add_vars, ref_vars)), + select(new_add_obs, !!!by_vars), + by = map_chr(by_vars, as_name) + ) + + df_return <- bind_rows( + df_return, + new_ref_obs + ) + } + + + return(df_return) } diff --git a/R/get_summary_records.R b/R/get_summary_records.R index bc32316034..95d8a96af3 100644 --- a/R/get_summary_records.R +++ b/R/get_summary_records.R @@ -6,7 +6,7 @@ #' #' Development on `get_summary_records()` is complete, and for new code we recommend #' switching to using the `dataset_add` argument in `derive_summary_records()`, -#' which is easier to use, more featureful, and still under active development. +#' which is easier to use, has more features, and still under active development. #' #' It is not uncommon to have an analysis need whereby one needs to derive an #' analysis value (`AVAL`) from multiple records. The ADaM basic dataset diff --git a/man/get_summary_records.Rd b/man/get_summary_records.Rd index 55ac3bf4b0..18c8cf31ff 100644 --- a/man/get_summary_records.Rd +++ b/man/get_summary_records.Rd @@ -59,7 +59,7 @@ A data frame of derived records. Development on \code{get_summary_records()} is complete, and for new code we recommend switching to using the \code{dataset_add} argument in \code{derive_summary_records()}, -which is easier to use, more featureful, and still under active development. +which is easier to use, has more features, and still under active development. It is not uncommon to have an analysis need whereby one needs to derive an analysis value (\code{AVAL}) from multiple records. The ADaM basic dataset From 836cb0a756c1fffa033c83d8825c77c04c0597b9 Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Fri, 6 Oct 2023 21:01:55 +0000 Subject: [PATCH 03/31] rough draft of new enhancement --- man/derive_summary_records.Rd | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/man/derive_summary_records.Rd b/man/derive_summary_records.Rd index 6f4154e20a..85643d8f1e 100644 --- a/man/derive_summary_records.Rd +++ b/man/derive_summary_records.Rd @@ -6,11 +6,14 @@ \usage{ derive_summary_records( dataset, + dataset_add = NULL, + dataset_ref = NULL, by_vars, filter = NULL, analysis_var, summary_fun, - set_values_to = NULL + set_values_to = NULL, + missing_values = NULL ) } \arguments{ @@ -18,6 +21,10 @@ derive_summary_records( The variables specified by the \code{by_vars} and \code{analysis_var} argument(s) to be expected.} +\item{dataset_add}{Additional dataset} + +\item{dataset_ref}{} + \item{by_vars}{Variables to consider for generation of groupwise summary records. Providing the names of variables in \code{\link[=exprs]{exprs()}} will create a groupwise summary and generate summary records for the specified groups.} @@ -53,6 +60,8 @@ A list of variable name-value pairs is expected. \item RHS refers to the values to set to the variable. This can be a string, a symbol, a numeric value, an expression, or \code{NA}, e.g., \code{exprs(PARAMCD = "TDOSE", PARCAT1 = "OVERALL")}. }} + +\item{missing_values}{} } \value{ A data frame with derived records appended to original dataset. From 0e6ec6c3fb4357228cec8a7c9b6647a48cceebd8 Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Tue, 24 Oct 2023 20:30:24 +0000 Subject: [PATCH 04/31] feat: #2142 get a good clean slate --- R/derive_summary_records.R | 32 ++++++++++++-------------------- R/get_summary_records.R | 6 +++--- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/R/derive_summary_records.R b/R/derive_summary_records.R index c678b93d14..e359768872 100644 --- a/R/derive_summary_records.R +++ b/R/derive_summary_records.R @@ -157,14 +157,6 @@ derive_summary_records <- function(dataset, ) assert_varval_list(set_values_to, optional = TRUE) assert_expr_list(missing_values, named = TRUE, optional = TRUE) - - if (!missing(analysis_var) || !missing(summary_fun)) { - deprecate_warn( - "1.0.0", - I("derive_summary_records(anaylsis_var = , summary_fun = )"), - "derive_summary_records(set_values_to = )" - required_vars = by_vars - ) assert_varval_list(set_values_to) if (!missing(analysis_var) || !missing(summary_fun)) { @@ -179,18 +171,18 @@ derive_summary_records <- function(dataset, } # Summarise the analysis value and bind to the original dataset - bind_rows( - dataset, - get_summary_records( - dataset, - by_vars = by_vars, - filter = !!filter, - set_values_to = set_values_to - ) - analysis_var <- assert_symbol(enexpr(analysis_var)) - assert_s3_class(summary_fun, "function") - set_values_to <- exprs(!!analysis_var := {{ summary_fun }}(!!analysis_var), !!!set_values_to) - } + # bind_rows( + # dataset, + # get_summary_records( + # dataset, + # by_vars = by_vars, + # filter = !!filter, + # set_values_to = set_values_to + # ) + # analysis_var <- assert_symbol(enexpr(analysis_var)) + # assert_s3_class(summary_fun, "function") + # set_values_to <- exprs(!!analysis_var := {{ summary_fun }}(!!analysis_var), !!!set_values_to) + # } if (is.null(dataset_add)) { dataset_add <- dataset diff --git a/R/get_summary_records.R b/R/get_summary_records.R index c2dd0a1a9f..eec0b0b634 100644 --- a/R/get_summary_records.R +++ b/R/get_summary_records.R @@ -71,9 +71,9 @@ #' #' @return A data frame of derived records. #' -#' @family der_gen +#' @family superseded #' -#' @keywords der_gen +#' @keywords internal #' #' @seealso [derive_summary_records()], [derive_var_merged_summary()] #' @@ -162,6 +162,7 @@ get_summary_records <- function(dataset, analysis_var, summary_fun, set_values_to = NULL) { + # lifecycle::signal_stage("superseded", "get_summary_records()", "derive_summary_records(dataset_add= )") inform( message = paste0( "`get_summary_records()` has been superseded, ", @@ -169,7 +170,6 @@ get_summary_records <- function(dataset, sep = "" ) ) - set_values_to) { assert_vars(by_vars) filter <- assert_filter_cond(enexpr(filter), optional = TRUE) assert_data_frame( From 346f595dbdf662982cb3e1c8f4cfa3527321c959 Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Thu, 26 Oct 2023 19:16:54 +0000 Subject: [PATCH 05/31] feat: #2142 working enhanced function --- R/derive_summary_records.R | 34 ++++++++---------- R/get_summary_records.R | 14 ++++---- man/derive_summary_records.Rd | 6 ++-- man/derive_var_extreme_flag.Rd | 3 +- man/derive_var_joined_exist_flag.Rd | 3 +- man/derive_var_merged_exist_flag.Rd | 3 +- man/derive_var_merged_summary.Rd | 3 +- man/derive_var_obs_number.Rd | 3 +- man/derive_var_relative_flag.Rd | 3 +- man/derive_vars_joined.Rd | 3 +- man/derive_vars_merged.Rd | 3 +- man/derive_vars_merged_lookup.Rd | 3 +- man/derive_vars_transposed.Rd | 3 +- man/get_summary_records.Rd | 18 ++-------- tests/testthat/test-derive_summary_records.R | 38 ++++++++++++++++++++ 15 files changed, 75 insertions(+), 65 deletions(-) diff --git a/R/derive_summary_records.R b/R/derive_summary_records.R index e359768872..7df704f641 100644 --- a/R/derive_summary_records.R +++ b/R/derive_summary_records.R @@ -155,9 +155,9 @@ derive_summary_records <- function(dataset, required_vars = expr_c(by_vars), optional = TRUE ) - assert_varval_list(set_values_to, optional = TRUE) - assert_expr_list(missing_values, named = TRUE, optional = TRUE) + assert_varval_list(set_values_to) + assert_expr_list(missing_values, named = TRUE, optional = TRUE) if (!missing(analysis_var) || !missing(summary_fun)) { deprecate_warn( @@ -170,29 +170,15 @@ derive_summary_records <- function(dataset, set_values_to <- exprs(!!analysis_var := {{ summary_fun }}(!!analysis_var), !!!set_values_to) } - # Summarise the analysis value and bind to the original dataset - # bind_rows( - # dataset, - # get_summary_records( - # dataset, - # by_vars = by_vars, - # filter = !!filter, - # set_values_to = set_values_to - # ) - # analysis_var <- assert_symbol(enexpr(analysis_var)) - # assert_s3_class(summary_fun, "function") - # set_values_to <- exprs(!!analysis_var := {{ summary_fun }}(!!analysis_var), !!!set_values_to) - # } - if (is.null(dataset_add)) { dataset_add <- dataset } summary_records <- dataset_add %>% group_by(!!!by_vars) %>% - filter_if(filter) %>% - ungroup() %>% - process_set_values_to(set_values_to) + filter_if(filter) %>% + summarise(!!!set_values_to) %>% + ungroup() df_return <- bind_rows( dataset, @@ -215,6 +201,16 @@ derive_summary_records <- function(dataset, ) } + if (!is.null(missing_values)) { + update_missings <- map2( + syms(names(missing_values)), + missing_values, + ~ expr(if_else(is.na(!!missing_var), !!.y, !!.x)) + ) + names(update_missings) <- names(missing_values) + df_return <- df_return %>% + mutate(!!!update_missings) + } return(df_return) } diff --git a/R/get_summary_records.R b/R/get_summary_records.R index eec0b0b634..21328e9f5f 100644 --- a/R/get_summary_records.R +++ b/R/get_summary_records.R @@ -163,13 +163,13 @@ get_summary_records <- function(dataset, summary_fun, set_values_to = NULL) { # lifecycle::signal_stage("superseded", "get_summary_records()", "derive_summary_records(dataset_add= )") - inform( - message = paste0( - "`get_summary_records()` has been superseded, ", - "please use the `dataset_add` argument in `derive_summary_records()`", - sep = "" - ) - ) + # inform( + # message = paste0( + # "`get_summary_records()` has been superseded, ", + # "please use the `dataset_add` argument in `derive_summary_records()`", + # sep = "" + # ) + # ) assert_vars(by_vars) filter <- assert_filter_cond(enexpr(filter), optional = TRUE) assert_data_frame( diff --git a/man/derive_summary_records.Rd b/man/derive_summary_records.Rd index 1a7ecb6563..e3ad7d1945 100644 --- a/man/derive_summary_records.Rd +++ b/man/derive_summary_records.Rd @@ -63,10 +63,6 @@ Set a list of variables to some specified value for the new records \itemize{ \item LHS refer to a variable. \item RHS refers to the values to set to the variable. This can be a string, a -symbol, a numeric value, an expression, or \code{NA}, e.g., \code{exprs(PARAMCD = "TDOSE", PARCAT1 = "OVERALL")}. -}} - -\item{missing_values}{} symbol, a numeric value, an expression or NA. If summary functions are used, the values are summarized by the variables specified for \code{by_vars}. } @@ -79,6 +75,8 @@ For example: PARCAT1 = "OVERALL" ) }\if{html}{\out{}}} + +\item{missing_values}{} } \value{ A data frame with derived records appended to original dataset. diff --git a/man/derive_var_extreme_flag.Rd b/man/derive_var_extreme_flag.Rd index 0375ef0309..05ea1d8359 100644 --- a/man/derive_var_extreme_flag.Rd +++ b/man/derive_var_extreme_flag.Rd @@ -267,8 +267,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged_lookup}()}, \code{\link{derive_vars_merged}()}, -\code{\link{derive_vars_transposed}()}, -\code{\link{get_summary_records}()} +\code{\link{derive_vars_transposed}()} } \concept{der_gen} \keyword{der_gen} diff --git a/man/derive_var_joined_exist_flag.Rd b/man/derive_var_joined_exist_flag.Rd index b1f6f4455d..1469dbdb93 100644 --- a/man/derive_var_joined_exist_flag.Rd +++ b/man/derive_var_joined_exist_flag.Rd @@ -362,8 +362,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged_lookup}()}, \code{\link{derive_vars_merged}()}, -\code{\link{derive_vars_transposed}()}, -\code{\link{get_summary_records}()} +\code{\link{derive_vars_transposed}()} } \concept{der_gen} \keyword{der_gen} diff --git a/man/derive_var_merged_exist_flag.Rd b/man/derive_var_merged_exist_flag.Rd index ae0728a4bd..76c08d7083 100644 --- a/man/derive_var_merged_exist_flag.Rd +++ b/man/derive_var_merged_exist_flag.Rd @@ -159,8 +159,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged_lookup}()}, \code{\link{derive_vars_merged}()}, -\code{\link{derive_vars_transposed}()}, -\code{\link{get_summary_records}()} +\code{\link{derive_vars_transposed}()} } \concept{der_gen} \keyword{der_gen} diff --git a/man/derive_var_merged_summary.Rd b/man/derive_var_merged_summary.Rd index 08220316cb..057bdf569c 100644 --- a/man/derive_var_merged_summary.Rd +++ b/man/derive_var_merged_summary.Rd @@ -175,8 +175,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged_lookup}()}, \code{\link{derive_vars_merged}()}, -\code{\link{derive_vars_transposed}()}, -\code{\link{get_summary_records}()} +\code{\link{derive_vars_transposed}()} } \concept{der_gen} \keyword{der_gen} diff --git a/man/derive_var_obs_number.Rd b/man/derive_var_obs_number.Rd index d4c54e6881..ebdd3375f1 100644 --- a/man/derive_var_obs_number.Rd +++ b/man/derive_var_obs_number.Rd @@ -102,8 +102,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged_lookup}()}, \code{\link{derive_vars_merged}()}, -\code{\link{derive_vars_transposed}()}, -\code{\link{get_summary_records}()} +\code{\link{derive_vars_transposed}()} } \concept{der_gen} \keyword{der_gen} diff --git a/man/derive_var_relative_flag.Rd b/man/derive_var_relative_flag.Rd index ac017c896d..679df63d95 100644 --- a/man/derive_var_relative_flag.Rd +++ b/man/derive_var_relative_flag.Rd @@ -178,8 +178,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged_lookup}()}, \code{\link{derive_vars_merged}()}, -\code{\link{derive_vars_transposed}()}, -\code{\link{get_summary_records}()} +\code{\link{derive_vars_transposed}()} } \concept{der_gen} \keyword{der_gen} diff --git a/man/derive_vars_joined.Rd b/man/derive_vars_joined.Rd index 5092e89ab8..7bd1c3f664 100644 --- a/man/derive_vars_joined.Rd +++ b/man/derive_vars_joined.Rd @@ -372,8 +372,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_var_relative_flag}()}, \code{\link{derive_vars_merged_lookup}()}, \code{\link{derive_vars_merged}()}, -\code{\link{derive_vars_transposed}()}, -\code{\link{get_summary_records}()} +\code{\link{derive_vars_transposed}()} } \concept{der_gen} \keyword{der_gen} diff --git a/man/derive_vars_merged.Rd b/man/derive_vars_merged.Rd index dca45a6222..d021aba1fd 100644 --- a/man/derive_vars_merged.Rd +++ b/man/derive_vars_merged.Rd @@ -326,8 +326,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_var_relative_flag}()}, \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged_lookup}()}, -\code{\link{derive_vars_transposed}()}, -\code{\link{get_summary_records}()} +\code{\link{derive_vars_transposed}()} } \concept{der_gen} \keyword{der_gen} diff --git a/man/derive_vars_merged_lookup.Rd b/man/derive_vars_merged_lookup.Rd index 3024d10083..3016da5a55 100644 --- a/man/derive_vars_merged_lookup.Rd +++ b/man/derive_vars_merged_lookup.Rd @@ -182,8 +182,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_var_relative_flag}()}, \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged}()}, -\code{\link{derive_vars_transposed}()}, -\code{\link{get_summary_records}()} +\code{\link{derive_vars_transposed}()} } \concept{der_gen} \keyword{der_gen} diff --git a/man/derive_vars_transposed.Rd b/man/derive_vars_transposed.Rd index 8195dd1166..9e7e753d5d 100644 --- a/man/derive_vars_transposed.Rd +++ b/man/derive_vars_transposed.Rd @@ -97,8 +97,7 @@ General Derivation Functions for all ADaMs that returns variable appended to dat \code{\link{derive_var_relative_flag}()}, \code{\link{derive_vars_joined}()}, \code{\link{derive_vars_merged_lookup}()}, -\code{\link{derive_vars_merged}()}, -\code{\link{get_summary_records}()} +\code{\link{derive_vars_merged}()} } \concept{der_gen} \keyword{der_gen} diff --git a/man/get_summary_records.Rd b/man/get_summary_records.Rd index 2f56e23183..beddbdf179 100644 --- a/man/get_summary_records.Rd +++ b/man/get_summary_records.Rd @@ -10,7 +10,7 @@ get_summary_records( filter = NULL, analysis_var, summary_fun, - set_values_to + set_values_to = NULL ) } \arguments{ @@ -169,18 +169,6 @@ get_summary_records( } \seealso{ \code{\link[=derive_summary_records]{derive_summary_records()}}, \code{\link[=derive_var_merged_summary]{derive_var_merged_summary()}} - -General Derivation Functions for all ADaMs that returns variable appended to dataset: -\code{\link{derive_var_extreme_flag}()}, -\code{\link{derive_var_joined_exist_flag}()}, -\code{\link{derive_var_merged_exist_flag}()}, -\code{\link{derive_var_merged_summary}()}, -\code{\link{derive_var_obs_number}()}, -\code{\link{derive_var_relative_flag}()}, -\code{\link{derive_vars_joined}()}, -\code{\link{derive_vars_merged_lookup}()}, -\code{\link{derive_vars_merged}()}, -\code{\link{derive_vars_transposed}()} } -\concept{der_gen} -\keyword{der_gen} +\concept{superseded} +\keyword{internal} diff --git a/tests/testthat/test-derive_summary_records.R b/tests/testthat/test-derive_summary_records.R index c7b26c927e..8bfcf00e71 100644 --- a/tests/testthat/test-derive_summary_records.R +++ b/tests/testthat/test-derive_summary_records.R @@ -143,3 +143,41 @@ test_that("derive_summary_records Test 4: deprecation warning for analysis_var a keys = c("subj", "visit", "seq", "type") ) }) + +test_that("make sure dataset_add works", { + input <- tibble::tribble( + ~subj, ~visit, ~val, ~seq, + "1", 1, 10, 1, + "1", 1, 14, 2, + "1", 1, 9, 3, + "1", 2, 11, 4, + "2", 2, NA_real_, 1 + ) + input_add <- tibble::tribble( + ~subj, ~visit, ~add_val, ~seq, + "1", 1, 100, 1, + "1", 1, 140, 2, + "1", 1, 90, 3 + ) + expected_output<- bind_rows( + input, + tibble::tribble( + ~subj, ~visit, ~val, ~type, + "1", 1, 110, "AVERAGE" + ) + ) + actual_output <- input %>% + derive_summary_records( + dataset_add = input_add, + by_vars = exprs(subj, visit), + set_values_to = exprs( + val = mean(add_val, na.rm = TRUE), + type = "AVERAGE" + ) + ) + expect_dfs_equal( + base = expected_output, + compare = actual_output, + keys = c("subj", "visit", "seq", "type") + ) +}) From 5a6263ebe521520081ba1d7a5ca111217e8f4bf0 Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Thu, 26 Oct 2023 21:23:33 +0000 Subject: [PATCH 06/31] feat: #2142 add appropriate test suite --- R/derive_summary_records.R | 26 +++--- man/derive_param_extreme_record.Rd | 4 + man/get_summary_records.Rd | 3 + tests/testthat/test-derive_summary_records.R | 92 +++++++++++++++++++- 4 files changed, 111 insertions(+), 14 deletions(-) diff --git a/R/derive_summary_records.R b/R/derive_summary_records.R index 7df704f641..59dd1f8035 100644 --- a/R/derive_summary_records.R +++ b/R/derive_summary_records.R @@ -152,7 +152,7 @@ derive_summary_records <- function(dataset, ) assert_data_frame( dataset_ref, - required_vars = expr_c(by_vars), + required_vars = by_vars, optional = TRUE ) @@ -180,6 +180,17 @@ derive_summary_records <- function(dataset, summarise(!!!set_values_to) %>% ungroup() + if (!is.null(missing_values)) { + update_missings <- map2( + syms(names(missing_values)), + missing_values, + ~ expr(if_else(is.na(!!.x), !!.y, !!.x)) + ) + names(update_missings) <- names(missing_values) + summary_records <- summary_records %>% + mutate(!!!update_missings) + } + df_return <- bind_rows( dataset, summary_records @@ -191,7 +202,7 @@ derive_summary_records <- function(dataset, new_ref_obs <- anti_join( select(dataset_ref, intersect(add_vars, ref_vars)), - select(new_add_obs, !!!by_vars), + select(summary_records, !!!by_vars), by = map_chr(by_vars, as_name) ) @@ -201,16 +212,5 @@ derive_summary_records <- function(dataset, ) } - if (!is.null(missing_values)) { - update_missings <- map2( - syms(names(missing_values)), - missing_values, - ~ expr(if_else(is.na(!!missing_var), !!.y, !!.x)) - ) - names(update_missings) <- names(missing_values) - df_return <- df_return %>% - mutate(!!!update_missings) - } - return(df_return) } diff --git a/man/derive_param_extreme_record.Rd b/man/derive_param_extreme_record.Rd index 3b68ae01d2..35739e25cf 100644 --- a/man/derive_param_extreme_record.Rd +++ b/man/derive_param_extreme_record.Rd @@ -141,5 +141,9 @@ derive_param_extreme_record( ) ) } +\seealso{ +Other superseded: +\code{\link{get_summary_records}()} +} \concept{superseded} \keyword{superseded} diff --git a/man/get_summary_records.Rd b/man/get_summary_records.Rd index beddbdf179..5fa6d1441e 100644 --- a/man/get_summary_records.Rd +++ b/man/get_summary_records.Rd @@ -169,6 +169,9 @@ get_summary_records( } \seealso{ \code{\link[=derive_summary_records]{derive_summary_records()}}, \code{\link[=derive_var_merged_summary]{derive_var_merged_summary()}} + +Other superseded: +\code{\link{derive_param_extreme_record}()} } \concept{superseded} \keyword{internal} diff --git a/tests/testthat/test-derive_summary_records.R b/tests/testthat/test-derive_summary_records.R index 8bfcf00e71..18e47c4bef 100644 --- a/tests/testthat/test-derive_summary_records.R +++ b/tests/testthat/test-derive_summary_records.R @@ -144,7 +144,8 @@ test_that("derive_summary_records Test 4: deprecation warning for analysis_var a ) }) -test_that("make sure dataset_add works", { +## Test 5: make sure dataset_add works ---- +test_that("derive_summary_records Test 5: make sure dataset_add works", { input <- tibble::tribble( ~subj, ~visit, ~val, ~seq, "1", 1, 10, 1, @@ -181,3 +182,92 @@ test_that("make sure dataset_add works", { keys = c("subj", "visit", "seq", "type") ) }) + +## Test 6: test missing values ---- +test_that("derive_summary_records Test 6: test missing values", { + input <- tibble::tribble( + ~subj, ~visit, ~val, ~seq, + "1", 1, 10, 1, + "1", 1, 14, 2, + "1", 1, 9, 3, + "1", 2, 11, 4, + "2", 2, NA_real_, 1 + ) + + expected_output <- bind_rows( + input, + tibble::tribble( + ~subj, ~visit, ~val, + "1", 1, 11, + "1", 2, 11, + "2", 2, 999999 + ) %>% + mutate(type = "AVERAGE") + ) + + actual_output <- input %>% + derive_summary_records( + by_vars = exprs(subj, visit), + set_values_to = exprs( + mean_val = mean(val, na.rm = TRUE), + type = "AVERAGE" + ), + missing_values = exprs(mean_val = 999999) + ) + + expect_dfs_equal( + base = expected_output, + compare = actual_output, + keys = c("subj", "visit", "seq", "type") + ) +}) + +## Test 7: make sure dataset_ref works ---- +test_that("derive_summary_records Test 7: make sure dataset_ref works", { + input <- tibble::tribble( + ~subj, ~visit, ~val, ~seq, + "1", 1, 10, 1, + "1", 1, 14, 2, + "1", 1, 9, 3, + "1", 2, 11, 4, + "2", 2, NA_real_, 1 + ) + + input_ref <- tibble::tribble( + ~subj, ~visit, + "1", 1, + "1", 2, + "2", 1, + "2", 2, + ) + expected_output <- bind_rows( + input, + tibble::tribble( + ~subj, ~visit, ~val, + "1", 1, 11, + "1", 2, 11, + "2", 2, NA_real_ + ) %>% + mutate(type = "AVERAGE"), + tibble::tribble( + ~subj, ~visit, + "2", 1, + ) + ) + + actual_output <- input %>% + derive_summary_records( + dataset_ref = input_ref, + by_vars = exprs(subj, visit), + set_values_to = exprs( + val = mean(val, na.rm = TRUE), + type = "AVERAGE" + ) + ) + + expect_dfs_equal( + base = expected_output, + compare = actual_output, + keys = c("subj", "visit", "seq", "type") + ) +}) From 843d8524f77ddaae9cfc9b876a2e925ac8fce2fc Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Thu, 26 Oct 2023 21:37:04 +0000 Subject: [PATCH 07/31] feat: #2142 run styler, lintr, add news and roxygen documentation --- NEWS.md | 2 + R/derive_summary_records.R | 42 +++++++++++++++++--- man/derive_extreme_event.Rd | 3 +- man/derive_extreme_records.Rd | 3 +- man/derive_summary_records.Rd | 38 +++++++++--------- tests/testthat/test-derive_summary_records.R | 16 ++++---- 6 files changed, 69 insertions(+), 35 deletions(-) diff --git a/NEWS.md b/NEWS.md index f4732e6a7c..14a7f12b0c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,8 @@ were enhanced such that more than one summary variable can be derived, e.g., `AVAL` as the sum and `ADT` as the maximum of the contributing records. (#1792) +- `derive_summary_records()` was enhanced with the following optional arguments: `dataset_add`, `dataset_ref`, `missing_values`. These arguemnts respectively, generate summary variables from additional datasets, retaining/add specific records that and impute user-defined missing values. (#2142) + ## Breaking Changes diff --git a/R/derive_summary_records.R b/R/derive_summary_records.R index 59dd1f8035..5484f2be77 100644 --- a/R/derive_summary_records.R +++ b/R/derive_summary_records.R @@ -11,11 +11,19 @@ #' retain those common values in the newly derived records. Otherwise new value #' will be set to `NA`. #' -#' @param dataset `r roxygen_param_dataset(expected_vars = c("by_vars", "analysis_var"))` +#' @param dataset `r roxygen_param_dataset(expected_vars = c("by_vars"))` #' #' @param dataset_add Additional dataset #' -#' @param dataset_ref +#' The variables specified for `by_vars` are expected. +#' Observations from the specified dataset are going to be used to calculate and added +#' as new records to the input dataset (`dataset`). +#' +#' @param dataset_ref Reference dataset +#' +#' The variables specified for `by_vars` are expected. For each +#' observation of the specified dataset a new observation is added to the +#' input dataset. #' #' @param by_vars Variables to consider for generation of groupwise summary #' records. Providing the names of variables in [exprs()] will create a @@ -33,9 +41,33 @@ #' + `filter = (dplyr::n() > 2)` will filter n count of `by_vars` greater #' than 2. #' -#' @inheritParams get_summary_records +#' @param set_values_to Variables to be set +#' +#' The specified variables are set to the specified values for the new +#' observations. +#' +#' Set a list of variables to some specified value for the new records +#' + LHS refer to a variable. +#' + RHS refers to the values to set to the variable. This can be a string, a +#' symbol, a numeric value, an expression or NA. If summary functions are +#' used, the values are summarized by the variables specified for `by_vars`. +#' +#' For example: +#' ``` +#' set_values_to = exprs( +#' AVAL = sum(AVAL), +#' DTYPE = "AVERAGE", +#' ) +#' ``` +#' +#' @param missing_values Values for missing summary values +#' +#' For observations of the input dataset (`dataset`) or (`dataset_add`) which do not have an +#' complete mapping defined by the summarization defined in `set_values_to`. Only variables +#' specified for `set_values_to` can be specified for `missing_values`. #' -#' @param missing_values +#' *Permitted Values*: named list of expressions, e.g., +#' `exprs(AVAL = -9999)` #' #' @return A data frame with derived records appended to original dataset. #' @@ -176,7 +208,7 @@ derive_summary_records <- function(dataset, summary_records <- dataset_add %>% group_by(!!!by_vars) %>% - filter_if(filter) %>% + filter_if(filter) %>% summarise(!!!set_values_to) %>% ungroup() diff --git a/man/derive_extreme_event.Rd b/man/derive_extreme_event.Rd index c21de5a24c..acfff30a87 100644 --- a/man/derive_extreme_event.Rd +++ b/man/derive_extreme_event.Rd @@ -96,8 +96,7 @@ For example: \if{html}{\out{
}}\preformatted{ set_values_to = exprs( AVAL = sum(AVAL), - PARAMCD = "TDOSE", - PARCAT1 = "OVERALL" + DTYPE = "AVERAGE", ) }\if{html}{\out{
}}} diff --git a/man/derive_extreme_records.Rd b/man/derive_extreme_records.Rd index 48b88a38ac..a726f9605f 100644 --- a/man/derive_extreme_records.Rd +++ b/man/derive_extreme_records.Rd @@ -129,8 +129,7 @@ For example: \if{html}{\out{
}}\preformatted{ set_values_to = exprs( AVAL = sum(AVAL), - PARAMCD = "TDOSE", - PARCAT1 = "OVERALL" + DTYPE = "AVERAGE", ) }\if{html}{\out{
}}} } diff --git a/man/derive_summary_records.Rd b/man/derive_summary_records.Rd index 80b2a77422..d077cce1d1 100644 --- a/man/derive_summary_records.Rd +++ b/man/derive_summary_records.Rd @@ -19,11 +19,19 @@ derive_summary_records( \arguments{ \item{dataset}{Input dataset -The variables specified by the \code{by_vars} and \code{analysis_var} argument(s) to be expected.} +The variables specified by the \code{by_vars} argument(s) to be expected.} -\item{dataset_add}{Additional dataset} +\item{dataset_add}{Additional dataset -\item{dataset_ref}{} +The variables specified for \code{by_vars} are expected. +Observations from the specified dataset are going to be used to calculate and added +as new records to the input dataset (\code{dataset}).} + +\item{dataset_ref}{Reference dataset + +The variables specified for \code{by_vars} are expected. For each +observation of the specified dataset a new observation is added to the +input dataset.} \item{by_vars}{Variables to consider for generation of groupwise summary records. Providing the names of variables in \code{\link[=exprs]{exprs()}} will create a @@ -42,18 +50,6 @@ values greater than mean of \code{AVAL} with in \code{by_vars}. than 2. }} -\item{analysis_var}{Analysis variable. - -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Please use \code{set_values_to} instead.} - -\item{summary_fun}{Function that takes as an input the \code{analysis_var} and -performs the calculation. - -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Please use \code{set_values_to} instead. - -This can include built-in functions as well as user defined functions, -for example \code{mean} or \code{function(x) mean(x, na.rm = TRUE)}.} - \item{set_values_to}{Variables to be set The specified variables are set to the specified values for the new @@ -71,12 +67,18 @@ For example: \if{html}{\out{
}}\preformatted{ set_values_to = exprs( AVAL = sum(AVAL), - PARAMCD = "TDOSE", - PARCAT1 = "OVERALL" + DTYPE = "AVERAGE", ) }\if{html}{\out{
}}} -\item{missing_values}{} +\item{missing_values}{Values for missing summary values + +For observations of the input dataset (\code{dataset}) or (\code{dataset_add}) which do not have an +complete mapping defined by the summarization defined in \code{set_values_to}. Only variables +specified for \code{set_values_to} can be specified for \code{missing_values}. + +\emph{Permitted Values}: named list of expressions, e.g., +\code{exprs(AVAL = -9999)}} } \value{ A data frame with derived records appended to original dataset. diff --git a/tests/testthat/test-derive_summary_records.R b/tests/testthat/test-derive_summary_records.R index 18e47c4bef..502bb9c2fd 100644 --- a/tests/testthat/test-derive_summary_records.R +++ b/tests/testthat/test-derive_summary_records.R @@ -160,11 +160,11 @@ test_that("derive_summary_records Test 5: make sure dataset_add works", { "1", 1, 140, 2, "1", 1, 90, 3 ) - expected_output<- bind_rows( + expected_output <- bind_rows( input, tibble::tribble( - ~subj, ~visit, ~val, ~type, - "1", 1, 110, "AVERAGE" + ~subj, ~visit, ~val, ~type, + "1", 1, 110, "AVERAGE" ) ) actual_output <- input %>% @@ -235,10 +235,10 @@ test_that("derive_summary_records Test 7: make sure dataset_ref works", { input_ref <- tibble::tribble( ~subj, ~visit, - "1", 1, - "1", 2, - "2", 1, - "2", 2, + "1", 1, + "1", 2, + "2", 1, + "2", 2, ) expected_output <- bind_rows( input, @@ -251,7 +251,7 @@ test_that("derive_summary_records Test 7: make sure dataset_ref works", { mutate(type = "AVERAGE"), tibble::tribble( ~subj, ~visit, - "2", 1, + "2", 1, ) ) From 2ea2c50ef1489bf889013e72545fdef6e0931627 Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Thu, 26 Oct 2023 21:43:12 +0000 Subject: [PATCH 08/31] chore: #2142 spelling/grammar --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 14a7f12b0c..5c1d2ac884 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,7 +12,7 @@ were enhanced such that more than one summary variable can be derived, e.g., `AVAL` as the sum and `ADT` as the maximum of the contributing records. (#1792) -- `derive_summary_records()` was enhanced with the following optional arguments: `dataset_add`, `dataset_ref`, `missing_values`. These arguemnts respectively, generate summary variables from additional datasets, retaining/add specific records that and impute user-defined missing values. (#2142) +- `derive_summary_records()` was enhanced with the following optional arguments: `dataset_add`, `dataset_ref`, `missing_values`. These arguments respectively, generate summary variables from additional datasets, retain/add specific records that and impute user-defined missing values. (#2142) ## Breaking Changes From 252e3c2f2d34f696bf82d31f1ba3998be46a204a Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Thu, 26 Oct 2023 21:47:28 +0000 Subject: [PATCH 09/31] chore: #2142 fix test --- tests/testthat/test-derive_summary_records.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-derive_summary_records.R b/tests/testthat/test-derive_summary_records.R index 502bb9c2fd..69fb2f6d3d 100644 --- a/tests/testthat/test-derive_summary_records.R +++ b/tests/testthat/test-derive_summary_records.R @@ -197,10 +197,10 @@ test_that("derive_summary_records Test 6: test missing values", { expected_output <- bind_rows( input, tibble::tribble( - ~subj, ~visit, ~val, - "1", 1, 11, - "1", 2, 11, - "2", 2, 999999 + ~subj, ~visit, ~aval, + "1", 1, 11, + "1", 2, 11, + "2", 2, 999999 ) %>% mutate(type = "AVERAGE") ) @@ -209,10 +209,10 @@ test_that("derive_summary_records Test 6: test missing values", { derive_summary_records( by_vars = exprs(subj, visit), set_values_to = exprs( - mean_val = mean(val, na.rm = TRUE), + aval = mean(val, na.rm = TRUE), type = "AVERAGE" ), - missing_values = exprs(mean_val = 999999) + missing_values = exprs(aval = 999999) ) expect_dfs_equal( From bf9470f6f3fe2e10c50bf24e48fd8d88075153b1 Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Thu, 26 Oct 2023 21:55:19 +0000 Subject: [PATCH 10/31] should we inform superseded --- R/get_summary_records.R | 8 -------- 1 file changed, 8 deletions(-) diff --git a/R/get_summary_records.R b/R/get_summary_records.R index 21328e9f5f..3058fa3f11 100644 --- a/R/get_summary_records.R +++ b/R/get_summary_records.R @@ -162,14 +162,6 @@ get_summary_records <- function(dataset, analysis_var, summary_fun, set_values_to = NULL) { - # lifecycle::signal_stage("superseded", "get_summary_records()", "derive_summary_records(dataset_add= )") - # inform( - # message = paste0( - # "`get_summary_records()` has been superseded, ", - # "please use the `dataset_add` argument in `derive_summary_records()`", - # sep = "" - # ) - # ) assert_vars(by_vars) filter <- assert_filter_cond(enexpr(filter), optional = TRUE) assert_data_frame( From 1adf60cb4a5b309d2c901a226ee5e18d99fc1bd6 Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Thu, 26 Oct 2023 21:57:18 +0000 Subject: [PATCH 11/31] retain deprecated arguments to pass cicd --- R/derive_summary_records.R | 2 ++ man/derive_summary_records.Rd | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/R/derive_summary_records.R b/R/derive_summary_records.R index 5484f2be77..453fba2cb4 100644 --- a/R/derive_summary_records.R +++ b/R/derive_summary_records.R @@ -69,6 +69,8 @@ #' *Permitted Values*: named list of expressions, e.g., #' `exprs(AVAL = -9999)` #' +#' @inheritParams get_summary_records +#' #' @return A data frame with derived records appended to original dataset. #' #' @family der_prm_bds_findings diff --git a/man/derive_summary_records.Rd b/man/derive_summary_records.Rd index d077cce1d1..e4d8da4c9e 100644 --- a/man/derive_summary_records.Rd +++ b/man/derive_summary_records.Rd @@ -50,6 +50,18 @@ values greater than mean of \code{AVAL} with in \code{by_vars}. than 2. }} +\item{analysis_var}{Analysis variable. + +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Please use \code{set_values_to} instead.} + +\item{summary_fun}{Function that takes as an input the \code{analysis_var} and +performs the calculation. + +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Please use \code{set_values_to} instead. + +This can include built-in functions as well as user defined functions, +for example \code{mean} or \code{function(x) mean(x, na.rm = TRUE)}.} + \item{set_values_to}{Variables to be set The specified variables are set to the specified values for the new From 7ba57b6553c31f03c449df7f6436c137f52cb99d Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Fri, 27 Oct 2023 17:45:12 +0000 Subject: [PATCH 12/31] chore: #2142 add remotes for admiraldev for proper branching strategy --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 02f6075009..1367d7e943 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,6 +38,7 @@ LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 Depends: R (>= 4.0) +Remotes: pharmaverse/admiraldev Imports: admiraldev (>= 0.4.0), dplyr (>= 0.8.4), From fc7e5f3a3726b0d7808377cac22e9d667ac236ff Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Fri, 27 Oct 2023 18:02:33 +0000 Subject: [PATCH 13/31] min dev versioning --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1367d7e943..d8ade14048 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,7 +38,7 @@ LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 Depends: R (>= 4.0) -Remotes: pharmaverse/admiraldev +Remotes: pharmaverse/admiraldev (>= 0.5.0.9000) Imports: admiraldev (>= 0.4.0), dplyr (>= 0.8.4), From ef2ac9418214d4a840c7f2ea92c2e7b0b7431e01 Mon Sep 17 00:00:00 2001 From: Daniel Sjoberg Date: Fri, 27 Oct 2023 12:39:35 -0700 Subject: [PATCH 14/31] Update DESCRIPTION --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d8ade14048..f9623c26b8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,9 +38,9 @@ LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 Depends: R (>= 4.0) -Remotes: pharmaverse/admiraldev (>= 0.5.0.9000) +Remotes: pharmaverse/admiraldev Imports: - admiraldev (>= 0.4.0), + admiraldev (>= 0.5.0.9000), dplyr (>= 0.8.4), hms (>= 0.5.3), lifecycle (>= 0.1.0), From 053a419bc92d7a04d7ba1610b26e26afc1ed956e Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Tue, 31 Oct 2023 18:50:44 +0000 Subject: [PATCH 15/31] chore: #2142 address feedback --- NEWS.md | 2 +- R/get_summary_records.R | 5 ++--- man/get_summary_records.Rd | 5 ++--- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 38a0f62c8a..6fd2f1da52 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,7 +12,7 @@ were enhanced such that more than one summary variable can be derived, e.g., `AVAL` as the sum and `ADT` as the maximum of the contributing records. (#1792) -- `derive_summary_records()` was enhanced with the following optional arguments: `dataset_add`, `dataset_ref`, `missing_values`. These arguments respectively, generate summary variables from additional datasets, retain/add specific records that and impute user-defined missing values. (#2142) +- `derive_summary_records()` was enhanced with the following optional arguments: `dataset_add`, `dataset_ref`, `missing_values`. These arguments respectively, generate summary variables from additional datasets, retain/add specific records from a reference dataset, and impute user-defined missing values. (#2142) ## Breaking Changes diff --git a/R/get_summary_records.R b/R/get_summary_records.R index 3058fa3f11..e859be443b 100644 --- a/R/get_summary_records.R +++ b/R/get_summary_records.R @@ -5,8 +5,7 @@ #' `r lifecycle::badge("superseded")` #' #' Development on `get_summary_records()` is complete, and for new code we recommend -#' switching to using the `dataset_add` argument in `derive_summary_records()`, -#' which is easier to use, has more features, and still under active development. +#' switching to using the `dataset_add` argument in `derive_summary_records()`. #' #' It is not uncommon to have an analysis need whereby one needs to derive an #' analysis value (`AVAL`) from multiple records. The ADaM basic dataset @@ -73,7 +72,7 @@ #' #' @family superseded #' -#' @keywords internal +#' @keywords superseded #' #' @seealso [derive_summary_records()], [derive_var_merged_summary()] #' diff --git a/man/get_summary_records.Rd b/man/get_summary_records.Rd index 5fa6d1441e..c8f0473c03 100644 --- a/man/get_summary_records.Rd +++ b/man/get_summary_records.Rd @@ -76,8 +76,7 @@ A data frame of derived records. \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Development on \code{get_summary_records()} is complete, and for new code we recommend -switching to using the \code{dataset_add} argument in \code{derive_summary_records()}, -which is easier to use, has more features, and still under active development. +switching to using the \code{dataset_add} argument in \code{derive_summary_records()}. It is not uncommon to have an analysis need whereby one needs to derive an analysis value (\code{AVAL}) from multiple records. The ADaM basic dataset @@ -174,4 +173,4 @@ Other superseded: \code{\link{derive_param_extreme_record}()} } \concept{superseded} -\keyword{internal} +\keyword{superseded} From 0b4ef6004808435e80f46054526e30a09ae37b4a Mon Sep 17 00:00:00 2001 From: Jerry Johnson Date: Tue, 31 Oct 2023 19:10:16 +0000 Subject: [PATCH 16/31] upversion our description page to match current version up on github --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0ad321d0cf..367b18fcbb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: admiral Type: Package Title: ADaM in R Asset Library -Version: 0.12.2.9001 +Version: 0.12.3.9001 Authors@R: c( person("Ben", "Straub", email = "ben.x.straub@gsk.com", role = c("aut", "cre")), person("Stefan", "Bundfuss", role = "aut"), From 6ee85002f271d5ceaa9be391c219acab834ed571 Mon Sep 17 00:00:00 2001 From: Ben Straub Date: Thu, 2 Nov 2023 19:30:46 +0000 Subject: [PATCH 17/31] docs: little note for running website versions --- vignettes/articles/website-versions.Rmd | 1 + 1 file changed, 1 insertion(+) diff --git a/vignettes/articles/website-versions.Rmd b/vignettes/articles/website-versions.Rmd index 8b1763402b..fdb51e2805 100644 --- a/vignettes/articles/website-versions.Rmd +++ b/vignettes/articles/website-versions.Rmd @@ -5,6 +5,7 @@ title: "Previous Versions of Website" ```{r, include = FALSE} # TO USE THIS ARTICLE, THE DESCRIPTION FILE MUST INCLUDE # Config/Needs/website: gert +# Make sure to copy the gh-pages branch to your local git knitr::opts_chunk$set( collapse = TRUE, From 19ced7765858b4f834cdba398e318ed9eea13edd Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Fri, 3 Nov 2023 17:47:38 +0000 Subject: [PATCH 18/31] feat: #2142 rename filter to filter_add --- NEWS.md | 2 ++ R/derive_param_exposure.R | 13 +++++++++++-- R/derive_summary_records.R | 14 ++++++++++++-- inst/templates/ad_adeg.R | 2 +- inst/templates/ad_advs.R | 2 +- man/derive_param_exposure.Rd | 1 + man/derive_summary_records.Rd | 3 ++- tests/testthat/test-call_derivation.R | 8 ++++---- tests/testthat/test-compute_scale.R | 2 +- tests/testthat/test-derive_summary_records.R | 2 +- vignettes/questionnaires.Rmd | 8 ++++---- 11 files changed, 40 insertions(+), 17 deletions(-) diff --git a/NEWS.md b/NEWS.md index a6e2dfb262..d366432902 100644 --- a/NEWS.md +++ b/NEWS.md @@ -26,6 +26,8 @@ for the event number, which can be used in `order`. (#2140) `analysis_var` and `summary_fun` were deprecated in favor of `set_values_to`. (#1792) +- In `derive_summary_records()` and `derive_param_exposure()` the argument `filter` was renamed to `filter_add` (#2142) + - In `derive_var_merged_summary()` the arguments `new_var`, `analysis_var`, and `summary_fun` were deprecated in favor of `new_vars`. (#1792) diff --git a/R/derive_param_exposure.R b/R/derive_param_exposure.R index 3c0e81c6b3..4b88da68f6 100644 --- a/R/derive_param_exposure.R +++ b/R/derive_param_exposure.R @@ -131,6 +131,7 @@ derive_param_exposure <- function(dataset, analysis_var, summary_fun, filter = NULL, + filter_add = NULL, set_values_to = NULL) { by_vars <- assert_vars(by_vars) analysis_var <- assert_symbol(enexpr(analysis_var)) @@ -158,7 +159,15 @@ derive_param_exposure <- function(dataset, assert_data_frame(dataset, required_vars = expr_c(by_vars, analysis_var, exprs(PARAMCD), dates) ) - filter <- assert_filter_cond(enexpr(filter), optional = TRUE) + if (!missing(filter)) { + deprecate_warn( + "1.0.0", + I("derive_param_exposure(filter = )"), + "derive_param_exposure(filter_add = )" + ) + filter <- assert_filter_cond(enexpr(filter), optional = TRUE) + } + filter <- assert_filter_cond(enexpr(filter_add), optional = TRUE) assert_varval_list(set_values_to, required_elements = "PARAMCD") assert_param_does_not_exist(dataset, set_values_to$PARAMCD) assert_character_scalar(input_code) @@ -173,7 +182,7 @@ derive_param_exposure <- function(dataset, derive_summary_records( dataset, by_vars = by_vars, - filter = PARAMCD == !!input_code & !!filter, + filter_add = PARAMCD == !!input_code & !!filter, set_values_to = exprs( !!analysis_var := {{ summary_fun }}(!!analysis_var), !!!set_dtm, diff --git a/R/derive_summary_records.R b/R/derive_summary_records.R index 453fba2cb4..ca2a3bc2e1 100644 --- a/R/derive_summary_records.R +++ b/R/derive_summary_records.R @@ -164,17 +164,17 @@ #' ) #' ) %>% #' arrange(USUBJID, AVISIT) -derive_summary_records <- function(dataset, +derive_summary_records <- function(dataset = NULL, dataset_add = NULL, dataset_ref = NULL, by_vars, filter = NULL, + filter_add = NULL, analysis_var, summary_fun, set_values_to = NULL, missing_values = NULL) { assert_vars(by_vars) - filter <- assert_filter_cond(enexpr(filter), optional = TRUE) assert_data_frame( dataset, required_vars = expr_c(by_vars) @@ -204,6 +204,16 @@ derive_summary_records <- function(dataset, set_values_to <- exprs(!!analysis_var := {{ summary_fun }}(!!analysis_var), !!!set_values_to) } + if (!missing(filter)) { + deprecate_warn( + "1.0.0", + I("derive_summary_records(filter = )"), + "derive_summary_records(filter_add = )" + ) + filter <- assert_filter_cond(enexpr(filter), optional = TRUE) + } + filter <- assert_filter_cond(enexpr(filter_add), optional = TRUE) + if (is.null(dataset_add)) { dataset_add <- dataset } diff --git a/inst/templates/ad_adeg.R b/inst/templates/ad_adeg.R index 7df467df42..4e574220d8 100644 --- a/inst/templates/ad_adeg.R +++ b/inst/templates/ad_adeg.R @@ -198,7 +198,7 @@ adeg <- adeg %>% adeg <- adeg %>% derive_summary_records( by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars, PARAMCD, AVISITN, AVISIT, ADT), - filter = dplyr::n() >= 2 & PARAMCD != "EGINTP", + filter_add = dplyr::n() >= 2 & PARAMCD != "EGINTP", set_values_to = exprs( AVAL = mean(AVAL, na.rm = TRUE), DTYPE = "AVERAGE" diff --git a/inst/templates/ad_advs.R b/inst/templates/ad_advs.R index 85f00152c6..191be64b22 100644 --- a/inst/templates/ad_advs.R +++ b/inst/templates/ad_advs.R @@ -157,7 +157,7 @@ advs <- advs %>% advs <- advs %>% derive_summary_records( by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars, PARAMCD, AVISITN, AVISIT, ADT, ADY), - filter = !is.na(AVAL), + filter_add = !is.na(AVAL), set_values_to = exprs( AVAL = mean(AVAL), DTYPE = "AVERAGE" diff --git a/man/derive_param_exposure.Rd b/man/derive_param_exposure.Rd index e54fd5704d..ff66659f43 100644 --- a/man/derive_param_exposure.Rd +++ b/man/derive_param_exposure.Rd @@ -11,6 +11,7 @@ derive_param_exposure( analysis_var, summary_fun, filter = NULL, + filter_add = NULL, set_values_to = NULL ) } diff --git a/man/derive_summary_records.Rd b/man/derive_summary_records.Rd index e4d8da4c9e..311e2f0e9d 100644 --- a/man/derive_summary_records.Rd +++ b/man/derive_summary_records.Rd @@ -5,11 +5,12 @@ \title{Add New Records Within By Groups Using Aggregation Functions} \usage{ derive_summary_records( - dataset, + dataset = NULL, dataset_add = NULL, dataset_ref = NULL, by_vars, filter = NULL, + filter_add = NULL, analysis_var, summary_fun, set_values_to = NULL, diff --git a/tests/testthat/test-call_derivation.R b/tests/testthat/test-call_derivation.R index c5e9e10abc..a2c0484ca7 100644 --- a/tests/testthat/test-call_derivation.R +++ b/tests/testthat/test-call_derivation.R @@ -10,7 +10,7 @@ test_that("call_derivation Test 1: Test that call_derivation generates expected VSSTRESN = mean(VSSTRESN, na.rm = TRUE), DTYPE = "AVERAGE" ), - filter = dplyr::n() >= 2L + filter_add = dplyr::n() >= 2L ) %>% derive_summary_records( by_vars = exprs(USUBJID, VSTESTCD), @@ -18,7 +18,7 @@ test_that("call_derivation Test 1: Test that call_derivation generates expected VSSTRESN = max(VSSTRESN, na.rm = TRUE), DTYPE = "MAXIMUM" ), - filter = dplyr::n() >= 2L + filter_add = dplyr::n() >= 2L ) %>% derive_summary_records( by_vars = exprs(USUBJID, VSTESTCD), @@ -26,7 +26,7 @@ test_that("call_derivation Test 1: Test that call_derivation generates expected VSSTRESN = min(VSSTRESN, na.rm = TRUE), DTYPE = "MINIMUM" ), - filter = dplyr::n() >= 2L + filter_add = dplyr::n() >= 2L ) actual_output <- call_derivation( @@ -53,7 +53,7 @@ test_that("call_derivation Test 1: Test that call_derivation generates expected ) ), by_vars = exprs(USUBJID, VSTESTCD), - filter = dplyr::n() >= 2L + filter_add = dplyr::n() >= 2L ) expect_dfs_equal( diff --git a/tests/testthat/test-compute_scale.R b/tests/testthat/test-compute_scale.R index afb20f819f..74ae6d7ec9 100644 --- a/tests/testthat/test-compute_scale.R +++ b/tests/testthat/test-compute_scale.R @@ -96,7 +96,7 @@ test_that("compute_scale Test 5: compute_scale() works as expected within derive_summary_records( input, by_vars = exprs(STUDYID, USUBJID, AVISIT, AVISITN), - filter = (PARAMCD %in% c("ITEM1", "ITEM2", "ITEM3")), + filter_add = (PARAMCD %in% c("ITEM1", "ITEM2", "ITEM3")), set_values_to = exprs( AVAL = compute_scale(AVAL, c(1, 5), c(0, 100), flip_direction = TRUE, min_n = 3), PARAMCD = "ITEMAVG" diff --git a/tests/testthat/test-derive_summary_records.R b/tests/testthat/test-derive_summary_records.R index 69fb2f6d3d..78bf2bc614 100644 --- a/tests/testthat/test-derive_summary_records.R +++ b/tests/testthat/test-derive_summary_records.R @@ -51,7 +51,7 @@ test_that("derive_summary_records Test 2: Filter record within `by_vars`", { actual_output <- input %>% derive_summary_records( by_vars = exprs(subj, visit), - filter = n() > 2, + filter_add = n() > 2, set_values_to = exprs( val = mean(val), seq = max(seq), diff --git a/vignettes/questionnaires.Rmd b/vignettes/questionnaires.Rmd index 5a1b353e70..dc24569f09 100644 --- a/vignettes/questionnaires.Rmd +++ b/vignettes/questionnaires.Rmd @@ -143,7 +143,7 @@ adgad7 <- adqs %>% derive_summary_records( by_vars = exprs(STUDYID, USUBJID, AVISIT, ADT, ADY, TRTSDT, DTHCAUS), # Select records contributing to total score - filter = str_detect(PARAMCD, "GAD020[1-7]"), + filter_add = str_detect(PARAMCD, "GAD020[1-7]"), set_values_to = exprs( AVAL = sum(AVAL, na.rm = TRUE), PARAMCD = "GAD02TS", @@ -171,7 +171,7 @@ adgdssf <- adqs %>% derive_summary_records( by_vars = exprs(STUDYID, USUBJID, AVISIT, ADT, ADY, TRTSDT, DTHCAUS), # Select records contributing to total score - filter = str_detect(PARAMCD, "GDS02[01][0-9]"), + filter_add = str_detect(PARAMCD, "GDS02[01][0-9]"), set_values_to = exprs( AVAL = compute_scale( AVAL, @@ -518,7 +518,7 @@ be derived by `derive_summary_records()`. ```{r} adgdssf <- adgdssf %>% derive_summary_records( - filter = str_detect(PARAMCD, "GDS02[01][0-9]"), + filter_add = str_detect(PARAMCD, "GDS02[01][0-9]"), by_vars = exprs(USUBJID, AVISIT), set_values_to = exprs( AVAL = sum(!is.na(AVAL)) / 15 >= 0.9, @@ -567,7 +567,7 @@ adgdssf <- adgdssf %>% ) ) %>% derive_summary_records( - filter = str_detect(PARAMCD, "GDS02[01][0-9]"), + filter_add = str_detect(PARAMCD, "GDS02[01][0-9]"), by_vars = exprs(USUBJID, AVISIT), set_values_to = exprs( AVAL = all(!is.na(AVAL)), From 48cbbc667f23a3f61b3a7562359b952a75304d61 Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Fri, 3 Nov 2023 13:48:25 -0400 Subject: [PATCH 19/31] Update R/derive_summary_records.R Co-authored-by: Ben Straub --- R/derive_summary_records.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/derive_summary_records.R b/R/derive_summary_records.R index ca2a3bc2e1..0ac2e09529 100644 --- a/R/derive_summary_records.R +++ b/R/derive_summary_records.R @@ -62,7 +62,7 @@ #' #' @param missing_values Values for missing summary values #' -#' For observations of the input dataset (`dataset`) or (`dataset_add`) which do not have an +#' For observations of the input dataset (`dataset`) or (`dataset_add`) which do not have a #' complete mapping defined by the summarization defined in `set_values_to`. Only variables #' specified for `set_values_to` can be specified for `missing_values`. #' From 53bdf6391f5aa9a95fa455ee8c0685eae70405b8 Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Fri, 3 Nov 2023 18:36:12 +0000 Subject: [PATCH 20/31] feat: #2142 get checks appropriately running --- R/derive_param_exposure.R | 4 ++++ R/derive_summary_records.R | 9 ++------- man/derive_param_exposure.Rd | 1 + man/derive_summary_records.Rd | 4 ++-- tests/testthat/test-call_derivation.R | 4 ++++ tests/testthat/test-derive_param_exposure.R | 3 +++ tests/testthat/test-derive_summary_records.R | 5 +++++ 7 files changed, 21 insertions(+), 9 deletions(-) diff --git a/R/derive_param_exposure.R b/R/derive_param_exposure.R index 4b88da68f6..d1e8b1d564 100644 --- a/R/derive_param_exposure.R +++ b/R/derive_param_exposure.R @@ -126,6 +126,7 @@ #' ) %>% #' select(-ASTDTM, -AENDTM) derive_param_exposure <- function(dataset, + dataset_add, by_vars, input_code, analysis_var, @@ -159,6 +160,8 @@ derive_param_exposure <- function(dataset, assert_data_frame(dataset, required_vars = expr_c(by_vars, analysis_var, exprs(PARAMCD), dates) ) + assert_data_frame(dataset_add, required_vars = by_vars) + if (!missing(filter)) { deprecate_warn( "1.0.0", @@ -181,6 +184,7 @@ derive_param_exposure <- function(dataset, derive_summary_records( dataset, + dataset_add, by_vars = by_vars, filter_add = PARAMCD == !!input_code & !!filter, set_values_to = exprs( diff --git a/R/derive_summary_records.R b/R/derive_summary_records.R index ca2a3bc2e1..d885ad275c 100644 --- a/R/derive_summary_records.R +++ b/R/derive_summary_records.R @@ -164,8 +164,8 @@ #' ) #' ) %>% #' arrange(USUBJID, AVISIT) -derive_summary_records <- function(dataset = NULL, - dataset_add = NULL, +derive_summary_records <- function(dataset, + dataset_add, dataset_ref = NULL, by_vars, filter = NULL, @@ -182,7 +182,6 @@ derive_summary_records <- function(dataset = NULL, assert_data_frame( dataset_add, required_vars = expr_c(by_vars), - optional = TRUE ) assert_data_frame( dataset_ref, @@ -214,10 +213,6 @@ derive_summary_records <- function(dataset = NULL, } filter <- assert_filter_cond(enexpr(filter_add), optional = TRUE) - if (is.null(dataset_add)) { - dataset_add <- dataset - } - summary_records <- dataset_add %>% group_by(!!!by_vars) %>% filter_if(filter) %>% diff --git a/man/derive_param_exposure.Rd b/man/derive_param_exposure.Rd index ff66659f43..4073aa867c 100644 --- a/man/derive_param_exposure.Rd +++ b/man/derive_param_exposure.Rd @@ -6,6 +6,7 @@ \usage{ derive_param_exposure( dataset, + dataset_add, by_vars, input_code, analysis_var, diff --git a/man/derive_summary_records.Rd b/man/derive_summary_records.Rd index 311e2f0e9d..cca5c59232 100644 --- a/man/derive_summary_records.Rd +++ b/man/derive_summary_records.Rd @@ -5,8 +5,8 @@ \title{Add New Records Within By Groups Using Aggregation Functions} \usage{ derive_summary_records( - dataset = NULL, - dataset_add = NULL, + dataset, + dataset_add, dataset_ref = NULL, by_vars, filter = NULL, diff --git a/tests/testthat/test-call_derivation.R b/tests/testthat/test-call_derivation.R index a2c0484ca7..667a960b52 100644 --- a/tests/testthat/test-call_derivation.R +++ b/tests/testthat/test-call_derivation.R @@ -5,6 +5,7 @@ test_that("call_derivation Test 1: Test that call_derivation generates expected expected_output <- input %>% derive_summary_records( + dataset_add = input, by_vars = exprs(USUBJID, VSTESTCD), set_values_to = exprs( VSSTRESN = mean(VSSTRESN, na.rm = TRUE), @@ -13,6 +14,7 @@ test_that("call_derivation Test 1: Test that call_derivation generates expected filter_add = dplyr::n() >= 2L ) %>% derive_summary_records( + dataset_add = input, by_vars = exprs(USUBJID, VSTESTCD), set_values_to = exprs( VSSTRESN = max(VSSTRESN, na.rm = TRUE), @@ -21,6 +23,7 @@ test_that("call_derivation Test 1: Test that call_derivation generates expected filter_add = dplyr::n() >= 2L ) %>% derive_summary_records( + dataset_add = input, by_vars = exprs(USUBJID, VSTESTCD), set_values_to = exprs( VSSTRESN = min(VSSTRESN, na.rm = TRUE), @@ -31,6 +34,7 @@ test_that("call_derivation Test 1: Test that call_derivation generates expected actual_output <- call_derivation( dataset = input, + dataset_add = input, derivation = derive_summary_records, variable_params = list( params( diff --git a/tests/testthat/test-derive_param_exposure.R b/tests/testthat/test-derive_param_exposure.R index dd3582f406..6b5c7c7ae3 100644 --- a/tests/testthat/test-derive_param_exposure.R +++ b/tests/testthat/test-derive_param_exposure.R @@ -61,6 +61,7 @@ test_that("derive_param_exposure Test 1: works with DTM variables", { actual_output <- input %>% derive_param_exposure( + dataset_add = input, by_vars = exprs(USUBJID), input_code = "DOSE", analysis_var = AVAL, @@ -68,6 +69,7 @@ test_that("derive_param_exposure Test 1: works with DTM variables", { set_values_to = exprs(PARAMCD = "TDOSE", PARCAT1 = "OVERALL") ) %>% derive_param_exposure( + dataset_add = input, by_vars = exprs(USUBJID), input_code = "DOSE", analysis_var = AVAL, @@ -75,6 +77,7 @@ test_that("derive_param_exposure Test 1: works with DTM variables", { set_values_to = exprs(PARAMCD = "AVDOSE", PARCAT1 = "OVERALL") ) %>% derive_param_exposure( + dataset_add = input, by_vars = exprs(USUBJID), input_code = "ADJ", analysis_var = AVALC, diff --git a/tests/testthat/test-derive_summary_records.R b/tests/testthat/test-derive_summary_records.R index 78bf2bc614..6bc5354943 100644 --- a/tests/testthat/test-derive_summary_records.R +++ b/tests/testthat/test-derive_summary_records.R @@ -11,6 +11,7 @@ test_that("derive_summary_records Test 1: creates new record per group and group actual_output <- input %>% derive_summary_records( + dataset_add = input, by_vars = exprs(subj, visit), set_values_to = exprs( val = mean(val), @@ -50,6 +51,7 @@ test_that("derive_summary_records Test 2: Filter record within `by_vars`", { actual_output <- input %>% derive_summary_records( + dataset_add = input, by_vars = exprs(subj, visit), filter_add = n() > 2, set_values_to = exprs( @@ -129,6 +131,7 @@ test_that("derive_summary_records Test 4: deprecation warning for analysis_var a expect_warning( actual_output <- input %>% derive_summary_records( + dataset_add = input, by_vars = exprs(subj, visit), analysis_var = val, summary_fun = mean, @@ -207,6 +210,7 @@ test_that("derive_summary_records Test 6: test missing values", { actual_output <- input %>% derive_summary_records( + dataset_add = input, by_vars = exprs(subj, visit), set_values_to = exprs( aval = mean(val, na.rm = TRUE), @@ -257,6 +261,7 @@ test_that("derive_summary_records Test 7: make sure dataset_ref works", { actual_output <- input %>% derive_summary_records( + dataset_add = input, dataset_ref = input_ref, by_vars = exprs(subj, visit), set_values_to = exprs( From a15e58a4058f90a47f14c82c041b18b25f729a9e Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Fri, 3 Nov 2023 19:33:47 +0000 Subject: [PATCH 21/31] roxygen stuff and vignettes --- R/derive_param_exposure.R | 10 ++++++++++ R/derive_summary_records.R | 15 ++++++--------- man/derive_param_exposure.Rd | 9 +++++++++ man/derive_summary_records.Rd | 7 +++++-- tests/testthat/test-compute_scale.R | 1 + tests/testthat/test-derive_param_exposure.R | 7 +++++++ vignettes/bds_exposure.Rmd | 2 ++ vignettes/bds_finding.Rmd | 3 ++- vignettes/questionnaires.Rmd | 7 +++++++ 9 files changed, 49 insertions(+), 12 deletions(-) diff --git a/R/derive_param_exposure.R b/R/derive_param_exposure.R index d1e8b1d564..27cb9bdeee 100644 --- a/R/derive_param_exposure.R +++ b/R/derive_param_exposure.R @@ -8,6 +8,13 @@ #' `PARAMCD` is expected as well, #' + Either `ASTDTM` and `AENDTM` or `ASTDT` and `AENDT` are also expected. #' +#' @param dataset_add Additional dataset +#' +#' The variables specified for `by_vars` are expected. +#' Observations from the specified dataset are going to be used to calculate and added +#' as new records to the input dataset (`dataset`). +#' +#' #' @param filter Filter condition #' #' The specified condition is applied to the input dataset before deriving the @@ -95,6 +102,7 @@ #' # Cumulative dose #' adex %>% #' derive_param_exposure( +#' dataset_add = adex, #' by_vars = exprs(USUBJID), #' set_values_to = exprs(PARAMCD = "TDOSE", PARCAT1 = "OVERALL"), #' input_code = "DOSE", @@ -106,6 +114,7 @@ #' # average dose in w2-24 #' adex %>% #' derive_param_exposure( +#' dataset_add = adex, #' by_vars = exprs(USUBJID), #' filter = VISIT %in% c("WEEK 2", "WEEK 24"), #' set_values_to = exprs(PARAMCD = "AVDW224", PARCAT1 = "WEEK2-24"), @@ -118,6 +127,7 @@ #' # Any dose adjustment? #' adex %>% #' derive_param_exposure( +#' dataset_add = adex, #' by_vars = exprs(USUBJID), #' set_values_to = exprs(PARAMCD = "TADJ", PARCAT1 = "OVERALL"), #' input_code = "ADJ", diff --git a/R/derive_summary_records.R b/R/derive_summary_records.R index a9bece644a..ea4ffeb7b1 100644 --- a/R/derive_summary_records.R +++ b/R/derive_summary_records.R @@ -112,6 +112,7 @@ #' # Summarize the average of the triplicate ECG interval values (AVAL) #' derive_summary_records( #' adeg, +#' dataset_add = adeg, #' by_vars = exprs(USUBJID, PARAM, AVISIT), #' set_values_to = exprs( #' AVAL = mean(AVAL, na.rm = TRUE), @@ -123,6 +124,7 @@ #' # Derive more than one summary variable #' derive_summary_records( #' adeg, +#' dataset_add = adeg, #' by_vars = exprs(USUBJID, PARAM, AVISIT), #' set_values_to = exprs( #' AVAL = mean(AVAL), @@ -156,6 +158,7 @@ #' # by group #' derive_summary_records( #' adeg, +#' dataset_add = adeg, #' by_vars = exprs(USUBJID, PARAM, AVISIT), #' filter = n() > 2, #' set_values_to = exprs( @@ -172,17 +175,11 @@ derive_summary_records <- function(dataset, filter_add = NULL, analysis_var, summary_fun, - set_values_to = NULL, + set_values_to, missing_values = NULL) { assert_vars(by_vars) - assert_data_frame( - dataset, - required_vars = expr_c(by_vars) - ) - assert_data_frame( - dataset_add, - required_vars = expr_c(by_vars), - ) + assert_data_frame(dataset, required_vars = by_vars) + assert_data_frame(dataset_add, required_vars = by_vars) assert_data_frame( dataset_ref, required_vars = by_vars, diff --git a/man/derive_param_exposure.Rd b/man/derive_param_exposure.Rd index 4073aa867c..21f11a511f 100644 --- a/man/derive_param_exposure.Rd +++ b/man/derive_param_exposure.Rd @@ -25,6 +25,12 @@ The variables specified by the \code{by_vars} and \code{analysis_var} argument(s \item Either \code{ASTDTM} and \code{AENDTM} or \code{ASTDT} and \code{AENDT} are also expected. }} +\item{dataset_add}{Additional dataset + +The variables specified for \code{by_vars} are expected. +Observations from the specified dataset are going to be used to calculate and added +as new records to the input dataset (\code{dataset}).} + \item{by_vars}{Grouping variables For each group defined by \code{by_vars} an observation is added to the output @@ -119,6 +125,7 @@ adex <- tribble( # Cumulative dose adex \%>\% derive_param_exposure( + dataset_add = adex, by_vars = exprs(USUBJID), set_values_to = exprs(PARAMCD = "TDOSE", PARCAT1 = "OVERALL"), input_code = "DOSE", @@ -130,6 +137,7 @@ adex \%>\% # average dose in w2-24 adex \%>\% derive_param_exposure( + dataset_add = adex, by_vars = exprs(USUBJID), filter = VISIT \%in\% c("WEEK 2", "WEEK 24"), set_values_to = exprs(PARAMCD = "AVDW224", PARCAT1 = "WEEK2-24"), @@ -142,6 +150,7 @@ adex \%>\% # Any dose adjustment? adex \%>\% derive_param_exposure( + dataset_add = adex, by_vars = exprs(USUBJID), set_values_to = exprs(PARAMCD = "TADJ", PARCAT1 = "OVERALL"), input_code = "ADJ", diff --git a/man/derive_summary_records.Rd b/man/derive_summary_records.Rd index cca5c59232..ecad30758f 100644 --- a/man/derive_summary_records.Rd +++ b/man/derive_summary_records.Rd @@ -13,7 +13,7 @@ derive_summary_records( filter_add = NULL, analysis_var, summary_fun, - set_values_to = NULL, + set_values_to, missing_values = NULL ) } @@ -86,7 +86,7 @@ For example: \item{missing_values}{Values for missing summary values -For observations of the input dataset (\code{dataset}) or (\code{dataset_add}) which do not have an +For observations of the input dataset (\code{dataset}) or (\code{dataset_add}) which do not have a complete mapping defined by the summarization defined in \code{set_values_to}. Only variables specified for \code{set_values_to} can be specified for \code{missing_values}. @@ -139,6 +139,7 @@ adeg <- tribble( # Summarize the average of the triplicate ECG interval values (AVAL) derive_summary_records( adeg, + dataset_add = adeg, by_vars = exprs(USUBJID, PARAM, AVISIT), set_values_to = exprs( AVAL = mean(AVAL, na.rm = TRUE), @@ -150,6 +151,7 @@ derive_summary_records( # Derive more than one summary variable derive_summary_records( adeg, + dataset_add = adeg, by_vars = exprs(USUBJID, PARAM, AVISIT), set_values_to = exprs( AVAL = mean(AVAL), @@ -183,6 +185,7 @@ adeg <- tribble( # by group derive_summary_records( adeg, + dataset_add = adeg, by_vars = exprs(USUBJID, PARAM, AVISIT), filter = n() > 2, set_values_to = exprs( diff --git a/tests/testthat/test-compute_scale.R b/tests/testthat/test-compute_scale.R index 74ae6d7ec9..0db6250394 100644 --- a/tests/testthat/test-compute_scale.R +++ b/tests/testthat/test-compute_scale.R @@ -95,6 +95,7 @@ test_that("compute_scale Test 5: compute_scale() works as expected within expect_equal( derive_summary_records( input, + dataset_add = input, by_vars = exprs(STUDYID, USUBJID, AVISIT, AVISITN), filter_add = (PARAMCD %in% c("ITEM1", "ITEM2", "ITEM3")), set_values_to = exprs( diff --git a/tests/testthat/test-derive_param_exposure.R b/tests/testthat/test-derive_param_exposure.R index 6b5c7c7ae3..ca80f75a60 100644 --- a/tests/testthat/test-derive_param_exposure.R +++ b/tests/testthat/test-derive_param_exposure.R @@ -128,6 +128,7 @@ test_that("derive_param_exposure Test 2: works with DT variables", { actual_output <- input_no_dtm %>% derive_param_exposure( + dataset_add = input_no_dtm, by_vars = exprs(USUBJID), input_code = "DOSE", analysis_var = AVAL, @@ -135,6 +136,7 @@ test_that("derive_param_exposure Test 2: works with DT variables", { set_values_to = exprs(PARAMCD = "TDOSE", PARCAT1 = "OVERALL") ) %>% derive_param_exposure( + dataset_add = input_no_dtm, by_vars = exprs(USUBJID), input_code = "DOSE", analysis_var = AVAL, @@ -142,6 +144,7 @@ test_that("derive_param_exposure Test 2: works with DT variables", { set_values_to = exprs(PARAMCD = "AVDOSE", PARCAT1 = "OVERALL") ) %>% derive_param_exposure( + dataset_add = input_no_dtm, by_vars = exprs(USUBJID), input_code = "ADJ", analysis_var = AVALC, @@ -162,6 +165,7 @@ test_that("derive_param_exposure Test 3: Errors", { expect_error( input <- input %>% derive_param_exposure( + dataset_add = input, by_vars = exprs(USUBJID), input_code = "DOSE", analysis_var = AVAL, @@ -174,6 +178,7 @@ test_that("derive_param_exposure Test 3: Errors", { expect_error( input <- input %>% derive_param_exposure( + dataset_add = input, by_vars = exprs(USUBJID), input_code = "DOSED", analysis_var = AVAL, @@ -191,6 +196,8 @@ test_that("derive_param_exposure Test 3: Errors", { input <- input %>% select(-starts_with("AST"), -starts_with("AEN")) %>% derive_param_exposure( + dataset = ., + dataset_add = ., by_vars = exprs(USUBJID), input_code = "DOSE", analysis_var = AVAL, diff --git a/vignettes/bds_exposure.Rmd b/vignettes/bds_exposure.Rmd index 0258e1fc8a..fd88cd7913 100644 --- a/vignettes/bds_exposure.Rmd +++ b/vignettes/bds_exposure.Rmd @@ -361,6 +361,7 @@ For example, to calculate the total dose by subject and treatment, ```{r eval=TRUE, echo=TRUE} adex <- derive_param_exposure( adex, + dataset_add = adex, by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars), input_code = "DOSE", analysis_var = AVAL, @@ -428,6 +429,7 @@ adex <- adex %>% summary_fun = function(x) if_else(sum(!is.na(x)) > 0, "Y", NA_character_) ) ), + dataset_add = adex, by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars) ) diff --git a/vignettes/bds_finding.Rmd b/vignettes/bds_finding.Rmd index 20e2fbdf62..64e13c31bc 100644 --- a/vignettes/bds_finding.Rmd +++ b/vignettes/bds_finding.Rmd @@ -989,9 +989,10 @@ Set `DTYPE` to `AVERAGE`. ```{r eval=TRUE} advs_ex2 <- derive_summary_records( advs, + dataset_add = advs, by_vars = exprs(STUDYID, USUBJID, PARAMCD, VISITNUM, ADT), set_values_to = exprs( - AVAL = mean(AVAL), + AVAL = mean(AVAL, na.rm = TRUE), DTYPE = "AVERAGE" ) ) diff --git a/vignettes/questionnaires.Rmd b/vignettes/questionnaires.Rmd index dc24569f09..652c0036e0 100644 --- a/vignettes/questionnaires.Rmd +++ b/vignettes/questionnaires.Rmd @@ -141,6 +141,8 @@ adgad7 <- adqs %>% # Select records to keep in the GAD-7 ADaM filter(PARCAT1 == "GAD-7 V2") %>% derive_summary_records( + dataset = ., + dataset_add = ., by_vars = exprs(STUDYID, USUBJID, AVISIT, ADT, ADY, TRTSDT, DTHCAUS), # Select records contributing to total score filter_add = str_detect(PARAMCD, "GAD020[1-7]"), @@ -169,6 +171,8 @@ adgdssf <- adqs %>% # Select records to keep in the GDS-SF ADaM filter(PARCAT1 == "GDS SHORT FORM") %>% derive_summary_records( + dataset = ., + dataset_add = ., by_vars = exprs(STUDYID, USUBJID, AVISIT, ADT, ADY, TRTSDT, DTHCAUS), # Select records contributing to total score filter_add = str_detect(PARAMCD, "GDS02[01][0-9]"), @@ -518,6 +522,7 @@ be derived by `derive_summary_records()`. ```{r} adgdssf <- adgdssf %>% derive_summary_records( + dataset_add = adgdssf, filter_add = str_detect(PARAMCD, "GDS02[01][0-9]"), by_vars = exprs(USUBJID, AVISIT), set_values_to = exprs( @@ -567,6 +572,8 @@ adgdssf <- adgdssf %>% ) ) %>% derive_summary_records( + dataset = ., + dataset_add = ., filter_add = str_detect(PARAMCD, "GDS02[01][0-9]"), by_vars = exprs(USUBJID, AVISIT), set_values_to = exprs( From 5c690f7bde99ec44a02fc59620059931e7089412 Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Fri, 3 Nov 2023 19:53:48 +0000 Subject: [PATCH 22/31] chore: #2142 roxygen stuff --- R/derive_param_exposure.R | 30 +++++++++++++++++++++++++----- R/derive_summary_records.R | 18 +++++++++++++++++- man/derive_param_exposure.Rd | 30 +++++++++++++++++++++++++----- man/derive_summary_records.Rd | 17 ++++++++++++++++- 4 files changed, 83 insertions(+), 12 deletions(-) diff --git a/R/derive_param_exposure.R b/R/derive_param_exposure.R index 27cb9bdeee..d5bd1cde27 100644 --- a/R/derive_param_exposure.R +++ b/R/derive_param_exposure.R @@ -15,13 +15,33 @@ #' as new records to the input dataset (`dataset`). #' #' -#' @param filter Filter condition +#' @param filter #' -#' The specified condition is applied to the input dataset before deriving the -#' new parameter, i.e., only observations fulfilling the condition are taken -#' into account. +#' `r lifecycle::badge("deprecated")` Please use `filter_add` instead. #' -#' *Permitted Values:* a condition +#' Filter condition as logical expression to apply during +#' summary calculation. By default, filtering expressions are computed within +#' `by_vars` as this will help when an aggregating, lagging, or ranking +#' function is involved. +#' +#' For example, +#' +#' + `filter = (AVAL > mean(AVAL, na.rm = TRUE))` will filter all `AVAL` +#' values greater than mean of `AVAL` with in `by_vars`. +#' + `filter = (dplyr::n() > 2)` will filter n count of `by_vars` greater +#' than 2. +#' +#' @param filter_add Filter condition as logical expression to apply during +#' summary calculation. By default, filtering expressions are computed within +#' `by_vars` as this will help when an aggregating, lagging, or ranking +#' function is involved. +#' +#' For example, +#' +#' + `filter_add = (AVAL > mean(AVAL, na.rm = TRUE))` will filter all `AVAL` +#' values greater than mean of `AVAL` with in `by_vars`. +#' + `filter_add = (dplyr::n() > 2)` will filter n count of `by_vars` greater +#' than 2. #' #' @param input_code Required parameter code #' diff --git a/R/derive_summary_records.R b/R/derive_summary_records.R index ea4ffeb7b1..d9363602ff 100644 --- a/R/derive_summary_records.R +++ b/R/derive_summary_records.R @@ -29,7 +29,11 @@ #' records. Providing the names of variables in [exprs()] will create a #' groupwise summary and generate summary records for the specified groups. #' -#' @param filter Filter condition as logical expression to apply during +#' @param filter +#' +#' `r lifecycle::badge("deprecated")` Please use `filter_add` instead. +#' +#' Filter condition as logical expression to apply during #' summary calculation. By default, filtering expressions are computed within #' `by_vars` as this will help when an aggregating, lagging, or ranking #' function is involved. @@ -41,6 +45,18 @@ #' + `filter = (dplyr::n() > 2)` will filter n count of `by_vars` greater #' than 2. #' +#' @param filter_add Filter condition as logical expression to apply during +#' summary calculation. By default, filtering expressions are computed within +#' `by_vars` as this will help when an aggregating, lagging, or ranking +#' function is involved. +#' +#' For example, +#' +#' + `filter_add = (AVAL > mean(AVAL, na.rm = TRUE))` will filter all `AVAL` +#' values greater than mean of `AVAL` with in `by_vars`. +#' + `filter_add = (dplyr::n() > 2)` will filter n count of `by_vars` greater +#' than 2. +#' #' @param set_values_to Variables to be set #' #' The specified variables are set to the specified values for the new diff --git a/man/derive_param_exposure.Rd b/man/derive_param_exposure.Rd index 21f11a511f..ce04b182d9 100644 --- a/man/derive_param_exposure.Rd +++ b/man/derive_param_exposure.Rd @@ -53,13 +53,33 @@ performs the calculation. This can include built-in functions as well as user defined functions, for example \code{mean} or \code{function(x) mean(x, na.rm = TRUE)}.} -\item{filter}{Filter condition +\item{filter}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Please use \code{filter_add} instead. -The specified condition is applied to the input dataset before deriving the -new parameter, i.e., only observations fulfilling the condition are taken -into account. +Filter condition as logical expression to apply during +summary calculation. By default, filtering expressions are computed within +\code{by_vars} as this will help when an aggregating, lagging, or ranking +function is involved. -\emph{Permitted Values:} a condition} +For example, +\itemize{ +\item \code{filter = (AVAL > mean(AVAL, na.rm = TRUE))} will filter all \code{AVAL} +values greater than mean of \code{AVAL} with in \code{by_vars}. +\item \code{filter = (dplyr::n() > 2)} will filter n count of \code{by_vars} greater +than 2. +}} + +\item{filter_add}{Filter condition as logical expression to apply during +summary calculation. By default, filtering expressions are computed within +\code{by_vars} as this will help when an aggregating, lagging, or ranking +function is involved. + +For example, +\itemize{ +\item \code{filter_add = (AVAL > mean(AVAL, na.rm = TRUE))} will filter all \code{AVAL} +values greater than mean of \code{AVAL} with in \code{by_vars}. +\item \code{filter_add = (dplyr::n() > 2)} will filter n count of \code{by_vars} greater +than 2. +}} \item{set_values_to}{Variable-value pairs diff --git a/man/derive_summary_records.Rd b/man/derive_summary_records.Rd index ecad30758f..66a3ae2fa5 100644 --- a/man/derive_summary_records.Rd +++ b/man/derive_summary_records.Rd @@ -38,7 +38,9 @@ input dataset.} records. Providing the names of variables in \code{\link[=exprs]{exprs()}} will create a groupwise summary and generate summary records for the specified groups.} -\item{filter}{Filter condition as logical expression to apply during +\item{filter}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Please use \code{filter_add} instead. + +Filter condition as logical expression to apply during summary calculation. By default, filtering expressions are computed within \code{by_vars} as this will help when an aggregating, lagging, or ranking function is involved. @@ -51,6 +53,19 @@ values greater than mean of \code{AVAL} with in \code{by_vars}. than 2. }} +\item{filter_add}{Filter condition as logical expression to apply during +summary calculation. By default, filtering expressions are computed within +\code{by_vars} as this will help when an aggregating, lagging, or ranking +function is involved. + +For example, +\itemize{ +\item \code{filter_add = (AVAL > mean(AVAL, na.rm = TRUE))} will filter all \code{AVAL} +values greater than mean of \code{AVAL} with in \code{by_vars}. +\item \code{filter_add = (dplyr::n() > 2)} will filter n count of \code{by_vars} greater +than 2. +}} + \item{analysis_var}{Analysis variable. \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Please use \code{set_values_to} instead.} From 01bc54d6c9936000128c93d6a49adddfc65d29ec Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Fri, 3 Nov 2023 20:00:48 +0000 Subject: [PATCH 23/31] get past check-templates --- inst/templates/ad_adeg.R | 1 + inst/templates/ad_adex.R | 1 + inst/templates/ad_advs.R | 1 + 3 files changed, 3 insertions(+) diff --git a/inst/templates/ad_adeg.R b/inst/templates/ad_adeg.R index 4e574220d8..46abfac143 100644 --- a/inst/templates/ad_adeg.R +++ b/inst/templates/ad_adeg.R @@ -197,6 +197,7 @@ adeg <- adeg %>% # (if least 2 records available) for all parameter except EGINTP adeg <- adeg %>% derive_summary_records( + dataset_add = adeg, by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars, PARAMCD, AVISITN, AVISIT, ADT), filter_add = dplyr::n() >= 2 & PARAMCD != "EGINTP", set_values_to = exprs( diff --git a/inst/templates/ad_adex.R b/inst/templates/ad_adex.R index d64e0df635..977ddf2a33 100644 --- a/inst/templates/ad_adex.R +++ b/inst/templates/ad_adex.R @@ -185,6 +185,7 @@ adex <- adex %>% summary_fun = function(x) if_else(sum(!is.na(x)) > 0, "Y", NA_character_) ) ), + dataset_add = adex, filter = VISIT %in% c("WEEK 2", "WEEK 24"), by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars) ) %>% diff --git a/inst/templates/ad_advs.R b/inst/templates/ad_advs.R index b8c4635afb..8cb14d7470 100644 --- a/inst/templates/ad_advs.R +++ b/inst/templates/ad_advs.R @@ -156,6 +156,7 @@ advs <- advs %>% ## Derive a new record as a summary record (e.g. mean of the triplicates at each time point) ---- advs <- advs %>% derive_summary_records( + dataset_add = advs, by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars, PARAMCD, AVISITN, AVISIT, ADT, ADY), filter_add = !is.na(AVAL), set_values_to = exprs( From 0f7a8abd1afe83976f72cfd7794f30d12a651724 Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Fri, 3 Nov 2023 20:07:38 +0000 Subject: [PATCH 24/31] finally get past templates --- inst/templates/ad_adex.R | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/templates/ad_adex.R b/inst/templates/ad_adex.R index 977ddf2a33..fca0ace57e 100644 --- a/inst/templates/ad_adex.R +++ b/inst/templates/ad_adex.R @@ -148,6 +148,7 @@ adex <- adex %>% summary_fun = function(x) if_else(sum(!is.na(x)) > 0, "Y", NA_character_) ) ), + dataset_add = adex, by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars) ) %>% # W2-W24 exposure From eaf98921de8a2fbe0e28f6040ccdc31e5c98e79c Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Mon, 6 Nov 2023 18:55:32 +0000 Subject: [PATCH 25/31] feat: #2142 clear up missing_values usage --- R/derive_summary_records.R | 30 ++++++---- tests/testthat/test-derive_summary_records.R | 62 ++++---------------- 2 files changed, 28 insertions(+), 64 deletions(-) diff --git a/R/derive_summary_records.R b/R/derive_summary_records.R index d9363602ff..0bb92227d3 100644 --- a/R/derive_summary_records.R +++ b/R/derive_summary_records.R @@ -232,17 +232,6 @@ derive_summary_records <- function(dataset, summarise(!!!set_values_to) %>% ungroup() - if (!is.null(missing_values)) { - update_missings <- map2( - syms(names(missing_values)), - missing_values, - ~ expr(if_else(is.na(!!.x), !!.y, !!.x)) - ) - names(update_missings) <- names(missing_values) - summary_records <- summary_records %>% - mutate(!!!update_missings) - } - df_return <- bind_rows( dataset, summary_records @@ -258,11 +247,28 @@ derive_summary_records <- function(dataset, by = map_chr(by_vars, as_name) ) + tmp_ref_obs <- get_new_tmp_var(new_ref_obs, prefix = "tmp_ref_obs") + + new_ref_obs <- new_ref_obs %>% + mutate(!!tmp_ref_obs := 1L) + df_return <- bind_rows( df_return, new_ref_obs ) } - return(df_return) + if (!is.null(missing_values)) { + update_missings <- map2( + syms(names(missing_values)), + missing_values, + ~ expr(if_else(is.na(!!.x) & tmp_ref_obs_1 == 1, !!.y, !!.x)) + ) + names(update_missings) <- names(missing_values) + df_return <- df_return %>% + mutate(!!!update_missings) + } + + df_return %>% + remove_tmp_vars() } diff --git a/tests/testthat/test-derive_summary_records.R b/tests/testthat/test-derive_summary_records.R index 6bc5354943..4300d05938 100644 --- a/tests/testthat/test-derive_summary_records.R +++ b/tests/testthat/test-derive_summary_records.R @@ -187,47 +187,7 @@ test_that("derive_summary_records Test 5: make sure dataset_add works", { }) ## Test 6: test missing values ---- -test_that("derive_summary_records Test 6: test missing values", { - input <- tibble::tribble( - ~subj, ~visit, ~val, ~seq, - "1", 1, 10, 1, - "1", 1, 14, 2, - "1", 1, 9, 3, - "1", 2, 11, 4, - "2", 2, NA_real_, 1 - ) - - expected_output <- bind_rows( - input, - tibble::tribble( - ~subj, ~visit, ~aval, - "1", 1, 11, - "1", 2, 11, - "2", 2, 999999 - ) %>% - mutate(type = "AVERAGE") - ) - - actual_output <- input %>% - derive_summary_records( - dataset_add = input, - by_vars = exprs(subj, visit), - set_values_to = exprs( - aval = mean(val, na.rm = TRUE), - type = "AVERAGE" - ), - missing_values = exprs(aval = 999999) - ) - - expect_dfs_equal( - base = expected_output, - compare = actual_output, - keys = c("subj", "visit", "seq", "type") - ) -}) - -## Test 7: make sure dataset_ref works ---- -test_that("derive_summary_records Test 7: make sure dataset_ref works", { +test_that("derive_summary_records Test 6: test missing values with dataset_ref", { input <- tibble::tribble( ~subj, ~visit, ~val, ~seq, "1", 1, 10, 1, @@ -244,18 +204,15 @@ test_that("derive_summary_records Test 7: make sure dataset_ref works", { "2", 1, "2", 2, ) + expected_output <- bind_rows( input, tibble::tribble( - ~subj, ~visit, ~val, - "1", 1, 11, - "1", 2, 11, - "2", 2, NA_real_ - ) %>% - mutate(type = "AVERAGE"), - tibble::tribble( - ~subj, ~visit, - "2", 1, + ~subj, ~visit, ~aval, ~type, + "1", 1, 11, "AVERAGE", + "1", 2, 11, "AVERAGE", + "2", 1, 999999, "MISSING", + "2", 2, NA_real_, "AVERAGE", ) ) @@ -265,9 +222,10 @@ test_that("derive_summary_records Test 7: make sure dataset_ref works", { dataset_ref = input_ref, by_vars = exprs(subj, visit), set_values_to = exprs( - val = mean(val, na.rm = TRUE), + aval = mean(val, na.rm = TRUE), type = "AVERAGE" - ) + ), + missing_values = exprs(aval = 999999, type = "MISSING") ) expect_dfs_equal( From 5c3748ae8ee9b1dbe95c5ca89f4cd7280795776b Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Mon, 6 Nov 2023 18:58:20 +0000 Subject: [PATCH 26/31] chore: #2142 rename filter to filter_add internally in codebase too --- R/derive_param_exposure.R | 6 +++--- R/derive_summary_records.R | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/derive_param_exposure.R b/R/derive_param_exposure.R index d5bd1cde27..5403ef7a31 100644 --- a/R/derive_param_exposure.R +++ b/R/derive_param_exposure.R @@ -198,9 +198,9 @@ derive_param_exposure <- function(dataset, I("derive_param_exposure(filter = )"), "derive_param_exposure(filter_add = )" ) - filter <- assert_filter_cond(enexpr(filter), optional = TRUE) + filter_add <- assert_filter_cond(enexpr(filter), optional = TRUE) } - filter <- assert_filter_cond(enexpr(filter_add), optional = TRUE) + filter_add <- assert_filter_cond(enexpr(filter_add), optional = TRUE) assert_varval_list(set_values_to, required_elements = "PARAMCD") assert_param_does_not_exist(dataset, set_values_to$PARAMCD) assert_character_scalar(input_code) @@ -216,7 +216,7 @@ derive_param_exposure <- function(dataset, dataset, dataset_add, by_vars = by_vars, - filter_add = PARAMCD == !!input_code & !!filter, + filter_add = PARAMCD == !!input_code & !!filter_add, set_values_to = exprs( !!analysis_var := {{ summary_fun }}(!!analysis_var), !!!set_dtm, diff --git a/R/derive_summary_records.R b/R/derive_summary_records.R index 0bb92227d3..4aa6410c6c 100644 --- a/R/derive_summary_records.R +++ b/R/derive_summary_records.R @@ -222,13 +222,13 @@ derive_summary_records <- function(dataset, I("derive_summary_records(filter = )"), "derive_summary_records(filter_add = )" ) - filter <- assert_filter_cond(enexpr(filter), optional = TRUE) + filter_add <- assert_filter_cond(enexpr(filter), optional = TRUE) } - filter <- assert_filter_cond(enexpr(filter_add), optional = TRUE) + filter_add <- assert_filter_cond(enexpr(filter_add), optional = TRUE) summary_records <- dataset_add %>% group_by(!!!by_vars) %>% - filter_if(filter) %>% + filter_if(filter_add) %>% summarise(!!!set_values_to) %>% ungroup() From 89a673e95f34552d7195a3a4d6b88e7d70ed4620 Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Mon, 6 Nov 2023 19:02:02 +0000 Subject: [PATCH 27/31] chore: #2142 adopt and address all other feedback --- R/derive_param_exposure.R | 2 +- R/derive_summary_records.R | 2 +- inst/templates/ad_adex.R | 2 +- man/derive_param_exposure.Rd | 2 +- man/derive_summary_records.Rd | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/derive_param_exposure.R b/R/derive_param_exposure.R index 5403ef7a31..3bf1d05be3 100644 --- a/R/derive_param_exposure.R +++ b/R/derive_param_exposure.R @@ -155,7 +155,7 @@ #' summary_fun = function(x) if_else(sum(!is.na(x)) > 0, "Y", NA_character_) #' ) %>% #' select(-ASTDTM, -AENDTM) -derive_param_exposure <- function(dataset, +derive_param_exposure <- function(dataset = NULL, dataset_add, by_vars, input_code, diff --git a/R/derive_summary_records.R b/R/derive_summary_records.R index 4aa6410c6c..83ac05daea 100644 --- a/R/derive_summary_records.R +++ b/R/derive_summary_records.R @@ -183,7 +183,7 @@ #' ) #' ) %>% #' arrange(USUBJID, AVISIT) -derive_summary_records <- function(dataset, +derive_summary_records <- function(dataset = NULL, dataset_add, dataset_ref = NULL, by_vars, diff --git a/inst/templates/ad_adex.R b/inst/templates/ad_adex.R index fca0ace57e..a9b84b6e5b 100644 --- a/inst/templates/ad_adex.R +++ b/inst/templates/ad_adex.R @@ -187,7 +187,7 @@ adex <- adex %>% ) ), dataset_add = adex, - filter = VISIT %in% c("WEEK 2", "WEEK 24"), + filter_add = VISIT %in% c("WEEK 2", "WEEK 24"), by_vars = exprs(STUDYID, USUBJID, !!!adsl_vars) ) %>% # Overall Dose intensity and W2-24 dose intensity diff --git a/man/derive_param_exposure.Rd b/man/derive_param_exposure.Rd index ce04b182d9..88092ca61f 100644 --- a/man/derive_param_exposure.Rd +++ b/man/derive_param_exposure.Rd @@ -5,7 +5,7 @@ \title{Add an Aggregated Parameter and Derive the Associated Start and End Dates} \usage{ derive_param_exposure( - dataset, + dataset = NULL, dataset_add, by_vars, input_code, diff --git a/man/derive_summary_records.Rd b/man/derive_summary_records.Rd index 66a3ae2fa5..588f2d5599 100644 --- a/man/derive_summary_records.Rd +++ b/man/derive_summary_records.Rd @@ -5,7 +5,7 @@ \title{Add New Records Within By Groups Using Aggregation Functions} \usage{ derive_summary_records( - dataset, + dataset = NULL, dataset_add, dataset_ref = NULL, by_vars, From 85f4f9ec981189676afdd3f40af696450f683f41 Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Mon, 6 Nov 2023 19:11:09 +0000 Subject: [PATCH 28/31] missed a renaming --- R/derive_param_exposure.R | 4 ++-- R/derive_summary_records.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/derive_param_exposure.R b/R/derive_param_exposure.R index 3bf1d05be3..a21e8c1d32 100644 --- a/R/derive_param_exposure.R +++ b/R/derive_param_exposure.R @@ -208,8 +208,8 @@ derive_param_exposure <- function(dataset = NULL, assert_character_vector(input_code, values = params_available) assert_s3_class(summary_fun, "function") - if (is.null(filter)) { - filter <- TRUE + if (is.null(filter_add)) { + filter_add <- TRUE } derive_summary_records( diff --git a/R/derive_summary_records.R b/R/derive_summary_records.R index 83ac05daea..9e983c4034 100644 --- a/R/derive_summary_records.R +++ b/R/derive_summary_records.R @@ -250,7 +250,7 @@ derive_summary_records <- function(dataset = NULL, tmp_ref_obs <- get_new_tmp_var(new_ref_obs, prefix = "tmp_ref_obs") new_ref_obs <- new_ref_obs %>% - mutate(!!tmp_ref_obs := 1L) + mutate(!!tmp_ref_obs := 1L) df_return <- bind_rows( df_return, @@ -266,7 +266,7 @@ derive_summary_records <- function(dataset = NULL, ) names(update_missings) <- names(missing_values) df_return <- df_return %>% - mutate(!!!update_missings) + mutate(!!!update_missings) } df_return %>% From b3fc993c7dfadb7d60c723167aa0ba22dcbd9e1c Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Tue, 7 Nov 2023 18:49:54 +0000 Subject: [PATCH 29/31] feat: #2142 remove extra fluff for missing values --- R/derive_param_exposure.R | 8 ++++---- R/derive_summary_records.R | 28 ++++++++-------------------- man/derive_param_exposure.Rd | 12 ++++++------ man/derive_summary_records.Rd | 4 ++-- 4 files changed, 20 insertions(+), 32 deletions(-) diff --git a/R/derive_param_exposure.R b/R/derive_param_exposure.R index a21e8c1d32..11552b94ed 100644 --- a/R/derive_param_exposure.R +++ b/R/derive_param_exposure.R @@ -6,11 +6,11 @@ #' @param dataset #' `r roxygen_param_dataset(expected_vars = c("by_vars", "analysis_var"))` #' `PARAMCD` is expected as well, -#' + Either `ASTDTM` and `AENDTM` or `ASTDT` and `AENDT` are also expected. #' #' @param dataset_add Additional dataset #' -#' The variables specified for `by_vars` are expected. +#' The variables specified for `by_vars`, `analysis_var`, +#' + alongside either `ASTDTM` and `AENDTM` or `ASTDT` and `AENDT` are also expected. #' Observations from the specified dataset are going to be used to calculate and added #' as new records to the input dataset (`dataset`). #' @@ -187,10 +187,10 @@ derive_param_exposure <- function(dataset = NULL, ) } - assert_data_frame(dataset, + assert_data_frame(dataset, required_vars = by_vars, optional = TRUE) + assert_data_frame(dataset_add, required_vars = expr_c(by_vars, analysis_var, exprs(PARAMCD), dates) ) - assert_data_frame(dataset_add, required_vars = by_vars) if (!missing(filter)) { deprecate_warn( diff --git a/R/derive_summary_records.R b/R/derive_summary_records.R index 9e983c4034..77e8a7d8a9 100644 --- a/R/derive_summary_records.R +++ b/R/derive_summary_records.R @@ -78,7 +78,7 @@ #' #' @param missing_values Values for missing summary values #' -#' For observations of the input dataset (`dataset`) or (`dataset_add`) which do not have a +#' For observations of the reference dataset (`dataset_ref`) which do not have a #' complete mapping defined by the summarization defined in `set_values_to`. Only variables #' specified for `set_values_to` can be specified for `missing_values`. #' @@ -176,7 +176,7 @@ #' adeg, #' dataset_add = adeg, #' by_vars = exprs(USUBJID, PARAM, AVISIT), -#' filter = n() > 2, +#' filter_add = n() > 2, #' set_values_to = exprs( #' AVAL = mean(AVAL, na.rm = TRUE), #' DTYPE = "AVERAGE" @@ -194,7 +194,7 @@ derive_summary_records <- function(dataset = NULL, set_values_to, missing_values = NULL) { assert_vars(by_vars) - assert_data_frame(dataset, required_vars = by_vars) + assert_data_frame(dataset, required_vars = by_vars, optional = TRUE) assert_data_frame(dataset_add, required_vars = by_vars) assert_data_frame( dataset_ref, @@ -247,10 +247,10 @@ derive_summary_records <- function(dataset = NULL, by = map_chr(by_vars, as_name) ) - tmp_ref_obs <- get_new_tmp_var(new_ref_obs, prefix = "tmp_ref_obs") - - new_ref_obs <- new_ref_obs %>% - mutate(!!tmp_ref_obs := 1L) + if (!is.null(missing_values)) { + new_ref_obs <- new_ref_obs %>% + mutate(!!!missing_values) + } df_return <- bind_rows( df_return, @@ -258,17 +258,5 @@ derive_summary_records <- function(dataset = NULL, ) } - if (!is.null(missing_values)) { - update_missings <- map2( - syms(names(missing_values)), - missing_values, - ~ expr(if_else(is.na(!!.x) & tmp_ref_obs_1 == 1, !!.y, !!.x)) - ) - names(update_missings) <- names(missing_values) - df_return <- df_return %>% - mutate(!!!update_missings) - } - - df_return %>% - remove_tmp_vars() + df_return } diff --git a/man/derive_param_exposure.Rd b/man/derive_param_exposure.Rd index 88092ca61f..9199ebd5d8 100644 --- a/man/derive_param_exposure.Rd +++ b/man/derive_param_exposure.Rd @@ -20,16 +20,16 @@ derive_param_exposure( \item{dataset}{Input dataset The variables specified by the \code{by_vars} and \code{analysis_var} argument(s) to be expected. -\code{PARAMCD} is expected as well, -\itemize{ -\item Either \code{ASTDTM} and \code{AENDTM} or \code{ASTDT} and \code{AENDT} are also expected. -}} +\code{PARAMCD} is expected as well,} \item{dataset_add}{Additional dataset -The variables specified for \code{by_vars} are expected. +The variables specified for \code{by_vars}, \code{analysis_var}, +\itemize{ +\item alongside either \code{ASTDTM} and \code{AENDTM} or \code{ASTDT} and \code{AENDT} are also expected. Observations from the specified dataset are going to be used to calculate and added -as new records to the input dataset (\code{dataset}).} +as new records to the input dataset (\code{dataset}). +}} \item{by_vars}{Grouping variables diff --git a/man/derive_summary_records.Rd b/man/derive_summary_records.Rd index 588f2d5599..793a2a2c5b 100644 --- a/man/derive_summary_records.Rd +++ b/man/derive_summary_records.Rd @@ -101,7 +101,7 @@ For example: \item{missing_values}{Values for missing summary values -For observations of the input dataset (\code{dataset}) or (\code{dataset_add}) which do not have a +For observations of the reference dataset (\code{dataset_ref}) which do not have a complete mapping defined by the summarization defined in \code{set_values_to}. Only variables specified for \code{set_values_to} can be specified for \code{missing_values}. @@ -202,7 +202,7 @@ derive_summary_records( adeg, dataset_add = adeg, by_vars = exprs(USUBJID, PARAM, AVISIT), - filter = n() > 2, + filter_add = n() > 2, set_values_to = exprs( AVAL = mean(AVAL, na.rm = TRUE), DTYPE = "AVERAGE" From 2f242828e0f45a481953ac026f5f0dd92d7c6af6 Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Tue, 7 Nov 2023 18:54:33 +0000 Subject: [PATCH 30/31] update news blurb --- NEWS.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 01d88f899e..4e06535aa4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -17,7 +17,9 @@ character vector (`'--DTC'`), was imputed. (#2146) were enhanced such that more than one summary variable can be derived, e.g., `AVAL` as the sum and `ADT` as the maximum of the contributing records. (#1792) -- `derive_summary_records()` was enhanced with the following optional arguments: `dataset_add`, `dataset_ref`, `missing_values`. These arguments respectively, generate summary variables from additional datasets, retain/add specific records from a reference dataset, and impute user-defined missing values. (#2142) +- `derive_summary_records()` was enhanced with the following optional arguments: `dataset_add`, `dataset_ref`, `missing_values`. These arguments respectively, generate summary variables from additional datasets, retain/add specific records from a reference dataset, and impute user-defined missing values. `derive_param_exposure()` was enhanced with `dataset_add` as well. (#2142) + +- The argument `dataset` is now optional for `derive_summary_records()` and `derive_param_exposure()`. (#2142) - The "joined" functions (`derive_vars_joined()`, `derive_var_joined_exist_flag()`, `filter_joined()`, and `event_joined()`) were unified: (#2126) From a0dc08a2961ea1b5404a1504fd6c567566bd18ea Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Wed, 8 Nov 2023 18:48:26 +0000 Subject: [PATCH 31/31] chore: #2142 update documentation based on feedback --- R/derive_param_exposure.R | 7 +++---- man/derive_param_exposure.Rd | 11 ++++------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/R/derive_param_exposure.R b/R/derive_param_exposure.R index 11552b94ed..8430aa7d89 100644 --- a/R/derive_param_exposure.R +++ b/R/derive_param_exposure.R @@ -4,13 +4,12 @@ #' start (`ASTDT(M)`)and end date (`AENDT(M)`) as the minimum and maximum date by `by_vars`. #' #' @param dataset -#' `r roxygen_param_dataset(expected_vars = c("by_vars", "analysis_var"))` -#' `PARAMCD` is expected as well, +#' `r roxygen_param_dataset(expected_vars = c("by_vars"))` #' #' @param dataset_add Additional dataset #' -#' The variables specified for `by_vars`, `analysis_var`, -#' + alongside either `ASTDTM` and `AENDTM` or `ASTDT` and `AENDT` are also expected. +#' The variables specified for `by_vars`, `analysis_var`, `PARAMCD`, +#' alongside either `ASTDTM` and `AENDTM` or `ASTDT` and `AENDT` are also expected. #' Observations from the specified dataset are going to be used to calculate and added #' as new records to the input dataset (`dataset`). #' diff --git a/man/derive_param_exposure.Rd b/man/derive_param_exposure.Rd index 9199ebd5d8..cea65120e1 100644 --- a/man/derive_param_exposure.Rd +++ b/man/derive_param_exposure.Rd @@ -19,17 +19,14 @@ derive_param_exposure( \arguments{ \item{dataset}{Input dataset -The variables specified by the \code{by_vars} and \code{analysis_var} argument(s) to be expected. -\code{PARAMCD} is expected as well,} +The variables specified by the \code{by_vars} argument(s) to be expected.} \item{dataset_add}{Additional dataset -The variables specified for \code{by_vars}, \code{analysis_var}, -\itemize{ -\item alongside either \code{ASTDTM} and \code{AENDTM} or \code{ASTDT} and \code{AENDT} are also expected. +The variables specified for \code{by_vars}, \code{analysis_var}, \code{PARAMCD}, +alongside either \code{ASTDTM} and \code{AENDTM} or \code{ASTDT} and \code{AENDT} are also expected. Observations from the specified dataset are going to be used to calculate and added -as new records to the input dataset (\code{dataset}). -}} +as new records to the input dataset (\code{dataset}).} \item{by_vars}{Grouping variables