From 9aa635808a7c8dd7038c7208870ff69a45d38c9f Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Wed, 14 Jun 2023 19:13:13 -0400 Subject: [PATCH 01/33] Refactor a_summary, deprecate create_afun_summary --- NAMESPACE | 4 - R/summarize_colvars.R | 9 +- R/summarize_variables.R | 307 +++++++------------ R/utils.R | 21 +- man/create_afun_summary.Rd | 34 -- man/extract_by_name.Rd | 7 +- man/summarize_colvars.Rd | 3 + man/summarize_variables.Rd | 107 ++----- man/summary_formats.Rd | 2 +- man/summary_labels.Rd | 7 +- tests/testthat/_snaps/summarize_variables.md | 41 --- tests/testthat/test-summarize_variables.R | 27 -- 12 files changed, 176 insertions(+), 393 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 778c6f17da..8c20ce6c27 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,10 +4,6 @@ S3method(a_compare,character) S3method(a_compare,factor) S3method(a_compare,logical) S3method(a_compare,numeric) -S3method(a_summary,character) -S3method(a_summary,factor) -S3method(a_summary,logical) -S3method(a_summary,numeric) S3method(as.rtable,data.frame) S3method(h_coxreg_inter_effect,factor) S3method(h_coxreg_inter_effect,numeric) diff --git a/R/summarize_colvars.R b/R/summarize_colvars.R index b3386ebbc1..8d75b41b0e 100644 --- a/R/summarize_colvars.R +++ b/R/summarize_colvars.R @@ -62,15 +62,16 @@ #' @export summarize_colvars <- function(lyt, ..., + na_level = NA_character_, .stats = c("n", "mean_sd", "median", "range", "count_fraction"), .formats = NULL, .labels = NULL, .indent_mods = NULL) { - afun <- create_afun_summary(.stats, .formats, .labels, .indent_mods) - analyze_colvars( lyt, - afun = afun, - extra_args = list(...) + afun = a_summary, + extra_args = list( + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_level = na_level, ... + ) ) } diff --git a/R/summarize_variables.R b/R/summarize_variables.R index 9d82632d8c..3c25f7bc4c 100644 --- a/R/summarize_variables.R +++ b/R/summarize_variables.R @@ -22,14 +22,14 @@ control_summarize_vars <- function(conf_level = 0.95, checkmate::assert_vector(quantiles, len = 2) checkmate::assert_int(quantile_type, lower = 1, upper = 9) checkmate::assert_numeric(test_mean) - nullo <- lapply(quantiles, assert_proportion_value) + lapply(quantiles, assert_proportion_value) assert_proportion_value(conf_level) list(conf_level = conf_level, quantiles = quantiles, quantile_type = quantile_type, test_mean = test_mean) } #' Format Function for Descriptive Statistics #' -#' Returns format patterns for descriptive statistics. The format is understood by the `rtables`. +#' Returns format patterns for descriptive statistics. The format is understood by `rtables`. #' #' @param type (`string`)\cr choice of a summary data type. Only `counts` and `numeric` types are currently supported. #' @@ -77,29 +77,46 @@ summary_formats <- function(type = "numeric") { #' #' Returns labels of descriptive statistics for numeric variables. #' -#' @return A named `vector` of default statistic labels. +#' @param type (`string`)\cr choice of a summary data type. Only `counts` and `numeric` types are currently supported. +#' +#' @return A named `vector` of default statistic labels for the given data type. #' #' @keywords internal -summary_labels <- function() { - c( - mean = "Mean", - sum = "Sum", - sd = "SD", - se = "SE", - mean_sd = "Mean (SD)", - mean_se = "Mean (SE)", - median = "Median", - mad = "Median Absolute Deviation", - iqr = "IQR", - range = "Min - Max", - median_range = "Median (Min - Max)", - cv = "CV (%)", - min = "Minimum", - max = "Maximum", - geom_mean = "Geometric Mean", - geom_cv = "CV % Geometric Mean", - n = "n" - ) +summary_labels <- function(type = "numeric") { + if (type == "counts") { + c( + n = "n", + count = "count", + count_fraction = "count_fraction", + n_blq = "n_blq" + ) + } else { + c( + n = "n", + sum = "Sum", + mean = "Mean", + sd = "SD", + se = "SE", + mean_sd = "Mean (SD)", + mean_se = "Mean (SE)", + mean_ci = "Mean 95% CI", + mean_sei = "Mean -/+ 1xSE", + mean_sdi = "Mean -/+ 1xSD", + mean_pval = "Mean p-value (H0: mean = 0)", + median = "Median", + mad = "Median Absolute Deviation", + median_ci = "Median 95% CI", + quantiles = "25% and 75%-ile", + iqr = "IQR", + range = "Min - Max", + cv = "CV (%)", + min = "Minimum", + max = "Maximum", + median_range = "Median (Min - Max)", + geom_mean = "Geometric Mean", + geom_cv = "CV % Geometric Mean" + ) + } } #' Summarize Variables @@ -408,9 +425,9 @@ s_summary.character <- function(x, verbose = TRUE, ...) { if (na.rm) { - y <- as_factor_keep_attributes(x, x_name = .var, verbose = verbose) + y <- as_factor_keep_attributes(x, verbose = verbose) } else { - y <- as_factor_keep_attributes(x, x_name = .var, verbose = verbose, na_level = "NA") + y <- as_factor_keep_attributes(x, verbose = verbose, na_level = "NA") } s_summary( @@ -478,86 +495,83 @@ s_summary.logical <- function(x, y } -#' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()`. -#' -#' @return -#' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()]. -#' -#' @export -a_summary <- function(x, - ..., - .N_row, # nolint - .N_col, # nolint - .var) { - UseMethod("a_summary", x) -} - .a_summary_numeric_formats <- summary_formats() .a_summary_numeric_labels <- summary_labels() - -#' @describeIn summarize_variables Formatted analysis function method for `numeric` class. -#' -#' @examples -#' # `a_summary.numeric` -#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") -#' -#' @export -a_summary.numeric <- make_afun( - s_summary.numeric, - .formats = .a_summary_numeric_formats, - .labels = .a_summary_numeric_labels -) - +.a_summary_numeric_indent_mods <- rep(0L, length(summary_labels())) %>% `names<-`(names(.a_summary_numeric_labels)) .a_summary_counts_formats <- summary_formats(type = "counts") +.a_summary_counts_labels <- summary_labels(type = "counts") +.a_summary_counts_indent_mods <- rep(0L, length(.a_summary_counts_labels)) %>% `names<-`(names(.a_summary_counts_labels)) -#' @describeIn summarize_variables Formatted analysis function method for `factor` class. +#' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and +#' `cfun` in `summarize_colvars()`. #' -#' @examples -#' # `a_summary.factor` -#' # We need to ungroup `count` and `count_fraction` first so that the rtables formatting -#' # functions can be applied correctly. -#' afun <- make_afun( -#' getS3method("a_summary", "factor"), -#' .ungroup_stats = c("count", "count_fraction") -#' ) -#' afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) -#' -#' @export -a_summary.factor <- make_afun( - s_summary.factor, - .formats = .a_summary_counts_formats -) - -#' @describeIn summarize_variables Formatted analysis function method for `character` class. +#' @return +#' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()]. #' #' @examples -#' # `a_summary.character` -#' afun <- make_afun( -#' getS3method("a_summary", "character"), -#' .ungroup_stats = c("count", "count_fraction") -#' ) -#' afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) +#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") +#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) +#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) +#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) #' #' @export -a_summary.character <- make_afun( - s_summary.character, - .formats = .a_summary_counts_formats -) +a_summary <- function(x, + .N_col, + .N_row, + .stats = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL, + na_level = NA_character_, + ...) { + # browser() + if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats) + if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats + if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_summary_numeric_labels else .a_summary_counts_labels + if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) { + .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats) + } + x_stats <- s_summary(x = x, .N_col = .N_col, .N_row = .N_row, ...) + if (is.numeric(x)) { + .labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply( + c("mean_ci", "mean_pval", "median_ci", "quantiles"), + function(x) attr(x_stats[[x]], "label") + ) + } + .stats <- intersect(.stats, names(x_stats)) + x_stats <- x_stats[.stats] + # browser() + if (!is.numeric(x) && !is.logical(x)) { + for (stat in c("count", "count_fraction")) { + for (a in names(x_stats[[stat]])) { + a_lvl <- paste(stat, a, sep = ".") + .stats <- c(.stats, a_lvl) + .formats[a_lvl] <- .formats[stat] + .labels[a_lvl] <- a + .indent_mods[a_lvl] <- .indent_mods[stat] + } + } -#' @describeIn summarize_variables Formatted analysis function method for `logical` class. -#' -#' @examples -#' # `a_summary.logical` -#' afun <- make_afun( -#' getS3method("a_summary", "logical") -#' ) -#' afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) -#' -#' @export -a_summary.logical <- make_afun( - s_summary.logical, - .formats = .a_summary_counts_formats -) + x_stats <- unlist(x_stats, recursive = FALSE) + .stats <- names(x_stats) + } + .formats_x <- extract_by_name( + .formats, .stats, if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats + ) + .labels_x <- extract_by_name(.labels, .stats, if (is.numeric(x)) .a_summary_numeric_labels) + .indent_mods_x <- extract_by_name( + .indent_mods, .stats, if (is.numeric(x)) .a_summary_numeric_indent_mods else .a_summary_counts_indent_mods + ) + + in_rows( + .list = x_stats, + .formats = .formats_x, + .names = .labels_x, + .labels = .labels_x, + .indent_mods = .indent_mods_x, + .format_na_strs = na_level + ) +} #' Constructor Function for [summarize_vars()] and [summarize_colvars()] #' @@ -572,97 +586,13 @@ a_summary.logical <- make_afun( #' #' @return Combined formatted analysis function for use in [summarize_vars()]. #' -#' @note Since [a_summary()] is generic and we want customization of the formatting arguments -#' via [rtables::make_afun()], we need to create another temporary generic function, with -#' corresponding customized methods. Then in order for the methods to be found, -#' we need to wrap them in a combined `afun`. Since this is required by two layout creating -#' functions (and possibly others in the future), we provide a constructor that does this: -#' [create_afun_summary()]. -#' -#' @examples -#' # `create_afun_summary()` to create combined `afun` -#' -#' afun <- create_afun_summary( -#' .stats = NULL, -#' .formats = c(median = "xx."), -#' .labels = c(median = "My median"), -#' .indent_mods = c(median = 1L) -#' ) -#' ## Fabricated dataset. -#' dta_test <- data.frame( -#' USUBJID = rep(1:6, each = 3), -#' PARAMCD = rep("lab", 6 * 3), -#' AVISIT = rep(paste0("V", 1:3), 6), -#' ARM = rep(LETTERS[1:3], rep(6, 3)), -#' AVAL = c(9:1, rep(NA, 9)) -#' ) -#' -#' l <- basic_table() %>% -#' split_cols_by(var = "ARM") %>% -#' split_rows_by(var = "AVISIT") %>% -#' analyze(vars = "AVAL", afun = afun) -#' -#' build_table(l, df = dta_test) -#' #' @export create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) { - function(x, - ..., - .N_row, # nolint - .N_col, # nolint - .var) { - afun <- function(x, ...) { - UseMethod("afun", x) - } - - numeric_stats <- afun_selected_stats( - .stats, - all_stats = names(.a_summary_numeric_formats) - ) - afun.numeric <- make_afun( # nolint - a_summary.numeric, - .stats = numeric_stats, - .formats = extract_by_name(.formats, numeric_stats), - .labels = extract_by_name(.labels, numeric_stats), - .indent_mods = extract_by_name(.indent_mods, numeric_stats) - ) - - factor_stats <- afun_selected_stats(.stats, c("n", "count", "count_fraction")) - ungroup_stats <- afun_selected_stats(.stats, c("count", "count_fraction")) - afun.factor <- make_afun( # nolint - a_summary.factor, - .stats = factor_stats, - .formats = extract_by_name(.formats, factor_stats), - .labels = extract_by_name(.labels, factor_stats), - .indent_mods = extract_by_name(.indent_mods, factor_stats), - .ungroup_stats = ungroup_stats - ) - - afun.character <- make_afun( # nolint - a_summary.character, - .stats = factor_stats, - .formats = extract_by_name(.formats, factor_stats), - .labels = extract_by_name(.labels, factor_stats), - .indent_mods = extract_by_name(.indent_mods, factor_stats), - .ungroup_stats = ungroup_stats - ) - - afun.logical <- make_afun( # nolint - a_summary.logical, - .stats = factor_stats, - .formats = extract_by_name(.formats, factor_stats), - .labels = extract_by_name(.labels, factor_stats), - .indent_mods = extract_by_name(.indent_mods, factor_stats) - ) - - afun( - x = x, - ..., - .N_row = .N_row, - .N_col = .N_col, - .var = .var - ) - } + lifecycle::deprecate_stop( + "0.8.2", + "create_afun_summary()", + "a_summary()" + ) } #' @describeIn summarize_variables Layout-creating function which can take statistics function arguments @@ -745,16 +675,15 @@ summarize_vars <- function(lyt, .formats = NULL, .labels = NULL, .indent_mods = NULL) { - afun <- create_afun_summary(.stats, .formats, .labels, .indent_mods) - analyze( lyt = lyt, vars = vars, var_labels = var_labels, - afun = afun, + afun = a_summary, nested = nested, - extra_args = list(...), - na_str = na_level, + extra_args = list( + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_level = na_level, ... + ), inclNAs = TRUE, show_labels = show_labels, table_names = table_names, diff --git a/R/utils.R b/R/utils.R index fa25eedad2..3a1cbf3f33 100644 --- a/R/utils.R +++ b/R/utils.R @@ -259,25 +259,34 @@ combine_vectors <- function(x, y) { #' - If `x` is `NULL`, then still always `NULL` is returned (same as in base function). #' - If `x` is not `NULL`, then the intersection of its names is made with `names` and those #' elements are returned. That is, `names` which don't appear in `x` are not returned as `NA`s. +#' - If `x_defaults` is not `NULL`, these values will be used to fill in any values that are in +#' `names` but not in `x`. #' #' @param x (named `vector`)\cr where to extract named elements from. #' @param names (`character`)\cr vector of names to extract. +#' @param x_defaults (named `vector`)\cr named vector of default values to fill in for each value in `names` if no +#' corresponding value in `x` exists. #' #' @return `NULL` if `x` is `NULL`, otherwise the extracted elements from `x`. #' #' @keywords internal -extract_by_name <- function(x, names) { - if (is.null(x)) { - return(NULL) - } +extract_by_name <- function(x, names, x_defaults = NULL) { + if (is.null(x)) return(NULL) checkmate::assert_named(x) checkmate::assert_character(names) which_extract <- intersect(names(x), names) + x_fill <- c() + if (!is.null(x_defaults)) { + checkmate::assert_named(x_defaults) + x_fill <- x_defaults + } if (length(which_extract) > 0) { - x[which_extract] + x_fill[which_extract] <- x[which_extract] + x_fill <- x_fill[names] } else { - NULL + x_fill <- NULL } + x_fill } #' Labels for Adverse Event Baskets diff --git a/man/create_afun_summary.Rd b/man/create_afun_summary.Rd index 69c3086158..e7df14a6d2 100644 --- a/man/create_afun_summary.Rd +++ b/man/create_afun_summary.Rd @@ -25,37 +25,3 @@ Combined formatted analysis function for use in \code{\link[=summarize_vars]{sum Constructor function which creates a combined formatted analysis function. } -\note{ -Since \code{\link[=a_summary]{a_summary()}} is generic and we want customization of the formatting arguments -via \code{\link[rtables:make_afun]{rtables::make_afun()}}, we need to create another temporary generic function, with -corresponding customized methods. Then in order for the methods to be found, -we need to wrap them in a combined \code{afun}. Since this is required by two layout creating -functions (and possibly others in the future), we provide a constructor that does this: -\code{\link[=create_afun_summary]{create_afun_summary()}}. -} -\examples{ -# `create_afun_summary()` to create combined `afun` - -afun <- create_afun_summary( - .stats = NULL, - .formats = c(median = "xx."), - .labels = c(median = "My median"), - .indent_mods = c(median = 1L) -) -## Fabricated dataset. -dta_test <- data.frame( - USUBJID = rep(1:6, each = 3), - PARAMCD = rep("lab", 6 * 3), - AVISIT = rep(paste0("V", 1:3), 6), - ARM = rep(LETTERS[1:3], rep(6, 3)), - AVAL = c(9:1, rep(NA, 9)) -) - -l <- basic_table() \%>\% - split_cols_by(var = "ARM") \%>\% - split_rows_by(var = "AVISIT") \%>\% - analyze(vars = "AVAL", afun = afun) - -build_table(l, df = dta_test) - -} diff --git a/man/extract_by_name.Rd b/man/extract_by_name.Rd index bc328b2a20..186478ec0a 100644 --- a/man/extract_by_name.Rd +++ b/man/extract_by_name.Rd @@ -4,12 +4,15 @@ \alias{extract_by_name} \title{Extract Elements by Name} \usage{ -extract_by_name(x, names) +extract_by_name(x, names, x_defaults = NULL) } \arguments{ \item{x}{(named \code{vector})\cr where to extract named elements from.} \item{names}{(\code{character})\cr vector of names to extract.} + +\item{x_defaults}{(named \code{vector})\cr named vector of default values to fill in for each value in \code{names} if no +corresponding value in \code{x} exists.} } \value{ \code{NULL} if \code{x} is \code{NULL}, otherwise the extracted elements from \code{x}. @@ -23,6 +26,8 @@ Differences to the standard \code{[} function are: \item If \code{x} is \code{NULL}, then still always \code{NULL} is returned (same as in base function). \item If \code{x} is not \code{NULL}, then the intersection of its names is made with \code{names} and those elements are returned. That is, \code{names} which don't appear in \code{x} are not returned as \code{NA}s. +\item If \code{x_defaults} is not \code{NULL}, these values will be used to fill in any values that are in +\code{names} but not in \code{x}. } } \keyword{internal} diff --git a/man/summarize_colvars.Rd b/man/summarize_colvars.Rd index 005cfae98f..d278f80b11 100644 --- a/man/summarize_colvars.Rd +++ b/man/summarize_colvars.Rd @@ -7,6 +7,7 @@ summarize_colvars( lyt, ..., + na_level = NA_character_, .stats = c("n", "mean_sd", "median", "range", "count_fraction"), .formats = NULL, .labels = NULL, @@ -18,6 +19,8 @@ summarize_colvars( \item{...}{arguments passed to \code{s_summary()}.} +\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} + \item{.stats}{(\code{character})\cr statistics to select for the table.} \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.} diff --git a/man/summarize_variables.Rd b/man/summarize_variables.Rd index f8aa8e3934..f0b624a61c 100644 --- a/man/summarize_variables.Rd +++ b/man/summarize_variables.Rd @@ -8,10 +8,6 @@ \alias{s_summary.character} \alias{s_summary.logical} \alias{a_summary} -\alias{a_summary.numeric} -\alias{a_summary.factor} -\alias{a_summary.character} -\alias{a_summary.logical} \alias{summarize_vars} \title{Summarize Variables} \usage{ @@ -57,45 +53,15 @@ s_summary(x, na.rm = TRUE, denom, .N_row, .N_col, .var, ...) ... ) -a_summary(x, ..., .N_row, .N_col, .var) - -\method{a_summary}{numeric}( - x, - na.rm = TRUE, - denom, - .N_row, - .N_col, - .var, - control = control_summarize_vars(), - ... -) - -\method{a_summary}{factor}( - x, - na.rm = TRUE, - denom = c("n", "N_row", "N_col"), - .N_row, - .N_col, - ... -) - -\method{a_summary}{character}( +a_summary( x, - na.rm = TRUE, - denom = c("n", "N_row", "N_col"), - .N_row, .N_col, - .var, - verbose = TRUE, - ... -) - -\method{a_summary}{logical}( - x, - na.rm = TRUE, - denom = c("n", "N_row", "N_col"), .N_row, - .N_col, + .stats = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL, + na_level = NA_character_, ... ) @@ -150,6 +116,18 @@ See more about \code{type} in \code{\link[stats:quantile]{stats::quantile()}}. \item{verbose}{(\code{logical})\cr Defaults to \code{TRUE}, which prints out warnings and messages. It is mainly used to print out information about factor casting.} +\item{.stats}{(\code{character})\cr statistics to select for the table.} + +\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.} + +\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} + +\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector +should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation +for that statistic's row label.} + +\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} + \item{lyt}{(\code{layout})\cr input layout where analyses will be added to.} \item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.} @@ -160,8 +138,6 @@ to print out information about factor casting.} possible} (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split underneath analyses, which is not allowed.} -\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} - \item{show_labels}{(\code{string})\cr label visibility: one of "default", "visible" and "hidden".} \item{table_names}{(\code{character})\cr this can be customized in case that the same \code{vars} are analyzed multiple times, @@ -169,16 +145,6 @@ to avoid warnings from \code{rtables}.} \item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group defined by this split instruction, or \code{NA_character_} (the default) for no section divider.} - -\item{.stats}{(\code{character})\cr statistics to select for the table.} - -\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.} - -\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} - -\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector -should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation -for that statistic's row label.} } \value{ \itemize{ @@ -264,15 +230,8 @@ conversion to factor (with a warning) and then forwards to the method for factor \item \code{s_summary(logical)}: Method for \code{logical} class. -\item \code{a_summary()}: Formatted analysis function which is used as \code{afun} in \code{summarize_vars()}. - -\item \code{a_summary(numeric)}: Formatted analysis function method for \code{numeric} class. - -\item \code{a_summary(factor)}: Formatted analysis function method for \code{factor} class. - -\item \code{a_summary(character)}: Formatted analysis function method for \code{character} class. - -\item \code{a_summary(logical)}: Formatted analysis function method for \code{logical} class. +\item \code{a_summary()}: Formatted analysis function which is used as \code{afun} in \code{summarize_vars()} and +\code{cfun} in \code{summarize_colvars()}. \item \code{summarize_vars()}: Layout-creating function which can take statistics function arguments and additional format arguments. This function is a wrapper for \code{\link[rtables:analyze]{rtables::analyze()}}. @@ -374,30 +333,10 @@ x <- c(TRUE, FALSE, TRUE, TRUE) s_summary(x, denom = "N_row", .N_row = 10L) s_summary(x, denom = "N_col", .N_col = 20L) -# `a_summary.numeric` a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") - -# `a_summary.factor` -# We need to ungroup `count` and `count_fraction` first so that the rtables formatting -# functions can be applied correctly. -afun <- make_afun( - getS3method("a_summary", "factor"), - .ungroup_stats = c("count", "count_fraction") -) -afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) - -# `a_summary.character` -afun <- make_afun( - getS3method("a_summary", "character"), - .ungroup_stats = c("count", "count_fraction") -) -afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) - -# `a_summary.logical` -afun <- make_afun( - getS3method("a_summary", "logical") -) -afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) +a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) +a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) +a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) ## Fabricated dataset. dta_test <- data.frame( diff --git a/man/summary_formats.Rd b/man/summary_formats.Rd index 690994c351..b22355fa50 100644 --- a/man/summary_formats.Rd +++ b/man/summary_formats.Rd @@ -13,6 +13,6 @@ summary_formats(type = "numeric") A named \code{vector} of default statistic formats for the given data type. } \description{ -Returns format patterns for descriptive statistics. The format is understood by the \code{rtables}. +Returns format patterns for descriptive statistics. The format is understood by \code{rtables}. } \keyword{internal} diff --git a/man/summary_labels.Rd b/man/summary_labels.Rd index a7cc88596a..b30caa625c 100644 --- a/man/summary_labels.Rd +++ b/man/summary_labels.Rd @@ -4,10 +4,13 @@ \alias{summary_labels} \title{Label Function for Descriptive Statistics} \usage{ -summary_labels() +summary_labels(type = "numeric") +} +\arguments{ +\item{type}{(\code{string})\cr choice of a summary data type. Only \code{counts} and \code{numeric} types are currently supported.} } \value{ -A named \code{vector} of default statistic labels. +A named \code{vector} of default statistic labels for the given data type. } \description{ Returns labels of descriptive statistics for numeric variables. diff --git a/tests/testthat/_snaps/summarize_variables.md b/tests/testthat/_snaps/summarize_variables.md index 66a86bcb1f..a7a3db1ea3 100644 --- a/tests/testthat/_snaps/summarize_variables.md +++ b/tests/testthat/_snaps/summarize_variables.md @@ -920,47 +920,6 @@ [1] 0 -# create_afun_summary creates an `afun` that works - - Code - res - Output - A B C - ———————————————————————————————————————————————————————— - V1 - AVAL - n 2 1 0 - My median 8 3 NA - Min - Max 6.0 - 9.0 3.0 - 3.0 NA - Mean 95% CI (-11.56, 26.56) NA NA - ARM - n 2 2 2 - A 2 (100%) 0 0 - B 0 2 (100%) 0 - C 0 0 2 (100%) - V2 - AVAL - n 2 1 0 - My median 6 2 NA - Min - Max 5.0 - 8.0 2.0 - 2.0 NA - Mean 95% CI (-12.56, 25.56) NA NA - ARM - n 2 2 2 - A 2 (100%) 0 0 - B 0 2 (100%) 0 - C 0 0 2 (100%) - V3 - AVAL - n 2 1 0 - My median 6 1 NA - Min - Max 4.0 - 7.0 1.0 - 1.0 NA - Mean 95% CI (-13.56, 24.56) NA NA - ARM - n 2 2 2 - A 2 (100%) 0 0 - B 0 2 (100%) 0 - C 0 0 2 (100%) - # `summarize_vars` works with healthy input, default `na.rm = TRUE`. Code diff --git a/tests/testthat/test-summarize_variables.R b/tests/testthat/test-summarize_variables.R index c98635e7dc..6e163c6d76 100644 --- a/tests/testthat/test-summarize_variables.R +++ b/tests/testthat/test-summarize_variables.R @@ -160,33 +160,6 @@ testthat::test_that("s_summary works with logical vectors and by if requested do testthat::expect_snapshot(res) }) -testthat::test_that("create_afun_summary creates an `afun` that works", { - afun <- create_afun_summary( - .stats = c("n", "count_fraction", "median", "range", "mean_ci"), - .formats = c(median = "xx."), - .labels = c(median = "My median"), - .indent_mods = c(median = 1L) - ) - dta_test <- data.frame( - USUBJID = rep(1:6, each = 3), - PARAMCD = rep("lab", 6 * 3), - AVISIT = rep(paste0("V", 1:3), 6), - ARM = rep(LETTERS[1:3], rep(6, 3)), - AVAL = c(9:1, rep(NA, 9)), - stringsAsFactors = TRUE - ) - - l <- basic_table() %>% - split_cols_by(var = "ARM") %>% - split_rows_by(var = "AVISIT") %>% - analyze(vars = c("AVAL", "ARM"), afun = afun) - - result <- build_table(l, df = dta_test) - - res <- testthat::expect_silent(result) - testthat::expect_snapshot(res) -}) - testthat::test_that("`summarize_vars` works with healthy input, default `na.rm = TRUE`.", { dta_test <- data.frame(AVAL = c(1:4, NA, NA)) From c0596583de14ce12c1343bf07c6712a18b21cc07 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Wed, 14 Jun 2023 20:32:18 -0400 Subject: [PATCH 02/33] Refactor a_compare, deprecate create_afun_compare --- NAMESPACE | 4 - R/compare_variables.R | 284 +++++++++++-------------------------- R/summarize_variables.R | 8 +- man/compare_variables.Rd | 103 +++++--------- man/create_afun_compare.Rd | 36 +---- man/create_afun_summary.Rd | 8 +- 6 files changed, 130 insertions(+), 313 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 8c20ce6c27..35026811bf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,5 @@ # Generated by roxygen2: do not edit by hand -S3method(a_compare,character) -S3method(a_compare,factor) -S3method(a_compare,logical) -S3method(a_compare,numeric) S3method(as.rtable,data.frame) S3method(h_coxreg_inter_effect,factor) S3method(h_coxreg_inter_effect,numeric) diff --git a/R/compare_variables.R b/R/compare_variables.R index 4ce447f9bc..246b72634d 100644 --- a/R/compare_variables.R +++ b/R/compare_variables.R @@ -244,119 +244,90 @@ s_compare.logical <- function(x, y } +.a_compare_numeric_formats <- c(.a_summary_numeric_formats, pval = "x.xxxx | (<0.0001)") +.a_compare_numeric_labels <- c(.a_summary_numeric_labels, pval = "p-value (t-test)") +.a_compare_numeric_indent_mods <- c(.a_summary_numeric_indent_mods, pval = 0L) +.a_compare_counts_formats <- c(.a_summary_counts_formats, pval = "x.xxxx | (<0.0001)") +.a_compare_counts_labels <- c(.a_summary_counts_labels, pval = "p-value (chi-squared test)") +.a_compare_counts_indent_mods <- c(.a_summary_counts_indent_mods, pval = 0L) + #' @describeIn compare_variables Formatted analysis function which is used as `afun` #' in `compare_vars()`. #' #' @return #' * `a_compare()` returns the corresponding list with formatted [rtables::CellValue()]. #' +#' @examples +#' a_compare(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .in_ref_col = FALSE, .var = "bla") +#' a_compare(factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), .in_ref_col = FALSE) +#' a_compare(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .in_ref_col = FALSE, .var = "x", verbose = FALSE) +#' a_compare(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = FALSE) +#' #' @export a_compare <- function(x, + .N_col, + .N_row, .ref_group, .in_ref_col, - ..., - .var) { - UseMethod("a_compare", x) -} - -#' @describeIn compare_variables Formatted analysis function method for `numeric` class. -#' -#' @examples -#' # `a_compare.numeric` -#' a_compare( -#' rnorm(10, 5, 1), -#' .ref_group = rnorm(20, -5, 1), -#' .in_ref_col = FALSE, -#' .var = "bla" -#' ) -#' -#' @export -a_compare.numeric <- make_afun( - s_compare.numeric, - .formats = c( - .a_summary_numeric_formats, - pval = "x.xxxx | (<0.0001)" - ), - .labels = c( - .a_summary_numeric_labels, - pval = "p-value (t-test)" - ), - .null_ref_cells = FALSE -) - -.a_compare_counts_formats <- c( - .a_summary_counts_formats, - pval = "x.xxxx | (<0.0001)" -) - -.a_compare_counts_labels <- c( - pval = "p-value (chi-squared test)" -) - -#' @describeIn compare_variables Formatted analysis function method for `factor` class. -#' -#' @examples -#' # `a_compare.factor` -#' # We need to ungroup `count` and `count_fraction` first so that the `rtables` formatting -#' # functions can be applied correctly. -#' afun <- make_afun( -#' getS3method("a_compare", "factor"), -#' .ungroup_stats = c("count", "count_fraction") -#' ) -#' x <- factor(c("a", "a", "b", "c", "a")) -#' y <- factor(c("a", "a", "b", "c")) -#' afun(x, .ref_group = y, .in_ref_col = FALSE) -#' -#' @export -a_compare.factor <- make_afun( - s_compare.factor, - .formats = .a_compare_counts_formats, - .labels = .a_compare_counts_labels, - .null_ref_cells = FALSE -) - -#' @describeIn compare_variables Formatted analysis function method for `character` class. -#' -#' @examples -#' # `a_compare.character` -#' afun <- make_afun( -#' getS3method("a_compare", "character"), -#' .ungroup_stats = c("count", "count_fraction") -#' ) -#' x <- c("A", "B", "A", "C") -#' y <- c("B", "A", "C") -#' afun(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE) -#' -#' @export -a_compare.character <- make_afun( - s_compare.character, - .formats = .a_compare_counts_formats, - .labels = .a_compare_counts_labels, - .null_ref_cells = FALSE -) + .stats = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL, + na_level = NA_character_, + ...) { + # browser() + if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats) + if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats + if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels + if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) { + .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats) + } + x_stats <- s_compare(x = x, .N_col = .N_col, .N_row = .N_row, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...) + if (is.numeric(x)) { + .labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply( + c("mean_ci", "mean_pval", "median_ci", "quantiles"), + function(x) attr(x_stats[[x]], "label") + ) + } + .stats <- intersect(.stats, names(x_stats)) + x_stats <- x_stats[.stats] + # browser() + if (!is.numeric(x) && !is.logical(x)) { + for (stat in c("count", "count_fraction")) { + for (a in names(x_stats[[stat]])) { + a_lvl <- paste(stat, a, sep = ".") + .stats <- c(.stats, a_lvl) + .formats[a_lvl] <- .formats[stat] + .labels[a_lvl] <- a + .indent_mods[a_lvl] <- .indent_mods[stat] + } + } + if (.in_ref_col) x_stats[["pval"]] <- "pvalue" + x_stats <- unlist(x_stats, recursive = FALSE) + if (.in_ref_col) x_stats[["pval"]] <- character() + .stats <- names(x_stats) + } + .formats_x <- extract_by_name( + .formats, .stats, if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats + ) + .labels_x <- extract_by_name(.labels, .stats, if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels) + .indent_mods_x <- extract_by_name( + .indent_mods, .stats, if (is.numeric(x)) .a_compare_numeric_indent_mods else .a_compare_counts_indent_mods + ) -#' @describeIn compare_variables Formatted analysis function method for `logical` class. -#' -#' @examples -#' # `a_compare.logical` -#' afun <- make_afun( -#' getS3method("a_compare", "logical") -#' ) -#' x <- c(TRUE, FALSE, FALSE, TRUE, TRUE) -#' y <- c(TRUE, FALSE) -#' afun(x, .ref_group = y, .in_ref_col = FALSE) -#' -#' @export -a_compare.logical <- make_afun( - s_compare.logical, - .formats = .a_compare_counts_formats, - .labels = .a_compare_counts_labels, - .null_ref_cells = FALSE -) + in_rows( + .list = x_stats, + .formats = .formats_x, + .names = .labels_x, + .labels = .labels_x, + .indent_mods = .indent_mods_x, + .format_na_strs = na_level + ) +} #' Constructor Function for [compare_vars()] #' -#' @description `r lifecycle::badge("stable")` +#' @description `r lifecycle::badge("deprecated")` #' #' Constructor function which creates a combined formatted analysis function. #' @@ -367,110 +338,20 @@ a_compare.logical <- make_afun( #' #' @return Combined formatted analysis function for use in [compare_vars()]. #' -#' @note Since [a_compare()] is generic and we want customization of the formatting arguments -#' via [rtables::make_afun()], we need to create another temporary generic function, with -#' corresponding customized methods. Then in order for the methods to be found, -#' we need to wrap them in a combined `afun`. Since this is required by two layout creating -#' functions (and possibly others in the future), we provide a constructor that does this: -#' [create_afun_compare()]. +#' @note This function has been deprecated in favor of direct implementation of `a_compare()`. #' #' @seealso [compare_vars()] #' -#' @examples -#' # `create_afun_compare()` to create combined `afun` -#' -#' afun <- create_afun_compare( -#' .stats = c("n", "count_fraction", "mean_sd", "pval"), -#' .indent_mods = c(pval = 1L) -#' ) -#' -#' lyt <- basic_table() %>% -#' split_cols_by("ARMCD", ref_group = "ARM A") %>% -#' analyze( -#' "AGE", -#' afun = afun, -#' show_labels = "visible" -#' ) -#' build_table(lyt, df = tern_ex_adsl) -#' -#' lyt <- basic_table() %>% -#' split_cols_by("ARMCD", ref_group = "ARM A") %>% -#' analyze( -#' "SEX", -#' afun = afun, -#' show_labels = "visible" -#' ) -#' build_table(lyt, df = tern_ex_adsl) -#' #' @export create_afun_compare <- function(.stats = NULL, .formats = NULL, .labels = NULL, .indent_mods = NULL) { - function(x, - .ref_group, - .in_ref_col, - ..., - .var) { - afun <- function(x, ...) { - UseMethod("afun", x) - } - - numeric_stats <- afun_selected_stats( - .stats, - all_stats = c(names(.a_summary_numeric_formats), "pval") - ) - afun.numeric <- make_afun( # nolint - a_compare.numeric, - .stats = numeric_stats, - .formats = extract_by_name(.formats, numeric_stats), - .labels = extract_by_name(.labels, numeric_stats), - .indent_mods = extract_by_name(.indent_mods, numeric_stats), - .null_ref_cells = FALSE - ) - - factor_stats <- afun_selected_stats( - .stats, - all_stats = names(.a_compare_counts_formats) - ) - ungroup_stats <- afun_selected_stats(.stats, c("count", "count_fraction")) - afun.factor <- make_afun( # nolint - a_compare.factor, - .stats = factor_stats, - .formats = extract_by_name(.formats, factor_stats), - .labels = extract_by_name(.labels, factor_stats), - .indent_mods = extract_by_name(.indent_mods, factor_stats), - .ungroup_stats = ungroup_stats, - .null_ref_cells = FALSE - ) - - afun.character <- make_afun( # nolint - a_compare.character, - .stats = factor_stats, - .formats = extract_by_name(.formats, factor_stats), - .labels = extract_by_name(.labels, factor_stats), - .indent_mods = extract_by_name(.indent_mods, factor_stats), - .ungroup_stats = ungroup_stats, - .null_ref_cells = FALSE - ) - - afun.logical <- make_afun( # nolint - a_compare.logical, - .stats = factor_stats, - .formats = extract_by_name(.formats, factor_stats), - .labels = extract_by_name(.labels, factor_stats), - .indent_mods = extract_by_name(.indent_mods, factor_stats), - .null_ref_cells = FALSE - ) - - afun( - x = x, - .ref_group = .ref_group, - .in_ref_col = .in_ref_col, - ..., - .var = .var - ) - } + lifecycle::deprecate_stop( + "0.8.2", + "create_afun_compare()", + "a_compare()" + ) } #' @describeIn compare_variables Layout-creating function which can take statistics function arguments @@ -515,22 +396,23 @@ compare_vars <- function(lyt, na_level = NA_character_, show_labels = "default", table_names = vars, + section_div = NA_character_, .stats = c("n", "mean_sd", "count_fraction", "pval"), .formats = NULL, .labels = NULL, .indent_mods = NULL) { - afun <- create_afun_compare(.stats, .formats, .labels, .indent_mods) - analyze( lyt = lyt, vars = vars, var_labels = var_labels, - afun = afun, + afun = a_compare, nested = nested, - extra_args = list(...), - na_str = na_level, + extra_args = list( + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_level = na_level, ... + ), inclNAs = TRUE, show_labels = show_labels, - table_names = table_names + table_names = table_names, + section_div = section_div ) } diff --git a/R/summarize_variables.R b/R/summarize_variables.R index 3c25f7bc4c..e5f8861571 100644 --- a/R/summarize_variables.R +++ b/R/summarize_variables.R @@ -558,7 +558,7 @@ a_summary <- function(x, .formats_x <- extract_by_name( .formats, .stats, if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats ) - .labels_x <- extract_by_name(.labels, .stats, if (is.numeric(x)) .a_summary_numeric_labels) + .labels_x <- extract_by_name(.labels, .stats, if (is.numeric(x)) .a_summary_numeric_labels else .a_summary_counts_labels) .indent_mods_x <- extract_by_name( .indent_mods, .stats, if (is.numeric(x)) .a_summary_numeric_indent_mods else .a_summary_counts_indent_mods ) @@ -575,7 +575,7 @@ a_summary <- function(x, #' Constructor Function for [summarize_vars()] and [summarize_colvars()] #' -#' @description `r lifecycle::badge("stable")` +#' @description `r lifecycle::badge("deprecated")` #' #' Constructor function which creates a combined formatted analysis function. #' @@ -586,6 +586,10 @@ a_summary <- function(x, #' #' @return Combined formatted analysis function for use in [summarize_vars()]. #' +#' @note This function has been deprecated in favor of direct implementation of `a_summary()`. +#' +#' @seealso [summarize_vars()] +#' #' @export create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) { lifecycle::deprecate_stop( diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd index 2626f42de8..b61c7e8f00 100644 --- a/man/compare_variables.Rd +++ b/man/compare_variables.Rd @@ -8,10 +8,6 @@ \alias{s_compare.character} \alias{s_compare.logical} \alias{a_compare} -\alias{a_compare.numeric} -\alias{a_compare.factor} -\alias{a_compare.character} -\alias{a_compare.logical} \alias{compare_vars} \title{Compare Variables Between Groups} \usage{ @@ -34,25 +30,20 @@ s_compare(x, .ref_group, .in_ref_col, ...) \method{s_compare}{logical}(x, .ref_group, .in_ref_col, na.rm = TRUE, denom = "n", ...) -a_compare(x, .ref_group, .in_ref_col, ..., .var) - -\method{a_compare}{numeric}(x, .ref_group, .in_ref_col, ...) - -\method{a_compare}{factor}(x, .ref_group, .in_ref_col, denom = "n", na.rm = TRUE, ...) - -\method{a_compare}{character}( +a_compare( x, + .N_col, + .N_row, .ref_group, .in_ref_col, - denom = "n", - na.rm = TRUE, - .var, - verbose = TRUE, + .stats = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL, + na_level = NA_character_, ... ) -\method{a_compare}{logical}(x, .ref_group, .in_ref_col, na.rm = TRUE, denom = "n", ...) - compare_vars( lyt, vars, @@ -62,6 +53,7 @@ compare_vars( na_level = NA_character_, show_labels = "default", table_names = vars, + section_div = NA_character_, .stats = c("n", "mean_sd", "count_fraction", "pval"), .formats = NULL, .labels = NULL, @@ -88,6 +80,23 @@ by a statistics function.} \item{verbose}{(\code{logical})\cr Whether warnings and messages should be printed. Mainly used to print out information about factor casting. Defaults to \code{TRUE}.} +\item{.N_col}{(\code{count})\cr row-wise N (row group count) for the group of observations being analyzed +(i.e. with no column-based subsetting) that is passed by \code{rtables}.} + +\item{.N_row}{(\code{count})\cr column-wise N (column count) for the full column that is passed by \code{rtables}.} + +\item{.stats}{(\code{character})\cr statistics to select for the table.} + +\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.} + +\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} + +\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector +should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation +for that statistic's row label.} + +\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} + \item{lyt}{(\code{layout})\cr input layout where analyses will be added to.} \item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.} @@ -98,22 +107,13 @@ to print out information about factor casting. Defaults to \code{TRUE}.} possible} (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split underneath analyses, which is not allowed.} -\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} - \item{show_labels}{(\code{string})\cr label visibility: one of "default", "visible" and "hidden".} \item{table_names}{(\code{character})\cr this can be customized in case that the same \code{vars} are analyzed multiple times, to avoid warnings from \code{rtables}.} -\item{.stats}{(\code{character})\cr statistics to select for the table.} - -\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.} - -\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} - -\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector -should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation -for that statistic's row label.} +\item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group +defined by this split instruction, or \code{NA_character_} (the default) for no section divider.} } \value{ \itemize{ @@ -154,14 +154,6 @@ is used. If missing values are not removed, then they are counted as \code{FALSE \item \code{a_compare()}: Formatted analysis function which is used as \code{afun} in \code{compare_vars()}. -\item \code{a_compare(numeric)}: Formatted analysis function method for \code{numeric} class. - -\item \code{a_compare(factor)}: Formatted analysis function method for \code{factor} class. - -\item \code{a_compare(character)}: Formatted analysis function method for \code{character} class. - -\item \code{a_compare(logical)}: Formatted analysis function method for \code{logical} class. - \item \code{compare_vars()}: Layout-creating function which can take statistics function arguments and additional format arguments. This function is a wrapper for \code{\link[rtables:analyze]{rtables::analyze()}}. @@ -240,41 +232,10 @@ y <- c(NA, NA, NA, NA, FALSE) s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE) s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE) -# `a_compare.numeric` -a_compare( - rnorm(10, 5, 1), - .ref_group = rnorm(20, -5, 1), - .in_ref_col = FALSE, - .var = "bla" -) - -# `a_compare.factor` -# We need to ungroup `count` and `count_fraction` first so that the `rtables` formatting -# functions can be applied correctly. -afun <- make_afun( - getS3method("a_compare", "factor"), - .ungroup_stats = c("count", "count_fraction") -) -x <- factor(c("a", "a", "b", "c", "a")) -y <- factor(c("a", "a", "b", "c")) -afun(x, .ref_group = y, .in_ref_col = FALSE) - -# `a_compare.character` -afun <- make_afun( - getS3method("a_compare", "character"), - .ungroup_stats = c("count", "count_fraction") -) -x <- c("A", "B", "A", "C") -y <- c("B", "A", "C") -afun(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE) - -# `a_compare.logical` -afun <- make_afun( - getS3method("a_compare", "logical") -) -x <- c(TRUE, FALSE, FALSE, TRUE, TRUE) -y <- c(TRUE, FALSE) -afun(x, .ref_group = y, .in_ref_col = FALSE) +a_compare(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .in_ref_col = FALSE, .var = "bla") +a_compare(factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), .in_ref_col = FALSE) +a_compare(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .in_ref_col = FALSE, .var = "x", verbose = FALSE) +a_compare(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = FALSE) # `compare_vars()` in `rtables` pipelines diff --git a/man/create_afun_compare.Rd b/man/create_afun_compare.Rd index a82994f874..14f71af618 100644 --- a/man/create_afun_compare.Rd +++ b/man/create_afun_compare.Rd @@ -26,44 +26,12 @@ for that statistic's row label.} Combined formatted analysis function for use in \code{\link[=compare_vars]{compare_vars()}}. } \description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Constructor function which creates a combined formatted analysis function. } \note{ -Since \code{\link[=a_compare]{a_compare()}} is generic and we want customization of the formatting arguments -via \code{\link[rtables:make_afun]{rtables::make_afun()}}, we need to create another temporary generic function, with -corresponding customized methods. Then in order for the methods to be found, -we need to wrap them in a combined \code{afun}. Since this is required by two layout creating -functions (and possibly others in the future), we provide a constructor that does this: -\code{\link[=create_afun_compare]{create_afun_compare()}}. -} -\examples{ -# `create_afun_compare()` to create combined `afun` - -afun <- create_afun_compare( - .stats = c("n", "count_fraction", "mean_sd", "pval"), - .indent_mods = c(pval = 1L) -) - -lyt <- basic_table() \%>\% - split_cols_by("ARMCD", ref_group = "ARM A") \%>\% - analyze( - "AGE", - afun = afun, - show_labels = "visible" - ) -build_table(lyt, df = tern_ex_adsl) - -lyt <- basic_table() \%>\% - split_cols_by("ARMCD", ref_group = "ARM A") \%>\% - analyze( - "SEX", - afun = afun, - show_labels = "visible" - ) -build_table(lyt, df = tern_ex_adsl) - +This function has been deprecated in favor of direct implementation of \code{a_compare()}. } \seealso{ \code{\link[=compare_vars]{compare_vars()}} diff --git a/man/create_afun_summary.Rd b/man/create_afun_summary.Rd index e7df14a6d2..0c22e921ad 100644 --- a/man/create_afun_summary.Rd +++ b/man/create_afun_summary.Rd @@ -21,7 +21,13 @@ for that statistic's row label.} Combined formatted analysis function for use in \code{\link[=summarize_vars]{summarize_vars()}}. } \description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Constructor function which creates a combined formatted analysis function. } +\note{ +This function has been deprecated in favor of direct implementation of \code{a_summary()}. +} +\seealso{ +\code{\link[=summarize_vars]{summarize_vars()}} +} From 6df46b958f8b723360514b6a0c8cc7c18410df1c Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Wed, 14 Jun 2023 21:17:51 -0400 Subject: [PATCH 03/33] Update pkgdown, remove browser calls --- R/compare_variables.R | 2 -- R/summarize_variables.R | 2 -- _pkgdown.yml | 1 - 3 files changed, 5 deletions(-) diff --git a/R/compare_variables.R b/R/compare_variables.R index 246b72634d..1e1af32c3d 100644 --- a/R/compare_variables.R +++ b/R/compare_variables.R @@ -275,7 +275,6 @@ a_compare <- function(x, .indent_mods = NULL, na_level = NA_character_, ...) { - # browser() if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats) if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels @@ -291,7 +290,6 @@ a_compare <- function(x, } .stats <- intersect(.stats, names(x_stats)) x_stats <- x_stats[.stats] - # browser() if (!is.numeric(x) && !is.logical(x)) { for (stat in c("count", "count_fraction")) { for (a in names(x_stats[[stat]])) { diff --git a/R/summarize_variables.R b/R/summarize_variables.R index e5f8861571..17f36a6adb 100644 --- a/R/summarize_variables.R +++ b/R/summarize_variables.R @@ -524,7 +524,6 @@ a_summary <- function(x, .indent_mods = NULL, na_level = NA_character_, ...) { - # browser() if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats) if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_summary_numeric_labels else .a_summary_counts_labels @@ -540,7 +539,6 @@ a_summary <- function(x, } .stats <- intersect(.stats, names(x_stats)) x_stats <- x_stats[.stats] - # browser() if (!is.numeric(x) && !is.logical(x)) { for (stat in c("count", "count_fraction")) { for (a in names(x_stats[[stat]])) { diff --git a/_pkgdown.yml b/_pkgdown.yml index aa1bc386aa..d905a742e9 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -67,7 +67,6 @@ reference: - title: Analysis Helper Functions desc: These functions are useful in defining an analysis. contents: - - starts_with("create_", internal = TRUE) - starts_with("h_") - starts_with("or_") - starts_with("prop_") From cf35bcef13d76ddb79b8300e4eca08157fe1259d Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Wed, 14 Jun 2023 21:26:45 -0400 Subject: [PATCH 04/33] Fix extract_by_name --- R/utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index 3a1cbf3f33..9a93fbc52d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -282,7 +282,7 @@ extract_by_name <- function(x, names, x_defaults = NULL) { } if (length(which_extract) > 0) { x_fill[which_extract] <- x[which_extract] - x_fill <- x_fill[names] + x_fill <- x_fill[which_extract] } else { x_fill <- NULL } From c3329062172d4eabd1a0ec09ede6a53a002801fd Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Thu, 15 Jun 2023 17:15:05 -0400 Subject: [PATCH 05/33] Fix NA handling --- R/compare_variables.R | 34 +++++++++++++++++++++++++++------- R/summarize_variables.R | 32 +++++++++++++++++++++++++++----- R/utils.R | 2 +- 3 files changed, 55 insertions(+), 13 deletions(-) diff --git a/R/compare_variables.R b/R/compare_variables.R index 1e1af32c3d..2f0470afa4 100644 --- a/R/compare_variables.R +++ b/R/compare_variables.R @@ -267,42 +267,62 @@ s_compare.logical <- function(x, a_compare <- function(x, .N_col, .N_row, + .var, + .df_row, .ref_group, .in_ref_col, .stats = NULL, .formats = NULL, .labels = NULL, .indent_mods = NULL, + na.rm = TRUE, na_level = NA_character_, ...) { - if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats) + if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels) if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels + if (is.null(.indent_mods)) .indent_mods <- if (is.numeric(x)) .a_compare_numeric_indent_mods else .a_compare_counts_indent_mods if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) { .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats) } - x_stats <- s_compare(x = x, .N_col = .N_col, .N_row = .N_row, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...) + if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level") + x_stats <- s_compare(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...) if (is.numeric(x)) { .labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply( c("mean_ci", "mean_pval", "median_ci", "quantiles"), function(x) attr(x_stats[[x]], "label") ) } + # browser() .stats <- intersect(.stats, names(x_stats)) x_stats <- x_stats[.stats] if (!is.numeric(x) && !is.logical(x)) { for (stat in c("count", "count_fraction")) { for (a in names(x_stats[[stat]])) { + a <- if (a == "na-level") "NA" else a a_lvl <- paste(stat, a, sep = ".") .stats <- c(.stats, a_lvl) - .formats[a_lvl] <- .formats[stat] - .labels[a_lvl] <- a - .indent_mods[a_lvl] <- .indent_mods[stat] + .formats <- append(.formats, .formats[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.formats)) { + which(names(.formats) == stat) - 1 + which(names(x_stats[[stat]]) == a) + } else { + length(.formats) + }) + .labels <- append(.labels, a %>% `names<-`(a_lvl), after = if (stat %in% names(.labels)) { + which(names(.labels) == stat) - 1 + which(names(x_stats[[stat]]) == a) + } else { + length(.labels) + }) + .indent_mods <- append(.indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) { + which(names(.indent_mods) == stat) - 1 + which(names(x_stats[[stat]]) == a) + } else { + length(.indent_mods) + }) } } if (.in_ref_col) x_stats[["pval"]] <- "pvalue" x_stats <- unlist(x_stats, recursive = FALSE) if (.in_ref_col) x_stats[["pval"]] <- character() + names(x_stats) <- gsub("na-level", "NA", names(x_stats)) .stats <- names(x_stats) } .formats_x <- extract_by_name( @@ -312,7 +332,6 @@ a_compare <- function(x, .indent_mods_x <- extract_by_name( .indent_mods, .stats, if (is.numeric(x)) .a_compare_numeric_indent_mods else .a_compare_counts_indent_mods ) - in_rows( .list = x_stats, .formats = .formats_x, @@ -391,6 +410,7 @@ compare_vars <- function(lyt, var_labels = vars, nested = TRUE, ..., + na.rm = TRUE, na_level = NA_character_, show_labels = "default", table_names = vars, @@ -406,7 +426,7 @@ compare_vars <- function(lyt, afun = a_compare, nested = nested, extra_args = list( - .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_level = na_level, ... + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na.rm = na.rm, na_level = na_level, ... ), inclNAs = TRUE, show_labels = show_labels, diff --git a/R/summarize_variables.R b/R/summarize_variables.R index 17f36a6adb..37d22eb8b0 100644 --- a/R/summarize_variables.R +++ b/R/summarize_variables.R @@ -518,19 +518,26 @@ s_summary.logical <- function(x, a_summary <- function(x, .N_col, .N_row, + .var, + .df_row, .stats = NULL, .formats = NULL, .labels = NULL, .indent_mods = NULL, + na.rm = TRUE, na_level = NA_character_, ...) { if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats) if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_summary_numeric_labels else .a_summary_counts_labels + if (is.null(.indent_mods)) { + .indent_mods <- if (is.numeric(x)) .a_summary_numeric_indent_mods else .a_summary_counts_indent_mods + } if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) { .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats) } - x_stats <- s_summary(x = x, .N_col = .N_col, .N_row = .N_row, ...) + if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level") + x_stats <- s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...) if (is.numeric(x)) { .labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply( c("mean_ci", "mean_pval", "median_ci", "quantiles"), @@ -542,15 +549,29 @@ a_summary <- function(x, if (!is.numeric(x) && !is.logical(x)) { for (stat in c("count", "count_fraction")) { for (a in names(x_stats[[stat]])) { + a <- if (a == "na-level") "NA" else a a_lvl <- paste(stat, a, sep = ".") .stats <- c(.stats, a_lvl) - .formats[a_lvl] <- .formats[stat] - .labels[a_lvl] <- a - .indent_mods[a_lvl] <- .indent_mods[stat] + .formats <- append(.formats, .formats[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.formats)) { + which(names(.formats) == stat) - 1 + which(names(x_stats[[stat]]) == a) + } else { + length(.formats) + }) + .labels <- append(.labels, a %>% `names<-`(a_lvl), after = if (stat %in% names(.labels)) { + which(names(.labels) == stat) - 1 + which(names(x_stats[[stat]]) == a) + } else { + length(.labels) + }) + .indent_mods <- append(.indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) { + which(names(.indent_mods) == stat) - 1 + which(names(x_stats[[stat]]) == a) + } else { + length(.indent_mods) + }) } } x_stats <- unlist(x_stats, recursive = FALSE) + names(x_stats) <- gsub("na-level", "NA", names(x_stats)) .stats <- names(x_stats) } .formats_x <- extract_by_name( @@ -669,6 +690,7 @@ summarize_vars <- function(lyt, var_labels = vars, nested = TRUE, ..., + na.rm = TRUE, na_level = NA_character_, show_labels = "default", table_names = vars, @@ -684,7 +706,7 @@ summarize_vars <- function(lyt, afun = a_summary, nested = nested, extra_args = list( - .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_level = na_level, ... + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na.rm = na.rm, na_level = na_level, ... ), inclNAs = TRUE, show_labels = show_labels, diff --git a/R/utils.R b/R/utils.R index 9a93fbc52d..5b9e27142f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -282,7 +282,7 @@ extract_by_name <- function(x, names, x_defaults = NULL) { } if (length(which_extract) > 0) { x_fill[which_extract] <- x[which_extract] - x_fill <- x_fill[which_extract] + x_fill <- if (!is.null(x_defaults)) x_fill[names] else x_fill[which_extract] } else { x_fill <- NULL } From f2cd0628c6b215ba449829d5cbdb458d48c921fe Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Thu, 15 Jun 2023 18:28:38 -0400 Subject: [PATCH 06/33] Modularize --- R/compare_variables.R | 39 +++++++--------------------- R/summarize_variables.R | 32 +++++------------------ R/utils_factor.R | 53 ++++++++++++++++++++++++++++++++++++++ man/compare_variables.Rd | 6 +++++ man/summarize_variables.Rd | 6 +++++ man/ungroup_stats.Rd | 34 ++++++++++++++++++++++++ 6 files changed, 115 insertions(+), 55 deletions(-) create mode 100644 man/ungroup_stats.Rd diff --git a/R/compare_variables.R b/R/compare_variables.R index 2f0470afa4..5aa334c8a1 100644 --- a/R/compare_variables.R +++ b/R/compare_variables.R @@ -281,7 +281,9 @@ a_compare <- function(x, if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels) if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels - if (is.null(.indent_mods)) .indent_mods <- if (is.numeric(x)) .a_compare_numeric_indent_mods else .a_compare_counts_indent_mods + if (is.null(.indent_mods)) { + .indent_mods <- if (is.numeric(x)) .a_compare_numeric_indent_mods else .a_compare_counts_indent_mods + } if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) { .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats) } @@ -293,37 +295,15 @@ a_compare <- function(x, function(x) attr(x_stats[[x]], "label") ) } - # browser() .stats <- intersect(.stats, names(x_stats)) x_stats <- x_stats[.stats] if (!is.numeric(x) && !is.logical(x)) { - for (stat in c("count", "count_fraction")) { - for (a in names(x_stats[[stat]])) { - a <- if (a == "na-level") "NA" else a - a_lvl <- paste(stat, a, sep = ".") - .stats <- c(.stats, a_lvl) - .formats <- append(.formats, .formats[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.formats)) { - which(names(.formats) == stat) - 1 + which(names(x_stats[[stat]]) == a) - } else { - length(.formats) - }) - .labels <- append(.labels, a %>% `names<-`(a_lvl), after = if (stat %in% names(.labels)) { - which(names(.labels) == stat) - 1 + which(names(x_stats[[stat]]) == a) - } else { - length(.labels) - }) - .indent_mods <- append(.indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) { - which(names(.indent_mods) == stat) - 1 + which(names(x_stats[[stat]]) == a) - } else { - length(.indent_mods) - }) - } - } - if (.in_ref_col) x_stats[["pval"]] <- "pvalue" - x_stats <- unlist(x_stats, recursive = FALSE) - if (.in_ref_col) x_stats[["pval"]] <- character() - names(x_stats) <- gsub("na-level", "NA", names(x_stats)) - .stats <- names(x_stats) + x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, .in_ref_col) + x_stats <- x_ungrp[["x"]] + .stats <- x_ungrp[[".stats"]] + .formats <- x_ungrp[[".formats"]] + .labels <- x_ungrp[[".labels"]] + .indent_mods <- x_ungrp[[".indent_mods"]] } .formats_x <- extract_by_name( .formats, .stats, if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats @@ -332,6 +312,7 @@ a_compare <- function(x, .indent_mods_x <- extract_by_name( .indent_mods, .stats, if (is.numeric(x)) .a_compare_numeric_indent_mods else .a_compare_counts_indent_mods ) + in_rows( .list = x_stats, .formats = .formats_x, diff --git a/R/summarize_variables.R b/R/summarize_variables.R index 37d22eb8b0..d45650f24e 100644 --- a/R/summarize_variables.R +++ b/R/summarize_variables.R @@ -547,32 +547,12 @@ a_summary <- function(x, .stats <- intersect(.stats, names(x_stats)) x_stats <- x_stats[.stats] if (!is.numeric(x) && !is.logical(x)) { - for (stat in c("count", "count_fraction")) { - for (a in names(x_stats[[stat]])) { - a <- if (a == "na-level") "NA" else a - a_lvl <- paste(stat, a, sep = ".") - .stats <- c(.stats, a_lvl) - .formats <- append(.formats, .formats[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.formats)) { - which(names(.formats) == stat) - 1 + which(names(x_stats[[stat]]) == a) - } else { - length(.formats) - }) - .labels <- append(.labels, a %>% `names<-`(a_lvl), after = if (stat %in% names(.labels)) { - which(names(.labels) == stat) - 1 + which(names(x_stats[[stat]]) == a) - } else { - length(.labels) - }) - .indent_mods <- append(.indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) { - which(names(.indent_mods) == stat) - 1 + which(names(x_stats[[stat]]) == a) - } else { - length(.indent_mods) - }) - } - } - - x_stats <- unlist(x_stats, recursive = FALSE) - names(x_stats) <- gsub("na-level", "NA", names(x_stats)) - .stats <- names(x_stats) + x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods) + x_stats <- x_ungrp[["x"]] + .stats <- x_ungrp[[".stats"]] + .formats <- x_ungrp[[".formats"]] + .labels <- x_ungrp[[".labels"]] + .indent_mods <- x_ungrp[[".indent_mods"]] } .formats_x <- extract_by_name( .formats, .stats, if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats diff --git a/R/utils_factor.R b/R/utils_factor.R index 151512fec8..8e5888ac22 100644 --- a/R/utils_factor.R +++ b/R/utils_factor.R @@ -296,3 +296,56 @@ fct_collapse_only <- function(.f, ..., .na_level = "") { x <- forcats::fct_collapse(.f, ..., other_level = .na_level) do.call(forcats::fct_relevel, args = c(list(.f = x), as.list(new_lvls))) } + +#' Ungroup and Format Non-Numeric Statistics +#' +#' Ungroups grouped non-numeric statistics (`count` and `count_fraction`) within input vectors `.stats`, `.formats`, +#' `.labels`, and `.indent_mods`. Formats explicit `NA` level in each input vector and changes p-value to +#' `character()` if current column is a reference column. Used within `a_summary()` and `a_compare()`. +#' +#' @inheritParams argument_convention +#' +#' @return A `list` with modified elements `x`, `.stats`, `.formats`, `.labels`, and `.indent_mods`. +#' +#' @seealso [a_summary()] and [a_compare()] which use this function internally. +#' +#' @keywords internal +ungroup_stats <- function(x, .stats, .formats, .labels, .indent_mods, .in_ref_col = FALSE) { + checkmate::assert_true(!is.numeric(x) & !is.logical(x)) + for (stat in c("count", "count_fraction")) { + for (a in names(x[[stat]])) { + a <- if (a == "na-level") "NA" else a + a_lvl <- paste(stat, a, sep = ".") + .stats <- c(.stats, a_lvl) + .formats <- append(.formats, .formats[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.formats)) { + which(names(.formats) == stat) - 1 + which(names(x[[stat]]) == a) + } else { + length(.formats) + }) + .labels <- append(.labels, a %>% `names<-`(a_lvl), after = if (stat %in% names(.labels)) { + which(names(.labels) == stat) - 1 + which(names(x[[stat]]) == a) + } else { + length(.labels) + }) + .indent_mods <- append(.indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) { + which(names(.indent_mods) == stat) - 1 + which(names(x[[stat]]) == a) + } else { + length(.indent_mods) + }) + } + } + + if (.in_ref_col && "pval" %in% names(x)) x[["pval"]] <- "pvalue" + x <- unlist(x, recursive = FALSE) + if (.in_ref_col && "pval" %in% names(x)) x[["pval"]] <- character() + names(x) <- gsub("na-level", "NA", names(x)) + .stats <- names(x) + + list( + x = x, + .stats = .stats, + .formats = .formats, + .labels = .labels, + .indent_mods = .indent_mods + ) +} diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd index b61c7e8f00..13818ae632 100644 --- a/man/compare_variables.Rd +++ b/man/compare_variables.Rd @@ -34,12 +34,15 @@ a_compare( x, .N_col, .N_row, + .var, + .df_row, .ref_group, .in_ref_col, .stats = NULL, .formats = NULL, .labels = NULL, .indent_mods = NULL, + na.rm = TRUE, na_level = NA_character_, ... ) @@ -50,6 +53,7 @@ compare_vars( var_labels = vars, nested = TRUE, ..., + na.rm = TRUE, na_level = NA_character_, show_labels = "default", table_names = vars, @@ -85,6 +89,8 @@ to print out information about factor casting. Defaults to \code{TRUE}.} \item{.N_row}{(\code{count})\cr column-wise N (column count) for the full column that is passed by \code{rtables}.} +\item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.} + \item{.stats}{(\code{character})\cr statistics to select for the table.} \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.} diff --git a/man/summarize_variables.Rd b/man/summarize_variables.Rd index f0b624a61c..b06ec1aeeb 100644 --- a/man/summarize_variables.Rd +++ b/man/summarize_variables.Rd @@ -57,10 +57,13 @@ a_summary( x, .N_col, .N_row, + .var, + .df_row, .stats = NULL, .formats = NULL, .labels = NULL, .indent_mods = NULL, + na.rm = TRUE, na_level = NA_character_, ... ) @@ -71,6 +74,7 @@ summarize_vars( var_labels = vars, nested = TRUE, ..., + na.rm = TRUE, na_level = NA_character_, show_labels = "default", table_names = vars, @@ -116,6 +120,8 @@ See more about \code{type} in \code{\link[stats:quantile]{stats::quantile()}}. \item{verbose}{(\code{logical})\cr Defaults to \code{TRUE}, which prints out warnings and messages. It is mainly used to print out information about factor casting.} +\item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.} + \item{.stats}{(\code{character})\cr statistics to select for the table.} \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.} diff --git a/man/ungroup_stats.Rd b/man/ungroup_stats.Rd new file mode 100644 index 0000000000..47bedd3df1 --- /dev/null +++ b/man/ungroup_stats.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_factor.R +\name{ungroup_stats} +\alias{ungroup_stats} +\title{Ungroup and Format Non-Numeric Statistics} +\usage{ +ungroup_stats(x, .stats, .formats, .labels, .indent_mods, .in_ref_col = FALSE) +} +\arguments{ +\item{x}{(\code{numeric})\cr vector of numbers we want to analyze.} + +\item{.stats}{(\code{character})\cr statistics to select for the table.} + +\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.} + +\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} + +\item{.indent_mods}{(named \code{integer})\cr indent modifiers for the labels. Defaults to 0, which corresponds to the +unmodified default behavior. Can be negative.} + +\item{.in_ref_col}{(\code{logical})\cr \code{TRUE} when working with the reference level, \code{FALSE} otherwise.} +} +\value{ +A \code{list} with modified elements \code{x}, \code{.stats}, \code{.formats}, \code{.labels}, and \code{.indent_mods}. +} +\description{ +Ungroups grouped non-numeric statistics (\code{count} and \code{count_fraction}) within input vectors \code{.stats}, \code{.formats}, +\code{.labels}, and \code{.indent_mods}. Formats explicit \code{NA} level in each input vector and changes p-value to +\code{character()} if current column is a reference column. Used within \code{a_summary()} and \code{a_compare()}. +} +\seealso{ +\code{\link[=a_summary]{a_summary()}} and \code{\link[=a_compare]{a_compare()}} which use this function internally. +} +\keyword{internal} From 6e89c201b6996217a4358e7803844c788512b3a2 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Thu, 15 Jun 2023 20:23:05 -0400 Subject: [PATCH 07/33] Remove duplication (a_compare) --- NAMESPACE | 1 - R/compare_variables.R | 79 +++++++------------------------ R/summarize_variables.R | 96 ++++++++++++++++++++++++-------------- R/utils_factor.R | 4 +- man/compare_variables.Rd | 13 +++--- man/create_afun_compare.Rd | 3 +- man/summarize_variables.Rd | 43 +++++++++++------ man/ungroup_stats.Rd | 4 +- 8 files changed, 118 insertions(+), 125 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 35026811bf..bfadcbd091 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,7 +20,6 @@ S3method(tidy,glm) S3method(tidy,step) S3method(tidy,summary.coxph) export(CombinationFunction) -export(a_compare) export(a_count_occurrences) export(a_count_occurrences_by_grade) export(a_count_patients_with_event) diff --git a/R/compare_variables.R b/R/compare_variables.R index 5aa334c8a1..8d8f6e0072 100644 --- a/R/compare_variables.R +++ b/R/compare_variables.R @@ -21,8 +21,9 @@ #' * For `compare_vars()`, the column split must define a reference group via `ref_group` so that the comparison #' is well defined. #' -#' @seealso Relevant constructor function [create_afun_compare()], and [s_summary()] which is used internally -#' to compute a summary within `s_compare()`. +#' @seealso Relevant constructor function [create_afun_compare()], [s_summary()] which is used internally +#' to compute a summary within `s_compare()`, and [a_compare()] which is used (with `compare = TRUE`) as the analysis +#' function for `compare_vars()`. #' #' @name compare_variables #' @include summarize_variables.R @@ -177,8 +178,8 @@ s_compare.character <- function(x, .var, verbose = TRUE, ...) { - x <- as_factor_keep_attributes(x, x_name = .var, verbose = verbose) - .ref_group <- as_factor_keep_attributes(.ref_group, x_name = .var, verbose = verbose) + x <- as_factor_keep_attributes(x, verbose = verbose) + .ref_group <- as_factor_keep_attributes(.ref_group, verbose = verbose) s_compare( x = x, .ref_group = .ref_group, @@ -244,26 +245,15 @@ s_compare.logical <- function(x, y } -.a_compare_numeric_formats <- c(.a_summary_numeric_formats, pval = "x.xxxx | (<0.0001)") -.a_compare_numeric_labels <- c(.a_summary_numeric_labels, pval = "p-value (t-test)") -.a_compare_numeric_indent_mods <- c(.a_summary_numeric_indent_mods, pval = 0L) -.a_compare_counts_formats <- c(.a_summary_counts_formats, pval = "x.xxxx | (<0.0001)") -.a_compare_counts_labels <- c(.a_summary_counts_labels, pval = "p-value (chi-squared test)") -.a_compare_counts_indent_mods <- c(.a_summary_counts_indent_mods, pval = 0L) - #' @describeIn compare_variables Formatted analysis function which is used as `afun` #' in `compare_vars()`. #' #' @return #' * `a_compare()` returns the corresponding list with formatted [rtables::CellValue()]. #' -#' @examples -#' a_compare(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .in_ref_col = FALSE, .var = "bla") -#' a_compare(factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), .in_ref_col = FALSE) -#' a_compare(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .in_ref_col = FALSE, .var = "x", verbose = FALSE) -#' a_compare(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = FALSE) +#' @note This function has been deprecated in favor of `a_summary()` with argument `compare` set to `TRUE`. #' -#' @export +#' @keywords internal a_compare <- function(x, .N_col, .N_row, @@ -278,48 +268,10 @@ a_compare <- function(x, na.rm = TRUE, na_level = NA_character_, ...) { - if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels) - if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats - if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels - if (is.null(.indent_mods)) { - .indent_mods <- if (is.numeric(x)) .a_compare_numeric_indent_mods else .a_compare_counts_indent_mods - } - if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) { - .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats) - } - if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level") - x_stats <- s_compare(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...) - if (is.numeric(x)) { - .labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply( - c("mean_ci", "mean_pval", "median_ci", "quantiles"), - function(x) attr(x_stats[[x]], "label") - ) - } - .stats <- intersect(.stats, names(x_stats)) - x_stats <- x_stats[.stats] - if (!is.numeric(x) && !is.logical(x)) { - x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, .in_ref_col) - x_stats <- x_ungrp[["x"]] - .stats <- x_ungrp[[".stats"]] - .formats <- x_ungrp[[".formats"]] - .labels <- x_ungrp[[".labels"]] - .indent_mods <- x_ungrp[[".indent_mods"]] - } - .formats_x <- extract_by_name( - .formats, .stats, if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats - ) - .labels_x <- extract_by_name(.labels, .stats, if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels) - .indent_mods_x <- extract_by_name( - .indent_mods, .stats, if (is.numeric(x)) .a_compare_numeric_indent_mods else .a_compare_counts_indent_mods - ) - - in_rows( - .list = x_stats, - .formats = .formats_x, - .names = .labels_x, - .labels = .labels_x, - .indent_mods = .indent_mods_x, - .format_na_strs = na_level + lifecycle::deprecate_stop( + "0.8.2", + "a_compare()", + "a_summary(compare = TRUE)" ) } @@ -336,7 +288,8 @@ a_compare <- function(x, #' #' @return Combined formatted analysis function for use in [compare_vars()]. #' -#' @note This function has been deprecated in favor of direct implementation of `a_compare()`. +#' @note This function has been deprecated in favor of direct implementation of `a_summary()` with argument `compare` +#' set to `TRUE`. #' #' @seealso [compare_vars()] #' @@ -348,7 +301,7 @@ create_afun_compare <- function(.stats = NULL, lifecycle::deprecate_stop( "0.8.2", "create_afun_compare()", - "a_compare()" + "a_summary(compare = TRUE)" ) } @@ -404,10 +357,10 @@ compare_vars <- function(lyt, lyt = lyt, vars = vars, var_labels = var_labels, - afun = a_compare, + afun = a_summary, nested = nested, extra_args = list( - .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na.rm = na.rm, na_level = na_level, ... + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na.rm = na.rm, na_level = na_level, compare = TRUE, ... ), inclNAs = TRUE, show_labels = show_labels, diff --git a/R/summarize_variables.R b/R/summarize_variables.R index d45650f24e..6aace87616 100644 --- a/R/summarize_variables.R +++ b/R/summarize_variables.R @@ -495,59 +495,89 @@ s_summary.logical <- function(x, y } -.a_summary_numeric_formats <- summary_formats() -.a_summary_numeric_labels <- summary_labels() -.a_summary_numeric_indent_mods <- rep(0L, length(summary_labels())) %>% `names<-`(names(.a_summary_numeric_labels)) -.a_summary_counts_formats <- summary_formats(type = "counts") -.a_summary_counts_labels <- summary_labels(type = "counts") -.a_summary_counts_indent_mods <- rep(0L, length(.a_summary_counts_labels)) %>% `names<-`(names(.a_summary_counts_labels)) +.a_compare_numeric_formats <- c(summary_formats(), pval = "x.xxxx | (<0.0001)") +.a_compare_numeric_labels <- c(summary_labels(), pval = "p-value (t-test)") +.a_compare_numeric_indents <- c(rep(0L, length(.a_compare_numeric_labels)) %>% `names<-`(names(.a_compare_numeric_labels))) +.a_compare_counts_formats <- c(summary_formats(type = "counts"), pval = "x.xxxx | (<0.0001)") +.a_compare_counts_labels <- c(summary_labels(type = "counts"), pval = "p-value (chi-squared test)") +.a_compare_counts_indents <- c(rep(0L, length(.a_compare_counts_labels)) %>% `names<-`(names(.a_compare_counts_labels))) #' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and -#' `cfun` in `summarize_colvars()`. +#' `compare_vars()` and as `cfun` in `summarize_colvars()`. +#' +#' @param compare (`logical`)\cr Whether comparison statistics should be analyzed instead of summary statistics +#' (`compare = TRUE` adds `pval` statistic comparing against reference group). #' #' @return #' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()]. #' +#' @note To use for comparison (with p-value statistic added), parameter `compare` must be set to `TRUE`. +#' #' @examples -#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") -#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) -#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) -#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) +#' # summary analysis - compare = FALSE +#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla", .df_row = c()) +#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10, .df_row = c()) +#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, .df_row = c(), verbose = FALSE) +#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10, .df_row = c()) +#' +#' # comparison analysis - compare = TRUE +#' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .in_ref_col = FALSE, .var = "bla", compare = TRUE) +#' a_summary(factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), .in_ref_col = FALSE, compare = TRUE) +#' a_summary(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .in_ref_col = FALSE, .var = "x", verbose = FALSE, compare = TRUE) +#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = FALSE, compare = TRUE) #' #' @export a_summary <- function(x, .N_col, .N_row, - .var, - .df_row, + .var = NULL, + .df_row = NULL, + .ref_group = NULL, + .in_ref_col = NULL, .stats = NULL, .formats = NULL, .labels = NULL, .indent_mods = NULL, na.rm = TRUE, na_level = NA_character_, + compare = FALSE, ...) { - if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats) - if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats - if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_summary_numeric_labels else .a_summary_counts_labels + x_stats <- if (!compare) { + s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...) + } else { + s_compare( + x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ... + ) + } + + if (is.null(.stats)) { + .stats <- names(get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats"))) + if (!compare) .stats <- head(.stats, -1) + } + if (is.null(.formats)) { + .formats <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats")) + if (!compare) .formats <- head(.formats, -1) + } + if (is.null(.labels)) { + if (is.numeric(x)) .a_compare_numeric_labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply( + c("mean_ci", "mean_pval", "median_ci", "quantiles"), function(x) attr(x_stats[[x]], "label") + ) + .labels <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_labels")) + if (!compare) .labels <- head(.labels, -1) + } if (is.null(.indent_mods)) { - .indent_mods <- if (is.numeric(x)) .a_summary_numeric_indent_mods else .a_summary_counts_indent_mods + .indent_mods <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_indents")) + if (!compare) .indent_mods <- head(.indent_mods, -1) } if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) { .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats) } if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level") - x_stats <- s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...) - if (is.numeric(x)) { - .labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply( - c("mean_ci", "mean_pval", "median_ci", "quantiles"), - function(x) attr(x_stats[[x]], "label") - ) - } + .stats <- intersect(.stats, names(x_stats)) x_stats <- x_stats[.stats] if (!is.numeric(x) && !is.logical(x)) { - x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods) + x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, if (compare) .in_ref_col else FALSE) x_stats <- x_ungrp[["x"]] .stats <- x_ungrp[[".stats"]] .formats <- x_ungrp[[".formats"]] @@ -555,11 +585,11 @@ a_summary <- function(x, .indent_mods <- x_ungrp[[".indent_mods"]] } .formats_x <- extract_by_name( - .formats, .stats, if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats + .formats, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats")) ) - .labels_x <- extract_by_name(.labels, .stats, if (is.numeric(x)) .a_summary_numeric_labels else .a_summary_counts_labels) + .labels_x <- extract_by_name(.labels, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_labels"))) .indent_mods_x <- extract_by_name( - .indent_mods, .stats, if (is.numeric(x)) .a_summary_numeric_indent_mods else .a_summary_counts_indent_mods + .indent_mods, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_indents")) ) in_rows( @@ -641,8 +671,7 @@ create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) { #' .labels = c(n = "n", mean_sd = "Mean, SD", quantiles = c("Q1 - Q3")) #' ) #' -#' results <- build_table(l, df = dta_test) -#' as_html(results) +#' build_table(l, df = dta_test) #' #' ## Use arguments interpreted by `s_summary`. #' l <- basic_table() %>% @@ -650,7 +679,7 @@ create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) { #' split_rows_by(var = "AVISIT") %>% #' summarize_vars(vars = "AVAL", na.rm = FALSE) #' -#' results <- build_table(l, df = dta_test) +#' build_table(l, df = dta_test) #' #' ## Handle `NA` levels first when summarizing factors. #' dta_test$AVISIT <- NA_character_ @@ -659,10 +688,7 @@ create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) { #' split_cols_by(var = "ARM") %>% #' summarize_vars(vars = "AVISIT", na.rm = FALSE) #' -#' results <- build_table(l, df = dta_test) -#' \dontrun{ -#' Viewer(results) -#' } +#' build_table(l, df = dta_test) #' #' @export summarize_vars <- function(lyt, diff --git a/R/utils_factor.R b/R/utils_factor.R index 8e5888ac22..0ddfaf5db7 100644 --- a/R/utils_factor.R +++ b/R/utils_factor.R @@ -301,13 +301,13 @@ fct_collapse_only <- function(.f, ..., .na_level = "") { #' #' Ungroups grouped non-numeric statistics (`count` and `count_fraction`) within input vectors `.stats`, `.formats`, #' `.labels`, and `.indent_mods`. Formats explicit `NA` level in each input vector and changes p-value to -#' `character()` if current column is a reference column. Used within `a_summary()` and `a_compare()`. +#' `character()` if current column is a reference column. Used within [`a_summary()`]. #' #' @inheritParams argument_convention #' #' @return A `list` with modified elements `x`, `.stats`, `.formats`, `.labels`, and `.indent_mods`. #' -#' @seealso [a_summary()] and [a_compare()] which use this function internally. +#' @seealso [a_summary()] which uses this function internally. #' #' @keywords internal ungroup_stats <- function(x, .stats, .formats, .labels, .indent_mods, .in_ref_col = FALSE) { diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd index 13818ae632..670c6cc8df 100644 --- a/man/compare_variables.Rd +++ b/man/compare_variables.Rd @@ -180,6 +180,8 @@ Therefore it is always better to manually convert character variables to factors \item For \code{compare_vars()}, the column split must define a reference group via \code{ref_group} so that the comparison is well defined. } + +This function has been deprecated in favor of \code{a_summary()} with argument \code{compare} set to \code{TRUE}. } \examples{ # `s_compare.numeric` @@ -238,11 +240,6 @@ y <- c(NA, NA, NA, NA, FALSE) s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE) s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE) -a_compare(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .in_ref_col = FALSE, .var = "bla") -a_compare(factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), .in_ref_col = FALSE) -a_compare(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .in_ref_col = FALSE, .var = "x", verbose = FALSE) -a_compare(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = FALSE) - # `compare_vars()` in `rtables` pipelines ## Default output within a `rtables` pipeline. @@ -264,6 +261,8 @@ build_table(lyt, df = tern_ex_adsl) } \seealso{ -Relevant constructor function \code{\link[=create_afun_compare]{create_afun_compare()}}, and \code{\link[=s_summary]{s_summary()}} which is used internally -to compute a summary within \code{s_compare()}. +Relevant constructor function \code{\link[=create_afun_compare]{create_afun_compare()}}, \code{\link[=s_summary]{s_summary()}} which is used internally +to compute a summary within \code{s_compare()}, and \code{\link[=a_compare]{a_compare()}} which is used (with \code{compare = TRUE}) as the analysis +function for \code{compare_vars()}. } +\keyword{internal} diff --git a/man/create_afun_compare.Rd b/man/create_afun_compare.Rd index 14f71af618..cea746b451 100644 --- a/man/create_afun_compare.Rd +++ b/man/create_afun_compare.Rd @@ -31,7 +31,8 @@ Combined formatted analysis function for use in \code{\link[=compare_vars]{compa Constructor function which creates a combined formatted analysis function. } \note{ -This function has been deprecated in favor of direct implementation of \code{a_compare()}. +This function has been deprecated in favor of direct implementation of \code{a_summary()} with argument \code{compare} +set to \code{TRUE}. } \seealso{ \code{\link[=compare_vars]{compare_vars()}} diff --git a/man/summarize_variables.Rd b/man/summarize_variables.Rd index b06ec1aeeb..1f2d0455cd 100644 --- a/man/summarize_variables.Rd +++ b/man/summarize_variables.Rd @@ -57,14 +57,17 @@ a_summary( x, .N_col, .N_row, - .var, - .df_row, + .var = NULL, + .df_row = NULL, + .ref_group = NULL, + .in_ref_col = NULL, .stats = NULL, .formats = NULL, .labels = NULL, .indent_mods = NULL, na.rm = TRUE, na_level = NA_character_, + compare = FALSE, ... ) @@ -122,6 +125,10 @@ to print out information about factor casting.} \item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.} +\item{.ref_group}{(\code{data.frame} or \code{vector})\cr the data corresponding to the reference group.} + +\item{.in_ref_col}{(\code{logical})\cr \code{TRUE} when working with the reference level, \code{FALSE} otherwise.} + \item{.stats}{(\code{character})\cr statistics to select for the table.} \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.} @@ -134,6 +141,9 @@ for that statistic's row label.} \item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} +\item{compare}{(\code{logical})\cr Whether comparison statistics should be analyzed instead of summary statistics +(\code{compare = TRUE} adds \code{pval} statistic comparing against reference group).} + \item{lyt}{(\code{layout})\cr input layout where analyses will be added to.} \item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.} @@ -237,7 +247,7 @@ conversion to factor (with a warning) and then forwards to the method for factor \item \code{s_summary(logical)}: Method for \code{logical} class. \item \code{a_summary()}: Formatted analysis function which is used as \code{afun} in \code{summarize_vars()} and -\code{cfun} in \code{summarize_colvars()}. +\code{compare_vars()} and as \code{cfun} in \code{summarize_colvars()}. \item \code{summarize_vars()}: Layout-creating function which can take statistics function arguments and additional format arguments. This function is a wrapper for \code{\link[rtables:analyze]{rtables::analyze()}}. @@ -266,6 +276,8 @@ can be generated correctly. In particular for sparse tables this very likely can It is therefore better to always pre-process the dataset such that factors are manually created from character variables before passing the dataset to \code{\link[rtables:build_table]{rtables::build_table()}}. } + +To use for comparison (with p-value statistic added), parameter \code{compare} must be set to \code{TRUE}. } \examples{ # `s_summary.numeric` @@ -339,10 +351,17 @@ x <- c(TRUE, FALSE, TRUE, TRUE) s_summary(x, denom = "N_row", .N_row = 10L) s_summary(x, denom = "N_col", .N_col = 20L) -a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") -a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) -a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) -a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) +# summary analysis - compare = FALSE +a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla", .df_row = c()) +a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10, .df_row = c()) +a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, .df_row = c(), verbose = FALSE) +a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10, .df_row = c()) + +# comparison analysis - compare = TRUE +a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .in_ref_col = FALSE, .var = "bla", compare = TRUE) +a_summary(factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), .in_ref_col = FALSE, compare = TRUE) +a_summary(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .in_ref_col = FALSE, .var = "x", verbose = FALSE, compare = TRUE) +a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = FALSE, compare = TRUE) ## Fabricated dataset. dta_test <- data.frame( @@ -373,8 +392,7 @@ l <- basic_table() \%>\% .labels = c(n = "n", mean_sd = "Mean, SD", quantiles = c("Q1 - Q3")) ) -results <- build_table(l, df = dta_test) -as_html(results) +build_table(l, df = dta_test) ## Use arguments interpreted by `s_summary`. l <- basic_table() \%>\% @@ -382,7 +400,7 @@ l <- basic_table() \%>\% split_rows_by(var = "AVISIT") \%>\% summarize_vars(vars = "AVAL", na.rm = FALSE) -results <- build_table(l, df = dta_test) +build_table(l, df = dta_test) ## Handle `NA` levels first when summarizing factors. dta_test$AVISIT <- NA_character_ @@ -391,9 +409,6 @@ l <- basic_table() \%>\% split_cols_by(var = "ARM") \%>\% summarize_vars(vars = "AVISIT", na.rm = FALSE) -results <- build_table(l, df = dta_test) -\dontrun{ -Viewer(results) -} +build_table(l, df = dta_test) } diff --git a/man/ungroup_stats.Rd b/man/ungroup_stats.Rd index 47bedd3df1..c9dffda957 100644 --- a/man/ungroup_stats.Rd +++ b/man/ungroup_stats.Rd @@ -26,9 +26,9 @@ A \code{list} with modified elements \code{x}, \code{.stats}, \code{.formats}, \ \description{ Ungroups grouped non-numeric statistics (\code{count} and \code{count_fraction}) within input vectors \code{.stats}, \code{.formats}, \code{.labels}, and \code{.indent_mods}. Formats explicit \code{NA} level in each input vector and changes p-value to -\code{character()} if current column is a reference column. Used within \code{a_summary()} and \code{a_compare()}. +\code{character()} if current column is a reference column. Used within \code{\link[=a_summary]{a_summary()}}. } \seealso{ -\code{\link[=a_summary]{a_summary()}} and \code{\link[=a_compare]{a_compare()}} which use this function internally. +\code{\link[=a_summary]{a_summary()}} which uses this function internally. } \keyword{internal} From c0c783aab225d09c4fb012bd7eeabb0de771c962 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 16 Jun 2023 13:54:59 -0400 Subject: [PATCH 08/33] Add workaround for all-NA rows --- R/summarize_variables.R | 7 ++++++- R/utils_factor.R | 6 +++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/R/summarize_variables.R b/R/summarize_variables.R index 6aace87616..1622dfd7a8 100644 --- a/R/summarize_variables.R +++ b/R/summarize_variables.R @@ -542,6 +542,12 @@ a_summary <- function(x, na_level = NA_character_, compare = FALSE, ...) { + # Remove all-NA rows + in_tot_col <- nrow(.df_row) == length(x) + .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ] + if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]] + + if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level") x_stats <- if (!compare) { s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...) } else { @@ -572,7 +578,6 @@ a_summary <- function(x, if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) { .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats) } - if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level") .stats <- intersect(.stats, names(x_stats)) x_stats <- x_stats[.stats] diff --git a/R/utils_factor.R b/R/utils_factor.R index 0ddfaf5db7..4fb93b8c34 100644 --- a/R/utils_factor.R +++ b/R/utils_factor.R @@ -318,17 +318,17 @@ ungroup_stats <- function(x, .stats, .formats, .labels, .indent_mods, .in_ref_co a_lvl <- paste(stat, a, sep = ".") .stats <- c(.stats, a_lvl) .formats <- append(.formats, .formats[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.formats)) { - which(names(.formats) == stat) - 1 + which(names(x[[stat]]) == a) + which(names(.formats) == stat) - 1 + which(names(x[[stat]]) == if (a != "NA" | "NA" %in% names(x[[stat]])) a else "na-level") } else { length(.formats) }) .labels <- append(.labels, a %>% `names<-`(a_lvl), after = if (stat %in% names(.labels)) { - which(names(.labels) == stat) - 1 + which(names(x[[stat]]) == a) + which(names(.labels) == stat) - 1 + which(names(x[[stat]]) == if (a != "NA" | "NA" %in% names(x[[stat]])) a else "na-level") } else { length(.labels) }) .indent_mods <- append(.indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) { - which(names(.indent_mods) == stat) - 1 + which(names(x[[stat]]) == a) + which(names(.indent_mods) == stat) - 1 + which(names(x[[stat]]) == if (a != "NA" | "NA" %in% names(x[[stat]])) a else "na-level") } else { length(.indent_mods) }) From eb626293950b8cb9bc4fe5de63c1a91b484be625 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Thu, 22 Jun 2023 15:21:06 -0400 Subject: [PATCH 09/33] Add tests, fix lint, update NEWS --- NEWS.md | 8 +- R/compare_variables.R | 41 +++- R/summarize_variables.R | 86 ++++--- R/utils.R | 2 + R/utils_factor.R | 23 +- man/compare_variables.Rd | 10 +- man/summarize_variables.Rd | 29 ++- tests/testthat/_snaps/summarize_variables.md | 223 +++++++++++++++++++ tests/testthat/test-summarize_variables.R | 86 +++++++ 9 files changed, 437 insertions(+), 71 deletions(-) diff --git a/NEWS.md b/NEWS.md index 0da41e0802..177d666b9a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # tern 0.8.2.9014 +### Breaking Changes + ### Enhancements * Added explicit zero counts to `g_km` plot "at risk" annotation tables. * Added a flag for total level split in `analyze_patients_exposure_in_cols`. @@ -7,6 +9,8 @@ * Updated `summarize_coxreg` to print covariates in data rows for univariate Cox regression with no interactions and content rows otherwise. * Removed "baseline status" text from `d_count_abnormal_by_baseline` labels. * Improved default sizing of annotation tables in `g_km` and added dynamic scaling of the `surv_med` and `coxph` annotation tables, with customization via the `width_annots` argument. +* Refactored `a_summary` to no longer use helper function `create_afun_summary`. +* Refactored `summarize_vars` and `compare_vars` to use refactored `a_summary`. ### Bug Fixes * Fixed bug in `split_text_grob` preventing titles and footnotes from being properly formatted and printed by `decorate_grob`. @@ -15,7 +19,9 @@ ### Miscellaneous * Updated README to include installation instructions for CRAN. -* Began deprecation of `indent_mod` argument and replace it with the `.indent_mods` argument in `summarize_num_patients` and `analyze_num_patients`. +* Began deprecation of `indent_mod` argument and replaced it with the `.indent_mods` argument in `summarize_num_patients` and `analyze_num_patients`. +* Deprecated `a_compare` and replaced it with `a_summary` with argument `compare = TRUE`. +* Deprecated helper functions `create_afun_summary` and `create_afun_compare` which are no longer used by `a_summary` and `a_compare` respectively. # tern 0.8.2 diff --git a/R/compare_variables.R b/R/compare_variables.R index 8d8f6e0072..739f4ecdce 100644 --- a/R/compare_variables.R +++ b/R/compare_variables.R @@ -128,6 +128,7 @@ s_compare.factor <- function(x, .ref_group <- .ref_group %>% explicit_na(label = "NA") } + if ("NA" %in% levels(x)) levels(.ref_group) <- c(levels(.ref_group), "NA") checkmate::assert_factor(x, levels = levels(.ref_group), min.levels = 2) y$pval <- if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) { @@ -251,28 +252,45 @@ s_compare.logical <- function(x, #' @return #' * `a_compare()` returns the corresponding list with formatted [rtables::CellValue()]. #' -#' @note This function has been deprecated in favor of `a_summary()` with argument `compare` set to `TRUE`. +#' @note `a_compare()` has been deprecated in favor of `a_summary()` with argument `compare` set to `TRUE`. #' #' @keywords internal a_compare <- function(x, - .N_col, - .N_row, - .var, - .df_row, - .ref_group, - .in_ref_col, + .N_col, # nolint + .N_row, # nolint + .var = NULL, + .df_row = NULL, + .ref_group = NULL, + .in_ref_col = FALSE, .stats = NULL, .formats = NULL, .labels = NULL, .indent_mods = NULL, - na.rm = TRUE, + na.rm = TRUE, # nolint na_level = NA_character_, ...) { - lifecycle::deprecate_stop( + lifecycle::deprecate_warn( "0.8.2", "a_compare()", "a_summary(compare = TRUE)" ) + a_summary( + x = x, + .N_col = .N_col, + .N_row = .N_row, + .var = .var, + .df_row = .df_row, + .ref_group = .ref_group, + .in_ref_col = .in_ref_col, + .stats = .stats, + .formats = .formats, + .labels = .labels, + .indent_mods = .indent_mods, + na.rm = na.rm, + na_level = na_level, + compare = TRUE, + ... + ) } #' Constructor Function for [compare_vars()] @@ -344,7 +362,7 @@ compare_vars <- function(lyt, var_labels = vars, nested = TRUE, ..., - na.rm = TRUE, + na.rm = TRUE, # nolint na_level = NA_character_, show_labels = "default", table_names = vars, @@ -360,7 +378,8 @@ compare_vars <- function(lyt, afun = a_summary, nested = nested, extra_args = list( - .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na.rm = na.rm, na_level = na_level, compare = TRUE, ... + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, + na.rm = na.rm, na_level = na_level, compare = TRUE, ... ), inclNAs = TRUE, show_labels = show_labels, diff --git a/R/summarize_variables.R b/R/summarize_variables.R index 1622dfd7a8..f80c4e0840 100644 --- a/R/summarize_variables.R +++ b/R/summarize_variables.R @@ -497,7 +497,9 @@ s_summary.logical <- function(x, .a_compare_numeric_formats <- c(summary_formats(), pval = "x.xxxx | (<0.0001)") .a_compare_numeric_labels <- c(summary_labels(), pval = "p-value (t-test)") -.a_compare_numeric_indents <- c(rep(0L, length(.a_compare_numeric_labels)) %>% `names<-`(names(.a_compare_numeric_labels))) +.a_compare_numeric_indents <- c( + rep(0L, length(.a_compare_numeric_labels)) %>% `names<-`(names(.a_compare_numeric_labels)) +) .a_compare_counts_formats <- c(summary_formats(type = "counts"), pval = "x.xxxx | (<0.0001)") .a_compare_counts_labels <- c(summary_labels(type = "counts"), pval = "p-value (chi-squared test)") .a_compare_counts_indents <- c(rep(0L, length(.a_compare_counts_labels)) %>% `names<-`(names(.a_compare_counts_labels))) @@ -511,41 +513,51 @@ s_summary.logical <- function(x, #' @return #' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()]. #' -#' @note To use for comparison (with p-value statistic added), parameter `compare` must be set to `TRUE`. +#' @note +#' * To use for comparison (with additional p-value statistic), parameter `compare` must be set to `TRUE`. +#' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is. #' #' @examples #' # summary analysis - compare = FALSE -#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla", .df_row = c()) -#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10, .df_row = c()) -#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, .df_row = c(), verbose = FALSE) -#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10, .df_row = c()) +#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") +#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) +#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) +#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) #' #' # comparison analysis - compare = TRUE -#' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .in_ref_col = FALSE, .var = "bla", compare = TRUE) -#' a_summary(factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), .in_ref_col = FALSE, compare = TRUE) -#' a_summary(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .in_ref_col = FALSE, .var = "x", verbose = FALSE, compare = TRUE) -#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = FALSE, compare = TRUE) +#' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE) +#' a_summary( +#' factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE +#' ) +#' a_summary( +#' c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE +#' ) +#' a_summary( +#' c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE +#' ) #' #' @export a_summary <- function(x, - .N_col, - .N_row, + .N_col, # nolint + .N_row, # nolint .var = NULL, .df_row = NULL, .ref_group = NULL, - .in_ref_col = NULL, + .in_ref_col = FALSE, .stats = NULL, .formats = NULL, .labels = NULL, .indent_mods = NULL, - na.rm = TRUE, + na.rm = TRUE, # nolint na_level = NA_character_, compare = FALSE, ...) { # Remove all-NA rows - in_tot_col <- nrow(.df_row) == length(x) - .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ] - if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]] + if (!is.null(.df_row) && ncol(.df_row) > 1) { + in_tot_col <- nrow(.df_row) == length(x) + .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ] + if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]] + } if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level") x_stats <- if (!compare) { @@ -564,10 +576,12 @@ a_summary <- function(x, .formats <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats")) if (!compare) .formats <- head(.formats, -1) } - if (is.null(.labels)) { - if (is.numeric(x)) .a_compare_numeric_labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply( + if (is.numeric(x)) { + .a_compare_numeric_labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply( c("mean_ci", "mean_pval", "median_ci", "quantiles"), function(x) attr(x_stats[[x]], "label") ) + } + if (is.null(.labels)) { .labels <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_labels")) if (!compare) .labels <- head(.labels, -1) } @@ -575,34 +589,37 @@ a_summary <- function(x, .indent_mods <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_indents")) if (!compare) .indent_mods <- head(.indent_mods, -1) } - if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) { + if (length(.indent_mods) == 1 && is.null(names(.indent_mods))) { .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats) } .stats <- intersect(.stats, names(x_stats)) x_stats <- x_stats[.stats] + .formats <- extract_by_name( + .formats, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats")) + ) + .labels <- extract_by_name( + .labels, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_labels")) + ) + .indent_mods <- extract_by_name( + .indent_mods, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_indents")) + ) + if (!is.numeric(x) && !is.logical(x)) { - x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, if (compare) .in_ref_col else FALSE) + x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, .in_ref_col) x_stats <- x_ungrp[["x"]] .stats <- x_ungrp[[".stats"]] .formats <- x_ungrp[[".formats"]] .labels <- x_ungrp[[".labels"]] .indent_mods <- x_ungrp[[".indent_mods"]] } - .formats_x <- extract_by_name( - .formats, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats")) - ) - .labels_x <- extract_by_name(.labels, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_labels"))) - .indent_mods_x <- extract_by_name( - .indent_mods, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_indents")) - ) in_rows( .list = x_stats, - .formats = .formats_x, - .names = .labels_x, - .labels = .labels_x, - .indent_mods = .indent_mods_x, + .formats = .formats, + .names = .labels, + .labels = .labels, + .indent_mods = .indent_mods, .format_na_strs = na_level ) } @@ -701,7 +718,7 @@ summarize_vars <- function(lyt, var_labels = vars, nested = TRUE, ..., - na.rm = TRUE, + na.rm = TRUE, # nolint na_level = NA_character_, show_labels = "default", table_names = vars, @@ -717,7 +734,8 @@ summarize_vars <- function(lyt, afun = a_summary, nested = nested, extra_args = list( - .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na.rm = na.rm, na_level = na_level, ... + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, + na.rm = na.rm, na_level = na_level, ... ), inclNAs = TRUE, show_labels = show_labels, diff --git a/R/utils.R b/R/utils.R index 5b9e27142f..d8c80a063c 100644 --- a/R/utils.R +++ b/R/utils.R @@ -283,6 +283,8 @@ extract_by_name <- function(x, names, x_defaults = NULL) { if (length(which_extract) > 0) { x_fill[which_extract] <- x[which_extract] x_fill <- if (!is.null(x_defaults)) x_fill[names] else x_fill[which_extract] + } else if (!is.null(x_defaults)) { + x_fill <- x_fill[names] } else { x_fill <- NULL } diff --git a/R/utils_factor.R b/R/utils_factor.R index 4fb93b8c34..283ec30f06 100644 --- a/R/utils_factor.R +++ b/R/utils_factor.R @@ -316,22 +316,25 @@ ungroup_stats <- function(x, .stats, .formats, .labels, .indent_mods, .in_ref_co for (a in names(x[[stat]])) { a <- if (a == "na-level") "NA" else a a_lvl <- paste(stat, a, sep = ".") + a_name <- if (a != "NA" || "NA" %in% names(x[[stat]])) a else "na-level" .stats <- c(.stats, a_lvl) .formats <- append(.formats, .formats[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.formats)) { - which(names(.formats) == stat) - 1 + which(names(x[[stat]]) == if (a != "NA" | "NA" %in% names(x[[stat]])) a else "na-level") + which(names(.formats) == stat) - 1 + which(names(x[[stat]]) == a_name) } else { length(.formats) }) .labels <- append(.labels, a %>% `names<-`(a_lvl), after = if (stat %in% names(.labels)) { - which(names(.labels) == stat) - 1 + which(names(x[[stat]]) == if (a != "NA" | "NA" %in% names(x[[stat]])) a else "na-level") + which(names(.labels) == stat) - 1 + which(names(x[[stat]]) == a_name) } else { length(.labels) }) - .indent_mods <- append(.indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) { - which(names(.indent_mods) == stat) - 1 + which(names(x[[stat]]) == if (a != "NA" | "NA" %in% names(x[[stat]])) a else "na-level") - } else { - length(.indent_mods) - }) + .indent_mods <- append( + .indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) { + which(names(.indent_mods) == stat) - 1 + which(names(x[[stat]]) == a_name) + } else { + length(.indent_mods) + } + ) } } @@ -344,8 +347,8 @@ ungroup_stats <- function(x, .stats, .formats, .labels, .indent_mods, .in_ref_co list( x = x, .stats = .stats, - .formats = .formats, - .labels = .labels, - .indent_mods = .indent_mods + .formats = .formats[.stats], + .labels = .labels[.stats], + .indent_mods = .indent_mods[.stats] ) } diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd index 670c6cc8df..8beb2ddae6 100644 --- a/man/compare_variables.Rd +++ b/man/compare_variables.Rd @@ -34,10 +34,10 @@ a_compare( x, .N_col, .N_row, - .var, - .df_row, - .ref_group, - .in_ref_col, + .var = NULL, + .df_row = NULL, + .ref_group = NULL, + .in_ref_col = FALSE, .stats = NULL, .formats = NULL, .labels = NULL, @@ -181,7 +181,7 @@ Therefore it is always better to manually convert character variables to factors is well defined. } -This function has been deprecated in favor of \code{a_summary()} with argument \code{compare} set to \code{TRUE}. +\code{a_compare()} has been deprecated in favor of \code{a_summary()} with argument \code{compare} set to \code{TRUE}. } \examples{ # `s_compare.numeric` diff --git a/man/summarize_variables.Rd b/man/summarize_variables.Rd index 1f2d0455cd..f54f0b88dd 100644 --- a/man/summarize_variables.Rd +++ b/man/summarize_variables.Rd @@ -60,7 +60,7 @@ a_summary( .var = NULL, .df_row = NULL, .ref_group = NULL, - .in_ref_col = NULL, + .in_ref_col = FALSE, .stats = NULL, .formats = NULL, .labels = NULL, @@ -277,7 +277,10 @@ It is therefore better to always pre-process the dataset such that factors are m created from character variables before passing the dataset to \code{\link[rtables:build_table]{rtables::build_table()}}. } -To use for comparison (with p-value statistic added), parameter \code{compare} must be set to \code{TRUE}. +\itemize{ +\item To use for comparison (with additional p-value statistic), parameter \code{compare} must be set to \code{TRUE}. +\item Ensure that either all \code{NA} values are converted to an explicit \code{NA} level or all \code{NA} values are left as is. +} } \examples{ # `s_summary.numeric` @@ -352,16 +355,22 @@ s_summary(x, denom = "N_row", .N_row = 10L) s_summary(x, denom = "N_col", .N_col = 20L) # summary analysis - compare = FALSE -a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla", .df_row = c()) -a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10, .df_row = c()) -a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, .df_row = c(), verbose = FALSE) -a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10, .df_row = c()) +a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") +a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) +a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) +a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) # comparison analysis - compare = TRUE -a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .in_ref_col = FALSE, .var = "bla", compare = TRUE) -a_summary(factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), .in_ref_col = FALSE, compare = TRUE) -a_summary(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .in_ref_col = FALSE, .var = "x", verbose = FALSE, compare = TRUE) -a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = FALSE, compare = TRUE) +a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE) +a_summary( + factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE +) +a_summary( + c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE +) +a_summary( + c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE +) ## Fabricated dataset. dta_test <- data.frame( diff --git a/tests/testthat/_snaps/summarize_variables.md b/tests/testthat/_snaps/summarize_variables.md index a7a3db1ea3..6104bd290b 100644 --- a/tests/testthat/_snaps/summarize_variables.md +++ b/tests/testthat/_snaps/summarize_variables.md @@ -920,6 +920,229 @@ [1] 0 +# a_summary works with healthy input. + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 n 10 0 n + 2 Sum 1.3 0 Sum + 3 Mean 0.1 0 Mean + 4 SD 0.8 0 SD + 5 SE 0.2 0 SE + 6 Mean (SD) 0.1 (0.8) 0 Mean (SD) + 7 Mean (SE) 0.1 (0.2) 0 Mean (SE) + 8 Mean 95% CI (-0.43, 0.69) 0 Mean 95% CI + 9 Mean -/+ 1xSE (-0.11, 0.38) 0 Mean -/+ 1xSE + 10 Mean -/+ 1xSD (-0.65, 0.91) 0 Mean -/+ 1xSD + 11 Mean p-value (H0: mean = 0) 0.61 0 Mean p-value (H0: mean = 0) + 12 Median 0.3 0 Median + 13 Median Absolute Deviation -0.0 0 Median Absolute Deviation + 14 Median 95% CI (-0.82, 0.74) 0 Median 95% CI + 15 25% and 75%-ile -0.6 - 0.6 0 25% and 75%-ile + 16 IQR 1.2 0 IQR + 17 Min - Max -0.8 - 1.6 0 Min - Max + 18 CV (%) 590.4 0 CV (%) + 19 Minimum -0.8 0 Minimum + 20 Maximum 1.6 0 Maximum + 21 Median (Min - Max) 0.3 (-0.8 - 1.6) 0 Median (Min - Max) + 22 Geometric Mean NA 0 Geometric Mean + 23 CV % Geometric Mean NA 0 CV % Geometric Mean + +--- + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 n 5 0 n + 2 a 3 0 a + 3 b 1 0 b + 4 c 1 0 c + 5 a 3 (60%) 0 a + 6 b 1 (20%) 0 b + 7 c 1 (20%) 0 c + 8 n_blq 0 0 n_blq + +--- + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 n 4 0 n + 2 A 2 0 A + 3 B 1 0 B + 4 C 1 0 C + 5 A 2 (50%) 0 A + 6 B 1 (25%) 0 B + 7 C 1 (25%) 0 C + 8 n_blq 0 0 n_blq + +--- + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 n 5 0 n + 2 count 3 0 count + 3 count_fraction 3 (60%) 0 count_fraction + 4 n_blq 0 0 n_blq + +# a_summary works with custom input. + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 std. dev 1 3 std. dev + 2 Median 95% CI -0.62 - 1.12 3 Median 95% CI + +--- + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 number of records 5.00 -1 number of records + 2 a 2 5 a + 3 b 1 5 b + 4 c 1 5 c + 5 NA 1 5 NA + 6 a 2 (40%) 0 a + 7 b 1 (20%) 0 b + 8 c 1 (20%) 0 c + 9 NA 1 (20%) 0 NA + 10 n_blq 0 0 n_blq + +# a_summary works with healthy input when compare = TRUE. + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 n 10 0 n + 2 Sum 51.3 0 Sum + 3 Mean 5.1 0 Mean + 4 SD 0.8 0 SD + 5 SE 0.2 0 SE + 6 Mean (SD) 5.1 (0.8) 0 Mean (SD) + 7 Mean (SE) 5.1 (0.2) 0 Mean (SE) + 8 Mean 95% CI (4.57, 5.69) 0 Mean 95% CI + 9 Mean -/+ 1xSE (4.89, 5.38) 0 Mean -/+ 1xSE + 10 Mean -/+ 1xSD (4.35, 5.91) 0 Mean -/+ 1xSD + 11 Mean p-value (H0: mean = 0) 0.00 0 Mean p-value (H0: mean = 0) + 12 Median 5.3 0 Median + 13 Median Absolute Deviation -0.0 0 Median Absolute Deviation + 14 Median 95% CI (4.18, 5.74) 0 Median 95% CI + 15 25% and 75%-ile 4.4 - 5.6 0 25% and 75%-ile + 16 IQR 1.2 0 IQR + 17 Min - Max 4.2 - 6.6 0 Min - Max + 18 CV (%) 15.2 0 CV (%) + 19 Minimum 4.2 0 Minimum + 20 Maximum 6.6 0 Maximum + 21 Median (Min - Max) 5.3 (4.2 - 6.6) 0 Median (Min - Max) + 22 Geometric Mean 5.1 0 Geometric Mean + 23 CV % Geometric Mean 15.2 0 CV % Geometric Mean + 24 p-value (t-test) <0.0001 0 p-value (t-test) + +--- + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 n 5 0 n + 2 a 3 0 a + 3 b 1 0 b + 4 c 1 0 c + 5 a 3 (60%) 0 a + 6 b 1 (20%) 0 b + 7 c 1 (20%) 0 c + 8 n_blq 0 0 n_blq + 9 p-value (chi-squared test) 0.9560 0 p-value (chi-squared test) + +--- + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 n 4 0 n + 2 A 2 0 A + 3 B 1 0 B + 4 C 1 0 C + 5 A 2 (50%) 0 A + 6 B 1 (25%) 0 B + 7 C 1 (25%) 0 C + 8 n_blq 0 0 n_blq + 9 p-value (chi-squared test) 0.9074 0 p-value (chi-squared test) + +--- + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 n 5 0 n + 2 count 3 0 count + 3 count_fraction 3 (60%) 0 count_fraction + 4 n_blq 0 0 n_blq + 5 p-value (chi-squared test) 0.8091 0 p-value (chi-squared test) + +# a_summary works with custom input when compare = TRUE. + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 pvalue <0.0001 3 pvalue + 2 Median 95% CI -0.41 - 1.10 3 Median 95% CI + +--- + + Code + res + Output + RowsVerticalSection (in_rows) object print method: + ---------------------------- + row_name formatted_cell indent_mod row_label + 1 number of records 5.00 -1 number of records + 2 a 2 5 a + 3 b 1 5 b + 4 c 1 5 c + 5 NA 1 5 NA + 6 a 2 (40%) 0 a + 7 b 1 (20%) 0 b + 8 c 1 (20%) 0 c + 9 NA 1 (20%) 0 NA + 10 n_blq 0 0 n_blq + 11 p-value (chi-squared test) 0.8254 0 p-value (chi-squared test) + # `summarize_vars` works with healthy input, default `na.rm = TRUE`. Code diff --git a/tests/testthat/test-summarize_variables.R b/tests/testthat/test-summarize_variables.R index 6e163c6d76..716dad910e 100644 --- a/tests/testthat/test-summarize_variables.R +++ b/tests/testthat/test-summarize_variables.R @@ -160,6 +160,92 @@ testthat::test_that("s_summary works with logical vectors and by if requested do testthat::expect_snapshot(res) }) +testthat::test_that("a_summary works with healthy input.", { + options("width" = 100) + # numeric input + set.seed(1) + result <- a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + # factor input + result <- a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + # character input + result <- a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + # logical input + result <- a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + +testthat::test_that("a_summary works with custom input.", { + options("width" = 100) + result <- a_summary( + rnorm(10), .N_col = 10, .N_row = 20, control_summarize_vars(conf_level = 0.90), .stats = c("sd", "median_ci"), + .formats = c(sd = "xx.", median_ci = "xx.xx - xx.xx"), .labels = c(sd = "std. dev"), .indent_mods = 3L + ) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + result <- a_summary( + factor(c("a", "a", "b", "c", NA)), .N_row = 10, .N_col = 10, .formats = c(n = "xx.xx"), + .labels = c(n = "number of records"), .indent_mods = c(n = -1L, count = 5L), na.rm = FALSE + ) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + +testthat::test_that("a_summary works with healthy input when compare = TRUE.", { + options("width" = 100) + # numeric input + set.seed(1) + result <- a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + # factor input + result <- a_summary( + factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE + ) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + # character input + result <- a_summary(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + # logical input + result <- a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), compare = TRUE) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + +testthat::test_that("a_summary works with custom input when compare = TRUE.", { + options("width" = 100) + result <- a_summary( + rnorm(10), .ref_group = rnorm(20, -5, 1), .N_col = 10, .N_row = 20, control_summarize_vars(conf_level = 0.90), + .stats = c("pval", "median_ci"), .formats = c(median_ci = "xx.xx - xx.xx"), .labels = c(pval = "pvalue"), + .indent_mods = 3L, compare = TRUE + ) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + result <- a_summary( + factor(c("a", "a", "b", "c", NA)), .ref_group = factor(c("a", "a", "b", "c")), .N_row = 10, .N_col = 10, + .formats = c(n = "xx.xx"), .labels = c(n = "number of records"), .indent_mods = c(n = -1L, count = 5L), + na.rm = FALSE, compare = TRUE + ) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + testthat::test_that("`summarize_vars` works with healthy input, default `na.rm = TRUE`.", { dta_test <- data.frame(AVAL = c(1:4, NA, NA)) From 825c36f0a740a82969fb93ce2af4f3a2e878cc2b Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Thu, 22 Jun 2023 16:06:17 -0400 Subject: [PATCH 10/33] Improve code --- R/compare_variables.R | 10 ++++--- R/summarize_colvars.R | 9 ++++--- R/summarize_variables.R | 59 +++++++++++++++++------------------------ 3 files changed, 37 insertions(+), 41 deletions(-) diff --git a/R/compare_variables.R b/R/compare_variables.R index 739f4ecdce..3e7939338e 100644 --- a/R/compare_variables.R +++ b/R/compare_variables.R @@ -371,16 +371,18 @@ compare_vars <- function(lyt, .formats = NULL, .labels = NULL, .indent_mods = NULL) { + extra_args <- list(.stats = .stats, na.rm = na.rm, na_level = na_level, compare = TRUE, ...) + if (!is.null(.formats)) extra_args[[".formats"]] <- .formats + if (!is.null(.labels)) extra_args[[".labels"]] <- .labels + if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods + analyze( lyt = lyt, vars = vars, var_labels = var_labels, afun = a_summary, nested = nested, - extra_args = list( - .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, - na.rm = na.rm, na_level = na_level, compare = TRUE, ... - ), + extra_args = extra_args, inclNAs = TRUE, show_labels = show_labels, table_names = table_names, diff --git a/R/summarize_colvars.R b/R/summarize_colvars.R index 8d75b41b0e..7d20568238 100644 --- a/R/summarize_colvars.R +++ b/R/summarize_colvars.R @@ -67,11 +67,14 @@ summarize_colvars <- function(lyt, .formats = NULL, .labels = NULL, .indent_mods = NULL) { + extra_args <- list(.stats = .stats, na_level = na_level, ...) + if (!is.null(.formats)) extra_args[[".formats"]] <- .formats + if (!is.null(.labels)) extra_args[[".labels"]] <- .labels + if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods + analyze_colvars( lyt, afun = a_summary, - extra_args = list( - .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_level = na_level, ... - ) + extra_args = extra_args ) } diff --git a/R/summarize_variables.R b/R/summarize_variables.R index f80c4e0840..1d8b295ca4 100644 --- a/R/summarize_variables.R +++ b/R/summarize_variables.R @@ -544,10 +544,10 @@ a_summary <- function(x, .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, - .stats = NULL, - .formats = NULL, - .labels = NULL, - .indent_mods = NULL, + .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels"))), + .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_formats")), + .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels")), + .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_indents")), na.rm = TRUE, # nolint na_level = NA_character_, compare = FALSE, @@ -560,34 +560,23 @@ a_summary <- function(x, } if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level") - x_stats <- if (!compare) { - s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...) + if (!compare) { + x_stats <- s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...) + .stats <- setdiff(.stats, "pval") + .formats <- .formats[setdiff(names(.formats), "pval")] + .labels <- .labels[setdiff(names(.labels), "pval")] + if (!is.null(names(.indent_mods))) .indent_mods <- .indent_mods[setdiff(names(.indent_mods), "pval")] } else { - s_compare( + x_stats <- s_compare( x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ... ) } - - if (is.null(.stats)) { - .stats <- names(get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats"))) - if (!compare) .stats <- head(.stats, -1) - } - if (is.null(.formats)) { - .formats <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats")) - if (!compare) .formats <- head(.formats, -1) - } if (is.numeric(x)) { - .a_compare_numeric_labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply( - c("mean_ci", "mean_pval", "median_ci", "quantiles"), function(x) attr(x_stats[[x]], "label") - ) - } - if (is.null(.labels)) { - .labels <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_labels")) - if (!compare) .labels <- head(.labels, -1) - } - if (is.null(.indent_mods)) { - .indent_mods <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_indents")) - if (!compare) .indent_mods <- head(.indent_mods, -1) + for (i in intersect(.stats, c("mean_ci", "mean_pval", "median_ci", "quantiles"))) { + if (!i %in% names(.labels) || .labels[[i]] == .a_compare_numeric_labels[[i]]) { + .labels[[i]] <- attr(x_stats[[i]], "label") + } + } } if (length(.indent_mods) == 1 && is.null(names(.indent_mods))) { .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats) @@ -596,13 +585,13 @@ a_summary <- function(x, .stats <- intersect(.stats, names(x_stats)) x_stats <- x_stats[.stats] .formats <- extract_by_name( - .formats, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats")) + .formats, .stats, get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_formats")) ) .labels <- extract_by_name( - .labels, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_labels")) + .labels, .stats, get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels")) ) .indent_mods <- extract_by_name( - .indent_mods, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_indents")) + .indent_mods, .stats, get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_indents")) ) if (!is.numeric(x) && !is.logical(x)) { @@ -727,16 +716,18 @@ summarize_vars <- function(lyt, .formats = NULL, .labels = NULL, .indent_mods = NULL) { + extra_args <- list(.stats = .stats, na.rm = na.rm, na_level = na_level, ...) + if (!is.null(.formats)) extra_args[[".formats"]] <- .formats + if (!is.null(.labels)) extra_args[[".labels"]] <- .labels + if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods + analyze( lyt = lyt, vars = vars, var_labels = var_labels, afun = a_summary, nested = nested, - extra_args = list( - .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, - na.rm = na.rm, na_level = na_level, ... - ), + extra_args = extra_args, inclNAs = TRUE, show_labels = show_labels, table_names = table_names, From ec7a1a3855b9154a90098337d240c7063fce01f8 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Thu, 22 Jun 2023 16:44:04 -0400 Subject: [PATCH 11/33] Update stale snapshots --- .../_snaps/estimate_incidence_rate.md | 4 +-- tests/testthat/_snaps/summarize_coxreg.md | 33 +++++++++++++++++++ tests/testthat/_snaps/summarize_glm_count.md | 16 ++++----- 3 files changed, 43 insertions(+), 10 deletions(-) diff --git a/tests/testthat/_snaps/estimate_incidence_rate.md b/tests/testthat/_snaps/estimate_incidence_rate.md index cabf6a1ab4..ba28293ec4 100644 --- a/tests/testthat/_snaps/estimate_incidence_rate.md +++ b/tests/testthat/_snaps/estimate_incidence_rate.md @@ -9,10 +9,10 @@ $conf_type [1] "exact" - $time_unit_input + $input_time_unit [1] "month" - $time_unit_output + $num_pt_year [1] 100 diff --git a/tests/testthat/_snaps/summarize_coxreg.md b/tests/testthat/_snaps/summarize_coxreg.md index 715fd433e2..98b9138061 100644 --- a/tests/testthat/_snaps/summarize_coxreg.md +++ b/tests/testthat/_snaps/summarize_coxreg.md @@ -224,6 +224,39 @@ Age All 1.01 (1.00, 1.02) 0.2486 +# summarize_coxreg works with character covariate in univariate case when interaction = TRUE + + Code + res + Output + n Hazard Ratio 95% CI p-value Interaction p-value + ———————————————————————————————————————————————————————————————————————————————————————————————— + Treatment: + ARM: 2 vs control (ARM: 1) 340 0.64 (0.43, 0.94) 0.0242 + Covariate: + COVAR2 340 0.7759 + F 0.67 (0.36, 1.22) + M 0.60 (0.36, 0.99) + +--- + + Code + res + Output + n Hazard Ratio 95% CI p-value Interaction p-value + ———————————————————————————————————————————————————————————————————————————————————————————————— + Treatment: + ARM: 2 vs control (ARM: 1) 340 0.64 (0.43, 0.94) 0.0242 + Covariate: + A Covariate Label 340 0.9883 + 1 0.63 (0.35, 1.14) + 2 0.58 (0.27, 1.26) + 3 0.55 (0.22, 1.35) + 4 0.69 (0.23, 2.07) + COVAR2 340 0.7759 + F 0.67 (0.36, 1.22) + M 0.60 (0.36, 0.99) + # summarize_coxreg adds the multivariate Cox regression layer to rtables Code diff --git a/tests/testthat/_snaps/summarize_glm_count.md b/tests/testthat/_snaps/summarize_glm_count.md index 81caa9c79f..a2e15c0bd7 100644 --- a/tests/testthat/_snaps/summarize_glm_count.md +++ b/tests/testthat/_snaps/summarize_glm_count.md @@ -68,10 +68,10 @@ Code res Output - rate asymp.LCL asymp.UCL ARM - A: Drug X 9.029759 6.295928 12.95068 A: Drug X - B: Placebo 9.029759 6.295928 12.95068 B: Placebo - C: Combination 9.029759 6.295928 12.95068 C: Combination + rate asymp.LCL asymp.UCL ARM + A: Drug X 3.07 2.202774 4.278651 A: Drug X + B: Placebo 3.07 2.202774 4.278651 B: Placebo + C: Combination 3.07 2.202774 4.278651 C: Combination # s_glm_count works with healthy input @@ -82,12 +82,12 @@ [1] 73 $rate - [1] 10.20612 + [1] 3.486005 attr(,"label") [1] "Adjusted Rate" $rate_ci - [1] 5.627535 18.509855 + [1] 1.983340 6.127155 attr(,"label") [1] "95% CI" @@ -116,12 +116,12 @@ [1] 73 $rate - [1] 10.20612 + [1] 3.486005 attr(,"label") [1] "Adjusted Rate" $rate_ci - [1] 5.627535 18.509855 + [1] 1.983340 6.127155 attr(,"label") [1] "95% CI" From 65572ff6dca9f915380a7eddea48895f2baaa1ac Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 22 Jun 2023 20:47:58 +0000 Subject: [PATCH 12/33] [skip actions] Restyle files --- R/summarize_colvars.R | 2 +- R/summarize_variables.R | 9 ++++++--- R/utils.R | 4 +++- R/utils_factor.R | 3 ++- tests/testthat/test-summarize_variables.R | 15 ++++++++++----- 5 files changed, 22 insertions(+), 11 deletions(-) diff --git a/R/summarize_colvars.R b/R/summarize_colvars.R index ebc9fa638f..7df3304f77 100644 --- a/R/summarize_colvars.R +++ b/R/summarize_colvars.R @@ -75,6 +75,6 @@ summarize_colvars <- function(lyt, analyze_colvars( lyt, afun = a_summary, - extra_args = extra_args + extra_args = extra_args ) } diff --git a/R/summarize_variables.R b/R/summarize_variables.R index 1d8b295ca4..2af5e41fb7 100644 --- a/R/summarize_variables.R +++ b/R/summarize_variables.R @@ -527,13 +527,16 @@ s_summary.logical <- function(x, #' # comparison analysis - compare = TRUE #' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE) #' a_summary( -#' factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE +#' factor(c("a", "a", "b", "c", "a")), +#' .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE #' ) #' a_summary( -#' c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE +#' c("A", "B", "A", "C"), +#' .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE #' ) #' a_summary( -#' c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE +#' c(TRUE, FALSE, FALSE, TRUE, TRUE), +#' .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE #' ) #' #' @export diff --git a/R/utils.R b/R/utils.R index 7d1a71cfd2..66f8b3ad51 100644 --- a/R/utils.R +++ b/R/utils.R @@ -248,7 +248,9 @@ combine_vectors <- function(x, y) { #' #' @keywords internal extract_by_name <- function(x, names, x_defaults = NULL) { - if (is.null(x)) return(NULL) + if (is.null(x)) { + return(NULL) + } checkmate::assert_named(x) checkmate::assert_character(names) which_extract <- intersect(names(x), names) diff --git a/R/utils_factor.R b/R/utils_factor.R index 8d0c707336..1e727061c8 100644 --- a/R/utils_factor.R +++ b/R/utils_factor.R @@ -309,7 +309,8 @@ ungroup_stats <- function(x, .stats, .formats, .labels, .indent_mods, .in_ref_co length(.labels) }) .indent_mods <- append( - .indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) { + .indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), + after = if (stat %in% names(.indent_mods)) { which(names(.indent_mods) == stat) - 1 + which(names(x[[stat]]) == a_name) } else { length(.indent_mods) diff --git a/tests/testthat/test-summarize_variables.R b/tests/testthat/test-summarize_variables.R index 716dad910e..ba22857d26 100644 --- a/tests/testthat/test-summarize_variables.R +++ b/tests/testthat/test-summarize_variables.R @@ -187,14 +187,16 @@ testthat::test_that("a_summary works with healthy input.", { testthat::test_that("a_summary works with custom input.", { options("width" = 100) result <- a_summary( - rnorm(10), .N_col = 10, .N_row = 20, control_summarize_vars(conf_level = 0.90), .stats = c("sd", "median_ci"), + rnorm(10), + .N_col = 10, .N_row = 20, control_summarize_vars(conf_level = 0.90), .stats = c("sd", "median_ci"), .formats = c(sd = "xx.", median_ci = "xx.xx - xx.xx"), .labels = c(sd = "std. dev"), .indent_mods = 3L ) res <- testthat::expect_silent(result) testthat::expect_snapshot(res) result <- a_summary( - factor(c("a", "a", "b", "c", NA)), .N_row = 10, .N_col = 10, .formats = c(n = "xx.xx"), + factor(c("a", "a", "b", "c", NA)), + .N_row = 10, .N_col = 10, .formats = c(n = "xx.xx"), .labels = c(n = "number of records"), .indent_mods = c(n = -1L, count = 5L), na.rm = FALSE ) res <- testthat::expect_silent(result) @@ -211,7 +213,8 @@ testthat::test_that("a_summary works with healthy input when compare = TRUE.", { # factor input result <- a_summary( - factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE + factor(c("a", "a", "b", "c", "a")), + .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE ) res <- testthat::expect_silent(result) testthat::expect_snapshot(res) @@ -230,7 +233,8 @@ testthat::test_that("a_summary works with healthy input when compare = TRUE.", { testthat::test_that("a_summary works with custom input when compare = TRUE.", { options("width" = 100) result <- a_summary( - rnorm(10), .ref_group = rnorm(20, -5, 1), .N_col = 10, .N_row = 20, control_summarize_vars(conf_level = 0.90), + rnorm(10), + .ref_group = rnorm(20, -5, 1), .N_col = 10, .N_row = 20, control_summarize_vars(conf_level = 0.90), .stats = c("pval", "median_ci"), .formats = c(median_ci = "xx.xx - xx.xx"), .labels = c(pval = "pvalue"), .indent_mods = 3L, compare = TRUE ) @@ -238,7 +242,8 @@ testthat::test_that("a_summary works with custom input when compare = TRUE.", { testthat::expect_snapshot(res) result <- a_summary( - factor(c("a", "a", "b", "c", NA)), .ref_group = factor(c("a", "a", "b", "c")), .N_row = 10, .N_col = 10, + factor(c("a", "a", "b", "c", NA)), + .ref_group = factor(c("a", "a", "b", "c")), .N_row = 10, .N_col = 10, .formats = c(n = "xx.xx"), .labels = c(n = "number of records"), .indent_mods = c(n = -1L, count = 5L), na.rm = FALSE, compare = TRUE ) From 2a870c26b93f502c7f9a26ab68668df511b7a5ca Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Thu, 22 Jun 2023 17:05:56 -0400 Subject: [PATCH 13/33] Add test, update NEWS --- NEWS.md | 2 ++ R/utils.R | 2 +- tests/testthat/test-utils.R | 19 +++++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 8db55c707f..ded07bab27 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ * Added method for `character` class to `h_coxreg_inter_effect` enabling `character` covariates in `summarize_coxreg`. * Refactored `a_summary` to no longer use helper function `create_afun_summary`. * Refactored `summarize_vars` and `compare_vars` to use refactored `a_summary`. +* Created new helper function `ungroup_stats` to ungroup and format statistics calculated for each level of a factor variable. +* Added new argument `x_defaults` to `extract_by_name` to allow user to specify default values to implement for values in `names` that are not specified in `x`. ### Miscellaneous * Began deprecation of `time_unit_input` and `time_unit_output` arguments and replaced them with the `input_time_unit` and `num_pt_year`, respectively, in `control_incidence_rate`. diff --git a/R/utils.R b/R/utils.R index 66f8b3ad51..037a29acbd 100644 --- a/R/utils.R +++ b/R/utils.R @@ -262,7 +262,7 @@ extract_by_name <- function(x, names, x_defaults = NULL) { if (length(which_extract) > 0) { x_fill[which_extract] <- x[which_extract] x_fill <- if (!is.null(x_defaults)) x_fill[names] else x_fill[which_extract] - } else if (!is.null(x_defaults)) { + } else if (length(intersect(names(x_defaults), names)) > 0) { x_fill <- x_fill[names] } else { x_fill <- NULL diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 87210c6163..feba1d12e9 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -89,6 +89,25 @@ testthat::test_that("extract_by_name returns NULL when there is no overlap", { testthat::expect_snapshot(res) }) +testthat::test_that("extract_by_name x_defaults argument works as expected", { + result <- extract_by_name( + x = c(a = "xx", b = function(x) paste(x, "bla")), + names = c("b", "c"), + x_defaults = c(c = "c_default") + + ) + expected <- c(b = function(x) paste(x, "bla"), c = "c_default") + testthat::expect_identical(result, expected) + + # no overlap + result <- extract_by_name( + x = c(a = "xx", b = "xx.xx"), + c("d", "c"), + c(a = "test") + ) + testthat::expect_identical(result, NULL) +}) + testthat::test_that("aesi_label works as expected for SMQ", { smq01nam <- c("AESI 1", "", NA) smq01sc <- c("NARROW", "", NA) From b66be67495d6b30d325915ecc9b24e8a7d4df644 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 22 Jun 2023 21:08:38 +0000 Subject: [PATCH 14/33] [skip actions] Restyle files --- tests/testthat/test-utils.R | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index feba1d12e9..b772079d10 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -94,7 +94,6 @@ testthat::test_that("extract_by_name x_defaults argument works as expected", { x = c(a = "xx", b = function(x) paste(x, "bla")), names = c("b", "c"), x_defaults = c(c = "c_default") - ) expected <- c(b = function(x) paste(x, "bla"), c = "c_default") testthat::expect_identical(result, expected) From c23aeab26cda1530c65efadfc7ce5f4c15e58b4a Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Thu, 22 Jun 2023 17:30:18 -0400 Subject: [PATCH 15/33] Update deprecated functions, roxygen --- R/compare_variables.R | 16 ++++++++-------- R/summarize_variables.R | 4 ++-- man/compare_variables.Rd | 12 ++++++++---- man/summarize_variables.Rd | 21 ++++++++++++++------- tests/testthat/test-compare_variables.R | 13 +++++++++++++ tests/testthat/test-summarize_variables.R | 6 ++++++ 6 files changed, 51 insertions(+), 21 deletions(-) diff --git a/R/compare_variables.R b/R/compare_variables.R index 3e7939338e..2159c2efed 100644 --- a/R/compare_variables.R +++ b/R/compare_variables.R @@ -262,17 +262,17 @@ a_compare <- function(x, .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, - .stats = NULL, - .formats = NULL, - .labels = NULL, - .indent_mods = NULL, + .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels"))), + .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_formats")), + .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels")), + .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_indents")), na.rm = TRUE, # nolint na_level = NA_character_, ...) { lifecycle::deprecate_warn( - "0.8.2", + "0.8.3", "a_compare()", - "a_summary(compare = TRUE)" + details = "Please use a_summary() with argument `compare` set to TRUE instead." ) a_summary( x = x, @@ -317,9 +317,9 @@ create_afun_compare <- function(.stats = NULL, .labels = NULL, .indent_mods = NULL) { lifecycle::deprecate_stop( - "0.8.2", + "0.8.3", "create_afun_compare()", - "a_summary(compare = TRUE)" + details = "Please use a_summary(compare = TRUE) directly instead." ) } diff --git a/R/summarize_variables.R b/R/summarize_variables.R index 2af5e41fb7..4fa1562205 100644 --- a/R/summarize_variables.R +++ b/R/summarize_variables.R @@ -636,9 +636,9 @@ a_summary <- function(x, #' @export create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) { lifecycle::deprecate_stop( - "0.8.2", + "0.8.3", "create_afun_summary()", - "a_summary()" + details = "Please use a_summary() directly instead." ) } diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd index 8beb2ddae6..7ba393662c 100644 --- a/man/compare_variables.Rd +++ b/man/compare_variables.Rd @@ -38,10 +38,14 @@ a_compare( .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, - .stats = NULL, - .formats = NULL, - .labels = NULL, - .indent_mods = NULL, + .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), + "_labels"))), + .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), + "_formats")), + .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), + "_labels")), + .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), + "_indents")), na.rm = TRUE, na_level = NA_character_, ... diff --git a/man/summarize_variables.Rd b/man/summarize_variables.Rd index f54f0b88dd..cc4fbccc81 100644 --- a/man/summarize_variables.Rd +++ b/man/summarize_variables.Rd @@ -61,10 +61,14 @@ a_summary( .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, - .stats = NULL, - .formats = NULL, - .labels = NULL, - .indent_mods = NULL, + .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), + "_labels"))), + .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), + "_formats")), + .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), + "_labels")), + .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), + "_indents")), na.rm = TRUE, na_level = NA_character_, compare = FALSE, @@ -363,13 +367,16 @@ a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) # comparison analysis - compare = TRUE a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE) a_summary( - factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE + factor(c("a", "a", "b", "c", "a")), + .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE ) a_summary( - c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE + c("A", "B", "A", "C"), + .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE ) a_summary( - c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE + c(TRUE, FALSE, FALSE, TRUE, TRUE), + .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE ) ## Fabricated dataset. diff --git a/tests/testthat/test-compare_variables.R b/tests/testthat/test-compare_variables.R index 1c76f86ae3..6979f844b9 100644 --- a/tests/testthat/test-compare_variables.R +++ b/tests/testthat/test-compare_variables.R @@ -124,3 +124,16 @@ testthat::test_that("compare_vars 'na_level' argument works as expected", { res <- testthat::expect_silent(result) testthat::expect_snapshot(res) }) + +# Deprecated functions + +testthat::test_that("create_afun_compare returns error message", { + testthat::expect_error(create_afun_compare()) +}) + +testthat::test_that("a_compare returns correct output and warning message", { + testthat::expect_warning(result <- a_compare(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .stats = c("n", "pval"))) + expected <- a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .stats = c("n", "pval"), compare = TRUE) + + testthat::expect_equal(result, expected) +}) diff --git a/tests/testthat/test-summarize_variables.R b/tests/testthat/test-summarize_variables.R index ba22857d26..c926338f17 100644 --- a/tests/testthat/test-summarize_variables.R +++ b/tests/testthat/test-summarize_variables.R @@ -458,3 +458,9 @@ testthat::test_that("summarize_vars 'na_level' argument works as expected", { res <- testthat::expect_silent(result) testthat::expect_snapshot(res) }) + +# Deprecated functions + +testthat::test_that("create_afun_summary returns error message", { + testthat::expect_error(create_afun_summary()) +}) From 11d8696459797c3491130dcbd621fc1396061948 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Thu, 22 Jun 2023 17:34:07 -0400 Subject: [PATCH 16/33] Update WORDLIST --- inst/WORDLIST | 3 +++ 1 file changed, 3 insertions(+) diff --git a/inst/WORDLIST b/inst/WORDLIST index dadff44ef3..06c569fd9d 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -15,6 +15,8 @@ Satterthwaite Schouten TLG TLGs +Ungroup +Ungroups biomarker biomarkers coercible @@ -29,4 +31,5 @@ responders subtable subtables unformatted +ungroup unstratified From 93fc5e16f8cc2feb9886161271c698cede9ca0c9 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Thu, 22 Jun 2023 17:40:04 -0400 Subject: [PATCH 17/33] Fix checks --- NEWS.md | 2 +- _pkgdown.yml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index ded07bab27..cc38aa1de8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,7 @@ * Added method for `character` class to `h_coxreg_inter_effect` enabling `character` covariates in `summarize_coxreg`. * Refactored `a_summary` to no longer use helper function `create_afun_summary`. * Refactored `summarize_vars` and `compare_vars` to use refactored `a_summary`. -* Created new helper function `ungroup_stats` to ungroup and format statistics calculated for each level of a factor variable. +* Created new internal helper function `ungroup_stats` to ungroup and format statistics calculated for factor variables. * Added new argument `x_defaults` to `extract_by_name` to allow user to specify default values to implement for values in `names` that are not specified in `x`. ### Miscellaneous diff --git a/_pkgdown.yml b/_pkgdown.yml index b4c818a3cf..fb620ca251 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -177,3 +177,4 @@ reference: desc: Functions that are currently deprecated within `tern`. contents: - pairwise + - starts_with("create_") From 56ccc08c270d53fb6e8f4fcd5337305d8e0d0ed0 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Tue, 4 Jul 2023 17:10:45 -0400 Subject: [PATCH 18/33] Refactor, create summary customizing function --- R/analyze_variables.R | 351 +++++++++++++++++++++++++++++------------- 1 file changed, 244 insertions(+), 107 deletions(-) diff --git a/R/analyze_variables.R b/R/analyze_variables.R index 3fc0a98ebb..5ef65ef05b 100644 --- a/R/analyze_variables.R +++ b/R/analyze_variables.R @@ -42,14 +42,19 @@ control_summarize_vars <- control_analyze_vars #' #' @return A named `vector` of default statistic formats for the given data type. #' -#' @keywords internal -summary_formats <- function(type = "numeric") { - if (type == "counts") { +#' @examples +#' summary_formats() +#' summary_formats(type = "count", include_pval = TRUE) +#' +#' @export +summary_formats <- function(type = "numeric", include_pval = FALSE) { + fmts <- if (type == "counts") { c( n = "xx.", count = "xx.", count_fraction = format_count_fraction, - n_blq = "xx." + n_blq = "xx.", + pval = "x.xxxx | (<0.0001)" ) } else { c( @@ -75,9 +80,12 @@ summary_formats <- function(type = "numeric") { max = "xx.x", median_range = "xx.x (xx.x - xx.x)", geom_mean = "xx.x", - geom_cv = "xx.x" + geom_cv = "xx.x", + pval = "x.xxxx | (<0.0001)" ) } + if (!include_pval) fmts <- head(fmts, -1) + fmts } #' Label Function for Descriptive Statistics @@ -88,14 +96,19 @@ summary_formats <- function(type = "numeric") { #' #' @return A named `vector` of default statistic labels for the given data type. #' -#' @keywords internal -summary_labels <- function(type = "numeric") { - if (type == "counts") { +#' @examples +#' summary_labels() +#' summary_labels(type = "count", include_pval = TRUE) +#' +#' @export +summary_labels <- function(type = "numeric", include_pval = FALSE) { + lbls <- if (type == "counts") { c( n = "n", count = "count", count_fraction = "count_fraction", - n_blq = "n_blq" + n_blq = "n_blq", + pval = "p-value (chi-squared test)" ) } else { c( @@ -121,9 +134,76 @@ summary_labels <- function(type = "numeric") { max = "Maximum", median_range = "Median (Min - Max)", geom_mean = "Geometric Mean", - geom_cv = "CV % Geometric Mean" + geom_cv = "CV % Geometric Mean", + pval = "p-value (t-test)" + ) + } + if (!include_pval) lbls <- head(lbls, -1) + lbls +} + +#' Set Defaults Settings for Summary Statistics +#' +#' @inheritParams summary_formats +#' @param include_pval (`logical`)\cr whether p-value should be included as a default statistic. +#' @param stats_custom (`named vector` of `character`) vector of statistics to include if not the defaults. This +#' argument overrides `include_pval` and other custom value arguments such that only settings for these statistics +#' will be returned. +#' @param formats_custom (`named vector` of `character`) vector of custom statistics formats to use in place of the +#' defaults defined in [`summary_formats()`]. Names should be a subset of the statistics defined in `stats_custom` (or +#' default statistics if this is `NULL`). +#' @param labels_custom (`named vector` of `character`) vector of custom statistics labels to use in place of the +#' defaults defined in [`summary_labels()`]. Names should be a subset of the statistics defined in `stats_custom` (or +#' default statistics if this is `NULL`). +#' @param indents_custom (`integer` or `named vector` of `integer`) vector of custom indentation modifiers for +#' statistics to use instead of the default of `0L` for all statistics. Names should be a subset of the statistics +#' defined in `stats_custom` (or default statistics if this is `NULL`). Alternatively, the same indentation modifier +#' can be applied to all statistics by setting `indents_custom` to a single integer value. +#' +#' @examples +#' summary_custom() +#' summary_custom(type = "counts", include_pval = TRUE) +#' summary_custom( +#' include_pval = TRUE, stats_custom = c("n", "mean", "sd", "pval"), +#' labels_custom = c(sd = "Std. Dev."), indents_custom = 3L +#' ) +#' +#' @export +summary_custom <- function(type = "numeric", + include_pval = FALSE, + stats_custom = NULL, + formats_custom = NULL, + labels_custom = NULL, + indents_custom = NULL) { + .formats <- summary_formats(type = type, include_pval = include_pval) + if (is.null(stats_custom)) { + .stats <- names(.formats) + } else { + checkmate::assert_subset( + stats_custom, + c(names(s_summary(if (type == "numeric") numeric() else TRUE)), "pval") ) + .stats <- stats_custom } + .labels <- summary_labels(type = type, include_pval = include_pval) + .indents <- setNames(rep(0L, length(.stats)), .stats) + + if (!is.null(formats_custom)) .formats[names(formats_custom)] <- formats_custom + if (!is.null(labels_custom)) .labels[names(labels_custom)] <- labels_custom + if (!is.null(indents_custom)) { + if (is.null(names(indents_custom)) && length(indents_custom) == 1) { + .indents[names(.indents)] <- indents_custom + } else { + .indents[names(indents_custom)] <- indents_custom + } + } + + list( + stats = .stats, + formats = .formats[.stats], + labels = .labels[.stats], + indents = .indents[.stats] + ) } #' Analyze Variables @@ -502,47 +582,20 @@ s_summary.logical <- function(x, y } -<<<<<<< HEAD:R/summarize_variables.R -.a_compare_numeric_formats <- c(summary_formats(), pval = "x.xxxx | (<0.0001)") -.a_compare_numeric_labels <- c(summary_labels(), pval = "p-value (t-test)") -.a_compare_numeric_indents <- c( - rep(0L, length(.a_compare_numeric_labels)) %>% `names<-`(names(.a_compare_numeric_labels)) -) -.a_compare_counts_formats <- c(summary_formats(type = "counts"), pval = "x.xxxx | (<0.0001)") -.a_compare_counts_labels <- c(summary_labels(type = "counts"), pval = "p-value (chi-squared test)") -.a_compare_counts_indents <- c(rep(0L, length(.a_compare_counts_labels)) %>% `names<-`(names(.a_compare_counts_labels))) + #' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and #' `compare_vars()` and as `cfun` in `summarize_colvars()`. #' #' @param compare (`logical`)\cr Whether comparison statistics should be analyzed instead of summary statistics #' (`compare = TRUE` adds `pval` statistic comparing against reference group). -======= -#' @describeIn analyze_variables Formatted analysis function which is used as `afun` in `analyze_vars()`. ->>>>>>> main:R/analyze_variables.R #' #' @return #' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()]. #' -<<<<<<< HEAD:R/summarize_variables.R #' @note #' * To use for comparison (with additional p-value statistic), parameter `compare` must be set to `TRUE`. #' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is. -======= -#' @export -a_summary <- function(x, - ..., - .N_row, # nolint - .N_col, # nolint - .var) { - UseMethod("a_summary", x) -} - -.a_summary_numeric_formats <- summary_formats() -.a_summary_numeric_labels <- summary_labels() - -#' @describeIn analyze_variables Formatted analysis function method for `numeric` class. ->>>>>>> main:R/analyze_variables.R #' #' @examples #' # summary analysis - compare = FALSE @@ -567,21 +620,21 @@ a_summary <- function(x, #' ) #' #' @export -a_summary <- function(x, - .N_col, # nolint - .N_row, # nolint - .var = NULL, - .df_row = NULL, - .ref_group = NULL, - .in_ref_col = FALSE, - .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels"))), - .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_formats")), - .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels")), - .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_indents")), - na.rm = TRUE, # nolint - na_level = NA_character_, - compare = FALSE, - ...) { +a_summary_output <- function(x, + .N_col, # nolint + .N_row, # nolint + .var = NULL, + .df_row = NULL, + .ref_group = NULL, + .in_ref_col = FALSE, + .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels"))), + .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_formats")), + .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels")), + .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_indents")), + na.rm = TRUE, # nolint + na_level = NA_character_, + compare = FALSE, + ...) { # Remove all-NA rows if (!is.null(.df_row) && ncol(.df_row) > 1) { in_tot_col <- nrow(.df_row) == length(x) @@ -609,10 +662,9 @@ a_summary <- function(x, } } if (length(.indent_mods) == 1 && is.null(names(.indent_mods))) { - .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats) + .indent_mods <- setNames(rep(.indent_mods, length(.stats)), .stats) } -<<<<<<< HEAD:R/summarize_variables.R .stats <- intersect(.stats, names(x_stats)) x_stats <- x_stats[.stats] .formats <- extract_by_name( @@ -643,7 +695,38 @@ a_summary <- function(x, .format_na_strs = na_level ) } -======= + +.a_compare_numeric_formats <- c(summary_formats(), pval = "x.xxxx | (<0.0001)") +.a_compare_numeric_labels <- c(summary_labels(), pval = "p-value (t-test)") +.a_compare_numeric_indents <- c( + rep(0L, length(.a_compare_numeric_labels)) %>% `names<-`(names(.a_compare_numeric_labels)) +) +.a_compare_counts_formats <- c(summary_formats(type = "counts"), pval = "x.xxxx | (<0.0001)") +.a_compare_counts_labels <- c(summary_labels(type = "counts"), pval = "p-value (chi-squared test)") +.a_compare_counts_indents <- c(rep(0L, length(.a_compare_counts_labels)) %>% `names<-`(names(.a_compare_counts_labels))) + + +#' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and +#' `compare_vars()` and as `cfun` in `summarize_colvars()`. +#' +#' @param compare (`logical`)\cr Whether comparison statistics should be analyzed instead of summary statistics +#' (`compare = TRUE` adds `pval` statistic comparing against reference group). +#' +#' @return +#' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()]. +#' +#' @note +#' * To use for comparison (with additional p-value statistic), parameter `compare` must be set to `TRUE`. +#' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is. +#' @export +a_summary <- function(x, + ..., + .N_row, # nolint + .N_col, # nolint + .var) { + UseMethod("a_summary", x) +} + #' @describeIn analyze_variables Formatted analysis function method for `factor` class. #' #' @examples @@ -657,42 +740,115 @@ a_summary <- function(x, #' afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) #' #' @export -a_summary.factor <- make_afun( - s_summary.factor, - .formats = .a_summary_counts_formats -) +a_summary.numeric <- function(x, + .N_col, # nolint + .N_row, # nolint + .var = NULL, + .df_row = NULL, + .ref_group = NULL, + .in_ref_col = FALSE, + compare = FALSE, + .stats = summary_custom(include_pval = compare)$stats, + .formats = summary_custom(include_pval = compare)$formats, + .labels = summary_custom(include_pval = compare)$labels, + .indent_mods = summary_custom(include_pval = compare)$indents, + na.rm = TRUE, # nolint + na_level = NA_character_, + ...) { + # browser() + # Remove all-NA rows + if (!is.null(.df_row) && ncol(.df_row) > 1) { + in_tot_col <- nrow(.df_row) == length(x) + .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ] + if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]] + } -#' @describeIn analyze_variables Formatted analysis function method for `character` class. -#' -#' @examples -#' # `a_summary.character` -#' afun <- make_afun( -#' getS3method("a_summary", "character"), -#' .ungroup_stats = c("count", "count_fraction") -#' ) -#' afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) -#' -#' @export -a_summary.character <- make_afun( - s_summary.character, - .formats = .a_summary_counts_formats -) + x_stats <- if (!compare) { + s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...) + } else { + s_compare( + x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ... + ) + } + + format_vals <- summary_custom( + include_pval = compare, + stats_custom = .stats, + formats_custom = .formats, + labels_custom = .labels, + indents_custom = .indent_mods + ) + .stats <- format_vals$stats + .formats <- format_vals$formats + .labels <- format_vals$labels + .indent_mods <- format_vals$indents + + for (i in intersect(.stats, c("mean_ci", "mean_pval", "median_ci", "quantiles"))) { + if (!i %in% names(.labels) || .labels[[i]] == summary_custom()$labels[[i]]) { + .labels[[i]] <- attr(x_stats[[i]], "label") + } + } + + x_stats <- x_stats[.stats] -#' @describeIn analyze_variables Formatted analysis function method for `logical` class. + in_rows( + .list = x_stats, + .formats = .formats, + .names = .labels, + .labels = .labels, + .indent_mods = .indent_mods, + .format_na_strs = na_level + ) +} + +#' #' @describeIn analyze_variables Formatted analysis function method for `factor` class. +#' #' +#' #' @examples +#' #' # `a_summary.factor` +#' #' # We need to ungroup `count` and `count_fraction` first so that the rtables formatting +#' #' # functions can be applied correctly. +#' #' afun <- make_afun( +#' #' getS3method("a_summary", "factor"), +#' #' .ungroup_stats = c("count", "count_fraction") +#' #' ) +#' #' afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) +#' #' +#' #' @export +#' a_summary.factor <- make_afun( +#' s_summary.factor, +#' .formats = .a_summary_counts_formats +#' ) #' -#' @examples -#' # `a_summary.logical` -#' afun <- make_afun( -#' getS3method("a_summary", "logical") +#' #' @describeIn analyze_variables Formatted analysis function method for `character` class. +#' #' +#' #' @examples +#' #' # `a_summary.character` +#' #' afun <- make_afun( +#' #' getS3method("a_summary", "character"), +#' #' .ungroup_stats = c("count", "count_fraction") +#' #' ) +#' #' afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) +#' #' +#' #' @export +#' a_summary.character <- make_afun( +#' s_summary.character, +#' .formats = .a_summary_counts_formats #' ) -#' afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) #' -#' @export -a_summary.logical <- make_afun( - s_summary.logical, - .formats = .a_summary_counts_formats -) ->>>>>>> main:R/analyze_variables.R +#' #' @describeIn analyze_variables Formatted analysis function method for `logical` class. +#' #' +#' #' @examples +#' #' # `a_summary.logical` +#' #' afun <- make_afun( +#' #' getS3method("a_summary", "logical") +#' #' ) +#' #' afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) +#' #' +#' #' @export +#' a_summary.logical <- make_afun( +#' s_summary.logical, +#' .formats = .a_summary_counts_formats +#' ) #' Constructor Function for [analyze_vars()] and [summarize_colvars()] #' @@ -786,9 +942,8 @@ create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) { #' #' build_table(l, df = dta_test) #' -<<<<<<< HEAD:R/summarize_variables.R #' @export -summarize_vars <- function(lyt, +analyze_vars <- function(lyt, vars, var_labels = vars, nested = TRUE, @@ -806,24 +961,6 @@ summarize_vars <- function(lyt, if (!is.null(.formats)) extra_args[[".formats"]] <- .formats if (!is.null(.labels)) extra_args[[".labels"]] <- .labels if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods -======= -#' @export analyze_vars summarize_vars -#' @aliases summarize_vars -analyze_vars <- function(lyt, - vars, - var_labels = vars, - nested = TRUE, - ..., - na_level = NA_character_, - show_labels = "default", - table_names = vars, - section_div = NA_character_, - .stats = c("n", "mean_sd", "median", "range", "count_fraction"), - .formats = NULL, - .labels = NULL, - .indent_mods = NULL) { - afun <- create_afun_summary(.stats, .formats, .labels, .indent_mods) ->>>>>>> main:R/analyze_variables.R analyze( lyt = lyt, From 5987bc9b92b6f7d8dc9f16f6f5c156071964b364 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Tue, 4 Jul 2023 17:57:42 -0400 Subject: [PATCH 19/33] temp --- NAMESPACE | 9 +- R/analyze_variables.R | 315 +++++++++++++++++-------- man/analyze_variables.Rd | 490 +++++++++++++++++++++++++++++++++++++++ man/summary_custom.Rd | 49 ++++ man/summary_formats.Rd | 8 +- man/summary_labels.Rd | 8 +- 6 files changed, 777 insertions(+), 102 deletions(-) create mode 100644 man/analyze_variables.Rd create mode 100644 man/summary_custom.Rd diff --git a/NAMESPACE b/NAMESPACE index de60f429eb..d21c817875 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,9 @@ # Generated by roxygen2: do not edit by hand +S3method(a_summary,character) +S3method(a_summary,factor) +S3method(a_summary,logical) +S3method(a_summary,numeric) S3method(as.rtable,data.frame) S3method(h_coxreg_inter_effect,character) S3method(h_coxreg_inter_effect,factor) @@ -33,6 +37,7 @@ export(a_odds_ratio) export(a_proportion) export(a_proportion_diff) export(a_summary) +export(a_summary_output) export(add_rowcounts) export(aesi_label) export(analyze_num_patients) @@ -261,7 +266,9 @@ export(summarize_num_patients) export(summarize_occurrences_by_grade) export(summarize_patients_events_in_cols) export(summarize_patients_exposure_in_cols) -export(summarize_vars) +export(summary_custom) +export(summary_formats) +export(summary_labels) export(surv_time) export(surv_timepoint) export(tabulate_rsp_biomarkers) diff --git a/R/analyze_variables.R b/R/analyze_variables.R index 5ef65ef05b..d60ee8f7c8 100644 --- a/R/analyze_variables.R +++ b/R/analyze_variables.R @@ -623,17 +623,17 @@ s_summary.logical <- function(x, a_summary_output <- function(x, .N_col, # nolint .N_row, # nolint - .var = NULL, - .df_row = NULL, - .ref_group = NULL, - .in_ref_col = FALSE, - .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels"))), - .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_formats")), - .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels")), - .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_indents")), - na.rm = TRUE, # nolint - na_level = NA_character_, - compare = FALSE, + .var, + .df_row, + .ref_group, + .in_ref_col, + .stats, + .formats, + .labels, + .indent_mods, + na.rm, # nolint + na_level, + compare, ...) { # Remove all-NA rows if (!is.null(.df_row) && ncol(.df_row) > 1) { @@ -643,41 +643,36 @@ a_summary_output <- function(x, } if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level") - if (!compare) { - x_stats <- s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...) - .stats <- setdiff(.stats, "pval") - .formats <- .formats[setdiff(names(.formats), "pval")] - .labels <- .labels[setdiff(names(.labels), "pval")] - if (!is.null(names(.indent_mods))) .indent_mods <- .indent_mods[setdiff(names(.indent_mods), "pval")] + x_stats <- if (!compare) { + s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...) } else { - x_stats <- s_compare( + s_compare( x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ... ) } + + custom_summary <- summary_custom( + include_pval = compare, + stats_custom = .stats, + formats_custom = .formats, + labels_custom = .labels, + indents_custom = .indent_mods + ) + .stats <- custom_summary$stats + .formats <- custom_summary$formats + .labels <- custom_summary$labels + .indent_mods <- custom_summary$indents + if (is.numeric(x)) { for (i in intersect(.stats, c("mean_ci", "mean_pval", "median_ci", "quantiles"))) { - if (!i %in% names(.labels) || .labels[[i]] == .a_compare_numeric_labels[[i]]) { + if (!i %in% names(.labels) || .labels[[i]] == summary_custom()$labels[[i]]) { .labels[[i]] <- attr(x_stats[[i]], "label") } } } - if (length(.indent_mods) == 1 && is.null(names(.indent_mods))) { - .indent_mods <- setNames(rep(.indent_mods, length(.stats)), .stats) - } - .stats <- intersect(.stats, names(x_stats)) x_stats <- x_stats[.stats] - .formats <- extract_by_name( - .formats, .stats, get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_formats")) - ) - .labels <- extract_by_name( - .labels, .stats, get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels")) - ) - .indent_mods <- extract_by_name( - .indent_mods, .stats, get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_indents")) - ) - - if (!is.numeric(x) && !is.logical(x)) { + if (is.factor(x)) { x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, .in_ref_col) x_stats <- x_ungrp[["x"]] .stats <- x_ungrp[[".stats"]] @@ -696,16 +691,6 @@ a_summary_output <- function(x, ) } -.a_compare_numeric_formats <- c(summary_formats(), pval = "x.xxxx | (<0.0001)") -.a_compare_numeric_labels <- c(summary_labels(), pval = "p-value (t-test)") -.a_compare_numeric_indents <- c( - rep(0L, length(.a_compare_numeric_labels)) %>% `names<-`(names(.a_compare_numeric_labels)) -) -.a_compare_counts_formats <- c(summary_formats(type = "counts"), pval = "x.xxxx | (<0.0001)") -.a_compare_counts_labels <- c(summary_labels(type = "counts"), pval = "p-value (chi-squared test)") -.a_compare_counts_indents <- c(rep(0L, length(.a_compare_counts_labels)) %>% `names<-`(names(.a_compare_counts_labels))) - - #' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and #' `compare_vars()` and as `cfun` in `summarize_colvars()`. #' @@ -720,10 +705,13 @@ a_summary_output <- function(x, #' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is. #' @export a_summary <- function(x, - ..., - .N_row, # nolint .N_col, # nolint - .var) { + .N_row, # nolint + .var, + .df_row, + .ref_group, + .in_ref_col, + ...) { UseMethod("a_summary", x) } @@ -755,7 +743,6 @@ a_summary.numeric <- function(x, na.rm = TRUE, # nolint na_level = NA_character_, ...) { - # browser() # Remove all-NA rows if (!is.null(.df_row) && ncol(.df_row) > 1) { in_tot_col <- nrow(.df_row) == length(x) @@ -771,17 +758,17 @@ a_summary.numeric <- function(x, ) } - format_vals <- summary_custom( + custom_summary <- summary_custom( include_pval = compare, stats_custom = .stats, formats_custom = .formats, labels_custom = .labels, indents_custom = .indent_mods ) - .stats <- format_vals$stats - .formats <- format_vals$formats - .labels <- format_vals$labels - .indent_mods <- format_vals$indents + .stats <- custom_summary$stats + .formats <- custom_summary$formats + .labels <- custom_summary$labels + .indent_mods <- custom_summary$indents for (i in intersect(.stats, c("mean_ci", "mean_pval", "median_ci", "quantiles"))) { if (!i %in% names(.labels) || .labels[[i]] == summary_custom()$labels[[i]]) { @@ -790,7 +777,6 @@ a_summary.numeric <- function(x, } x_stats <- x_stats[.stats] - in_rows( .list = x_stats, .formats = .formats, @@ -801,54 +787,189 @@ a_summary.numeric <- function(x, ) } -#' #' @describeIn analyze_variables Formatted analysis function method for `factor` class. -#' #' -#' #' @examples -#' #' # `a_summary.factor` -#' #' # We need to ungroup `count` and `count_fraction` first so that the rtables formatting -#' #' # functions can be applied correctly. -#' #' afun <- make_afun( -#' #' getS3method("a_summary", "factor"), -#' #' .ungroup_stats = c("count", "count_fraction") -#' #' ) -#' #' afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) -#' #' -#' #' @export -#' a_summary.factor <- make_afun( -#' s_summary.factor, -#' .formats = .a_summary_counts_formats +#' @describeIn analyze_variables Formatted analysis function method for `factor` class. +#' +#' @examples +#' # `a_summary.factor` +#' # We need to ungroup `count` and `count_fraction` first so that the rtables formatting +#' # functions can be applied correctly. +#' afun <- make_afun( +#' getS3method("a_summary", "factor"), +#' .ungroup_stats = c("count", "count_fraction") #' ) +#' afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) +#' +#' @export +a_summary.factor <- function(x, + .N_col, # nolint + .N_row, # nolint + .var = NULL, + .df_row = NULL, + .ref_group = NULL, + .in_ref_col = FALSE, + compare = FALSE, + .stats = summary_custom(type = "counts", include_pval = compare)$stats, + .formats = summary_custom(type = "counts", include_pval = compare)$formats, + .labels = summary_custom(type = "counts", include_pval = compare)$labels, + .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents, + na.rm = TRUE, # nolint + na_level = NA_character_, + ...) { + # Remove all-NA rows + if (!is.null(.df_row) && ncol(.df_row) > 1) { + in_tot_col <- nrow(.df_row) == length(x) + .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ] + if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]] + } + + if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level") + + x_stats <- if (!compare) { + s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...) + } else { + s_compare( + x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ... + ) + } + + custom_summary <- summary_custom( + type = "counts", + include_pval = compare, + stats_custom = .stats, + formats_custom = .formats, + labels_custom = .labels, + indents_custom = .indent_mods + ) + .stats <- custom_summary$stats + .formats <- custom_summary$formats + .labels <- custom_summary$labels + .indent_mods <- custom_summary$indents + + x_stats <- x_stats[.stats] + x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, .in_ref_col) + x_stats <- x_ungrp[["x"]] + .stats <- x_ungrp[[".stats"]] + .formats <- x_ungrp[[".formats"]] + .labels <- x_ungrp[[".labels"]] + .indent_mods <- x_ungrp[[".indent_mods"]] + + in_rows( + .list = x_stats, + .formats = .formats, + .names = .labels, + .labels = .labels, + .indent_mods = .indent_mods, + .format_na_strs = na_level + ) +} + +#' @describeIn analyze_variables Formatted analysis function method for `character` class. #' -#' #' @describeIn analyze_variables Formatted analysis function method for `character` class. -#' #' -#' #' @examples -#' #' # `a_summary.character` -#' #' afun <- make_afun( -#' #' getS3method("a_summary", "character"), -#' #' .ungroup_stats = c("count", "count_fraction") -#' #' ) -#' #' afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) -#' #' -#' #' @export -#' a_summary.character <- make_afun( -#' s_summary.character, -#' .formats = .a_summary_counts_formats +#' @examples +#' # `a_summary.character` +#' afun <- make_afun( +#' getS3method("a_summary", "character"), +#' .ungroup_stats = c("count", "count_fraction") #' ) +#' afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) #' -#' #' @describeIn analyze_variables Formatted analysis function method for `logical` class. -#' #' -#' #' @examples -#' #' # `a_summary.logical` -#' #' afun <- make_afun( -#' #' getS3method("a_summary", "logical") -#' #' ) -#' #' afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) -#' #' -#' #' @export -#' a_summary.logical <- make_afun( -#' s_summary.logical, -#' .formats = .a_summary_counts_formats +#' @export +a_summary.character <- function(x, + .N_col, # nolint + .N_row, # nolint + .var = NULL, + .df_row = NULL, + .ref_group = NULL, + .in_ref_col = FALSE, + compare = FALSE, + .stats = summary_custom(type = "counts", include_pval = compare)$stats, + .formats = summary_custom(type = "counts", include_pval = compare)$formats, + .labels = summary_custom(type = "counts", include_pval = compare)$labels, + .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents, + na.rm = TRUE, # nolint + na_level = NA_character_, + ...) { + x <- as.factor(x) + a_summary(x, + .N_col, # nolint + .N_row, # nolint + .var = .var, + .df_row = .df_row, + .ref_group = .ref_group, + .in_ref_col = .in_ref_col, + compare = compare, + .stats = .stats, + .formats = .formats, + .labels = .labels, + .indent_mods = .indent_mods, + na.rm = na.rm, + na_level = na_level, + ...) +} + +#' @describeIn analyze_variables Formatted analysis function method for `logical` class. +#' +#' @examples +#' # `a_summary.logical` +#' afun <- make_afun( +#' getS3method("a_summary", "logical") #' ) +#' afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) +#' +#' @export +a_summary.logical <- function(x, + .N_col, # nolint + .N_row, # nolint + .var = NULL, + .df_row = NULL, + .ref_group = NULL, + .in_ref_col = FALSE, + compare = FALSE, + .stats = summary_custom(type = "counts", include_pval = compare)$stats, + .formats = summary_custom(type = "counts", include_pval = compare)$formats, + .labels = summary_custom(type = "counts", include_pval = compare)$labels, + .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents, + na.rm = TRUE, # nolint + na_level = NA_character_, + ...) { + # Remove all-NA rows + if (!is.null(.df_row) && ncol(.df_row) > 1) { + in_tot_col <- nrow(.df_row) == length(x) + .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ] + if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]] + } + + if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level") + x_stats <- if (!compare) { + s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...) + } else { + s_compare( + x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ... + ) + } + + custom_summary <- summary_custom( + include_pval = compare, + stats_custom = .stats, + formats_custom = .formats, + labels_custom = .labels, + indents_custom = .indent_mods + ) + .stats <- custom_summary$stats + .formats <- custom_summary$formats + .labels <- custom_summary$labels + .indent_mods <- custom_summary$indents + + x_stats <- x_stats[.stats] + in_rows( + .list = x_stats, + .formats = .formats, + .names = .labels, + .labels = .labels, + .indent_mods = .indent_mods, + .format_na_strs = na_level + ) +} #' Constructor Function for [analyze_vars()] and [summarize_colvars()] #' diff --git a/man/analyze_variables.Rd b/man/analyze_variables.Rd new file mode 100644 index 0000000000..5ec4af07e4 --- /dev/null +++ b/man/analyze_variables.Rd @@ -0,0 +1,490 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analyze_variables.R +\name{analyze_variables} +\alias{analyze_variables} +\alias{s_summary} +\alias{s_summary.numeric} +\alias{s_summary.factor} +\alias{s_summary.character} +\alias{s_summary.logical} +\alias{a_summary.numeric} +\alias{a_summary.factor} +\alias{a_summary.character} +\alias{a_summary.logical} +\alias{analyze_vars} +\title{Analyze Variables} +\usage{ +s_summary(x, na.rm = TRUE, denom, .N_row, .N_col, .var, ...) + +\method{s_summary}{numeric}( + x, + na.rm = TRUE, + denom, + .N_row, + .N_col, + .var, + control = control_analyze_vars(), + ... +) + +\method{s_summary}{factor}( + x, + na.rm = TRUE, + denom = c("n", "N_row", "N_col"), + .N_row, + .N_col, + ... +) + +\method{s_summary}{character}( + x, + na.rm = TRUE, + denom = c("n", "N_row", "N_col"), + .N_row, + .N_col, + .var, + verbose = TRUE, + ... +) + +\method{s_summary}{logical}( + x, + na.rm = TRUE, + denom = c("n", "N_row", "N_col"), + .N_row, + .N_col, + ... +) + +\method{a_summary}{numeric}( + x, + .N_col, + .N_row, + .var = NULL, + .df_row = NULL, + .ref_group = NULL, + .in_ref_col = FALSE, + compare = FALSE, + .stats = summary_custom(include_pval = compare)$stats, + .formats = summary_custom(include_pval = compare)$formats, + .labels = summary_custom(include_pval = compare)$labels, + .indent_mods = summary_custom(include_pval = compare)$indents, + na.rm = TRUE, + na_level = NA_character_, + ... +) + +\method{a_summary}{factor}( + x, + .N_col, + .N_row, + .var = NULL, + .df_row = NULL, + .ref_group = NULL, + .in_ref_col = FALSE, + compare = FALSE, + .stats = summary_custom(type = "counts", include_pval = compare)$stats, + .formats = summary_custom(type = "counts", include_pval = compare)$formats, + .labels = summary_custom(type = "counts", include_pval = compare)$labels, + .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents, + na.rm = TRUE, + na_level = NA_character_, + ... +) + +\method{a_summary}{character}( + x, + .N_col, + .N_row, + .var = NULL, + .df_row = NULL, + .ref_group = NULL, + .in_ref_col = FALSE, + compare = FALSE, + .stats = summary_custom(type = "counts", include_pval = compare)$stats, + .formats = summary_custom(type = "counts", include_pval = compare)$formats, + .labels = summary_custom(type = "counts", include_pval = compare)$labels, + .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents, + na.rm = TRUE, + na_level = NA_character_, + ... +) + +\method{a_summary}{logical}( + x, + .N_col, + .N_row, + .var = NULL, + .df_row = NULL, + .ref_group = NULL, + .in_ref_col = FALSE, + compare = FALSE, + .stats = summary_custom(type = "counts", include_pval = compare)$stats, + .formats = summary_custom(type = "counts", include_pval = compare)$formats, + .labels = summary_custom(type = "counts", include_pval = compare)$labels, + .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents, + na.rm = TRUE, + na_level = NA_character_, + ... +) + +analyze_vars( + lyt, + vars, + var_labels = vars, + nested = TRUE, + ..., + na.rm = TRUE, + na_level = NA_character_, + show_labels = "default", + table_names = vars, + section_div = NA_character_, + .stats = c("n", "mean_sd", "median", "range", "count_fraction"), + .formats = NULL, + .labels = NULL, + .indent_mods = NULL +) +} +\arguments{ +\item{x}{(\code{numeric})\cr vector of numbers we want to analyze.} + +\item{na.rm}{(\code{flag})\cr whether \code{NA} values should be removed from \code{x} prior to analysis.} + +\item{denom}{(\code{string})\cr choice of denominator for proportion. Options are: +\itemize{ +\item \code{n}: number of values in this row and column intersection. +\item \code{N_row}: total number of values in this row across columns. +\item \code{N_col}: total number of values in this column across rows. +}} + +\item{.N_row}{(\code{count})\cr column-wise N (column count) for the full column that is passed by \code{rtables}.} + +\item{.N_col}{(\code{count})\cr row-wise N (row group count) for the group of observations being analyzed +(i.e. with no column-based subsetting) that is passed by \code{rtables}.} + +\item{.var}{(\code{string})\cr single variable name that is passed by \code{rtables} when requested +by a statistics function.} + +\item{...}{arguments passed to \code{s_summary()}.} + +\item{control}{(\code{list})\cr parameters for descriptive statistics details, specified by using +the helper function \code{\link[=control_analyze_vars]{control_analyze_vars()}}. Some possible parameter options are: +\itemize{ +\item \code{conf_level} (\code{proportion})\cr confidence level of the interval for mean and median. +\item \code{quantiles} (\code{numeric})\cr vector of length two to specify the quantiles. +\item \code{quantile_type} (\code{numeric})\cr between 1 and 9 selecting quantile algorithms to be used. +See more about \code{type} in \code{\link[stats:quantile]{stats::quantile()}}. +\item \code{test_mean} (\code{numeric})\cr value to test against the mean under the null hypothesis when calculating p-value. +}} + +\item{verbose}{(\code{logical})\cr Defaults to \code{TRUE}, which prints out warnings and messages. It is mainly used +to print out information about factor casting.} + +\item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.} + +\item{.ref_group}{(\code{data.frame} or \code{vector})\cr the data corresponding to the reference group.} + +\item{.in_ref_col}{(\code{logical})\cr \code{TRUE} when working with the reference level, \code{FALSE} otherwise.} + +\item{.stats}{(\code{character})\cr statistics to select for the table.} + +\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.} + +\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} + +\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector +should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation +for that statistic's row label.} + +\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} + +\item{lyt}{(\code{layout})\cr input layout where analyses will be added to.} + +\item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.} + +\item{var_labels}{(\code{character})\cr character for label.} + +\item{nested}{(\code{flag})\cr whether this layout instruction be applied within the existing layout structure \emph{if +possible} (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split +underneath analyses, which is not allowed.} + +\item{show_labels}{(\code{string})\cr label visibility: one of "default", "visible" and "hidden".} + +\item{table_names}{(\code{character})\cr this can be customized in case that the same \code{vars} are analyzed multiple times, +to avoid warnings from \code{rtables}.} + +\item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group +defined by this split instruction, or \code{NA_character_} (the default) for no section divider.} +} +\value{ +\itemize{ +\item \code{s_summary()} returns different statistics depending on the class of \code{x}. +} + +\itemize{ +\item If \code{x} is of class \code{numeric}, returns a \code{list} with the following named \code{numeric} items: +\itemize{ +\item \code{n}: The \code{\link[=length]{length()}} of \code{x}. +\item \code{sum}: The \code{\link[=sum]{sum()}} of \code{x}. +\item \code{mean}: The \code{\link[=mean]{mean()}} of \code{x}. +\item \code{sd}: The \code{\link[stats:sd]{stats::sd()}} of \code{x}. +\item \code{se}: The standard error of \code{x} mean, i.e.: (\code{sd(x) / sqrt(length(x))}). +\item \code{mean_sd}: The \code{\link[=mean]{mean()}} and \code{\link[stats:sd]{stats::sd()}} of \code{x}. +\item \code{mean_se}: The \code{\link[=mean]{mean()}} of \code{x} and its standard error (see above). +\item \code{mean_ci}: The CI for the mean of \code{x} (from \code{\link[=stat_mean_ci]{stat_mean_ci()}}). +\item \code{mean_sei}: The SE interval for the mean of \code{x}, i.e.: (\code{\link[=mean]{mean()}} -/+ \code{\link[stats:sd]{stats::sd()}} / \code{\link[=sqrt]{sqrt()}}). +\item \code{mean_sdi}: The SD interval for the mean of \code{x}, i.e.: (\code{\link[=mean]{mean()}} -/+ \code{\link[stats:sd]{stats::sd()}}). +\item \code{mean_pval}: The two-sided p-value of the mean of \code{x} (from \code{\link[=stat_mean_pval]{stat_mean_pval()}}). +\item \code{median}: The \code{\link[stats:median]{stats::median()}} of \code{x}. +\item \code{mad}: The median absolute deviation of \code{x}, i.e.: (\code{\link[stats:median]{stats::median()}} of \code{xc}, +where \code{xc} = \code{x} - \code{\link[stats:median]{stats::median()}}). +\item \code{median_ci}: The CI for the median of \code{x} (from \code{\link[=stat_median_ci]{stat_median_ci()}}). +\item \code{quantiles}: Two sample quantiles of \code{x} (from \code{\link[stats:quantile]{stats::quantile()}}). +\item \code{iqr}: The \code{\link[stats:IQR]{stats::IQR()}} of \code{x}. +\item \code{range}: The \code{\link[=range_noinf]{range_noinf()}} of \code{x}. +\item \code{min}: The \code{\link[=max]{max()}} of \code{x}. +\item \code{max}: The \code{\link[=min]{min()}} of \code{x}. +\item \code{median_range}: The \code{\link[=median]{median()}} and \code{\link[=range_noinf]{range_noinf()}} of \code{x}. +\item \code{cv}: The coefficient of variation of \code{x}, i.e.: (\code{\link[stats:sd]{stats::sd()}} / \code{\link[=mean]{mean()}} * 100). +\item \code{geom_mean}: The geometric mean of \code{x}, i.e.: (\code{exp(mean(log(x)))}). +\item \code{geom_cv}: The geometric coefficient of variation of \code{x}, i.e.: (\code{sqrt(exp(sd(log(x)) ^ 2) - 1) * 100}). +} +} + +\itemize{ +\item If \code{x} is of class \code{factor} or converted from \code{character}, returns a \code{list} with named \code{numeric} items: +\itemize{ +\item \code{n}: The \code{\link[=length]{length()}} of \code{x}. +\item \code{count}: A list with the number of cases for each level of the factor \code{x}. +\item \code{count_fraction}: Similar to \code{count} but also includes the proportion of cases for each level of the +factor \code{x} relative to the denominator, or \code{NA} if the denominator is zero. +} +} + +\itemize{ +\item If \code{x} is of class \code{logical}, returns a \code{list} with named \code{numeric} items: +\itemize{ +\item \code{n}: The \code{\link[=length]{length()}} of \code{x} (possibly after removing \code{NA}s). +\item \code{count}: Count of \code{TRUE} in \code{x}. +\item \code{count_fraction}: Count and proportion of \code{TRUE} in \code{x} relative to the denominator, or \code{NA} if the +denominator is zero. Note that \code{NA}s in \code{x} are never counted or leading to \code{NA} here. +} +} + +\itemize{ +\item \code{analyze_vars()} returns a layout object suitable for passing to further layouting functions, +or to \code{\link[rtables:build_table]{rtables::build_table()}}. Adding this function to an \code{rtable} layout will add formatted rows containing +the statistics from \code{s_summary()} to the table layout. +} +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} + +We use the S3 generic function \code{\link[=s_summary]{s_summary()}} to implement summaries for different \code{x} objects. This +is used as a statistics function in combination with the analyze function \code{\link[=analyze_vars]{analyze_vars()}}. +} +\section{Functions}{ +\itemize{ +\item \code{s_summary()}: S3 generic function to produces a variable summary. + +\item \code{s_summary(numeric)}: Method for \code{numeric} class. + +\item \code{s_summary(factor)}: Method for \code{factor} class. + +\item \code{s_summary(character)}: Method for \code{character} class. This makes an automatic +conversion to factor (with a warning) and then forwards to the method for factors. + +\item \code{s_summary(logical)}: Method for \code{logical} class. + +\item \code{a_summary(numeric)}: Formatted analysis function method for \code{factor} class. + +\item \code{a_summary(factor)}: Formatted analysis function method for \code{factor} class. + +\item \code{a_summary(character)}: Formatted analysis function method for \code{character} class. + +\item \code{a_summary(logical)}: Formatted analysis function method for \code{logical} class. + +\item \code{analyze_vars()}: Layout-creating function which can take statistics function arguments +and additional format arguments. This function is a wrapper for \code{\link[rtables:analyze]{rtables::analyze()}}. + +}} +\note{ +\itemize{ +\item If \code{x} is an empty vector, \code{NA} is returned. This is the expected feature so as to return \code{rcell} content in +\code{rtables} when the intersection of a column and a row delimits an empty data selection. +\item When the \code{mean} function is applied to an empty vector, \code{NA} will be returned instead of \code{NaN}, the latter +being standard behavior in R. +} + +\itemize{ +\item If \code{x} is an empty \code{factor}, a list is still returned for \code{counts} with one element +per factor level. If there are no levels in \code{x}, the function fails. +\item If factor variables contain \code{NA}, these \code{NA} values are excluded by default. To include \code{NA} values +set \code{na.rm = FALSE} and missing values will be displayed as an \code{NA} level. Alternatively, an explicit +factor level can be defined for \code{NA} values during pre-processing via \code{\link[=df_explicit_na]{df_explicit_na()}} - the +default \code{na_level} (\code{""}) will also be excluded when \code{na.rm} is set to \code{TRUE}. +} + +\itemize{ +\item Automatic conversion of character to factor does not guarantee that the table +can be generated correctly. In particular for sparse tables this very likely can fail. +It is therefore better to always pre-process the dataset such that factors are manually +created from character variables before passing the dataset to \code{\link[rtables:build_table]{rtables::build_table()}}. +} + +Deprecation cycle started for \code{summarize_vars} as it is going to renamed into +\code{analyze_vars}. Intention is to reflect better the core underlying \code{rtables} +functions; in this case \code{\link[rtables:analyze]{rtables::analyze()}}. +} +\examples{ +# `s_summary.numeric` + +## Basic usage: empty numeric returns NA-filled items. +s_summary(numeric()) + +## Management of NA values. +x <- c(NA_real_, 1) +s_summary(x, na.rm = TRUE) +s_summary(x, na.rm = FALSE) + +x <- c(NA_real_, 1, 2) +s_summary(x, stats = NULL) + +## Benefits in `rtables` contructions: +require(rtables) +dta_test <- data.frame( + Group = rep(LETTERS[1:3], each = 2), + sub_group = rep(letters[1:2], each = 3), + x = 1:6 +) + +## The summary obtained in with `rtables`: +basic_table() \%>\% + split_cols_by(var = "Group") \%>\% + split_rows_by(var = "sub_group") \%>\% + analyze(vars = "x", afun = s_summary) \%>\% + build_table(df = dta_test) + +## By comparison with `lapply`: +X <- split(dta_test, f = with(dta_test, interaction(Group, sub_group))) +lapply(X, function(x) s_summary(x$x)) + +# `s_summary.factor` + +## Basic usage: +s_summary(factor(c("a", "a", "b", "c", "a"))) +# Empty factor returns NA-filled items. +s_summary(factor(levels = c("a", "b", "c"))) + +## Management of NA values. +x <- factor(c(NA, "Female")) +x <- explicit_na(x) +s_summary(x, na.rm = TRUE) +s_summary(x, na.rm = FALSE) + +## Different denominators. +x <- factor(c("a", "a", "b", "c", "a")) +s_summary(x, denom = "N_row", .N_row = 10L) +s_summary(x, denom = "N_col", .N_col = 20L) + +# `s_summary.character` + +## Basic usage: +s_summary(c("a", "a", "b", "c", "a"), .var = "x", verbose = FALSE) +s_summary(c("a", "a", "b", "c", "a", ""), .var = "x", na.rm = FALSE, verbose = FALSE) + +# `s_summary.logical` + +## Basic usage: +s_summary(c(TRUE, FALSE, TRUE, TRUE)) + +## Management of NA values. +x <- c(NA, TRUE, FALSE) +s_summary(x, na.rm = TRUE) +s_summary(x, na.rm = FALSE) + +## Different denominators. +x <- c(TRUE, FALSE, TRUE, TRUE) +s_summary(x, denom = "N_row", .N_row = 10L) +s_summary(x, denom = "N_col", .N_col = 20L) + +# `a_summary.factor` +# We need to ungroup `count` and `count_fraction` first so that the rtables formatting +# functions can be applied correctly. +afun <- make_afun( + getS3method("a_summary", "factor"), + .ungroup_stats = c("count", "count_fraction") +) +afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) + +# `a_summary.factor` +# We need to ungroup `count` and `count_fraction` first so that the rtables formatting +# functions can be applied correctly. +afun <- make_afun( + getS3method("a_summary", "factor"), + .ungroup_stats = c("count", "count_fraction") +) +afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) + +# `a_summary.character` +afun <- make_afun( + getS3method("a_summary", "character"), + .ungroup_stats = c("count", "count_fraction") +) +afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) + +# `a_summary.logical` +afun <- make_afun( + getS3method("a_summary", "logical") +) +afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) + +## Fabricated dataset. +dta_test <- data.frame( + USUBJID = rep(1:6, each = 3), + PARAMCD = rep("lab", 6 * 3), + AVISIT = rep(paste0("V", 1:3), 6), + ARM = rep(LETTERS[1:3], rep(6, 3)), + AVAL = c(9:1, rep(NA, 9)) +) + +# `analyze_vars()` in `rtables` pipelines +## Default output within a `rtables` pipeline. +l <- basic_table() \%>\% + split_cols_by(var = "ARM") \%>\% + split_rows_by(var = "AVISIT") \%>\% + analyze_vars(vars = "AVAL") + +build_table(l, df = dta_test) + +## Select and format statistics output. +l <- basic_table() \%>\% + split_cols_by(var = "ARM") \%>\% + split_rows_by(var = "AVISIT") \%>\% + analyze_vars( + vars = "AVAL", + .stats = c("n", "mean_sd", "quantiles"), + .formats = c("mean_sd" = "xx.x, xx.x"), + .labels = c(n = "n", mean_sd = "Mean, SD", quantiles = c("Q1 - Q3")) + ) + +build_table(l, df = dta_test) + +## Use arguments interpreted by `s_summary`. +l <- basic_table() \%>\% + split_cols_by(var = "ARM") \%>\% + split_rows_by(var = "AVISIT") \%>\% + analyze_vars(vars = "AVAL", na.rm = FALSE) + +build_table(l, df = dta_test) + +## Handle `NA` levels first when summarizing factors. +dta_test$AVISIT <- NA_character_ +dta_test <- df_explicit_na(dta_test) +l <- basic_table() \%>\% + split_cols_by(var = "ARM") \%>\% + analyze_vars(vars = "AVISIT", na.rm = FALSE) + +build_table(l, df = dta_test) + +} diff --git a/man/summary_custom.Rd b/man/summary_custom.Rd new file mode 100644 index 0000000000..ece05515c2 --- /dev/null +++ b/man/summary_custom.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analyze_variables.R +\name{summary_custom} +\alias{summary_custom} +\title{Set Defaults Settings for Summary Statistics} +\usage{ +summary_custom( + type = "numeric", + include_pval = FALSE, + stats_custom = NULL, + formats_custom = NULL, + labels_custom = NULL, + indents_custom = NULL +) +} +\arguments{ +\item{type}{(\code{string})\cr choice of a summary data type. Only \code{counts} and \code{numeric} types are currently supported.} + +\item{include_pval}{(\code{logical})\cr whether p-value should be included as a default statistic.} + +\item{stats_custom}{(\verb{named vector} of \code{character}) vector of statistics to include if not the defaults. This +argument overrides \code{include_pval} and other custom value arguments such that only settings for these statistics +will be returned.} + +\item{formats_custom}{(\verb{named vector} of \code{character}) vector of custom statistics formats to use in place of the +defaults defined in \code{\link[=summary_formats]{summary_formats()}}. Names should be a subset of the statistics defined in \code{stats_custom} (or +default statistics if this is \code{NULL}).} + +\item{labels_custom}{(\verb{named vector} of \code{character}) vector of custom statistics labels to use in place of the +defaults defined in \code{\link[=summary_labels]{summary_labels()}}. Names should be a subset of the statistics defined in \code{stats_custom} (or +default statistics if this is \code{NULL}).} + +\item{indents_custom}{(\code{integer} or \verb{named vector} of \code{integer}) vector of custom indentation modifiers for +statistics to use instead of the default of \code{0L} for all statistics. Names should be a subset of the statistics +defined in \code{stats_custom} (or default statistics if this is \code{NULL}). Alternatively, the same indentation modifier +can be applied to all statistics by setting \code{indents_custom} to a single integer value.} +} +\description{ +Set Defaults Settings for Summary Statistics +} +\examples{ +summary_custom() +summary_custom(type = "counts", include_pval = TRUE) +summary_custom( + include_pval = TRUE, stats_custom = c("n", "mean", "sd", "pval"), + labels_custom = c(sd = "Std. Dev."), indents_custom = 3L +) + +} diff --git a/man/summary_formats.Rd b/man/summary_formats.Rd index ffa2da528f..86ea9d925d 100644 --- a/man/summary_formats.Rd +++ b/man/summary_formats.Rd @@ -4,7 +4,7 @@ \alias{summary_formats} \title{Format Function for Descriptive Statistics} \usage{ -summary_formats(type = "numeric") +summary_formats(type = "numeric", include_pval = FALSE) } \arguments{ \item{type}{(\code{string})\cr choice of a summary data type. Only \code{counts} and \code{numeric} types are currently supported.} @@ -15,4 +15,8 @@ A named \code{vector} of default statistic formats for the given data type. \description{ Returns format patterns for descriptive statistics. The format is understood by \code{rtables}. } -\keyword{internal} +\examples{ +summary_formats() +summary_formats(type = "count", include_pval = TRUE) + +} diff --git a/man/summary_labels.Rd b/man/summary_labels.Rd index 3566089803..f2306a536c 100644 --- a/man/summary_labels.Rd +++ b/man/summary_labels.Rd @@ -4,7 +4,7 @@ \alias{summary_labels} \title{Label Function for Descriptive Statistics} \usage{ -summary_labels(type = "numeric") +summary_labels(type = "numeric", include_pval = FALSE) } \arguments{ \item{type}{(\code{string})\cr choice of a summary data type. Only \code{counts} and \code{numeric} types are currently supported.} @@ -15,4 +15,8 @@ A named \code{vector} of default statistic labels for the given data type. \description{ Returns labels of descriptive statistics for numeric variables. } -\keyword{internal} +\examples{ +summary_labels() +summary_labels(type = "count", include_pval = TRUE) + +} From 8b386988f5122e3c30b2f0ff63421dc81a17a2e1 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Tue, 4 Jul 2023 18:18:58 -0400 Subject: [PATCH 20/33] Fix functions, remove custom argument --- R/analyze_variables.R | 257 +++++++++++------------------------- R/utils.R | 19 +-- man/analyze_variables.Rd | 40 +++--- man/extract_by_name.Rd | 7 +- tests/testthat/test-utils.R | 18 --- 5 files changed, 95 insertions(+), 246 deletions(-) diff --git a/R/analyze_variables.R b/R/analyze_variables.R index d60ee8f7c8..cc0868b9cb 100644 --- a/R/analyze_variables.R +++ b/R/analyze_variables.R @@ -75,11 +75,12 @@ summary_formats <- function(type = "numeric", include_pval = FALSE) { quantiles = "xx.x - xx.x", iqr = "xx.x", range = "xx.x - xx.x", - cv = "xx.x", min = "xx.x", max = "xx.x", median_range = "xx.x (xx.x - xx.x)", + cv = "xx.x", geom_mean = "xx.x", + geom_mean_ci = "(xx.xx, xx.xx)", geom_cv = "xx.x", pval = "x.xxxx | (<0.0001)" ) @@ -129,11 +130,12 @@ summary_labels <- function(type = "numeric", include_pval = FALSE) { quantiles = "25% and 75%-ile", iqr = "IQR", range = "Min - Max", - cv = "CV (%)", min = "Minimum", max = "Maximum", median_range = "Median (Min - Max)", + cv = "CV (%)", geom_mean = "Geometric Mean", + geom_mean_ci = "Geometric Mean 95% CI", geom_cv = "CV % Geometric Mean", pval = "p-value (t-test)" ) @@ -179,11 +181,7 @@ summary_custom <- function(type = "numeric", if (is.null(stats_custom)) { .stats <- names(.formats) } else { - checkmate::assert_subset( - stats_custom, - c(names(s_summary(if (type == "numeric") numeric() else TRUE)), "pval") - ) - .stats <- stats_custom + .stats <- intersect(stats_custom, names(.formats)) } .labels <- summary_labels(type = type, include_pval = include_pval) .indents <- setNames(rep(0L, length(.stats)), .stats) @@ -597,28 +595,6 @@ s_summary.logical <- function(x, #' * To use for comparison (with additional p-value statistic), parameter `compare` must be set to `TRUE`. #' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is. #' -#' @examples -#' # summary analysis - compare = FALSE -#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") -#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) -#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) -#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) -#' -#' # comparison analysis - compare = TRUE -#' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE) -#' a_summary( -#' factor(c("a", "a", "b", "c", "a")), -#' .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE -#' ) -#' a_summary( -#' c("A", "B", "A", "C"), -#' .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE -#' ) -#' a_summary( -#' c(TRUE, FALSE, FALSE, TRUE, TRUE), -#' .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE -#' ) -#' #' @export a_summary_output <- function(x, .N_col, # nolint @@ -703,6 +679,7 @@ a_summary_output <- function(x, #' @note #' * To use for comparison (with additional p-value statistic), parameter `compare` must be set to `TRUE`. #' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is. +#' #' @export a_summary <- function(x, .N_col, # nolint @@ -718,14 +695,8 @@ a_summary <- function(x, #' @describeIn analyze_variables Formatted analysis function method for `factor` class. #' #' @examples -#' # `a_summary.factor` -#' # We need to ungroup `count` and `count_fraction` first so that the rtables formatting -#' # functions can be applied correctly. -#' afun <- make_afun( -#' getS3method("a_summary", "factor"), -#' .ungroup_stats = c("count", "count_fraction") -#' ) -#' afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) +#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") +#' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE) #' #' @export a_summary.numeric <- function(x, @@ -743,61 +714,33 @@ a_summary.numeric <- function(x, na.rm = TRUE, # nolint na_level = NA_character_, ...) { - # Remove all-NA rows - if (!is.null(.df_row) && ncol(.df_row) > 1) { - in_tot_col <- nrow(.df_row) == length(x) - .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ] - if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]] - } - - x_stats <- if (!compare) { - s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...) - } else { - s_compare( - x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ... - ) - } - - custom_summary <- summary_custom( - include_pval = compare, - stats_custom = .stats, - formats_custom = .formats, - labels_custom = .labels, - indents_custom = .indent_mods - ) - .stats <- custom_summary$stats - .formats <- custom_summary$formats - .labels <- custom_summary$labels - .indent_mods <- custom_summary$indents - - for (i in intersect(.stats, c("mean_ci", "mean_pval", "median_ci", "quantiles"))) { - if (!i %in% names(.labels) || .labels[[i]] == summary_custom()$labels[[i]]) { - .labels[[i]] <- attr(x_stats[[i]], "label") - } - } - - x_stats <- x_stats[.stats] - in_rows( - .list = x_stats, + a_summary( + x = x, + .N_col = .N_col, + .N_row = .N_row, + .var = .var, + .df_row = .df_row, + .ref_group = .ref_group, + .in_ref_col = .in_ref_col, + compare = compare, + .stats = .stats, .formats = .formats, - .names = .labels, .labels = .labels, .indent_mods = .indent_mods, - .format_na_strs = na_level + na.rm = na.rm, + na_level = na_level, + ... ) } #' @describeIn analyze_variables Formatted analysis function method for `factor` class. #' #' @examples -#' # `a_summary.factor` -#' # We need to ungroup `count` and `count_fraction` first so that the rtables formatting -#' # functions can be applied correctly. -#' afun <- make_afun( -#' getS3method("a_summary", "factor"), -#' .ungroup_stats = c("count", "count_fraction") +#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) +#' a_summary( +#' factor(c("a", "a", "b", "c", "a")), +#' .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE #' ) -#' afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) #' #' @export a_summary.factor <- function(x, @@ -815,63 +758,33 @@ a_summary.factor <- function(x, na.rm = TRUE, # nolint na_level = NA_character_, ...) { - # Remove all-NA rows - if (!is.null(.df_row) && ncol(.df_row) > 1) { - in_tot_col <- nrow(.df_row) == length(x) - .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ] - if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]] - } - - if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level") - - x_stats <- if (!compare) { - s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...) - } else { - s_compare( - x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ... - ) - } - - custom_summary <- summary_custom( - type = "counts", - include_pval = compare, - stats_custom = .stats, - formats_custom = .formats, - labels_custom = .labels, - indents_custom = .indent_mods - ) - .stats <- custom_summary$stats - .formats <- custom_summary$formats - .labels <- custom_summary$labels - .indent_mods <- custom_summary$indents - - x_stats <- x_stats[.stats] - x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, .in_ref_col) - x_stats <- x_ungrp[["x"]] - .stats <- x_ungrp[[".stats"]] - .formats <- x_ungrp[[".formats"]] - .labels <- x_ungrp[[".labels"]] - .indent_mods <- x_ungrp[[".indent_mods"]] - - in_rows( - .list = x_stats, + a_summary( + x = x, + .N_col = .N_col, + .N_row = .N_row, + .var = .var, + .df_row = .df_row, + .ref_group = .ref_group, + .in_ref_col = .in_ref_col, + compare = compare, + .stats = .stats, .formats = .formats, - .names = .labels, .labels = .labels, .indent_mods = .indent_mods, - .format_na_strs = na_level + na.rm = na.rm, + na_level = na_level, + ... ) } #' @describeIn analyze_variables Formatted analysis function method for `character` class. #' #' @examples -#' # `a_summary.character` -#' afun <- make_afun( -#' getS3method("a_summary", "character"), -#' .ungroup_stats = c("count", "count_fraction") +#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) +#' a_summary( +#' c("A", "B", "A", "C"), +#' .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE #' ) -#' afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) #' #' @export a_summary.character <- function(x, @@ -890,31 +803,33 @@ a_summary.character <- function(x, na_level = NA_character_, ...) { x <- as.factor(x) - a_summary(x, - .N_col, # nolint - .N_row, # nolint - .var = .var, - .df_row = .df_row, - .ref_group = .ref_group, - .in_ref_col = .in_ref_col, - compare = compare, - .stats = .stats, - .formats = .formats, - .labels = .labels, - .indent_mods = .indent_mods, - na.rm = na.rm, - na_level = na_level, - ...) + a_summary( + x = x, + .N_col = .N_col, + .N_row = .N_row, + .var = .var, + .df_row = .df_row, + .ref_group = .ref_group, + .in_ref_col = .in_ref_col, + compare = compare, + .stats = .stats, + .formats = .formats, + .labels = .labels, + .indent_mods = .indent_mods, + na.rm = na.rm, + na_level = na_level, + ... + ) } #' @describeIn analyze_variables Formatted analysis function method for `logical` class. #' #' @examples -#' # `a_summary.logical` -#' afun <- make_afun( -#' getS3method("a_summary", "logical") +#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) +#' a_summary( +#' c(TRUE, FALSE, FALSE, TRUE, TRUE), +#' .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE #' ) -#' afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) #' #' @export a_summary.logical <- function(x, @@ -932,42 +847,22 @@ a_summary.logical <- function(x, na.rm = TRUE, # nolint na_level = NA_character_, ...) { - # Remove all-NA rows - if (!is.null(.df_row) && ncol(.df_row) > 1) { - in_tot_col <- nrow(.df_row) == length(x) - .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ] - if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]] - } - - if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level") - x_stats <- if (!compare) { - s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...) - } else { - s_compare( - x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ... - ) - } - - custom_summary <- summary_custom( - include_pval = compare, - stats_custom = .stats, - formats_custom = .formats, - labels_custom = .labels, - indents_custom = .indent_mods - ) - .stats <- custom_summary$stats - .formats <- custom_summary$formats - .labels <- custom_summary$labels - .indent_mods <- custom_summary$indents - - x_stats <- x_stats[.stats] - in_rows( - .list = x_stats, + a_summary( + x = x, + .N_col = .N_col, + .N_row = .N_row, + .var = .var, + .df_row = .df_row, + .ref_group = .ref_group, + .in_ref_col = .in_ref_col, + compare = compare, + .stats = .stats, .formats = .formats, - .names = .labels, .labels = .labels, .indent_mods = .indent_mods, - .format_na_strs = na_level + na.rm = na.rm, + na_level = na_level, + ... ) } diff --git a/R/utils.R b/R/utils.R index 037a29acbd..a5dcaed550 100644 --- a/R/utils.R +++ b/R/utils.R @@ -236,38 +236,25 @@ combine_vectors <- function(x, y) { #' - If `x` is `NULL`, then still always `NULL` is returned (same as in base function). #' - If `x` is not `NULL`, then the intersection of its names is made with `names` and those #' elements are returned. That is, `names` which don't appear in `x` are not returned as `NA`s. -#' - If `x_defaults` is not `NULL`, these values will be used to fill in any values that are in -#' `names` but not in `x`. #' #' @param x (named `vector`)\cr where to extract named elements from. #' @param names (`character`)\cr vector of names to extract. -#' @param x_defaults (named `vector`)\cr named vector of default values to fill in for each value in `names` if no -#' corresponding value in `x` exists. #' #' @return `NULL` if `x` is `NULL`, otherwise the extracted elements from `x`. #' #' @keywords internal -extract_by_name <- function(x, names, x_defaults = NULL) { +extract_by_name <- function(x, names) { if (is.null(x)) { return(NULL) } checkmate::assert_named(x) checkmate::assert_character(names) which_extract <- intersect(names(x), names) - x_fill <- c() - if (!is.null(x_defaults)) { - checkmate::assert_named(x_defaults) - x_fill <- x_defaults - } if (length(which_extract) > 0) { - x_fill[which_extract] <- x[which_extract] - x_fill <- if (!is.null(x_defaults)) x_fill[names] else x_fill[which_extract] - } else if (length(intersect(names(x_defaults), names)) > 0) { - x_fill <- x_fill[names] + x[which_extract] } else { - x_fill <- NULL + NULL } - x_fill } #' Labels for Adverse Event Baskets diff --git a/man/analyze_variables.Rd b/man/analyze_variables.Rd index 5ec4af07e4..714af59a8d 100644 --- a/man/analyze_variables.Rd +++ b/man/analyze_variables.Rd @@ -408,36 +408,26 @@ x <- c(TRUE, FALSE, TRUE, TRUE) s_summary(x, denom = "N_row", .N_row = 10L) s_summary(x, denom = "N_col", .N_col = 20L) -# `a_summary.factor` -# We need to ungroup `count` and `count_fraction` first so that the rtables formatting -# functions can be applied correctly. -afun <- make_afun( - getS3method("a_summary", "factor"), - .ungroup_stats = c("count", "count_fraction") -) -afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) - -# `a_summary.factor` -# We need to ungroup `count` and `count_fraction` first so that the rtables formatting -# functions can be applied correctly. -afun <- make_afun( - getS3method("a_summary", "factor"), - .ungroup_stats = c("count", "count_fraction") +a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") +a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE) + +a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) +a_summary( + factor(c("a", "a", "b", "c", "a")), + .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE ) -afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) -# `a_summary.character` -afun <- make_afun( - getS3method("a_summary", "character"), - .ungroup_stats = c("count", "count_fraction") +a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) +a_summary( + c("A", "B", "A", "C"), + .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE ) -afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) -# `a_summary.logical` -afun <- make_afun( - getS3method("a_summary", "logical") +a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) +a_summary( + c(TRUE, FALSE, FALSE, TRUE, TRUE), + .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE ) -afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) ## Fabricated dataset. dta_test <- data.frame( diff --git a/man/extract_by_name.Rd b/man/extract_by_name.Rd index 186478ec0a..bc328b2a20 100644 --- a/man/extract_by_name.Rd +++ b/man/extract_by_name.Rd @@ -4,15 +4,12 @@ \alias{extract_by_name} \title{Extract Elements by Name} \usage{ -extract_by_name(x, names, x_defaults = NULL) +extract_by_name(x, names) } \arguments{ \item{x}{(named \code{vector})\cr where to extract named elements from.} \item{names}{(\code{character})\cr vector of names to extract.} - -\item{x_defaults}{(named \code{vector})\cr named vector of default values to fill in for each value in \code{names} if no -corresponding value in \code{x} exists.} } \value{ \code{NULL} if \code{x} is \code{NULL}, otherwise the extracted elements from \code{x}. @@ -26,8 +23,6 @@ Differences to the standard \code{[} function are: \item If \code{x} is \code{NULL}, then still always \code{NULL} is returned (same as in base function). \item If \code{x} is not \code{NULL}, then the intersection of its names is made with \code{names} and those elements are returned. That is, \code{names} which don't appear in \code{x} are not returned as \code{NA}s. -\item If \code{x_defaults} is not \code{NULL}, these values will be used to fill in any values that are in -\code{names} but not in \code{x}. } } \keyword{internal} diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index b772079d10..87210c6163 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -89,24 +89,6 @@ testthat::test_that("extract_by_name returns NULL when there is no overlap", { testthat::expect_snapshot(res) }) -testthat::test_that("extract_by_name x_defaults argument works as expected", { - result <- extract_by_name( - x = c(a = "xx", b = function(x) paste(x, "bla")), - names = c("b", "c"), - x_defaults = c(c = "c_default") - ) - expected <- c(b = function(x) paste(x, "bla"), c = "c_default") - testthat::expect_identical(result, expected) - - # no overlap - result <- extract_by_name( - x = c(a = "xx", b = "xx.xx"), - c("d", "c"), - c(a = "test") - ) - testthat::expect_identical(result, NULL) -}) - testthat::test_that("aesi_label works as expected for SMQ", { smq01nam <- c("AESI 1", "", NA) smq01sc <- c("NARROW", "", NA) From 5ce7c64b808c64f7dad26d161acd1259714f620c Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Tue, 4 Jul 2023 18:46:57 -0400 Subject: [PATCH 21/33] Fix tests, clean up code --- NEWS.md | 1 - R/analyze_variables.R | 79 ++++++++++++---------- R/compare_variables.R | 12 ---- tests/testthat/_snaps/analyze_variables.md | 24 ++++--- tests/testthat/test-analyze_variables.R | 2 +- 5 files changed, 56 insertions(+), 62 deletions(-) diff --git a/NEWS.md b/NEWS.md index e7f52b7bd3..9262a426af 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,7 +5,6 @@ * Refactored `a_summary` to no longer use helper function `create_afun_summary`. * Refactored `summarize_vars` and `compare_vars` to use refactored `a_summary`. * Created new internal helper function `ungroup_stats` to ungroup and format statistics calculated for factor variables. -* Added new argument `x_defaults` to `extract_by_name` to allow user to specify default values to implement for values in `names` that are not specified in `x`. ### Enhancements * Started deprecation cycle for `summarize_vars` and `control_summarize_vars`. Renamed into `analyze_vars` and `control_analyze_vars` to reflect underlying `rtables` machinery while keeping backward compatibility with aliases. diff --git a/R/analyze_variables.R b/R/analyze_variables.R index cc0868b9cb..21cf753303 100644 --- a/R/analyze_variables.R +++ b/R/analyze_variables.R @@ -177,12 +177,10 @@ summary_custom <- function(type = "numeric", formats_custom = NULL, labels_custom = NULL, indents_custom = NULL) { + if ("pval" %in% stats_custom) include_pval <- TRUE + .formats <- summary_formats(type = type, include_pval = include_pval) - if (is.null(stats_custom)) { - .stats <- names(.formats) - } else { - .stats <- intersect(stats_custom, names(.formats)) - } + .stats <- if (is.null(stats_custom)) names(.formats) else intersect(stats_custom, names(.formats)) .labels <- summary_labels(type = type, include_pval = include_pval) .indents <- setNames(rep(0L, length(.stats)), .stats) @@ -603,13 +601,14 @@ a_summary_output <- function(x, .df_row, .ref_group, .in_ref_col, + compare, + type, .stats, .formats, .labels, .indent_mods, na.rm, # nolint na_level, - compare, ...) { # Remove all-NA rows if (!is.null(.df_row) && ncol(.df_row) > 1) { @@ -628,6 +627,7 @@ a_summary_output <- function(x, } custom_summary <- summary_custom( + type = type, include_pval = compare, stats_custom = .stats, formats_custom = .formats, @@ -714,7 +714,7 @@ a_summary.numeric <- function(x, na.rm = TRUE, # nolint na_level = NA_character_, ...) { - a_summary( + a_summary_output( x = x, .N_col = .N_col, .N_row = .N_row, @@ -723,6 +723,7 @@ a_summary.numeric <- function(x, .ref_group = .ref_group, .in_ref_col = .in_ref_col, compare = compare, + type = "numeric", .stats = .stats, .formats = .formats, .labels = .labels, @@ -758,7 +759,7 @@ a_summary.factor <- function(x, na.rm = TRUE, # nolint na_level = NA_character_, ...) { - a_summary( + a_summary_output( x = x, .N_col = .N_col, .N_row = .N_row, @@ -767,6 +768,7 @@ a_summary.factor <- function(x, .ref_group = .ref_group, .in_ref_col = .in_ref_col, compare = compare, + type = "counts", .stats = .stats, .formats = .formats, .labels = .labels, @@ -803,7 +805,8 @@ a_summary.character <- function(x, na_level = NA_character_, ...) { x <- as.factor(x) - a_summary( + .ref_group <- as.factor(.ref_group) + a_summary_output( x = x, .N_col = .N_col, .N_row = .N_row, @@ -812,6 +815,7 @@ a_summary.character <- function(x, .ref_group = .ref_group, .in_ref_col = .in_ref_col, compare = compare, + type = "counts", .stats = .stats, .formats = .formats, .labels = .labels, @@ -833,21 +837,21 @@ a_summary.character <- function(x, #' #' @export a_summary.logical <- function(x, - .N_col, # nolint - .N_row, # nolint - .var = NULL, - .df_row = NULL, - .ref_group = NULL, - .in_ref_col = FALSE, - compare = FALSE, - .stats = summary_custom(type = "counts", include_pval = compare)$stats, - .formats = summary_custom(type = "counts", include_pval = compare)$formats, - .labels = summary_custom(type = "counts", include_pval = compare)$labels, - .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents, - na.rm = TRUE, # nolint - na_level = NA_character_, - ...) { - a_summary( + .N_col, # nolint + .N_row, # nolint + .var = NULL, + .df_row = NULL, + .ref_group = NULL, + .in_ref_col = FALSE, + compare = FALSE, + .stats = summary_custom(type = "counts", include_pval = compare)$stats, + .formats = summary_custom(type = "counts", include_pval = compare)$formats, + .labels = summary_custom(type = "counts", include_pval = compare)$labels, + .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents, + na.rm = TRUE, # nolint + na_level = NA_character_, + ...) { + a_summary_output( x = x, .N_col = .N_col, .N_row = .N_row, @@ -856,6 +860,7 @@ a_summary.logical <- function(x, .ref_group = .ref_group, .in_ref_col = .in_ref_col, compare = compare, + type = "counts", .stats = .stats, .formats = .formats, .labels = .labels, @@ -960,19 +965,19 @@ create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) { #' #' @export analyze_vars <- function(lyt, - vars, - var_labels = vars, - nested = TRUE, - ..., - na.rm = TRUE, # nolint - na_level = NA_character_, - show_labels = "default", - table_names = vars, - section_div = NA_character_, - .stats = c("n", "mean_sd", "median", "range", "count_fraction"), - .formats = NULL, - .labels = NULL, - .indent_mods = NULL) { + vars, + var_labels = vars, + nested = TRUE, + ..., + na.rm = TRUE, # nolint + na_level = NA_character_, + show_labels = "default", + table_names = vars, + section_div = NA_character_, + .stats = c("n", "mean_sd", "median", "range", "count_fraction"), + .formats = NULL, + .labels = NULL, + .indent_mods = NULL) { extra_args <- list(.stats = .stats, na.rm = na.rm, na_level = na_level, ...) if (!is.null(.formats)) extra_args[[".formats"]] <- .formats if (!is.null(.labels)) extra_args[[".labels"]] <- .labels diff --git a/R/compare_variables.R b/R/compare_variables.R index 2c5092c703..cb97788397 100644 --- a/R/compare_variables.R +++ b/R/compare_variables.R @@ -262,12 +262,6 @@ a_compare <- function(x, .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, - .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels"))), - .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_formats")), - .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels")), - .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_indents")), - na.rm = TRUE, # nolint - na_level = NA_character_, ...) { lifecycle::deprecate_warn( "0.8.3", @@ -282,12 +276,6 @@ a_compare <- function(x, .df_row = .df_row, .ref_group = .ref_group, .in_ref_col = .in_ref_col, - .stats = .stats, - .formats = .formats, - .labels = .labels, - .indent_mods = .indent_mods, - na.rm = na.rm, - na_level = na_level, compare = TRUE, ... ) diff --git a/tests/testthat/_snaps/analyze_variables.md b/tests/testthat/_snaps/analyze_variables.md index 587af512d0..78d1afbf16 100644 --- a/tests/testthat/_snaps/analyze_variables.md +++ b/tests/testthat/_snaps/analyze_variables.md @@ -945,12 +945,13 @@ 15 25% and 75%-ile -0.6 - 0.6 0 25% and 75%-ile 16 IQR 1.2 0 IQR 17 Min - Max -0.8 - 1.6 0 Min - Max - 18 CV (%) 590.4 0 CV (%) - 19 Minimum -0.8 0 Minimum - 20 Maximum 1.6 0 Maximum - 21 Median (Min - Max) 0.3 (-0.8 - 1.6) 0 Median (Min - Max) + 18 Minimum -0.8 0 Minimum + 19 Maximum 1.6 0 Maximum + 20 Median (Min - Max) 0.3 (-0.8 - 1.6) 0 Median (Min - Max) + 21 CV (%) 590.4 0 CV (%) 22 Geometric Mean NA 0 Geometric Mean - 23 CV % Geometric Mean NA 0 CV % Geometric Mean + 23 Geometric Mean 95% CI NA 0 Geometric Mean 95% CI + 24 CV % Geometric Mean NA 0 CV % Geometric Mean --- @@ -1054,13 +1055,14 @@ 15 25% and 75%-ile 4.4 - 5.6 0 25% and 75%-ile 16 IQR 1.2 0 IQR 17 Min - Max 4.2 - 6.6 0 Min - Max - 18 CV (%) 15.2 0 CV (%) - 19 Minimum 4.2 0 Minimum - 20 Maximum 6.6 0 Maximum - 21 Median (Min - Max) 5.3 (4.2 - 6.6) 0 Median (Min - Max) + 18 Minimum 4.2 0 Minimum + 19 Maximum 6.6 0 Maximum + 20 Median (Min - Max) 5.3 (4.2 - 6.6) 0 Median (Min - Max) + 21 CV (%) 15.2 0 CV (%) 22 Geometric Mean 5.1 0 Geometric Mean - 23 CV % Geometric Mean 15.2 0 CV % Geometric Mean - 24 p-value (t-test) <0.0001 0 p-value (t-test) + 23 Geometric Mean 95% CI (4.56, 5.66) 0 Geometric Mean 95% CI + 24 CV % Geometric Mean 15.2 0 CV % Geometric Mean + 25 p-value (t-test) <0.0001 0 p-value (t-test) --- diff --git a/tests/testthat/test-analyze_variables.R b/tests/testthat/test-analyze_variables.R index 4c2bf85f8a..b1a0590f5d 100644 --- a/tests/testthat/test-analyze_variables.R +++ b/tests/testthat/test-analyze_variables.R @@ -364,7 +364,7 @@ testthat::test_that("`analyze_vars` works with character input and gives the sam l <- basic_table() %>% analyze_vars(vars = "foo") - testthat::expect_warning(result <- build_table(l, dta)) + result <- build_table(l, dta) dta_factor <- dta %>% dplyr::mutate(foo = factor(foo)) From 9af071099edf54e438aba17fe110454e98c64360 Mon Sep 17 00:00:00 2001 From: "27856297+dependabot-preview[bot]@users.noreply.github.com" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Tue, 4 Jul 2023 22:51:06 +0000 Subject: [PATCH 22/33] [skip actions] Roxygen Man Pages Auto Update --- man/compare_variables.Rd | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd index 7ba393662c..b9c7117358 100644 --- a/man/compare_variables.Rd +++ b/man/compare_variables.Rd @@ -38,16 +38,6 @@ a_compare( .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, - .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), - "_labels"))), - .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), - "_formats")), - .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), - "_labels")), - .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), - "_indents")), - na.rm = TRUE, - na_level = NA_character_, ... ) @@ -95,18 +85,6 @@ to print out information about factor casting. Defaults to \code{TRUE}.} \item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.} -\item{.stats}{(\code{character})\cr statistics to select for the table.} - -\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.} - -\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} - -\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector -should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation -for that statistic's row label.} - -\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} - \item{lyt}{(\code{layout})\cr input layout where analyses will be added to.} \item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.} @@ -117,6 +95,8 @@ for that statistic's row label.} possible} (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split underneath analyses, which is not allowed.} +\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} + \item{show_labels}{(\code{string})\cr label visibility: one of "default", "visible" and "hidden".} \item{table_names}{(\code{character})\cr this can be customized in case that the same \code{vars} are analyzed multiple times, @@ -124,6 +104,16 @@ to avoid warnings from \code{rtables}.} \item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group defined by this split instruction, or \code{NA_character_} (the default) for no section divider.} + +\item{.stats}{(\code{character})\cr statistics to select for the table.} + +\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.} + +\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} + +\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector +should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation +for that statistic's row label.} } \value{ \itemize{ From a4bcb31c09e4f466c3a40b463cffc356c79b3db5 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Tue, 4 Jul 2023 20:31:21 -0400 Subject: [PATCH 23/33] Fix docs --- NAMESPACE | 4 +- R/analyze_variables.R | 231 ++++++++------------- R/utils_factor.R | 15 +- man/analyze_variables.Rd | 102 +++++---- man/compare_variables.Rd | 34 ++- man/summary_custom.Rd | 49 ----- man/summary_formats.Rd | 22 -- man/summary_labels.Rd | 22 -- man/summary_stats.Rd | 90 ++++++++ man/ungroup_stats.Rd | 14 +- tests/testthat/_snaps/analyze_variables.md | 198 +++++++++++++++++- tests/testthat/test-analyze_variables.R | 98 ++++++++- 12 files changed, 552 insertions(+), 327 deletions(-) delete mode 100644 man/summary_custom.Rd delete mode 100644 man/summary_formats.Rd delete mode 100644 man/summary_labels.Rd create mode 100644 man/summary_stats.Rd diff --git a/NAMESPACE b/NAMESPACE index d21c817875..2717dc01a0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,8 +1,6 @@ # Generated by roxygen2: do not edit by hand -S3method(a_summary,character) -S3method(a_summary,factor) -S3method(a_summary,logical) +S3method(a_summary,default) S3method(a_summary,numeric) S3method(as.rtable,data.frame) S3method(h_coxreg_inter_effect,character) diff --git a/R/analyze_variables.R b/R/analyze_variables.R index 21cf753303..8539f09bf9 100644 --- a/R/analyze_variables.R +++ b/R/analyze_variables.R @@ -34,13 +34,23 @@ control_analyze_vars <- function(conf_level = 0.95, control_summarize_vars <- control_analyze_vars -#' Format Function for Descriptive Statistics +#' Summary Statistic Settings Functions #' -#' Returns format patterns for descriptive statistics. The format is understood by `rtables`. +#' @description `r lifecycle::badge("stable")` +#' +#' Functions to retrieve default settings for summary statistics and customize these settings. #' -#' @param type (`string`)\cr choice of a summary data type. Only `counts` and `numeric` types are currently supported. +#' @param type (`character`)\cr choice of summary data type. Only `counts` and `numeric` types are currently supported. +#' @param include_pval (`logical`)\cr whether p-value should be included as a default statistic. #' -#' @return A named `vector` of default statistic formats for the given data type. +#' @name summary_stats +NULL + +#' @describeIn summary_stats Function to retrieve default formats for summary statistics. Returns format patterns for +#' descriptive statistics which are understood by `rtables`. +#' +#' @return +#' * `summary_formats` returns a named `vector` of default statistic formats for the given data type. #' #' @examples #' summary_formats() @@ -89,13 +99,11 @@ summary_formats <- function(type = "numeric", include_pval = FALSE) { fmts } -#' Label Function for Descriptive Statistics -#' -#' Returns labels of descriptive statistics for numeric variables. -#' -#' @param type (`string`)\cr choice of a summary data type. Only `counts` and `numeric` types are currently supported. +#' @describeIn summary_stats Function to retrieve default labels for summary statistics. Returns labels of descriptive +#' statistics which are understood by `rtables`. #' -#' @return A named `vector` of default statistic labels for the given data type. +#' @return +#' * `summary_labels` returns a named `vector` of default statistic labels for the given data type. #' #' @examples #' summary_labels() @@ -144,30 +152,33 @@ summary_labels <- function(type = "numeric", include_pval = FALSE) { lbls } -#' Set Defaults Settings for Summary Statistics +#' @describeIn summary_stats Function to configure settings for default or custom summary statistics for a given data +#' type. In addition to selecting a custom subset of statistics, the user can also set custom formats, labels, and +#' indent modifiers for any of these statistics. #' -#' @inheritParams summary_formats -#' @param include_pval (`logical`)\cr whether p-value should be included as a default statistic. -#' @param stats_custom (`named vector` of `character`) vector of statistics to include if not the defaults. This +#' @param stats_custom (`named vector` of `character`)\cr vector of statistics to include if not the defaults. This #' argument overrides `include_pval` and other custom value arguments such that only settings for these statistics #' will be returned. -#' @param formats_custom (`named vector` of `character`) vector of custom statistics formats to use in place of the +#' @param formats_custom (`named vector` of `character`)\cr vector of custom statistics formats to use in place of the #' defaults defined in [`summary_formats()`]. Names should be a subset of the statistics defined in `stats_custom` (or #' default statistics if this is `NULL`). -#' @param labels_custom (`named vector` of `character`) vector of custom statistics labels to use in place of the +#' @param labels_custom (`named vector` of `character`)\cr vector of custom statistics labels to use in place of the #' defaults defined in [`summary_labels()`]. Names should be a subset of the statistics defined in `stats_custom` (or #' default statistics if this is `NULL`). -#' @param indents_custom (`integer` or `named vector` of `integer`) vector of custom indentation modifiers for +#' @param indent_mods_custom (`integer` or `named vector` of `integer`)\cr vector of custom indentation modifiers for #' statistics to use instead of the default of `0L` for all statistics. Names should be a subset of the statistics #' defined in `stats_custom` (or default statistics if this is `NULL`). Alternatively, the same indentation modifier -#' can be applied to all statistics by setting `indents_custom` to a single integer value. +#' can be applied to all statistics by setting `indent_mods_custom` to a single integer value. +#' +#' @return +#' * `summary_custom` returns a `list` of 4 named elements: `stats`, `formats`, `labels`, and `indent_mods`. #' #' @examples #' summary_custom() #' summary_custom(type = "counts", include_pval = TRUE) #' summary_custom( #' include_pval = TRUE, stats_custom = c("n", "mean", "sd", "pval"), -#' labels_custom = c(sd = "Std. Dev."), indents_custom = 3L +#' labels_custom = c(sd = "Std. Dev."), indent_mods_custom = 3L #' ) #' #' @export @@ -176,21 +187,21 @@ summary_custom <- function(type = "numeric", stats_custom = NULL, formats_custom = NULL, labels_custom = NULL, - indents_custom = NULL) { + indent_mods_custom = NULL) { if ("pval" %in% stats_custom) include_pval <- TRUE .formats <- summary_formats(type = type, include_pval = include_pval) .stats <- if (is.null(stats_custom)) names(.formats) else intersect(stats_custom, names(.formats)) .labels <- summary_labels(type = type, include_pval = include_pval) - .indents <- setNames(rep(0L, length(.stats)), .stats) + .indent_mods <- setNames(rep(0L, length(.stats)), .stats) if (!is.null(formats_custom)) .formats[names(formats_custom)] <- formats_custom if (!is.null(labels_custom)) .labels[names(labels_custom)] <- labels_custom - if (!is.null(indents_custom)) { - if (is.null(names(indents_custom)) && length(indents_custom) == 1) { - .indents[names(.indents)] <- indents_custom + if (!is.null(indent_mods_custom)) { + if (is.null(names(indent_mods_custom)) && length(indent_mods_custom) == 1) { + .indent_mods[names(.indent_mods)] <- indent_mods_custom } else { - .indents[names(indents_custom)] <- indents_custom + .indent_mods[names(indent_mods_custom)] <- indent_mods_custom } } @@ -198,7 +209,7 @@ summary_custom <- function(type = "numeric", stats = .stats, formats = .formats[.stats], labels = .labels[.stats], - indents = .indents[.stats] + indent_mods = .indent_mods[.stats] ) } @@ -499,6 +510,7 @@ s_summary.factor <- function(x, #' s_summary(c("a", "a", "b", "c", "a", ""), .var = "x", na.rm = FALSE, verbose = FALSE) #' #' @export + s_summary.character <- function(x, na.rm = TRUE, # nolint denom = c("n", "N_row", "N_col"), @@ -578,21 +590,23 @@ s_summary.logical <- function(x, y } - - -#' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and -#' `compare_vars()` and as `cfun` in `summarize_colvars()`. +#' @describeIn analyze_variables Formatted analysis helper function which is used within `a_summary`. #' #' @param compare (`logical`)\cr Whether comparison statistics should be analyzed instead of summary statistics #' (`compare = TRUE` adds `pval` statistic comparing against reference group). +#' @param type (`character`)\cr type of statistics to calculate given `x`. If `x` is numeric `type` should be +#' `"numeric"`, otherwise type should be `"counts"`. #' #' @return -#' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()]. +#' * `a_summary_output()` returns the corresponding list with formatted [rtables::CellValue()]. #' #' @note #' * To use for comparison (with additional p-value statistic), parameter `compare` must be set to `TRUE`. #' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is. #' +#' @examples +#' a_summary_output() +#' #' @export a_summary_output <- function(x, .N_col, # nolint @@ -632,12 +646,12 @@ a_summary_output <- function(x, stats_custom = .stats, formats_custom = .formats, labels_custom = .labels, - indents_custom = .indent_mods + indent_mods_custom = .indent_mods ) .stats <- custom_summary$stats .formats <- custom_summary$formats .labels <- custom_summary$labels - .indent_mods <- custom_summary$indents + .indent_mods <- custom_summary$indent_mods if (is.numeric(x)) { for (i in intersect(.stats, c("mean_ci", "mean_pval", "median_ci", "quantiles"))) { @@ -648,7 +662,7 @@ a_summary_output <- function(x, } x_stats <- x_stats[.stats] - if (is.factor(x)) { + if (is.factor(x) || is.character(x)) { x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, .in_ref_col) x_stats <- x_ungrp[["x"]] .stats <- x_ungrp[[".stats"]] @@ -667,12 +681,9 @@ a_summary_output <- function(x, ) } -#' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and +#' @describeIn analyze_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and #' `compare_vars()` and as `cfun` in `summarize_colvars()`. #' -#' @param compare (`logical`)\cr Whether comparison statistics should be analyzed instead of summary statistics -#' (`compare = TRUE` adds `pval` statistic comparing against reference group). -#' #' @return #' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()]. #' @@ -692,49 +703,9 @@ a_summary <- function(x, UseMethod("a_summary", x) } -#' @describeIn analyze_variables Formatted analysis function method for `factor` class. +#' @describeIn analyze_variables Formatted analysis function `default` method for non-numeric classes. #' -#' @examples -#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") -#' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE) -#' -#' @export -a_summary.numeric <- function(x, - .N_col, # nolint - .N_row, # nolint - .var = NULL, - .df_row = NULL, - .ref_group = NULL, - .in_ref_col = FALSE, - compare = FALSE, - .stats = summary_custom(include_pval = compare)$stats, - .formats = summary_custom(include_pval = compare)$formats, - .labels = summary_custom(include_pval = compare)$labels, - .indent_mods = summary_custom(include_pval = compare)$indents, - na.rm = TRUE, # nolint - na_level = NA_character_, - ...) { - a_summary_output( - x = x, - .N_col = .N_col, - .N_row = .N_row, - .var = .var, - .df_row = .df_row, - .ref_group = .ref_group, - .in_ref_col = .in_ref_col, - compare = compare, - type = "numeric", - .stats = .stats, - .formats = .formats, - .labels = .labels, - .indent_mods = .indent_mods, - na.rm = na.rm, - na_level = na_level, - ... - ) -} - -#' @describeIn analyze_variables Formatted analysis function method for `factor` class. +#' @method a_summary default #' #' @examples #' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) @@ -743,69 +714,34 @@ a_summary.numeric <- function(x, #' .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE #' ) #' -#' @export -a_summary.factor <- function(x, - .N_col, # nolint - .N_row, # nolint - .var = NULL, - .df_row = NULL, - .ref_group = NULL, - .in_ref_col = FALSE, - compare = FALSE, - .stats = summary_custom(type = "counts", include_pval = compare)$stats, - .formats = summary_custom(type = "counts", include_pval = compare)$formats, - .labels = summary_custom(type = "counts", include_pval = compare)$labels, - .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents, - na.rm = TRUE, # nolint - na_level = NA_character_, - ...) { - a_summary_output( - x = x, - .N_col = .N_col, - .N_row = .N_row, - .var = .var, - .df_row = .df_row, - .ref_group = .ref_group, - .in_ref_col = .in_ref_col, - compare = compare, - type = "counts", - .stats = .stats, - .formats = .formats, - .labels = .labels, - .indent_mods = .indent_mods, - na.rm = na.rm, - na_level = na_level, - ... - ) -} - -#' @describeIn analyze_variables Formatted analysis function method for `character` class. -#' -#' @examples #' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) #' a_summary( #' c("A", "B", "A", "C"), #' .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE #' ) #' +#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) +#' a_summary( +#' c(TRUE, FALSE, FALSE, TRUE, TRUE), +#' .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE +#' ) +#' #' @export -a_summary.character <- function(x, - .N_col, # nolint - .N_row, # nolint - .var = NULL, - .df_row = NULL, - .ref_group = NULL, - .in_ref_col = FALSE, - compare = FALSE, - .stats = summary_custom(type = "counts", include_pval = compare)$stats, - .formats = summary_custom(type = "counts", include_pval = compare)$formats, - .labels = summary_custom(type = "counts", include_pval = compare)$labels, - .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents, - na.rm = TRUE, # nolint - na_level = NA_character_, - ...) { - x <- as.factor(x) - .ref_group <- as.factor(.ref_group) +a_summary.default <- function(x, + .N_col, # nolint + .N_row, # nolint + .var = NULL, + .df_row = NULL, + .ref_group = NULL, + .in_ref_col = FALSE, + compare = FALSE, + .stats = summary_custom(type = "counts", include_pval = compare)$stats, + .formats = summary_custom(type = "counts", include_pval = compare)$formats, + .labels = summary_custom(type = "counts", include_pval = compare)$labels, + .indent_mods = summary_custom(type = "counts", include_pval = compare)$indent_mods, + na.rm = TRUE, # nolint + na_level = NA_character_, + ...) { a_summary_output( x = x, .N_col = .N_col, @@ -826,17 +762,16 @@ a_summary.character <- function(x, ) } -#' @describeIn analyze_variables Formatted analysis function method for `logical` class. +#' @describeIn analyze_variables Formatted analysis function method for `numeric` class. +#' +#' @method a_summary numeric #' #' @examples -#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) -#' a_summary( -#' c(TRUE, FALSE, FALSE, TRUE, TRUE), -#' .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE -#' ) +#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") +#' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE) #' #' @export -a_summary.logical <- function(x, +a_summary.numeric <- function(x, .N_col, # nolint .N_row, # nolint .var = NULL, @@ -844,10 +779,10 @@ a_summary.logical <- function(x, .ref_group = NULL, .in_ref_col = FALSE, compare = FALSE, - .stats = summary_custom(type = "counts", include_pval = compare)$stats, - .formats = summary_custom(type = "counts", include_pval = compare)$formats, - .labels = summary_custom(type = "counts", include_pval = compare)$labels, - .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents, + .stats = summary_custom(include_pval = compare)$stats, + .formats = summary_custom(include_pval = compare)$formats, + .labels = summary_custom(include_pval = compare)$labels, + .indent_mods = summary_custom(include_pval = compare)$indent_mods, na.rm = TRUE, # nolint na_level = NA_character_, ...) { @@ -860,7 +795,7 @@ a_summary.logical <- function(x, .ref_group = .ref_group, .in_ref_col = .in_ref_col, compare = compare, - type = "counts", + type = "numeric", .stats = .stats, .formats = .formats, .labels = .labels, diff --git a/R/utils_factor.R b/R/utils_factor.R index 1e727061c8..85b5c7677f 100644 --- a/R/utils_factor.R +++ b/R/utils_factor.R @@ -284,15 +284,24 @@ fct_collapse_only <- function(.f, ..., .na_level = "") { #' `character()` if current column is a reference column. Used within [`a_summary()`]. #' #' @inheritParams argument_convention +#' @param x (`named list` of `numeric`)\cr list of numeric statistics containing the statistics to ungroup. +#' @param which_stats (`vector` of `character`)\cr which statistics should be ungrouped. #' #' @return A `list` with modified elements `x`, `.stats`, `.formats`, `.labels`, and `.indent_mods`. #' #' @seealso [a_summary()] which uses this function internally. #' #' @keywords internal -ungroup_stats <- function(x, .stats, .formats, .labels, .indent_mods, .in_ref_col = FALSE) { - checkmate::assert_true(!is.numeric(x) & !is.logical(x)) - for (stat in c("count", "count_fraction")) { +ungroup_stats <- function(x, + .stats, + .formats, + .labels, + .indent_mods, + .in_ref_col = FALSE, + which_stats = c("count", "count_fraction")) { + checkmate::assert_list(x) + + for (stat in which_stats) { for (a in names(x[[stat]])) { a <- if (a == "na-level") "NA" else a a_lvl <- paste(stat, a, sep = ".") diff --git a/man/analyze_variables.Rd b/man/analyze_variables.Rd index 714af59a8d..a09d5e2962 100644 --- a/man/analyze_variables.Rd +++ b/man/analyze_variables.Rd @@ -7,10 +7,10 @@ \alias{s_summary.factor} \alias{s_summary.character} \alias{s_summary.logical} +\alias{a_summary_output} +\alias{a_summary} +\alias{a_summary.default} \alias{a_summary.numeric} -\alias{a_summary.factor} -\alias{a_summary.character} -\alias{a_summary.logical} \alias{analyze_vars} \title{Analyze Variables} \usage{ @@ -56,43 +56,28 @@ s_summary(x, na.rm = TRUE, denom, .N_row, .N_col, .var, ...) ... ) -\method{a_summary}{numeric}( +a_summary_output( x, .N_col, .N_row, - .var = NULL, - .df_row = NULL, - .ref_group = NULL, - .in_ref_col = FALSE, - compare = FALSE, - .stats = summary_custom(include_pval = compare)$stats, - .formats = summary_custom(include_pval = compare)$formats, - .labels = summary_custom(include_pval = compare)$labels, - .indent_mods = summary_custom(include_pval = compare)$indents, - na.rm = TRUE, - na_level = NA_character_, + .var, + .df_row, + .ref_group, + .in_ref_col, + compare, + type, + .stats, + .formats, + .labels, + .indent_mods, + na.rm, + na_level, ... ) -\method{a_summary}{factor}( - x, - .N_col, - .N_row, - .var = NULL, - .df_row = NULL, - .ref_group = NULL, - .in_ref_col = FALSE, - compare = FALSE, - .stats = summary_custom(type = "counts", include_pval = compare)$stats, - .formats = summary_custom(type = "counts", include_pval = compare)$formats, - .labels = summary_custom(type = "counts", include_pval = compare)$labels, - .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents, - na.rm = TRUE, - na_level = NA_character_, - ... -) +a_summary(x, .N_col, .N_row, .var, .df_row, .ref_group, .in_ref_col, ...) -\method{a_summary}{character}( +\method{a_summary}{default}( x, .N_col, .N_row, @@ -104,13 +89,13 @@ s_summary(x, na.rm = TRUE, denom, .N_row, .N_col, .var, ...) .stats = summary_custom(type = "counts", include_pval = compare)$stats, .formats = summary_custom(type = "counts", include_pval = compare)$formats, .labels = summary_custom(type = "counts", include_pval = compare)$labels, - .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents, + .indent_mods = summary_custom(type = "counts", include_pval = compare)$indent_mods, na.rm = TRUE, na_level = NA_character_, ... ) -\method{a_summary}{logical}( +\method{a_summary}{numeric}( x, .N_col, .N_row, @@ -119,10 +104,10 @@ s_summary(x, na.rm = TRUE, denom, .N_row, .N_col, .var, ...) .ref_group = NULL, .in_ref_col = FALSE, compare = FALSE, - .stats = summary_custom(type = "counts", include_pval = compare)$stats, - .formats = summary_custom(type = "counts", include_pval = compare)$formats, - .labels = summary_custom(type = "counts", include_pval = compare)$labels, - .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents, + .stats = summary_custom(include_pval = compare)$stats, + .formats = summary_custom(include_pval = compare)$formats, + .labels = summary_custom(include_pval = compare)$labels, + .indent_mods = summary_custom(include_pval = compare)$indent_mods, na.rm = TRUE, na_level = NA_character_, ... @@ -186,6 +171,12 @@ to print out information about factor casting.} \item{.in_ref_col}{(\code{logical})\cr \code{TRUE} when working with the reference level, \code{FALSE} otherwise.} +\item{compare}{(\code{logical})\cr Whether comparison statistics should be analyzed instead of summary statistics +(\code{compare = TRUE} adds \code{pval} statistic comparing against reference group).} + +\item{type}{(\code{character})\cr type of statistics to calculate given \code{x}. If \code{x} is numeric \code{type} should be +\code{"numeric"}, otherwise type should be \code{"counts"}.} + \item{.stats}{(\code{character})\cr statistics to select for the table.} \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.} @@ -271,6 +262,14 @@ denominator is zero. Note that \code{NA}s in \code{x} are never counted or leadi } } +\itemize{ +\item \code{a_summary_output()} returns the corresponding list with formatted \code{\link[rtables:CellValue]{rtables::CellValue()}}. +} + +\itemize{ +\item \code{a_summary()} returns the corresponding list with formatted \code{\link[rtables:CellValue]{rtables::CellValue()}}. +} + \itemize{ \item \code{analyze_vars()} returns a layout object suitable for passing to further layouting functions, or to \code{\link[rtables:build_table]{rtables::build_table()}}. Adding this function to an \code{rtable} layout will add formatted rows containing @@ -296,13 +295,14 @@ conversion to factor (with a warning) and then forwards to the method for factor \item \code{s_summary(logical)}: Method for \code{logical} class. -\item \code{a_summary(numeric)}: Formatted analysis function method for \code{factor} class. +\item \code{a_summary_output()}: Formatted analysis helper function which is used within \code{a_summary}. -\item \code{a_summary(factor)}: Formatted analysis function method for \code{factor} class. +\item \code{a_summary()}: Formatted analysis function which is used as \code{afun} in \code{summarize_vars()} and +\code{compare_vars()} and as \code{cfun} in \code{summarize_colvars()}. -\item \code{a_summary(character)}: Formatted analysis function method for \code{character} class. +\item \code{a_summary(default)}: Formatted analysis function \code{default} method for non-numeric classes. -\item \code{a_summary(logical)}: Formatted analysis function method for \code{logical} class. +\item \code{a_summary(numeric)}: Formatted analysis function method for \code{numeric} class. \item \code{analyze_vars()}: Layout-creating function which can take statistics function arguments and additional format arguments. This function is a wrapper for \code{\link[rtables:analyze]{rtables::analyze()}}. @@ -332,6 +332,16 @@ It is therefore better to always pre-process the dataset such that factors are m created from character variables before passing the dataset to \code{\link[rtables:build_table]{rtables::build_table()}}. } +\itemize{ +\item To use for comparison (with additional p-value statistic), parameter \code{compare} must be set to \code{TRUE}. +\item Ensure that either all \code{NA} values are converted to an explicit \code{NA} level or all \code{NA} values are left as is. +} + +\itemize{ +\item To use for comparison (with additional p-value statistic), parameter \code{compare} must be set to \code{TRUE}. +\item Ensure that either all \code{NA} values are converted to an explicit \code{NA} level or all \code{NA} values are left as is. +} + Deprecation cycle started for \code{summarize_vars} as it is going to renamed into \code{analyze_vars}. Intention is to reflect better the core underlying \code{rtables} functions; in this case \code{\link[rtables:analyze]{rtables::analyze()}}. @@ -408,8 +418,7 @@ x <- c(TRUE, FALSE, TRUE, TRUE) s_summary(x, denom = "N_row", .N_row = 10L) s_summary(x, denom = "N_col", .N_col = 20L) -a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") -a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE) +a_summary_output() a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) a_summary( @@ -429,6 +438,9 @@ a_summary( .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE ) +a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") +a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE) + ## Fabricated dataset. dta_test <- data.frame( USUBJID = rep(1:6, each = 3), diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd index 7ba393662c..b9c7117358 100644 --- a/man/compare_variables.Rd +++ b/man/compare_variables.Rd @@ -38,16 +38,6 @@ a_compare( .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, - .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), - "_labels"))), - .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), - "_formats")), - .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), - "_labels")), - .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), - "_indents")), - na.rm = TRUE, - na_level = NA_character_, ... ) @@ -95,18 +85,6 @@ to print out information about factor casting. Defaults to \code{TRUE}.} \item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.} -\item{.stats}{(\code{character})\cr statistics to select for the table.} - -\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.} - -\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} - -\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector -should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation -for that statistic's row label.} - -\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} - \item{lyt}{(\code{layout})\cr input layout where analyses will be added to.} \item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.} @@ -117,6 +95,8 @@ for that statistic's row label.} possible} (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split underneath analyses, which is not allowed.} +\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} + \item{show_labels}{(\code{string})\cr label visibility: one of "default", "visible" and "hidden".} \item{table_names}{(\code{character})\cr this can be customized in case that the same \code{vars} are analyzed multiple times, @@ -124,6 +104,16 @@ to avoid warnings from \code{rtables}.} \item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group defined by this split instruction, or \code{NA_character_} (the default) for no section divider.} + +\item{.stats}{(\code{character})\cr statistics to select for the table.} + +\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.} + +\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} + +\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector +should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation +for that statistic's row label.} } \value{ \itemize{ diff --git a/man/summary_custom.Rd b/man/summary_custom.Rd deleted file mode 100644 index ece05515c2..0000000000 --- a/man/summary_custom.Rd +++ /dev/null @@ -1,49 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/analyze_variables.R -\name{summary_custom} -\alias{summary_custom} -\title{Set Defaults Settings for Summary Statistics} -\usage{ -summary_custom( - type = "numeric", - include_pval = FALSE, - stats_custom = NULL, - formats_custom = NULL, - labels_custom = NULL, - indents_custom = NULL -) -} -\arguments{ -\item{type}{(\code{string})\cr choice of a summary data type. Only \code{counts} and \code{numeric} types are currently supported.} - -\item{include_pval}{(\code{logical})\cr whether p-value should be included as a default statistic.} - -\item{stats_custom}{(\verb{named vector} of \code{character}) vector of statistics to include if not the defaults. This -argument overrides \code{include_pval} and other custom value arguments such that only settings for these statistics -will be returned.} - -\item{formats_custom}{(\verb{named vector} of \code{character}) vector of custom statistics formats to use in place of the -defaults defined in \code{\link[=summary_formats]{summary_formats()}}. Names should be a subset of the statistics defined in \code{stats_custom} (or -default statistics if this is \code{NULL}).} - -\item{labels_custom}{(\verb{named vector} of \code{character}) vector of custom statistics labels to use in place of the -defaults defined in \code{\link[=summary_labels]{summary_labels()}}. Names should be a subset of the statistics defined in \code{stats_custom} (or -default statistics if this is \code{NULL}).} - -\item{indents_custom}{(\code{integer} or \verb{named vector} of \code{integer}) vector of custom indentation modifiers for -statistics to use instead of the default of \code{0L} for all statistics. Names should be a subset of the statistics -defined in \code{stats_custom} (or default statistics if this is \code{NULL}). Alternatively, the same indentation modifier -can be applied to all statistics by setting \code{indents_custom} to a single integer value.} -} -\description{ -Set Defaults Settings for Summary Statistics -} -\examples{ -summary_custom() -summary_custom(type = "counts", include_pval = TRUE) -summary_custom( - include_pval = TRUE, stats_custom = c("n", "mean", "sd", "pval"), - labels_custom = c(sd = "Std. Dev."), indents_custom = 3L -) - -} diff --git a/man/summary_formats.Rd b/man/summary_formats.Rd deleted file mode 100644 index 86ea9d925d..0000000000 --- a/man/summary_formats.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/analyze_variables.R -\name{summary_formats} -\alias{summary_formats} -\title{Format Function for Descriptive Statistics} -\usage{ -summary_formats(type = "numeric", include_pval = FALSE) -} -\arguments{ -\item{type}{(\code{string})\cr choice of a summary data type. Only \code{counts} and \code{numeric} types are currently supported.} -} -\value{ -A named \code{vector} of default statistic formats for the given data type. -} -\description{ -Returns format patterns for descriptive statistics. The format is understood by \code{rtables}. -} -\examples{ -summary_formats() -summary_formats(type = "count", include_pval = TRUE) - -} diff --git a/man/summary_labels.Rd b/man/summary_labels.Rd deleted file mode 100644 index f2306a536c..0000000000 --- a/man/summary_labels.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/analyze_variables.R -\name{summary_labels} -\alias{summary_labels} -\title{Label Function for Descriptive Statistics} -\usage{ -summary_labels(type = "numeric", include_pval = FALSE) -} -\arguments{ -\item{type}{(\code{string})\cr choice of a summary data type. Only \code{counts} and \code{numeric} types are currently supported.} -} -\value{ -A named \code{vector} of default statistic labels for the given data type. -} -\description{ -Returns labels of descriptive statistics for numeric variables. -} -\examples{ -summary_labels() -summary_labels(type = "count", include_pval = TRUE) - -} diff --git a/man/summary_stats.Rd b/man/summary_stats.Rd new file mode 100644 index 0000000000..1821410347 --- /dev/null +++ b/man/summary_stats.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analyze_variables.R +\name{summary_stats} +\alias{summary_stats} +\alias{summary_formats} +\alias{summary_labels} +\alias{summary_custom} +\title{Summary Statistic Settings Functions} +\usage{ +summary_formats(type = "numeric", include_pval = FALSE) + +summary_labels(type = "numeric", include_pval = FALSE) + +summary_custom( + type = "numeric", + include_pval = FALSE, + stats_custom = NULL, + formats_custom = NULL, + labels_custom = NULL, + indent_mods_custom = NULL +) +} +\arguments{ +\item{type}{(\code{character})\cr choice of summary data type. Only \code{counts} and \code{numeric} types are currently supported.} + +\item{include_pval}{(\code{logical})\cr whether p-value should be included as a default statistic.} + +\item{stats_custom}{(\verb{named vector} of \code{character})\cr vector of statistics to include if not the defaults. This +argument overrides \code{include_pval} and other custom value arguments such that only settings for these statistics +will be returned.} + +\item{formats_custom}{(\verb{named vector} of \code{character})\cr vector of custom statistics formats to use in place of the +defaults defined in \code{\link[=summary_formats]{summary_formats()}}. Names should be a subset of the statistics defined in \code{stats_custom} (or +default statistics if this is \code{NULL}).} + +\item{labels_custom}{(\verb{named vector} of \code{character})\cr vector of custom statistics labels to use in place of the +defaults defined in \code{\link[=summary_labels]{summary_labels()}}. Names should be a subset of the statistics defined in \code{stats_custom} (or +default statistics if this is \code{NULL}).} + +\item{indent_mods_custom}{(\code{integer} or \verb{named vector} of \code{integer})\cr vector of custom indentation modifiers for +statistics to use instead of the default of \code{0L} for all statistics. Names should be a subset of the statistics +defined in \code{stats_custom} (or default statistics if this is \code{NULL}). Alternatively, the same indentation modifier +can be applied to all statistics by setting \code{indent_mods_custom} to a single integer value.} +} +\value{ +\itemize{ +\item \code{summary_formats} returns a named \code{vector} of default statistic formats for the given data type. +} + +\itemize{ +\item \code{summary_labels} returns a named \code{vector} of default statistic labels for the given data type. +} + +\itemize{ +\item \code{summary_custom} returns a \code{list} of 4 named elements: \code{stats}, \code{formats}, \code{labels}, and \code{indent_mods}. +} +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} + +Functions to retrieve default settings for summary statistics and customize these settings. +} +\section{Functions}{ +\itemize{ +\item \code{summary_formats()}: Function to retrieve default formats for summary statistics. Returns format patterns for +descriptive statistics which are understood by \code{rtables}. + +\item \code{summary_labels()}: Function to retrieve default labels for summary statistics. Returns labels of descriptive +statistics which are understood by \code{rtables}. + +\item \code{summary_custom()}: Function to configure settings for default or custom summary statistics for a given data +type. In addition to selecting a custom subset of statistics, the user can also set custom formats, labels, and +indent modifiers for any of these statistics. + +}} +\examples{ +summary_formats() +summary_formats(type = "count", include_pval = TRUE) + +summary_labels() +summary_labels(type = "count", include_pval = TRUE) + +summary_custom() +summary_custom(type = "counts", include_pval = TRUE) +summary_custom( + include_pval = TRUE, stats_custom = c("n", "mean", "sd", "pval"), + labels_custom = c(sd = "Std. Dev."), indent_mods_custom = 3L +) + +} diff --git a/man/ungroup_stats.Rd b/man/ungroup_stats.Rd index c9dffda957..b427561695 100644 --- a/man/ungroup_stats.Rd +++ b/man/ungroup_stats.Rd @@ -4,10 +4,18 @@ \alias{ungroup_stats} \title{Ungroup and Format Non-Numeric Statistics} \usage{ -ungroup_stats(x, .stats, .formats, .labels, .indent_mods, .in_ref_col = FALSE) +ungroup_stats( + x, + .stats, + .formats, + .labels, + .indent_mods, + .in_ref_col = FALSE, + which_stats = c("count", "count_fraction") +) } \arguments{ -\item{x}{(\code{numeric})\cr vector of numbers we want to analyze.} +\item{x}{(\verb{named list} of \code{numeric})\cr list of numeric statistics containing the statistics to ungroup.} \item{.stats}{(\code{character})\cr statistics to select for the table.} @@ -19,6 +27,8 @@ ungroup_stats(x, .stats, .formats, .labels, .indent_mods, .in_ref_col = FALSE) unmodified default behavior. Can be negative.} \item{.in_ref_col}{(\code{logical})\cr \code{TRUE} when working with the reference level, \code{FALSE} otherwise.} + +\item{which_stats}{(\code{vector} of \code{character})\cr which statistics should be ungrouped.} } \value{ A \code{list} with modified elements \code{x}, \code{.stats}, \code{.formats}, \code{.labels}, and \code{.indent_mods}. diff --git a/tests/testthat/_snaps/analyze_variables.md b/tests/testthat/_snaps/analyze_variables.md index 78d1afbf16..fa1cb362c3 100644 --- a/tests/testthat/_snaps/analyze_variables.md +++ b/tests/testthat/_snaps/analyze_variables.md @@ -16,6 +16,202 @@ [1] 0 +# summary_formats works as expected + + Code + res + Output + n sum mean + "xx." "xx.x" "xx.x" + sd se mean_sd + "xx.x" "xx.x" "xx.x (xx.x)" + mean_se mean_ci mean_sei + "xx.x (xx.x)" "(xx.xx, xx.xx)" "(xx.xx, xx.xx)" + mean_sdi mean_pval median + "(xx.xx, xx.xx)" "xx.xx" "xx.x" + mad median_ci quantiles + "xx.x" "(xx.xx, xx.xx)" "xx.x - xx.x" + iqr range min + "xx.x" "xx.x - xx.x" "xx.x" + max median_range cv + "xx.x" "xx.x (xx.x - xx.x)" "xx.x" + geom_mean geom_mean_ci geom_cv + "xx.x" "(xx.xx, xx.xx)" "xx.x" + +--- + + Code + res + Output + $n + [1] "xx." + + $count + [1] "xx." + + $count_fraction + function(x, ...) { + attr(x, "label") <- NULL + + if (any(is.na(x))) { + return("NA") + } + + checkmate::assert_vector(x) + checkmate::assert_integerish(x[1]) + assert_proportion_value(x[2], include_boundaries = TRUE) + + result <- if (x[1] == 0) { + "0" + } else { + paste0(x[1], " (", round(x[2] * 100, 1), "%)") + } + + return(result) + } + + + $n_blq + [1] "xx." + + $pval + [1] "x.xxxx | (<0.0001)" + + +# summary_labels works as expected + + Code + res + Output + n sum + "n" "Sum" + mean sd + "Mean" "SD" + se mean_sd + "SE" "Mean (SD)" + mean_se mean_ci + "Mean (SE)" "Mean 95% CI" + mean_sei mean_sdi + "Mean -/+ 1xSE" "Mean -/+ 1xSD" + mean_pval median + "Mean p-value (H0: mean = 0)" "Median" + mad median_ci + "Median Absolute Deviation" "Median 95% CI" + quantiles iqr + "25% and 75%-ile" "IQR" + range min + "Min - Max" "Minimum" + max median_range + "Maximum" "Median (Min - Max)" + cv geom_mean + "CV (%)" "Geometric Mean" + geom_mean_ci geom_cv + "Geometric Mean 95% CI" "CV % Geometric Mean" + +--- + + Code + res + Output + n count + "n" "count" + count_fraction n_blq + "count_fraction" "n_blq" + pval + "p-value (chi-squared test)" + +# summary_custom works as expected + + Code + res + Output + $stats + [1] "n" "sum" "mean" "sd" "se" + [6] "mean_sd" "mean_se" "mean_ci" "mean_sei" "mean_sdi" + [11] "mean_pval" "median" "mad" "median_ci" "quantiles" + [16] "iqr" "range" "min" "max" "median_range" + [21] "cv" "geom_mean" "geom_mean_ci" "geom_cv" + + $formats + n sum mean + "xx." "xx.x" "xx.x" + sd se mean_sd + "xx.x" "xx.x" "xx.x (xx.x)" + mean_se mean_ci mean_sei + "xx.x (xx.x)" "(xx.xx, xx.xx)" "(xx.xx, xx.xx)" + mean_sdi mean_pval median + "(xx.xx, xx.xx)" "xx.xx" "xx.x" + mad median_ci quantiles + "xx.x" "(xx.xx, xx.xx)" "xx.x - xx.x" + iqr range min + "xx.x" "xx.x - xx.x" "xx.x" + max median_range cv + "xx.x" "xx.x (xx.x - xx.x)" "xx.x" + geom_mean geom_mean_ci geom_cv + "xx.x" "(xx.xx, xx.xx)" "xx.x" + + $labels + n sum + "n" "Sum" + mean sd + "Mean" "SD" + se mean_sd + "SE" "Mean (SD)" + mean_se mean_ci + "Mean (SE)" "Mean 95% CI" + mean_sei mean_sdi + "Mean -/+ 1xSE" "Mean -/+ 1xSD" + mean_pval median + "Mean p-value (H0: mean = 0)" "Median" + mad median_ci + "Median Absolute Deviation" "Median 95% CI" + quantiles iqr + "25% and 75%-ile" "IQR" + range min + "Min - Max" "Minimum" + max median_range + "Maximum" "Median (Min - Max)" + cv geom_mean + "CV (%)" "Geometric Mean" + geom_mean_ci geom_cv + "Geometric Mean 95% CI" "CV % Geometric Mean" + + $indent_mods + n sum mean sd se mean_sd + 0 0 0 0 0 0 + mean_se mean_ci mean_sei mean_sdi mean_pval median + 0 0 0 0 0 0 + mad median_ci quantiles iqr range min + 0 0 0 0 0 0 + max median_range cv geom_mean geom_mean_ci geom_cv + 0 0 0 0 0 0 + + +--- + + Code + res + Output + $stats + [1] "n" "count" + + $formats + $formats$n + [1] "xx.xx" + + $formats$count + [1] "xx." + + + $labels + n count + "n" "#" + + $indent_mods + n count + 2 2 + + # s_summary return NA for x length 0L Code @@ -920,7 +1116,7 @@ [1] 0 -# a_summary works with healthy input. +# a_summary_output and a_summary work with healthy input. Code res diff --git a/tests/testthat/test-analyze_variables.R b/tests/testthat/test-analyze_variables.R index b1a0590f5d..4e6da3a58b 100644 --- a/tests/testthat/test-analyze_variables.R +++ b/tests/testthat/test-analyze_variables.R @@ -13,6 +13,39 @@ testthat::test_that("control_analyze_vars fails wrong inputs", { testthat::expect_error(control_analyze_vars(conf_level = 95)) }) +testthat::test_that("summary_formats works as expected", { + result <- summary_formats() + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + result <- summary_formats(type = "counts", include_pval = TRUE) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + +testthat::test_that("summary_labels works as expected", { + result <- summary_labels() + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + result <- summary_labels(type = "counts", include_pval = TRUE) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + +testthat::test_that("summary_custom works as expected", { + result <- summary_custom() + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + result <- summary_custom( + type = "counts", stats_custom = c("n", "count"), + formats_custom = c(n = "xx.xx"), labels_custom = c(count = "#"), indent_mods_custom = 2L + ) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + testthat::test_that("s_summary return NA for x length 0L", { x <- numeric() @@ -160,27 +193,72 @@ testthat::test_that("s_summary works with logical vectors and by if requested do testthat::expect_snapshot(res) }) -testthat::test_that("a_summary works with healthy input.", { +testthat::test_that("a_summary_output and a_summary work with healthy input.", { options("width" = 100) - # numeric input + + # numeric input - a_summary_output set.seed(1) - result <- a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") + x <- rnorm(10) + result <- a_summary_output( + x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, + compare = FALSE, type = "numeric", .stats = summary_custom()$stats, .formats = summary_custom()$formats, + .labels = summary_custom()$labels, .indent_mods = summary_custom()$indents, na.rm = TRUE, na_level = NA_character_ + ) + res_out <- testthat::expect_silent(result) + + # numeric input - a_summary + result <- a_summary(x = x, .N_col = 10, .N_row = 20, .var = "bla") res <- testthat::expect_silent(result) + testthat::expect_identical(res_out, res) testthat::expect_snapshot(res) - # factor input - result <- a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) + # factor input - a_summary_output + x <- factor(c("a", "a", "b", "c", "a")) + result <- a_summary_output( + x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, + compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats, + .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels, + .indent_mods = summary_custom(type = "counts")$indents, na.rm = TRUE, na_level = NA_character_ + ) + res_out <- testthat::expect_silent(result) + + # factor input - a_summary + result <- a_summary(x = x, .N_row = 10, .N_col = 10) res <- testthat::expect_silent(result) + testthat::expect_identical(res_out, res) testthat::expect_snapshot(res) - # character input - result <- a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) + # character input - a_summary_output + x <- c("A", "B", "A", "C") + result <- a_summary_output( + x = x, .N_col = 10, .N_row = 10, .var = "x", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, + compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats, + .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels, + .indent_mods = summary_custom(type = "counts")$indents, na.rm = TRUE, na_level = NA_character_, + verbose = FALSE + ) + res_out <- testthat::expect_silent(result) + + # character input - a_summary + result <- a_summary(x = x, .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE) res <- testthat::expect_silent(result) + testthat::expect_identical(res_out, res) testthat::expect_snapshot(res) - # logical input - result <- a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10) + # logical input - a_summary_output + x <- c(TRUE, FALSE, FALSE, TRUE, TRUE) + result <- a_summary_output( + x = x, .N_col = 10, .N_row = 10, .var = NULL, .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, + compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats, + .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels, + .indent_mods = summary_custom(type = "counts")$indents, na.rm = TRUE, na_level = NA_character_ + ) + res_out <- testthat::expect_silent(result) + + # logical input - a_summary + result <- a_summary(x = x, .N_row = 10, .N_col = 10) res <- testthat::expect_silent(result) + testthat::expect_identical(res_out, res) testthat::expect_snapshot(res) }) @@ -364,7 +442,7 @@ testthat::test_that("`analyze_vars` works with character input and gives the sam l <- basic_table() %>% analyze_vars(vars = "foo") - result <- build_table(l, dta) + testthat::expect_warning(result <- build_table(l, dta)) dta_factor <- dta %>% dplyr::mutate(foo = factor(foo)) From a073c27920e506657efd3d07ad6fc6e980535384 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Tue, 4 Jul 2023 20:48:39 -0400 Subject: [PATCH 24/33] Fix checks --- R/analyze_functions.R | 2 +- R/analyze_variables.R | 6 +++--- _pkgdown.yml | 1 + man/analyze_functions.Rd | 2 +- man/analyze_variables.Rd | 2 +- man/create_afun_summary.Rd | 2 +- 6 files changed, 8 insertions(+), 7 deletions(-) diff --git a/R/analyze_functions.R b/R/analyze_functions.R index bc8c364207..20addfd507 100644 --- a/R/analyze_functions.R +++ b/R/analyze_functions.R @@ -32,7 +32,7 @@ #' leverage `analyze_colvars` to have the context split in rows and the analysis #' methods in columns. #' * [summarize_change()] -#' * [analyze_vars()]: formerly known as [summarize_vars()], it was renamed to reflect +#' * [analyze_vars()]: formerly known as `summarize_vars()`, it was renamed to reflect #' core function [rtables::analyze()]. #' * [surv_time()] #' * [surv_timepoint()] diff --git a/R/analyze_variables.R b/R/analyze_variables.R index 8539f09bf9..f807499b23 100644 --- a/R/analyze_variables.R +++ b/R/analyze_variables.R @@ -193,7 +193,7 @@ summary_custom <- function(type = "numeric", .formats <- summary_formats(type = type, include_pval = include_pval) .stats <- if (is.null(stats_custom)) names(.formats) else intersect(stats_custom, names(.formats)) .labels <- summary_labels(type = type, include_pval = include_pval) - .indent_mods <- setNames(rep(0L, length(.stats)), .stats) + .indent_mods <- stats::setNames(rep(0L, length(.stats)), .stats) if (!is.null(formats_custom)) .formats[names(formats_custom)] <- formats_custom if (!is.null(labels_custom)) .labels[names(labels_custom)] <- labels_custom @@ -681,7 +681,7 @@ a_summary_output <- function(x, ) } -#' @describeIn analyze_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and +#' @describeIn analyze_variables Formatted analysis function which is used as `afun` in `analyze_vars()` and #' `compare_vars()` and as `cfun` in `summarize_colvars()`. #' #' @return @@ -821,7 +821,7 @@ a_summary.numeric <- function(x, #' #' @note This function has been deprecated in favor of direct implementation of `a_summary()`. #' -#' @seealso [summarize_vars()] +#' @seealso [analyze_vars()] #' #' @export create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) { diff --git a/_pkgdown.yml b/_pkgdown.yml index 7ee0307ab9..228a5a3c77 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -70,6 +70,7 @@ reference: - starts_with("h_") - starts_with("or_") - starts_with("prop_") + - starts_with("summmary_") - -starts_with("h_col_") - -h_content_first_row - -h_data_plot diff --git a/man/analyze_functions.Rd b/man/analyze_functions.Rd index 086a022c4e..14017aabc1 100644 --- a/man/analyze_functions.Rd +++ b/man/analyze_functions.Rd @@ -34,7 +34,7 @@ variables that are split into different columns. In comparison, \link{analyze_co leverage \code{analyze_colvars} to have the context split in rows and the analysis methods in columns. \item \code{\link[=summarize_change]{summarize_change()}} -\item \code{\link[=analyze_vars]{analyze_vars()}}: formerly known as \code{\link[=summarize_vars]{summarize_vars()}}, it was renamed to reflect +\item \code{\link[=analyze_vars]{analyze_vars()}}: formerly known as \code{summarize_vars()}, it was renamed to reflect core function \code{\link[rtables:analyze]{rtables::analyze()}}. \item \code{\link[=surv_time]{surv_time()}} \item \code{\link[=surv_timepoint]{surv_timepoint()}} diff --git a/man/analyze_variables.Rd b/man/analyze_variables.Rd index a09d5e2962..0a574d5387 100644 --- a/man/analyze_variables.Rd +++ b/man/analyze_variables.Rd @@ -297,7 +297,7 @@ conversion to factor (with a warning) and then forwards to the method for factor \item \code{a_summary_output()}: Formatted analysis helper function which is used within \code{a_summary}. -\item \code{a_summary()}: Formatted analysis function which is used as \code{afun} in \code{summarize_vars()} and +\item \code{a_summary()}: Formatted analysis function which is used as \code{afun} in \code{analyze_vars()} and \code{compare_vars()} and as \code{cfun} in \code{summarize_colvars()}. \item \code{a_summary(default)}: Formatted analysis function \code{default} method for non-numeric classes. diff --git a/man/create_afun_summary.Rd b/man/create_afun_summary.Rd index e97a1e439a..f5bc4e57d6 100644 --- a/man/create_afun_summary.Rd +++ b/man/create_afun_summary.Rd @@ -29,5 +29,5 @@ Constructor function which creates a combined formatted analysis function. This function has been deprecated in favor of direct implementation of \code{a_summary()}. } \seealso{ -\code{\link[=summarize_vars]{summarize_vars()}} +\code{\link[=analyze_vars]{analyze_vars()}} } From adaccca93cb11496692558faa55d02da718d004c Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Tue, 4 Jul 2023 20:53:41 -0400 Subject: [PATCH 25/33] Add examples --- R/analyze_variables.R | 17 ++++++++++++++++- man/analyze_variables.Rd | 17 ++++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/R/analyze_variables.R b/R/analyze_variables.R index f807499b23..8c1e4154c2 100644 --- a/R/analyze_variables.R +++ b/R/analyze_variables.R @@ -605,7 +605,22 @@ s_summary.logical <- function(x, #' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is. #' #' @examples -#' a_summary_output() +#' set.seed(1) +#' x <- rnorm(10) +#' a_summary_output( +#' x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, +#' compare = FALSE, type = "numeric", .stats = summary_custom()$stats, .formats = summary_custom()$formats, +#' .labels = summary_custom()$labels, .indent_mods = summary_custom()$indents, +#' na.rm = TRUE, na_level = NA_character_ +#' ) +#' +#' x <- factor(c("a", "a", "b", "c", "a")) +#' a_summary_output( +#' x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, +#' compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats, +#' .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels, +#' .indent_mods = summary_custom(type = "counts")$indents, na.rm = TRUE, na_level = NA_character_ +#' ) #' #' @export a_summary_output <- function(x, diff --git a/man/analyze_variables.Rd b/man/analyze_variables.Rd index 0a574d5387..26f21fe37e 100644 --- a/man/analyze_variables.Rd +++ b/man/analyze_variables.Rd @@ -418,7 +418,22 @@ x <- c(TRUE, FALSE, TRUE, TRUE) s_summary(x, denom = "N_row", .N_row = 10L) s_summary(x, denom = "N_col", .N_col = 20L) -a_summary_output() +set.seed(1) +x <- rnorm(10) +a_summary_output( + x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, + compare = FALSE, type = "numeric", .stats = summary_custom()$stats, .formats = summary_custom()$formats, + .labels = summary_custom()$labels, .indent_mods = summary_custom()$indents, + na.rm = TRUE, na_level = NA_character_ +) + +x <- factor(c("a", "a", "b", "c", "a")) +a_summary_output( + x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, + compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats, + .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels, + .indent_mods = summary_custom(type = "counts")$indents, na.rm = TRUE, na_level = NA_character_ +) a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) a_summary( From 2953a5d16d6ab5f5f487bc695ae3305eea050603 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Tue, 4 Jul 2023 21:03:19 -0400 Subject: [PATCH 26/33] Update WORDLIST --- _pkgdown.yml | 2 +- inst/WORDLIST | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index 228a5a3c77..e9fc0e9df1 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -70,7 +70,7 @@ reference: - starts_with("h_") - starts_with("or_") - starts_with("prop_") - - starts_with("summmary_") + - summary_stats - -starts_with("h_col_") - -h_content_first_row - -h_data_plot diff --git a/inst/WORDLIST b/inst/WORDLIST index 06c569fd9d..ad964f9947 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -32,4 +32,5 @@ subtable subtables unformatted ungroup +ungrouped unstratified From 10d47942ab3dcbe7859392d07fafb1adca064053 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Tue, 4 Jul 2023 21:16:16 -0400 Subject: [PATCH 27/33] roxygen examples too wide --- R/analyze_variables.R | 19 +++++++++++-------- man/analyze_variables.Rd | 19 +++++++++++-------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/R/analyze_variables.R b/R/analyze_variables.R index 8c1e4154c2..052a2534a6 100644 --- a/R/analyze_variables.R +++ b/R/analyze_variables.R @@ -608,18 +608,21 @@ s_summary.logical <- function(x, #' set.seed(1) #' x <- rnorm(10) #' a_summary_output( -#' x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, -#' compare = FALSE, type = "numeric", .stats = summary_custom()$stats, .formats = summary_custom()$formats, -#' .labels = summary_custom()$labels, .indent_mods = summary_custom()$indents, -#' na.rm = TRUE, na_level = NA_character_ +#' x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL, +#' .in_ref_col = FALSE, compare = FALSE, type = "numeric", .stats = summary_custom()$stats, +#' .formats = summary_custom()$formats, .labels = summary_custom()$labels, +#' .indent_mods = summary_custom()$indents, na.rm = TRUE, na_level = NA_character_ #' ) #' #' x <- factor(c("a", "a", "b", "c", "a")) #' a_summary_output( -#' x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, -#' compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats, -#' .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels, -#' .indent_mods = summary_custom(type = "counts")$indents, na.rm = TRUE, na_level = NA_character_ +#' x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL, +#' .in_ref_col = FALSE, compare = FALSE, type = "counts", +#' .stats = summary_custom(type = "counts")$stats, +#' .formats = summary_custom(type = "counts")$formats, +#' .labels = summary_custom(type = "counts")$labels, +#' .indent_mods = summary_custom(type = "counts")$indents, +#' na.rm = TRUE, na_level = NA_character_ #' ) #' #' @export diff --git a/man/analyze_variables.Rd b/man/analyze_variables.Rd index 26f21fe37e..24cbc1d250 100644 --- a/man/analyze_variables.Rd +++ b/man/analyze_variables.Rd @@ -421,18 +421,21 @@ s_summary(x, denom = "N_col", .N_col = 20L) set.seed(1) x <- rnorm(10) a_summary_output( - x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, - compare = FALSE, type = "numeric", .stats = summary_custom()$stats, .formats = summary_custom()$formats, - .labels = summary_custom()$labels, .indent_mods = summary_custom()$indents, - na.rm = TRUE, na_level = NA_character_ + x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL, + .in_ref_col = FALSE, compare = FALSE, type = "numeric", .stats = summary_custom()$stats, + .formats = summary_custom()$formats, .labels = summary_custom()$labels, + .indent_mods = summary_custom()$indents, na.rm = TRUE, na_level = NA_character_ ) x <- factor(c("a", "a", "b", "c", "a")) a_summary_output( - x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, - compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats, - .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels, - .indent_mods = summary_custom(type = "counts")$indents, na.rm = TRUE, na_level = NA_character_ + x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL, + .in_ref_col = FALSE, compare = FALSE, type = "counts", + .stats = summary_custom(type = "counts")$stats, + .formats = summary_custom(type = "counts")$formats, + .labels = summary_custom(type = "counts")$labels, + .indent_mods = summary_custom(type = "counts")$indents, + na.rm = TRUE, na_level = NA_character_ ) a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) From c8b430dc9b01323c0b7eee28db429489316d0c27 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Tue, 11 Jul 2023 21:38:06 -0400 Subject: [PATCH 28/33] Separate files for summary funs, internal afun helper --- DESCRIPTION | 1 + NAMESPACE | 1 - R/analyze_variables.R | 283 +++--------------------- R/summary_stats.R | 214 ++++++++++++++++++ man/a_summary_internal.Rd | 47 ++++ man/analyze_variables.Rd | 57 +---- man/control_analyze_vars.Rd | 2 +- man/summary_stats.Rd | 2 +- tests/testthat/test-analyze_variables.R | 18 +- 9 files changed, 302 insertions(+), 323 deletions(-) create mode 100644 R/summary_stats.R create mode 100644 man/a_summary_internal.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 26e6386f03..a1111d59d8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -145,6 +145,7 @@ Collate: 'summarize_glm_count.R' 'summarize_num_patients.R' 'summarize_patients_exposure_in_cols.R' + 'summary_stats.R' 'survival_biomarkers_subgroups.R' 'survival_coxph_pairwise.R' 'survival_duration_subgroups.R' diff --git a/NAMESPACE b/NAMESPACE index 2717dc01a0..5239b12fcb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -35,7 +35,6 @@ export(a_odds_ratio) export(a_proportion) export(a_proportion_diff) export(a_summary) -export(a_summary_output) export(add_rowcounts) export(aesi_label) export(analyze_num_patients) diff --git a/R/analyze_variables.R b/R/analyze_variables.R index 052a2534a6..5a2eefbd48 100644 --- a/R/analyze_variables.R +++ b/R/analyze_variables.R @@ -1,218 +1,3 @@ -#' Control Function for Descriptive Statistics -#' -#' @description `r lifecycle::badge("stable")` -#' -#' Sets a list of parameters for summaries of descriptive statistics. Typically used internally to specify -#' details for [s_summary()]. This function family is mainly used by [analyze_vars()]. -#' -#' @inheritParams argument_convention -#' @param quantiles (`numeric`)\cr of length two to specify the quantiles to calculate. -#' @param quantile_type (`numeric`)\cr between 1 and 9 selecting quantile algorithms to be used. -#' Default is set to 2 as this matches the default quantile algorithm in SAS `proc univariate` set by `QNTLDEF=5`. -#' This differs from R's default. See more about `type` in [stats::quantile()]. -#' @param test_mean (`numeric`)\cr to test against the mean under the null hypothesis when calculating p-value. -#' -#' @note Deprecation cycle started for `control_summarize_vars` as it is going to renamed into -#' `control_analyze_vars`. Intention is to reflect better the core underlying `rtables` -#' functions; in this case [analyze_vars()] wraps [rtables::analyze()]. -#' -#' @return A list of components with the same names as the arguments. -#' -#' @export control_analyze_vars control_summarize_vars -#' @aliases control_summarize_vars -control_analyze_vars <- function(conf_level = 0.95, - quantiles = c(0.25, 0.75), - quantile_type = 2, - test_mean = 0) { - checkmate::assert_vector(quantiles, len = 2) - checkmate::assert_int(quantile_type, lower = 1, upper = 9) - checkmate::assert_numeric(test_mean) - lapply(quantiles, assert_proportion_value) - assert_proportion_value(conf_level) - list(conf_level = conf_level, quantiles = quantiles, quantile_type = quantile_type, test_mean = test_mean) -} - -control_summarize_vars <- control_analyze_vars - -#' Summary Statistic Settings Functions -#' -#' @description `r lifecycle::badge("stable")` -#' -#' Functions to retrieve default settings for summary statistics and customize these settings. -#' -#' @param type (`character`)\cr choice of summary data type. Only `counts` and `numeric` types are currently supported. -#' @param include_pval (`logical`)\cr whether p-value should be included as a default statistic. -#' -#' @name summary_stats -NULL - -#' @describeIn summary_stats Function to retrieve default formats for summary statistics. Returns format patterns for -#' descriptive statistics which are understood by `rtables`. -#' -#' @return -#' * `summary_formats` returns a named `vector` of default statistic formats for the given data type. -#' -#' @examples -#' summary_formats() -#' summary_formats(type = "count", include_pval = TRUE) -#' -#' @export -summary_formats <- function(type = "numeric", include_pval = FALSE) { - fmts <- if (type == "counts") { - c( - n = "xx.", - count = "xx.", - count_fraction = format_count_fraction, - n_blq = "xx.", - pval = "x.xxxx | (<0.0001)" - ) - } else { - c( - n = "xx.", - sum = "xx.x", - mean = "xx.x", - sd = "xx.x", - se = "xx.x", - mean_sd = "xx.x (xx.x)", - mean_se = "xx.x (xx.x)", - mean_ci = "(xx.xx, xx.xx)", - mean_sei = "(xx.xx, xx.xx)", - mean_sdi = "(xx.xx, xx.xx)", - mean_pval = "xx.xx", - median = "xx.x", - mad = "xx.x", - median_ci = "(xx.xx, xx.xx)", - quantiles = "xx.x - xx.x", - iqr = "xx.x", - range = "xx.x - xx.x", - min = "xx.x", - max = "xx.x", - median_range = "xx.x (xx.x - xx.x)", - cv = "xx.x", - geom_mean = "xx.x", - geom_mean_ci = "(xx.xx, xx.xx)", - geom_cv = "xx.x", - pval = "x.xxxx | (<0.0001)" - ) - } - if (!include_pval) fmts <- head(fmts, -1) - fmts -} - -#' @describeIn summary_stats Function to retrieve default labels for summary statistics. Returns labels of descriptive -#' statistics which are understood by `rtables`. -#' -#' @return -#' * `summary_labels` returns a named `vector` of default statistic labels for the given data type. -#' -#' @examples -#' summary_labels() -#' summary_labels(type = "count", include_pval = TRUE) -#' -#' @export -summary_labels <- function(type = "numeric", include_pval = FALSE) { - lbls <- if (type == "counts") { - c( - n = "n", - count = "count", - count_fraction = "count_fraction", - n_blq = "n_blq", - pval = "p-value (chi-squared test)" - ) - } else { - c( - n = "n", - sum = "Sum", - mean = "Mean", - sd = "SD", - se = "SE", - mean_sd = "Mean (SD)", - mean_se = "Mean (SE)", - mean_ci = "Mean 95% CI", - mean_sei = "Mean -/+ 1xSE", - mean_sdi = "Mean -/+ 1xSD", - mean_pval = "Mean p-value (H0: mean = 0)", - median = "Median", - mad = "Median Absolute Deviation", - median_ci = "Median 95% CI", - quantiles = "25% and 75%-ile", - iqr = "IQR", - range = "Min - Max", - min = "Minimum", - max = "Maximum", - median_range = "Median (Min - Max)", - cv = "CV (%)", - geom_mean = "Geometric Mean", - geom_mean_ci = "Geometric Mean 95% CI", - geom_cv = "CV % Geometric Mean", - pval = "p-value (t-test)" - ) - } - if (!include_pval) lbls <- head(lbls, -1) - lbls -} - -#' @describeIn summary_stats Function to configure settings for default or custom summary statistics for a given data -#' type. In addition to selecting a custom subset of statistics, the user can also set custom formats, labels, and -#' indent modifiers for any of these statistics. -#' -#' @param stats_custom (`named vector` of `character`)\cr vector of statistics to include if not the defaults. This -#' argument overrides `include_pval` and other custom value arguments such that only settings for these statistics -#' will be returned. -#' @param formats_custom (`named vector` of `character`)\cr vector of custom statistics formats to use in place of the -#' defaults defined in [`summary_formats()`]. Names should be a subset of the statistics defined in `stats_custom` (or -#' default statistics if this is `NULL`). -#' @param labels_custom (`named vector` of `character`)\cr vector of custom statistics labels to use in place of the -#' defaults defined in [`summary_labels()`]. Names should be a subset of the statistics defined in `stats_custom` (or -#' default statistics if this is `NULL`). -#' @param indent_mods_custom (`integer` or `named vector` of `integer`)\cr vector of custom indentation modifiers for -#' statistics to use instead of the default of `0L` for all statistics. Names should be a subset of the statistics -#' defined in `stats_custom` (or default statistics if this is `NULL`). Alternatively, the same indentation modifier -#' can be applied to all statistics by setting `indent_mods_custom` to a single integer value. -#' -#' @return -#' * `summary_custom` returns a `list` of 4 named elements: `stats`, `formats`, `labels`, and `indent_mods`. -#' -#' @examples -#' summary_custom() -#' summary_custom(type = "counts", include_pval = TRUE) -#' summary_custom( -#' include_pval = TRUE, stats_custom = c("n", "mean", "sd", "pval"), -#' labels_custom = c(sd = "Std. Dev."), indent_mods_custom = 3L -#' ) -#' -#' @export -summary_custom <- function(type = "numeric", - include_pval = FALSE, - stats_custom = NULL, - formats_custom = NULL, - labels_custom = NULL, - indent_mods_custom = NULL) { - if ("pval" %in% stats_custom) include_pval <- TRUE - - .formats <- summary_formats(type = type, include_pval = include_pval) - .stats <- if (is.null(stats_custom)) names(.formats) else intersect(stats_custom, names(.formats)) - .labels <- summary_labels(type = type, include_pval = include_pval) - .indent_mods <- stats::setNames(rep(0L, length(.stats)), .stats) - - if (!is.null(formats_custom)) .formats[names(formats_custom)] <- formats_custom - if (!is.null(labels_custom)) .labels[names(labels_custom)] <- labels_custom - if (!is.null(indent_mods_custom)) { - if (is.null(names(indent_mods_custom)) && length(indent_mods_custom) == 1) { - .indent_mods[names(.indent_mods)] <- indent_mods_custom - } else { - .indent_mods[names(indent_mods_custom)] <- indent_mods_custom - } - } - - list( - stats = .stats, - formats = .formats[.stats], - labels = .labels[.stats], - indent_mods = .indent_mods[.stats] - ) -} - #' Analyze Variables #' #' @description `r lifecycle::badge("stable")` @@ -590,7 +375,7 @@ s_summary.logical <- function(x, y } -#' @describeIn analyze_variables Formatted analysis helper function which is used within `a_summary`. +#' Helper Function to Create Output Rows for [a_summary()] #' #' @param compare (`logical`)\cr Whether comparison statistics should be analyzed instead of summary statistics #' (`compare = TRUE` adds `pval` statistic comparing against reference group). @@ -598,50 +383,29 @@ s_summary.logical <- function(x, #' `"numeric"`, otherwise type should be `"counts"`. #' #' @return -#' * `a_summary_output()` returns the corresponding list with formatted [rtables::CellValue()]. +#' * `a_summary_internal()` returns a corresponding list with formatted [rtables::CellValue()] used within `a_summary`. #' #' @note #' * To use for comparison (with additional p-value statistic), parameter `compare` must be set to `TRUE`. #' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is. #' -#' @examples -#' set.seed(1) -#' x <- rnorm(10) -#' a_summary_output( -#' x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL, -#' .in_ref_col = FALSE, compare = FALSE, type = "numeric", .stats = summary_custom()$stats, -#' .formats = summary_custom()$formats, .labels = summary_custom()$labels, -#' .indent_mods = summary_custom()$indents, na.rm = TRUE, na_level = NA_character_ -#' ) -#' -#' x <- factor(c("a", "a", "b", "c", "a")) -#' a_summary_output( -#' x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL, -#' .in_ref_col = FALSE, compare = FALSE, type = "counts", -#' .stats = summary_custom(type = "counts")$stats, -#' .formats = summary_custom(type = "counts")$formats, -#' .labels = summary_custom(type = "counts")$labels, -#' .indent_mods = summary_custom(type = "counts")$indents, -#' na.rm = TRUE, na_level = NA_character_ -#' ) -#' -#' @export -a_summary_output <- function(x, - .N_col, # nolint - .N_row, # nolint - .var, - .df_row, - .ref_group, - .in_ref_col, - compare, - type, - .stats, - .formats, - .labels, - .indent_mods, - na.rm, # nolint - na_level, - ...) { +#' @keywords internal +a_summary_internal <- function(x, + .N_col, # nolint + .N_row, # nolint + .var, + .df_row, + .ref_group, + .in_ref_col, + compare, + type, + .stats, + .formats, + .labels, + .indent_mods, + na.rm, # nolint + na_level, + ...) { # Remove all-NA rows if (!is.null(.df_row) && ncol(.df_row) > 1) { in_tot_col <- nrow(.df_row) == length(x) @@ -702,6 +466,11 @@ a_summary_output <- function(x, #' @describeIn analyze_variables Formatted analysis function which is used as `afun` in `analyze_vars()` and #' `compare_vars()` and as `cfun` in `summarize_colvars()`. #' +#' @param compare (`logical`)\cr Whether comparison statistics should be analyzed instead of summary statistics +#' (`compare = TRUE` adds `pval` statistic comparing against reference group). +#' @param type (`character`)\cr type of statistics to calculate given `x`. If `x` is numeric `type` should be +#' `"numeric"`, otherwise type should be `"counts"`. +#' #' @return #' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()]. #' @@ -760,7 +529,7 @@ a_summary.default <- function(x, na.rm = TRUE, # nolint na_level = NA_character_, ...) { - a_summary_output( + a_summary_internal( x = x, .N_col = .N_col, .N_row = .N_row, @@ -804,7 +573,7 @@ a_summary.numeric <- function(x, na.rm = TRUE, # nolint na_level = NA_character_, ...) { - a_summary_output( + a_summary_internal( x = x, .N_col = .N_col, .N_row = .N_row, diff --git a/R/summary_stats.R b/R/summary_stats.R new file mode 100644 index 0000000000..7fffa4f025 --- /dev/null +++ b/R/summary_stats.R @@ -0,0 +1,214 @@ +#' Summary Statistic Settings Functions +#' +#' @description `r lifecycle::badge("stable")` +#' +#' Functions to retrieve default settings for summary statistics and customize these settings. +#' +#' @param type (`character`)\cr choice of summary data type. Only `counts` and `numeric` types are currently supported. +#' @param include_pval (`logical`)\cr whether p-value should be included as a default statistic. +#' +#' @name summary_stats +NULL + +#' @describeIn summary_stats Function to retrieve default formats for summary statistics. Returns format patterns for +#' descriptive statistics which are understood by `rtables`. +#' +#' @return +#' * `summary_formats` returns a named `vector` of default statistic formats for the given data type. +#' +#' @examples +#' summary_formats() +#' summary_formats(type = "count", include_pval = TRUE) +#' +#' @export +summary_formats <- function(type = "numeric", include_pval = FALSE) { + fmts <- if (type == "counts") { + c( + n = "xx.", + count = "xx.", + count_fraction = format_count_fraction, + n_blq = "xx.", + pval = "x.xxxx | (<0.0001)" + ) + } else { + c( + n = "xx.", + sum = "xx.x", + mean = "xx.x", + sd = "xx.x", + se = "xx.x", + mean_sd = "xx.x (xx.x)", + mean_se = "xx.x (xx.x)", + mean_ci = "(xx.xx, xx.xx)", + mean_sei = "(xx.xx, xx.xx)", + mean_sdi = "(xx.xx, xx.xx)", + mean_pval = "xx.xx", + median = "xx.x", + mad = "xx.x", + median_ci = "(xx.xx, xx.xx)", + quantiles = "xx.x - xx.x", + iqr = "xx.x", + range = "xx.x - xx.x", + min = "xx.x", + max = "xx.x", + median_range = "xx.x (xx.x - xx.x)", + cv = "xx.x", + geom_mean = "xx.x", + geom_mean_ci = "(xx.xx, xx.xx)", + geom_cv = "xx.x", + pval = "x.xxxx | (<0.0001)" + ) + } + if (!include_pval) fmts <- head(fmts, -1) + fmts +} + +#' @describeIn summary_stats Function to retrieve default labels for summary statistics. Returns labels of descriptive +#' statistics which are understood by `rtables`. +#' +#' @return +#' * `summary_labels` returns a named `vector` of default statistic labels for the given data type. +#' +#' @examples +#' summary_labels() +#' summary_labels(type = "count", include_pval = TRUE) +#' +#' @export +summary_labels <- function(type = "numeric", include_pval = FALSE) { + lbls <- if (type == "counts") { + c( + n = "n", + count = "count", + count_fraction = "count_fraction", + n_blq = "n_blq", + pval = "p-value (chi-squared test)" + ) + } else { + c( + n = "n", + sum = "Sum", + mean = "Mean", + sd = "SD", + se = "SE", + mean_sd = "Mean (SD)", + mean_se = "Mean (SE)", + mean_ci = "Mean 95% CI", + mean_sei = "Mean -/+ 1xSE", + mean_sdi = "Mean -/+ 1xSD", + mean_pval = "Mean p-value (H0: mean = 0)", + median = "Median", + mad = "Median Absolute Deviation", + median_ci = "Median 95% CI", + quantiles = "25% and 75%-ile", + iqr = "IQR", + range = "Min - Max", + min = "Minimum", + max = "Maximum", + median_range = "Median (Min - Max)", + cv = "CV (%)", + geom_mean = "Geometric Mean", + geom_mean_ci = "Geometric Mean 95% CI", + geom_cv = "CV % Geometric Mean", + pval = "p-value (t-test)" + ) + } + if (!include_pval) lbls <- head(lbls, -1) + lbls +} + +#' @describeIn summary_stats Function to configure settings for default or custom summary statistics for a given data +#' type. In addition to selecting a custom subset of statistics, the user can also set custom formats, labels, and +#' indent modifiers for any of these statistics. +#' +#' @param stats_custom (`named vector` of `character`)\cr vector of statistics to include if not the defaults. This +#' argument overrides `include_pval` and other custom value arguments such that only settings for these statistics +#' will be returned. +#' @param formats_custom (`named vector` of `character`)\cr vector of custom statistics formats to use in place of the +#' defaults defined in [`summary_formats()`]. Names should be a subset of the statistics defined in `stats_custom` (or +#' default statistics if this is `NULL`). +#' @param labels_custom (`named vector` of `character`)\cr vector of custom statistics labels to use in place of the +#' defaults defined in [`summary_labels()`]. Names should be a subset of the statistics defined in `stats_custom` (or +#' default statistics if this is `NULL`). +#' @param indent_mods_custom (`integer` or `named vector` of `integer`)\cr vector of custom indentation modifiers for +#' statistics to use instead of the default of `0L` for all statistics. Names should be a subset of the statistics +#' defined in `stats_custom` (or default statistics if this is `NULL`). Alternatively, the same indentation modifier +#' can be applied to all statistics by setting `indent_mods_custom` to a single integer value. +#' +#' @return +#' * `summary_custom` returns a `list` of 4 named elements: `stats`, `formats`, `labels`, and `indent_mods`. +#' +#' @examples +#' summary_custom() +#' summary_custom(type = "counts", include_pval = TRUE) +#' summary_custom( +#' include_pval = TRUE, stats_custom = c("n", "mean", "sd", "pval"), +#' labels_custom = c(sd = "Std. Dev."), indent_mods_custom = 3L +#' ) +#' +#' @export +summary_custom <- function(type = "numeric", + include_pval = FALSE, + stats_custom = NULL, + formats_custom = NULL, + labels_custom = NULL, + indent_mods_custom = NULL) { + if ("pval" %in% stats_custom) include_pval <- TRUE + + .formats <- summary_formats(type = type, include_pval = include_pval) + .stats <- if (is.null(stats_custom)) names(.formats) else intersect(stats_custom, names(.formats)) + .labels <- summary_labels(type = type, include_pval = include_pval) + .indent_mods <- stats::setNames(rep(0L, length(.stats)), .stats) + + if (!is.null(formats_custom)) .formats[names(formats_custom)] <- formats_custom + if (!is.null(labels_custom)) .labels[names(labels_custom)] <- labels_custom + if (!is.null(indent_mods_custom)) { + if (is.null(names(indent_mods_custom)) && length(indent_mods_custom) == 1) { + .indent_mods[names(.indent_mods)] <- indent_mods_custom + } else { + .indent_mods[names(indent_mods_custom)] <- indent_mods_custom + } + } + + list( + stats = .stats, + formats = .formats[.stats], + labels = .labels[.stats], + indent_mods = .indent_mods[.stats] + ) +} + +#' Control Function for Descriptive Statistics +#' +#' @description `r lifecycle::badge("stable")` +#' +#' Sets a list of parameters for summaries of descriptive statistics. Typically used internally to specify +#' details for [s_summary()]. This function family is mainly used by [analyze_vars()]. +#' +#' @inheritParams argument_convention +#' @param quantiles (`numeric`)\cr of length two to specify the quantiles to calculate. +#' @param quantile_type (`numeric`)\cr between 1 and 9 selecting quantile algorithms to be used. +#' Default is set to 2 as this matches the default quantile algorithm in SAS `proc univariate` set by `QNTLDEF=5`. +#' This differs from R's default. See more about `type` in [stats::quantile()]. +#' @param test_mean (`numeric`)\cr to test against the mean under the null hypothesis when calculating p-value. +#' +#' @note Deprecation cycle started for `control_summarize_vars` as it is going to renamed into +#' `control_analyze_vars`. Intention is to reflect better the core underlying `rtables` +#' functions; in this case [analyze_vars()] wraps [rtables::analyze()]. +#' +#' @return A list of components with the same names as the arguments. +#' +#' @export control_analyze_vars control_summarize_vars +#' @aliases control_summarize_vars +control_analyze_vars <- function(conf_level = 0.95, + quantiles = c(0.25, 0.75), + quantile_type = 2, + test_mean = 0) { + checkmate::assert_vector(quantiles, len = 2) + checkmate::assert_int(quantile_type, lower = 1, upper = 9) + checkmate::assert_numeric(test_mean) + lapply(quantiles, assert_proportion_value) + assert_proportion_value(conf_level) + list(conf_level = conf_level, quantiles = quantiles, quantile_type = quantile_type, test_mean = test_mean) +} + +control_summarize_vars <- control_analyze_vars diff --git a/man/a_summary_internal.Rd b/man/a_summary_internal.Rd new file mode 100644 index 0000000000..63ea602afd --- /dev/null +++ b/man/a_summary_internal.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analyze_variables.R +\name{a_summary_internal} +\alias{a_summary_internal} +\title{Helper Function to Create Output Rows for \code{\link[=a_summary]{a_summary()}}} +\usage{ +a_summary_internal( + x, + .N_col, + .N_row, + .var, + .df_row, + .ref_group, + .in_ref_col, + compare, + type, + .stats, + .formats, + .labels, + .indent_mods, + na.rm, + na_level, + ... +) +} +\arguments{ +\item{compare}{(\code{logical})\cr Whether comparison statistics should be analyzed instead of summary statistics +(\code{compare = TRUE} adds \code{pval} statistic comparing against reference group).} + +\item{type}{(\code{character})\cr type of statistics to calculate given \code{x}. If \code{x} is numeric \code{type} should be +\code{"numeric"}, otherwise type should be \code{"counts"}.} +} +\value{ +\itemize{ +\item \code{a_summary_internal()} returns a corresponding list with formatted \code{\link[rtables:CellValue]{rtables::CellValue()}} used within \code{a_summary}. +} +} +\description{ +Helper Function to Create Output Rows for \code{\link[=a_summary]{a_summary()}} +} +\note{ +\itemize{ +\item To use for comparison (with additional p-value statistic), parameter \code{compare} must be set to \code{TRUE}. +\item Ensure that either all \code{NA} values are converted to an explicit \code{NA} level or all \code{NA} values are left as is. +} +} +\keyword{internal} diff --git a/man/analyze_variables.Rd b/man/analyze_variables.Rd index 24cbc1d250..38ae596a19 100644 --- a/man/analyze_variables.Rd +++ b/man/analyze_variables.Rd @@ -7,7 +7,6 @@ \alias{s_summary.factor} \alias{s_summary.character} \alias{s_summary.logical} -\alias{a_summary_output} \alias{a_summary} \alias{a_summary.default} \alias{a_summary.numeric} @@ -56,25 +55,6 @@ s_summary(x, na.rm = TRUE, denom, .N_row, .N_col, .var, ...) ... ) -a_summary_output( - x, - .N_col, - .N_row, - .var, - .df_row, - .ref_group, - .in_ref_col, - compare, - type, - .stats, - .formats, - .labels, - .indent_mods, - na.rm, - na_level, - ... -) - a_summary(x, .N_col, .N_row, .var, .df_row, .ref_group, .in_ref_col, ...) \method{a_summary}{default}( @@ -174,9 +154,6 @@ to print out information about factor casting.} \item{compare}{(\code{logical})\cr Whether comparison statistics should be analyzed instead of summary statistics (\code{compare = TRUE} adds \code{pval} statistic comparing against reference group).} -\item{type}{(\code{character})\cr type of statistics to calculate given \code{x}. If \code{x} is numeric \code{type} should be -\code{"numeric"}, otherwise type should be \code{"counts"}.} - \item{.stats}{(\code{character})\cr statistics to select for the table.} \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.} @@ -206,6 +183,9 @@ to avoid warnings from \code{rtables}.} \item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group defined by this split instruction, or \code{NA_character_} (the default) for no section divider.} + +\item{type}{(\code{character})\cr type of statistics to calculate given \code{x}. If \code{x} is numeric \code{type} should be +\code{"numeric"}, otherwise type should be \code{"counts"}.} } \value{ \itemize{ @@ -262,10 +242,6 @@ denominator is zero. Note that \code{NA}s in \code{x} are never counted or leadi } } -\itemize{ -\item \code{a_summary_output()} returns the corresponding list with formatted \code{\link[rtables:CellValue]{rtables::CellValue()}}. -} - \itemize{ \item \code{a_summary()} returns the corresponding list with formatted \code{\link[rtables:CellValue]{rtables::CellValue()}}. } @@ -295,8 +271,6 @@ conversion to factor (with a warning) and then forwards to the method for factor \item \code{s_summary(logical)}: Method for \code{logical} class. -\item \code{a_summary_output()}: Formatted analysis helper function which is used within \code{a_summary}. - \item \code{a_summary()}: Formatted analysis function which is used as \code{afun} in \code{analyze_vars()} and \code{compare_vars()} and as \code{cfun} in \code{summarize_colvars()}. @@ -337,11 +311,6 @@ created from character variables before passing the dataset to \code{\link[rtabl \item Ensure that either all \code{NA} values are converted to an explicit \code{NA} level or all \code{NA} values are left as is. } -\itemize{ -\item To use for comparison (with additional p-value statistic), parameter \code{compare} must be set to \code{TRUE}. -\item Ensure that either all \code{NA} values are converted to an explicit \code{NA} level or all \code{NA} values are left as is. -} - Deprecation cycle started for \code{summarize_vars} as it is going to renamed into \code{analyze_vars}. Intention is to reflect better the core underlying \code{rtables} functions; in this case \code{\link[rtables:analyze]{rtables::analyze()}}. @@ -418,26 +387,6 @@ x <- c(TRUE, FALSE, TRUE, TRUE) s_summary(x, denom = "N_row", .N_row = 10L) s_summary(x, denom = "N_col", .N_col = 20L) -set.seed(1) -x <- rnorm(10) -a_summary_output( - x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL, - .in_ref_col = FALSE, compare = FALSE, type = "numeric", .stats = summary_custom()$stats, - .formats = summary_custom()$formats, .labels = summary_custom()$labels, - .indent_mods = summary_custom()$indents, na.rm = TRUE, na_level = NA_character_ -) - -x <- factor(c("a", "a", "b", "c", "a")) -a_summary_output( - x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL, - .in_ref_col = FALSE, compare = FALSE, type = "counts", - .stats = summary_custom(type = "counts")$stats, - .formats = summary_custom(type = "counts")$formats, - .labels = summary_custom(type = "counts")$labels, - .indent_mods = summary_custom(type = "counts")$indents, - na.rm = TRUE, na_level = NA_character_ -) - a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10) a_summary( factor(c("a", "a", "b", "c", "a")), diff --git a/man/control_analyze_vars.Rd b/man/control_analyze_vars.Rd index 825166f5ec..d9a335674c 100644 --- a/man/control_analyze_vars.Rd +++ b/man/control_analyze_vars.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/analyze_variables.R +% Please edit documentation in R/summary_stats.R \name{control_analyze_vars} \alias{control_analyze_vars} \alias{control_summarize_vars} diff --git a/man/summary_stats.Rd b/man/summary_stats.Rd index 1821410347..441f7b2299 100644 --- a/man/summary_stats.Rd +++ b/man/summary_stats.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/analyze_variables.R +% Please edit documentation in R/summary_stats.R \name{summary_stats} \alias{summary_stats} \alias{summary_formats} diff --git a/tests/testthat/test-analyze_variables.R b/tests/testthat/test-analyze_variables.R index 4e6da3a58b..016d44c786 100644 --- a/tests/testthat/test-analyze_variables.R +++ b/tests/testthat/test-analyze_variables.R @@ -193,13 +193,13 @@ testthat::test_that("s_summary works with logical vectors and by if requested do testthat::expect_snapshot(res) }) -testthat::test_that("a_summary_output and a_summary work with healthy input.", { +testthat::test_that("a_summary_internal and a_summary work with healthy input.", { options("width" = 100) - # numeric input - a_summary_output + # numeric input - a_summary_internal set.seed(1) x <- rnorm(10) - result <- a_summary_output( + result <- a_summary_internal( x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, compare = FALSE, type = "numeric", .stats = summary_custom()$stats, .formats = summary_custom()$formats, .labels = summary_custom()$labels, .indent_mods = summary_custom()$indents, na.rm = TRUE, na_level = NA_character_ @@ -212,9 +212,9 @@ testthat::test_that("a_summary_output and a_summary work with healthy input.", { testthat::expect_identical(res_out, res) testthat::expect_snapshot(res) - # factor input - a_summary_output + # factor input - a_summary_internal x <- factor(c("a", "a", "b", "c", "a")) - result <- a_summary_output( + result <- a_summary_internal( x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats, .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels, @@ -228,9 +228,9 @@ testthat::test_that("a_summary_output and a_summary work with healthy input.", { testthat::expect_identical(res_out, res) testthat::expect_snapshot(res) - # character input - a_summary_output + # character input - a_summary_internal x <- c("A", "B", "A", "C") - result <- a_summary_output( + result <- a_summary_internal( x = x, .N_col = 10, .N_row = 10, .var = "x", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats, .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels, @@ -245,9 +245,9 @@ testthat::test_that("a_summary_output and a_summary work with healthy input.", { testthat::expect_identical(res_out, res) testthat::expect_snapshot(res) - # logical input - a_summary_output + # logical input - a_summary_internal x <- c(TRUE, FALSE, FALSE, TRUE, TRUE) - result <- a_summary_output( + result <- a_summary_internal( x = x, .N_col = 10, .N_row = 10, .var = NULL, .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE, compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats, .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels, From 28b803dc6c2f8e4f788c18246d7fd9cd7b1d9028 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 14 Jul 2023 20:18:15 -0400 Subject: [PATCH 29/33] Clean up a_summary and ungroup_stats, export a_compare --- NAMESPACE | 1 + R/analyze_variables.R | 21 ++++---- R/compare_variables.R | 6 ++- R/utils_factor.R | 62 +++++++--------------- man/compare_variables.Rd | 4 +- man/ungroup_stats.Rd | 22 ++------ tests/testthat/_snaps/analyze_variables.md | 2 +- 7 files changed, 42 insertions(+), 76 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 5239b12fcb..5e9df15f52 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ S3method(tidy,glm) S3method(tidy,step) S3method(tidy,summary.coxph) export(CombinationFunction) +export(a_compare) export(a_count_occurrences) export(a_count_occurrences_by_grade) export(a_count_patients_sum_exposure) diff --git a/R/analyze_variables.R b/R/analyze_variables.R index 5a2eefbd48..12d3d7c67f 100644 --- a/R/analyze_variables.R +++ b/R/analyze_variables.R @@ -406,14 +406,9 @@ a_summary_internal <- function(x, na.rm, # nolint na_level, ...) { - # Remove all-NA rows - if (!is.null(.df_row) && ncol(.df_row) > 1) { - in_tot_col <- nrow(.df_row) == length(x) - .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ] - if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]] - } + # If one col has NA vals, must add NA row to other cols (using placeholder lvl `fill-na-level`) + if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "fill-na-level") - if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level") x_stats <- if (!compare) { s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...) } else { @@ -422,6 +417,7 @@ a_summary_internal <- function(x, ) } + # Fill in with formatting defaults if needed custom_summary <- summary_custom( type = type, include_pval = compare, @@ -434,7 +430,9 @@ a_summary_internal <- function(x, .formats <- custom_summary$formats .labels <- custom_summary$labels .indent_mods <- custom_summary$indent_mods + x_stats <- x_stats[.stats] + # Check for custom labels from control_analyze_vars if (is.numeric(x)) { for (i in intersect(.stats, c("mean_ci", "mean_pval", "median_ci", "quantiles"))) { if (!i %in% names(.labels) || .labels[[i]] == summary_custom()$labels[[i]]) { @@ -443,14 +441,15 @@ a_summary_internal <- function(x, } } - x_stats <- x_stats[.stats] if (is.factor(x) || is.character(x)) { - x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, .in_ref_col) + # Ungroup statistics with values for each level of x + x_ungrp <- ungroup_stats(x_stats, .formats, .labels, .indent_mods, .in_ref_col) x_stats <- x_ungrp[["x"]] - .stats <- x_ungrp[[".stats"]] .formats <- x_ungrp[[".formats"]] - .labels <- x_ungrp[[".labels"]] + .labels <- gsub("fill-na-level", "NA", x_ungrp[[".labels"]]) .indent_mods <- x_ungrp[[".indent_mods"]] + + if (.in_ref_col && "pval" %in% names(x_stats)) x_stats[["pval"]] <- character() } in_rows( diff --git a/R/compare_variables.R b/R/compare_variables.R index cb97788397..52bba6bf3d 100644 --- a/R/compare_variables.R +++ b/R/compare_variables.R @@ -254,7 +254,11 @@ s_compare.logical <- function(x, #' #' @note `a_compare()` has been deprecated in favor of `a_summary()` with argument `compare` set to `TRUE`. #' -#' @keywords internal +#' @examples +#' # `a_compare` deprecated - use `a_summary()` instead +#' a_compare(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .stats = c("n", "pval")) +#' +#' @export a_compare <- function(x, .N_col, # nolint .N_row, # nolint diff --git a/R/utils_factor.R b/R/utils_factor.R index 85b5c7677f..9e2a8d79d5 100644 --- a/R/utils_factor.R +++ b/R/utils_factor.R @@ -277,68 +277,42 @@ fct_collapse_only <- function(.f, ..., .na_level = "") { do.call(forcats::fct_relevel, args = c(list(.f = x), as.list(new_lvls))) } -#' Ungroup and Format Non-Numeric Statistics +#' Ungroup Non-Numeric Statistics #' -#' Ungroups grouped non-numeric statistics (`count` and `count_fraction`) within input vectors `.stats`, `.formats`, -#' `.labels`, and `.indent_mods`. Formats explicit `NA` level in each input vector and changes p-value to -#' `character()` if current column is a reference column. Used within [`a_summary()`]. +#' Ungroups grouped non-numeric statistics within input vectors `.formats`, `.labels`, and `.indent_mods`. #' #' @inheritParams argument_convention #' @param x (`named list` of `numeric`)\cr list of numeric statistics containing the statistics to ungroup. -#' @param which_stats (`vector` of `character`)\cr which statistics should be ungrouped. #' -#' @return A `list` with modified elements `x`, `.stats`, `.formats`, `.labels`, and `.indent_mods`. +#' @return A `list` with modified elements `x`, `.formats`, `.labels`, and `.indent_mods`. #' #' @seealso [a_summary()] which uses this function internally. #' #' @keywords internal ungroup_stats <- function(x, - .stats, .formats, .labels, .indent_mods, - .in_ref_col = FALSE, - which_stats = c("count", "count_fraction")) { + .in_ref_col = FALSE) { checkmate::assert_list(x) - - for (stat in which_stats) { - for (a in names(x[[stat]])) { - a <- if (a == "na-level") "NA" else a - a_lvl <- paste(stat, a, sep = ".") - a_name <- if (a != "NA" || "NA" %in% names(x[[stat]])) a else "na-level" - .stats <- c(.stats, a_lvl) - .formats <- append(.formats, .formats[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.formats)) { - which(names(.formats) == stat) - 1 + which(names(x[[stat]]) == a_name) - } else { - length(.formats) - }) - .labels <- append(.labels, a %>% `names<-`(a_lvl), after = if (stat %in% names(.labels)) { - which(names(.labels) == stat) - 1 + which(names(x[[stat]]) == a_name) - } else { - length(.labels) - }) - .indent_mods <- append( - .indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), - after = if (stat %in% names(.indent_mods)) { - which(names(.indent_mods) == stat) - 1 + which(names(x[[stat]]) == a_name) - } else { - length(.indent_mods) - } - ) - } - } - - if (.in_ref_col && "pval" %in% names(x)) x[["pval"]] <- "pvalue" x <- unlist(x, recursive = FALSE) - if (.in_ref_col && "pval" %in% names(x)) x[["pval"]] <- character() - names(x) <- gsub("na-level", "NA", names(x)) .stats <- names(x) + # Ungroup stats + .formats <- lapply(.stats, function(x) { + .formats[[if (!grepl("\\.", x)) x else regmatches(x, regexpr("\\.", x), invert = TRUE)[[1]][1]]] + }) + .indent_mods <- sapply(.stats, function(x) { + .indent_mods[[if (!grepl("\\.", x)) x else regmatches(x, regexpr("\\.", x), invert = TRUE)[[1]][1]]] + }) + .labels <- sapply(.stats, function(x) { + if (!grepl("\\.", x)) .labels[[x]] else regmatches(x, regexpr("\\.", x), invert = TRUE)[[1]][2] + }) + list( x = x, - .stats = .stats, - .formats = .formats[.stats], - .labels = .labels[.stats], - .indent_mods = .indent_mods[.stats] + .formats = .formats, + .labels = .labels, + .indent_mods = .indent_mods ) } diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd index b9c7117358..e2c6ed3457 100644 --- a/man/compare_variables.Rd +++ b/man/compare_variables.Rd @@ -234,6 +234,9 @@ y <- c(NA, NA, NA, NA, FALSE) s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE) s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE) +# `a_compare` deprecated - use `a_summary()` instead +a_compare(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .stats = c("n", "pval")) + # `compare_vars()` in `rtables` pipelines ## Default output within a `rtables` pipeline. @@ -259,4 +262,3 @@ Relevant constructor function \code{\link[=create_afun_compare]{create_afun_comp to compute a summary within \code{s_compare()}, and \code{\link[=a_compare]{a_compare()}} which is used (with \code{compare = TRUE}) as the analysis function for \code{compare_vars()}. } -\keyword{internal} diff --git a/man/ungroup_stats.Rd b/man/ungroup_stats.Rd index b427561695..8e2d0ec447 100644 --- a/man/ungroup_stats.Rd +++ b/man/ungroup_stats.Rd @@ -2,23 +2,13 @@ % Please edit documentation in R/utils_factor.R \name{ungroup_stats} \alias{ungroup_stats} -\title{Ungroup and Format Non-Numeric Statistics} +\title{Ungroup Non-Numeric Statistics} \usage{ -ungroup_stats( - x, - .stats, - .formats, - .labels, - .indent_mods, - .in_ref_col = FALSE, - which_stats = c("count", "count_fraction") -) +ungroup_stats(x, .formats, .labels, .indent_mods, .in_ref_col = FALSE) } \arguments{ \item{x}{(\verb{named list} of \code{numeric})\cr list of numeric statistics containing the statistics to ungroup.} -\item{.stats}{(\code{character})\cr statistics to select for the table.} - \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.} \item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} @@ -27,16 +17,12 @@ ungroup_stats( unmodified default behavior. Can be negative.} \item{.in_ref_col}{(\code{logical})\cr \code{TRUE} when working with the reference level, \code{FALSE} otherwise.} - -\item{which_stats}{(\code{vector} of \code{character})\cr which statistics should be ungrouped.} } \value{ -A \code{list} with modified elements \code{x}, \code{.stats}, \code{.formats}, \code{.labels}, and \code{.indent_mods}. +A \code{list} with modified elements \code{x}, \code{.formats}, \code{.labels}, and \code{.indent_mods}. } \description{ -Ungroups grouped non-numeric statistics (\code{count} and \code{count_fraction}) within input vectors \code{.stats}, \code{.formats}, -\code{.labels}, and \code{.indent_mods}. Formats explicit \code{NA} level in each input vector and changes p-value to -\code{character()} if current column is a reference column. Used within \code{\link[=a_summary]{a_summary()}}. +Ungroups grouped non-numeric statistics within input vectors \code{.formats}, \code{.labels}, and \code{.indent_mods}. } \seealso{ \code{\link[=a_summary]{a_summary()}} which uses this function internally. diff --git a/tests/testthat/_snaps/analyze_variables.md b/tests/testthat/_snaps/analyze_variables.md index fa1cb362c3..705312763c 100644 --- a/tests/testthat/_snaps/analyze_variables.md +++ b/tests/testthat/_snaps/analyze_variables.md @@ -1116,7 +1116,7 @@ [1] 0 -# a_summary_output and a_summary work with healthy input. +# a_summary_internal and a_summary work with healthy input. Code res From d0e341c97a8497818d4ecdb70cd5eacbf07458fc Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 14 Jul 2023 20:23:00 -0400 Subject: [PATCH 30/33] Update NEWS --- NEWS.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index 0f6cbca9c2..b089e2d17b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,19 +2,19 @@ ### Enhancements * Added method for `character` class to `h_coxreg_inter_effect` enabling `character` covariates in `summarize_coxreg`. -* Refactored `a_summary` to no longer use helper function `create_afun_summary`. -* Refactored `summarize_vars` and `compare_vars` to use refactored `a_summary`. -* Created new internal helper function `ungroup_stats` to ungroup and format statistics calculated for factor variables. * Started deprecation cycle for `summarize_vars` and `control_summarize_vars`. Renamed into `analyze_vars` and `control_analyze_vars` to reflect underlying `rtables` machinery while keeping backward compatibility with aliases. * Added `ylim` argument to `g_km` to allow the user to set custom limits for the y-axis. * Added assertion to `g_km` which checks whether there is one arm present in the data when `annot_coxph` is true. * Added `flag_labels` argument to `s_count_patients_with_flags` to enable more label handling options in `count_patients_by_flags`. +* Refactored `a_summary` to no longer use helper function `create_afun_summary`. +* Refactored `summarize_vars` and `compare_vars` to use refactored `a_summary`. +* Created new internal helper functions `ungroup_stats` to ungroup statistics calculated for factor variables, and `a_summary_internal` to perform calculations for `a_summary`. ### Miscellaneous * Began deprecation of `time_unit_input` and `time_unit_output` arguments and replaced them with the `input_time_unit` and `num_pt_year`, respectively, in `control_incidence_rate`. +* Removed deprecated `pairwise` function. * Deprecated `a_compare` and replaced it with `a_summary` with argument `compare = TRUE`. * Deprecated helper functions `create_afun_summary` and `create_afun_compare` which are no longer used by `a_summary` and `a_compare` respectively. -* Removed deprecated `pairwise` function. ### Bug Fixes * Fixed long double assertion check in `sum(weights)` for `M1mac` installation. From 62c7990c3e4995473a8a6e35e7a23d69a2e5cb29 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Sat, 15 Jul 2023 15:22:34 -0400 Subject: [PATCH 31/33] Fix tests --- R/analyze_variables.R | 4 +--- R/utils_factor.R | 9 ++++++--- man/ungroup_stats.Rd | 6 ++---- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/R/analyze_variables.R b/R/analyze_variables.R index 12d3d7c67f..6d2dce3ba2 100644 --- a/R/analyze_variables.R +++ b/R/analyze_variables.R @@ -443,13 +443,11 @@ a_summary_internal <- function(x, if (is.factor(x) || is.character(x)) { # Ungroup statistics with values for each level of x - x_ungrp <- ungroup_stats(x_stats, .formats, .labels, .indent_mods, .in_ref_col) + x_ungrp <- ungroup_stats(x_stats, .formats, .labels, .indent_mods) x_stats <- x_ungrp[["x"]] .formats <- x_ungrp[[".formats"]] .labels <- gsub("fill-na-level", "NA", x_ungrp[[".labels"]]) .indent_mods <- x_ungrp[[".indent_mods"]] - - if (.in_ref_col && "pval" %in% names(x_stats)) x_stats[["pval"]] <- character() } in_rows( diff --git a/R/utils_factor.R b/R/utils_factor.R index 9e2a8d79d5..aab2e52184 100644 --- a/R/utils_factor.R +++ b/R/utils_factor.R @@ -286,16 +286,19 @@ fct_collapse_only <- function(.f, ..., .na_level = "") { #' #' @return A `list` with modified elements `x`, `.formats`, `.labels`, and `.indent_mods`. #' -#' @seealso [a_summary()] which uses this function internally. +#' @seealso [a_summary_internal()] which uses this function internally. #' #' @keywords internal ungroup_stats <- function(x, .formats, .labels, - .indent_mods, - .in_ref_col = FALSE) { + .indent_mods) { checkmate::assert_list(x) + empty_pval <- "pval" %in% names(x) && length(x[["pval"]]) == 0 x <- unlist(x, recursive = FALSE) + + # If p-value is empty it is removed by unlist and needs to be re-added + if (empty_pval) x[["pval"]] <- character() .stats <- names(x) # Ungroup stats diff --git a/man/ungroup_stats.Rd b/man/ungroup_stats.Rd index 8e2d0ec447..fb68d1cce6 100644 --- a/man/ungroup_stats.Rd +++ b/man/ungroup_stats.Rd @@ -4,7 +4,7 @@ \alias{ungroup_stats} \title{Ungroup Non-Numeric Statistics} \usage{ -ungroup_stats(x, .formats, .labels, .indent_mods, .in_ref_col = FALSE) +ungroup_stats(x, .formats, .labels, .indent_mods) } \arguments{ \item{x}{(\verb{named list} of \code{numeric})\cr list of numeric statistics containing the statistics to ungroup.} @@ -15,8 +15,6 @@ ungroup_stats(x, .formats, .labels, .indent_mods, .in_ref_col = FALSE) \item{.indent_mods}{(named \code{integer})\cr indent modifiers for the labels. Defaults to 0, which corresponds to the unmodified default behavior. Can be negative.} - -\item{.in_ref_col}{(\code{logical})\cr \code{TRUE} when working with the reference level, \code{FALSE} otherwise.} } \value{ A \code{list} with modified elements \code{x}, \code{.formats}, \code{.labels}, and \code{.indent_mods}. @@ -25,6 +23,6 @@ A \code{list} with modified elements \code{x}, \code{.formats}, \code{.labels}, Ungroups grouped non-numeric statistics within input vectors \code{.formats}, \code{.labels}, and \code{.indent_mods}. } \seealso{ -\code{\link[=a_summary]{a_summary()}} which uses this function internally. +\code{\link[=a_summary_internal]{a_summary_internal()}} which uses this function internally. } \keyword{internal} From 87b738ad2037c56b1ee4abab1fb5d6acccd3af7c Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 21 Jul 2023 15:43:12 -0400 Subject: [PATCH 32/33] Organize tests --- tests/testthat/_snaps/analyze_variables.md | 214 --------------------- tests/testthat/_snaps/summary_stats.md | 214 +++++++++++++++++++++ tests/testthat/test-analyze_variables.R | 48 ----- tests/testthat/test-summary_stats.R | 47 +++++ 4 files changed, 261 insertions(+), 262 deletions(-) create mode 100644 tests/testthat/_snaps/summary_stats.md create mode 100644 tests/testthat/test-summary_stats.R diff --git a/tests/testthat/_snaps/analyze_variables.md b/tests/testthat/_snaps/analyze_variables.md index 705312763c..4fe6f5d8ed 100644 --- a/tests/testthat/_snaps/analyze_variables.md +++ b/tests/testthat/_snaps/analyze_variables.md @@ -1,217 +1,3 @@ -# control_analyze_vars works with customized parameters - - Code - res - Output - $conf_level - [1] 0.9 - - $quantiles - [1] 0.1 0.9 - - $quantile_type - [1] 2 - - $test_mean - [1] 0 - - -# summary_formats works as expected - - Code - res - Output - n sum mean - "xx." "xx.x" "xx.x" - sd se mean_sd - "xx.x" "xx.x" "xx.x (xx.x)" - mean_se mean_ci mean_sei - "xx.x (xx.x)" "(xx.xx, xx.xx)" "(xx.xx, xx.xx)" - mean_sdi mean_pval median - "(xx.xx, xx.xx)" "xx.xx" "xx.x" - mad median_ci quantiles - "xx.x" "(xx.xx, xx.xx)" "xx.x - xx.x" - iqr range min - "xx.x" "xx.x - xx.x" "xx.x" - max median_range cv - "xx.x" "xx.x (xx.x - xx.x)" "xx.x" - geom_mean geom_mean_ci geom_cv - "xx.x" "(xx.xx, xx.xx)" "xx.x" - ---- - - Code - res - Output - $n - [1] "xx." - - $count - [1] "xx." - - $count_fraction - function(x, ...) { - attr(x, "label") <- NULL - - if (any(is.na(x))) { - return("NA") - } - - checkmate::assert_vector(x) - checkmate::assert_integerish(x[1]) - assert_proportion_value(x[2], include_boundaries = TRUE) - - result <- if (x[1] == 0) { - "0" - } else { - paste0(x[1], " (", round(x[2] * 100, 1), "%)") - } - - return(result) - } - - - $n_blq - [1] "xx." - - $pval - [1] "x.xxxx | (<0.0001)" - - -# summary_labels works as expected - - Code - res - Output - n sum - "n" "Sum" - mean sd - "Mean" "SD" - se mean_sd - "SE" "Mean (SD)" - mean_se mean_ci - "Mean (SE)" "Mean 95% CI" - mean_sei mean_sdi - "Mean -/+ 1xSE" "Mean -/+ 1xSD" - mean_pval median - "Mean p-value (H0: mean = 0)" "Median" - mad median_ci - "Median Absolute Deviation" "Median 95% CI" - quantiles iqr - "25% and 75%-ile" "IQR" - range min - "Min - Max" "Minimum" - max median_range - "Maximum" "Median (Min - Max)" - cv geom_mean - "CV (%)" "Geometric Mean" - geom_mean_ci geom_cv - "Geometric Mean 95% CI" "CV % Geometric Mean" - ---- - - Code - res - Output - n count - "n" "count" - count_fraction n_blq - "count_fraction" "n_blq" - pval - "p-value (chi-squared test)" - -# summary_custom works as expected - - Code - res - Output - $stats - [1] "n" "sum" "mean" "sd" "se" - [6] "mean_sd" "mean_se" "mean_ci" "mean_sei" "mean_sdi" - [11] "mean_pval" "median" "mad" "median_ci" "quantiles" - [16] "iqr" "range" "min" "max" "median_range" - [21] "cv" "geom_mean" "geom_mean_ci" "geom_cv" - - $formats - n sum mean - "xx." "xx.x" "xx.x" - sd se mean_sd - "xx.x" "xx.x" "xx.x (xx.x)" - mean_se mean_ci mean_sei - "xx.x (xx.x)" "(xx.xx, xx.xx)" "(xx.xx, xx.xx)" - mean_sdi mean_pval median - "(xx.xx, xx.xx)" "xx.xx" "xx.x" - mad median_ci quantiles - "xx.x" "(xx.xx, xx.xx)" "xx.x - xx.x" - iqr range min - "xx.x" "xx.x - xx.x" "xx.x" - max median_range cv - "xx.x" "xx.x (xx.x - xx.x)" "xx.x" - geom_mean geom_mean_ci geom_cv - "xx.x" "(xx.xx, xx.xx)" "xx.x" - - $labels - n sum - "n" "Sum" - mean sd - "Mean" "SD" - se mean_sd - "SE" "Mean (SD)" - mean_se mean_ci - "Mean (SE)" "Mean 95% CI" - mean_sei mean_sdi - "Mean -/+ 1xSE" "Mean -/+ 1xSD" - mean_pval median - "Mean p-value (H0: mean = 0)" "Median" - mad median_ci - "Median Absolute Deviation" "Median 95% CI" - quantiles iqr - "25% and 75%-ile" "IQR" - range min - "Min - Max" "Minimum" - max median_range - "Maximum" "Median (Min - Max)" - cv geom_mean - "CV (%)" "Geometric Mean" - geom_mean_ci geom_cv - "Geometric Mean 95% CI" "CV % Geometric Mean" - - $indent_mods - n sum mean sd se mean_sd - 0 0 0 0 0 0 - mean_se mean_ci mean_sei mean_sdi mean_pval median - 0 0 0 0 0 0 - mad median_ci quantiles iqr range min - 0 0 0 0 0 0 - max median_range cv geom_mean geom_mean_ci geom_cv - 0 0 0 0 0 0 - - ---- - - Code - res - Output - $stats - [1] "n" "count" - - $formats - $formats$n - [1] "xx.xx" - - $formats$count - [1] "xx." - - - $labels - n count - "n" "#" - - $indent_mods - n count - 2 2 - - # s_summary return NA for x length 0L Code diff --git a/tests/testthat/_snaps/summary_stats.md b/tests/testthat/_snaps/summary_stats.md new file mode 100644 index 0000000000..4dd4633f57 --- /dev/null +++ b/tests/testthat/_snaps/summary_stats.md @@ -0,0 +1,214 @@ +# summary_formats works as expected + + Code + res + Output + n sum mean + "xx." "xx.x" "xx.x" + sd se mean_sd + "xx.x" "xx.x" "xx.x (xx.x)" + mean_se mean_ci mean_sei + "xx.x (xx.x)" "(xx.xx, xx.xx)" "(xx.xx, xx.xx)" + mean_sdi mean_pval median + "(xx.xx, xx.xx)" "xx.xx" "xx.x" + mad median_ci quantiles + "xx.x" "(xx.xx, xx.xx)" "xx.x - xx.x" + iqr range min + "xx.x" "xx.x - xx.x" "xx.x" + max median_range cv + "xx.x" "xx.x (xx.x - xx.x)" "xx.x" + geom_mean geom_mean_ci geom_cv + "xx.x" "(xx.xx, xx.xx)" "xx.x" + +--- + + Code + res + Output + $n + [1] "xx." + + $count + [1] "xx." + + $count_fraction + function(x, ...) { + attr(x, "label") <- NULL + + if (any(is.na(x))) { + return("NA") + } + + checkmate::assert_vector(x) + checkmate::assert_integerish(x[1]) + assert_proportion_value(x[2], include_boundaries = TRUE) + + result <- if (x[1] == 0) { + "0" + } else { + paste0(x[1], " (", round(x[2] * 100, 1), "%)") + } + + return(result) + } + + + $n_blq + [1] "xx." + + $pval + [1] "x.xxxx | (<0.0001)" + + +# summary_labels works as expected + + Code + res + Output + n sum + "n" "Sum" + mean sd + "Mean" "SD" + se mean_sd + "SE" "Mean (SD)" + mean_se mean_ci + "Mean (SE)" "Mean 95% CI" + mean_sei mean_sdi + "Mean -/+ 1xSE" "Mean -/+ 1xSD" + mean_pval median + "Mean p-value (H0: mean = 0)" "Median" + mad median_ci + "Median Absolute Deviation" "Median 95% CI" + quantiles iqr + "25% and 75%-ile" "IQR" + range min + "Min - Max" "Minimum" + max median_range + "Maximum" "Median (Min - Max)" + cv geom_mean + "CV (%)" "Geometric Mean" + geom_mean_ci geom_cv + "Geometric Mean 95% CI" "CV % Geometric Mean" + +--- + + Code + res + Output + n count + "n" "count" + count_fraction n_blq + "count_fraction" "n_blq" + pval + "p-value (chi-squared test)" + +# summary_custom works as expected + + Code + res + Output + $stats + [1] "n" "sum" "mean" "sd" "se" + [6] "mean_sd" "mean_se" "mean_ci" "mean_sei" "mean_sdi" + [11] "mean_pval" "median" "mad" "median_ci" "quantiles" + [16] "iqr" "range" "min" "max" "median_range" + [21] "cv" "geom_mean" "geom_mean_ci" "geom_cv" + + $formats + n sum mean + "xx." "xx.x" "xx.x" + sd se mean_sd + "xx.x" "xx.x" "xx.x (xx.x)" + mean_se mean_ci mean_sei + "xx.x (xx.x)" "(xx.xx, xx.xx)" "(xx.xx, xx.xx)" + mean_sdi mean_pval median + "(xx.xx, xx.xx)" "xx.xx" "xx.x" + mad median_ci quantiles + "xx.x" "(xx.xx, xx.xx)" "xx.x - xx.x" + iqr range min + "xx.x" "xx.x - xx.x" "xx.x" + max median_range cv + "xx.x" "xx.x (xx.x - xx.x)" "xx.x" + geom_mean geom_mean_ci geom_cv + "xx.x" "(xx.xx, xx.xx)" "xx.x" + + $labels + n sum + "n" "Sum" + mean sd + "Mean" "SD" + se mean_sd + "SE" "Mean (SD)" + mean_se mean_ci + "Mean (SE)" "Mean 95% CI" + mean_sei mean_sdi + "Mean -/+ 1xSE" "Mean -/+ 1xSD" + mean_pval median + "Mean p-value (H0: mean = 0)" "Median" + mad median_ci + "Median Absolute Deviation" "Median 95% CI" + quantiles iqr + "25% and 75%-ile" "IQR" + range min + "Min - Max" "Minimum" + max median_range + "Maximum" "Median (Min - Max)" + cv geom_mean + "CV (%)" "Geometric Mean" + geom_mean_ci geom_cv + "Geometric Mean 95% CI" "CV % Geometric Mean" + + $indent_mods + n sum mean sd se mean_sd + 0 0 0 0 0 0 + mean_se mean_ci mean_sei mean_sdi mean_pval median + 0 0 0 0 0 0 + mad median_ci quantiles iqr range min + 0 0 0 0 0 0 + max median_range cv geom_mean geom_mean_ci geom_cv + 0 0 0 0 0 0 + + +--- + + Code + res + Output + $stats + [1] "n" "count" + + $formats + $formats$n + [1] "xx.xx" + + $formats$count + [1] "xx." + + + $labels + n count + "n" "#" + + $indent_mods + n count + 2 2 + + +# control_analyze_vars works with customized parameters + + Code + res + Output + $conf_level + [1] 0.9 + + $quantiles + [1] 0.1 0.9 + + $quantile_type + [1] 2 + + $test_mean + [1] 0 + + diff --git a/tests/testthat/test-analyze_variables.R b/tests/testthat/test-analyze_variables.R index 016d44c786..70d44f0914 100644 --- a/tests/testthat/test-analyze_variables.R +++ b/tests/testthat/test-analyze_variables.R @@ -1,51 +1,3 @@ -testthat::test_that("control_analyze_vars works with customized parameters", { - result <- control_analyze_vars( - conf_level = 0.9, - quantiles = c(0.1, 0.9) - ) - - res <- testthat::expect_silent(result) - testthat::expect_snapshot(res) -}) - -testthat::test_that("control_analyze_vars fails wrong inputs", { - testthat::expect_error(control_analyze_vars(quantiles = c(25, 75))) - testthat::expect_error(control_analyze_vars(conf_level = 95)) -}) - -testthat::test_that("summary_formats works as expected", { - result <- summary_formats() - res <- testthat::expect_silent(result) - testthat::expect_snapshot(res) - - result <- summary_formats(type = "counts", include_pval = TRUE) - res <- testthat::expect_silent(result) - testthat::expect_snapshot(res) -}) - -testthat::test_that("summary_labels works as expected", { - result <- summary_labels() - res <- testthat::expect_silent(result) - testthat::expect_snapshot(res) - - result <- summary_labels(type = "counts", include_pval = TRUE) - res <- testthat::expect_silent(result) - testthat::expect_snapshot(res) -}) - -testthat::test_that("summary_custom works as expected", { - result <- summary_custom() - res <- testthat::expect_silent(result) - testthat::expect_snapshot(res) - - result <- summary_custom( - type = "counts", stats_custom = c("n", "count"), - formats_custom = c(n = "xx.xx"), labels_custom = c(count = "#"), indent_mods_custom = 2L - ) - res <- testthat::expect_silent(result) - testthat::expect_snapshot(res) -}) - testthat::test_that("s_summary return NA for x length 0L", { x <- numeric() diff --git a/tests/testthat/test-summary_stats.R b/tests/testthat/test-summary_stats.R new file mode 100644 index 0000000000..281de0be1f --- /dev/null +++ b/tests/testthat/test-summary_stats.R @@ -0,0 +1,47 @@ +testthat::test_that("summary_formats works as expected", { + result <- summary_formats() + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + result <- summary_formats(type = "counts", include_pval = TRUE) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + +testthat::test_that("summary_labels works as expected", { + result <- summary_labels() + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + result <- summary_labels(type = "counts", include_pval = TRUE) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + +testthat::test_that("summary_custom works as expected", { + result <- summary_custom() + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + result <- summary_custom( + type = "counts", stats_custom = c("n", "count"), + formats_custom = c(n = "xx.xx"), labels_custom = c(count = "#"), indent_mods_custom = 2L + ) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + +testthat::test_that("control_analyze_vars works with customized parameters", { + result <- control_analyze_vars( + conf_level = 0.9, + quantiles = c(0.1, 0.9) + ) + + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +}) + +testthat::test_that("control_analyze_vars fails wrong inputs", { + testthat::expect_error(control_analyze_vars(quantiles = c(25, 75))) + testthat::expect_error(control_analyze_vars(conf_level = 95)) +}) From 3d990980f85b1d6f37bb1afad26544099bd9ab56 Mon Sep 17 00:00:00 2001 From: "27856297+dependabot-preview[bot]@users.noreply.github.com" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Fri, 4 Aug 2023 10:20:22 +0000 Subject: [PATCH 33/33] [skip actions] Roxygen Man Pages Auto Update --- man/compare_variables.Rd | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd index de6271c6a2..076db930d3 100644 --- a/man/compare_variables.Rd +++ b/man/compare_variables.Rd @@ -78,10 +78,11 @@ by a statistics function.} \item{verbose}{(\code{logical})\cr Whether warnings and messages should be printed. Mainly used to print out information about factor casting. Defaults to \code{TRUE}.} -\item{.N_col}{(\code{count})\cr row-wise N (row group count) for the group of observations being analyzed -(i.e. with no column-based subsetting) that is passed by \code{rtables}.} +\item{.N_col}{(\code{integer})\cr column-wise N (column count) for the full column being analyzed that is typically +passed by \code{rtables}.} -\item{.N_row}{(\code{count})\cr column-wise N (column count) for the full column that is passed by \code{rtables}.} +\item{.N_row}{(\code{integer})\cr row-wise N (row group count) for the group of observations being analyzed +(i.e. with no column-based subsetting) that is typically passed by \code{rtables}.} \item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.}