From 9aa635808a7c8dd7038c7208870ff69a45d38c9f Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Wed, 14 Jun 2023 19:13:13 -0400
Subject: [PATCH 01/33] Refactor a_summary, deprecate create_afun_summary

---
 NAMESPACE                                    |   4 -
 R/summarize_colvars.R                        |   9 +-
 R/summarize_variables.R                      | 307 +++++++------------
 R/utils.R                                    |  21 +-
 man/create_afun_summary.Rd                   |  34 --
 man/extract_by_name.Rd                       |   7 +-
 man/summarize_colvars.Rd                     |   3 +
 man/summarize_variables.Rd                   | 107 ++-----
 man/summary_formats.Rd                       |   2 +-
 man/summary_labels.Rd                        |   7 +-
 tests/testthat/_snaps/summarize_variables.md |  41 ---
 tests/testthat/test-summarize_variables.R    |  27 --
 12 files changed, 176 insertions(+), 393 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 778c6f17da..8c20ce6c27 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,10 +4,6 @@ S3method(a_compare,character)
 S3method(a_compare,factor)
 S3method(a_compare,logical)
 S3method(a_compare,numeric)
-S3method(a_summary,character)
-S3method(a_summary,factor)
-S3method(a_summary,logical)
-S3method(a_summary,numeric)
 S3method(as.rtable,data.frame)
 S3method(h_coxreg_inter_effect,factor)
 S3method(h_coxreg_inter_effect,numeric)
diff --git a/R/summarize_colvars.R b/R/summarize_colvars.R
index b3386ebbc1..8d75b41b0e 100644
--- a/R/summarize_colvars.R
+++ b/R/summarize_colvars.R
@@ -62,15 +62,16 @@
 #' @export
 summarize_colvars <- function(lyt,
                               ...,
+                              na_level = NA_character_,
                               .stats = c("n", "mean_sd", "median", "range", "count_fraction"),
                               .formats = NULL,
                               .labels = NULL,
                               .indent_mods = NULL) {
-  afun <- create_afun_summary(.stats, .formats, .labels, .indent_mods)
-
   analyze_colvars(
     lyt,
-    afun = afun,
-    extra_args = list(...)
+    afun = a_summary,
+    extra_args =  list(
+      .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_level = na_level, ...
+    )
   )
 }
diff --git a/R/summarize_variables.R b/R/summarize_variables.R
index 9d82632d8c..3c25f7bc4c 100644
--- a/R/summarize_variables.R
+++ b/R/summarize_variables.R
@@ -22,14 +22,14 @@ control_summarize_vars <- function(conf_level = 0.95,
   checkmate::assert_vector(quantiles, len = 2)
   checkmate::assert_int(quantile_type, lower = 1, upper = 9)
   checkmate::assert_numeric(test_mean)
-  nullo <- lapply(quantiles, assert_proportion_value)
+  lapply(quantiles, assert_proportion_value)
   assert_proportion_value(conf_level)
   list(conf_level = conf_level, quantiles = quantiles, quantile_type = quantile_type, test_mean = test_mean)
 }
 
 #' Format Function for Descriptive Statistics
 #'
-#' Returns format patterns for descriptive statistics. The format is understood by the `rtables`.
+#' Returns format patterns for descriptive statistics. The format is understood by `rtables`.
 #'
 #' @param type (`string`)\cr choice of a summary data type. Only `counts` and `numeric` types are currently supported.
 #'
@@ -77,29 +77,46 @@ summary_formats <- function(type = "numeric") {
 #'
 #' Returns labels of descriptive statistics for numeric variables.
 #'
-#' @return A named `vector` of default statistic labels.
+#' @param type (`string`)\cr choice of a summary data type. Only `counts` and `numeric` types are currently supported.
+#'
+#' @return A named `vector` of default statistic labels for the given data type.
 #'
 #' @keywords internal
-summary_labels <- function() {
-  c(
-    mean = "Mean",
-    sum = "Sum",
-    sd = "SD",
-    se = "SE",
-    mean_sd = "Mean (SD)",
-    mean_se = "Mean (SE)",
-    median = "Median",
-    mad = "Median Absolute Deviation",
-    iqr = "IQR",
-    range = "Min - Max",
-    median_range = "Median (Min - Max)",
-    cv = "CV (%)",
-    min = "Minimum",
-    max = "Maximum",
-    geom_mean = "Geometric Mean",
-    geom_cv = "CV % Geometric Mean",
-    n = "n"
-  )
+summary_labels <- function(type = "numeric") {
+  if (type == "counts") {
+    c(
+      n = "n",
+      count = "count",
+      count_fraction = "count_fraction",
+      n_blq = "n_blq"
+    )
+  } else {
+    c(
+      n = "n",
+      sum = "Sum",
+      mean = "Mean",
+      sd = "SD",
+      se = "SE",
+      mean_sd = "Mean (SD)",
+      mean_se = "Mean (SE)",
+      mean_ci = "Mean 95% CI",
+      mean_sei = "Mean -/+ 1xSE",
+      mean_sdi = "Mean -/+ 1xSD",
+      mean_pval = "Mean p-value (H0: mean = 0)",
+      median = "Median",
+      mad = "Median Absolute Deviation",
+      median_ci = "Median 95% CI",
+      quantiles = "25% and 75%-ile",
+      iqr = "IQR",
+      range = "Min - Max",
+      cv = "CV (%)",
+      min = "Minimum",
+      max = "Maximum",
+      median_range = "Median (Min - Max)",
+      geom_mean = "Geometric Mean",
+      geom_cv = "CV % Geometric Mean"
+    )
+  }
 }
 
 #' Summarize Variables
@@ -408,9 +425,9 @@ s_summary.character <- function(x,
                                 verbose = TRUE,
                                 ...) {
   if (na.rm) {
-    y <- as_factor_keep_attributes(x, x_name = .var, verbose = verbose)
+    y <- as_factor_keep_attributes(x, verbose = verbose)
   } else {
-    y <- as_factor_keep_attributes(x, x_name = .var, verbose = verbose, na_level = "NA")
+    y <- as_factor_keep_attributes(x, verbose = verbose, na_level = "NA")
   }
 
   s_summary(
@@ -478,86 +495,83 @@ s_summary.logical <- function(x,
   y
 }
 
-#' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()`.
-#'
-#' @return
-#' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()].
-#'
-#' @export
-a_summary <- function(x,
-                      ...,
-                      .N_row, # nolint
-                      .N_col, # nolint
-                      .var) {
-  UseMethod("a_summary", x)
-}
-
 .a_summary_numeric_formats <- summary_formats()
 .a_summary_numeric_labels <- summary_labels()
-
-#' @describeIn summarize_variables Formatted analysis function method for `numeric` class.
-#'
-#' @examples
-#' # `a_summary.numeric`
-#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
-#'
-#' @export
-a_summary.numeric <- make_afun(
-  s_summary.numeric,
-  .formats = .a_summary_numeric_formats,
-  .labels = .a_summary_numeric_labels
-)
-
+.a_summary_numeric_indent_mods <- rep(0L, length(summary_labels())) %>% `names<-`(names(.a_summary_numeric_labels))
 .a_summary_counts_formats <- summary_formats(type = "counts")
+.a_summary_counts_labels <- summary_labels(type = "counts")
+.a_summary_counts_indent_mods <- rep(0L, length(.a_summary_counts_labels)) %>% `names<-`(names(.a_summary_counts_labels))
 
-#' @describeIn summarize_variables Formatted analysis function method for `factor` class.
+#' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and
+#'   `cfun` in `summarize_colvars()`.
 #'
-#' @examples
-#' # `a_summary.factor`
-#' # We need to ungroup `count` and `count_fraction` first so that the rtables formatting
-#' # functions can be applied correctly.
-#' afun <- make_afun(
-#'   getS3method("a_summary", "factor"),
-#'   .ungroup_stats = c("count", "count_fraction")
-#' )
-#' afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
-#'
-#' @export
-a_summary.factor <- make_afun(
-  s_summary.factor,
-  .formats = .a_summary_counts_formats
-)
-
-#' @describeIn summarize_variables Formatted analysis function method for `character` class.
+#' @return
+#' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()].
 #'
 #' @examples
-#' # `a_summary.character`
-#' afun <- make_afun(
-#'   getS3method("a_summary", "character"),
-#'   .ungroup_stats = c("count", "count_fraction")
-#' )
-#' afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
+#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
+#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
+#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
+#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
 #'
 #' @export
-a_summary.character <- make_afun(
-  s_summary.character,
-  .formats = .a_summary_counts_formats
-)
+a_summary <- function(x,
+                      .N_col,
+                      .N_row,
+                      .stats = NULL,
+                      .formats = NULL,
+                      .labels = NULL,
+                      .indent_mods = NULL,
+                      na_level = NA_character_,
+                      ...) {
+  # browser()
+  if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats)
+  if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats
+  if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_summary_numeric_labels else .a_summary_counts_labels
+  if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) {
+    .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats)
+  }
+  x_stats <- s_summary(x = x, .N_col = .N_col, .N_row = .N_row, ...)
+  if (is.numeric(x)) {
+    .labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply(
+      c("mean_ci", "mean_pval", "median_ci", "quantiles"),
+      function(x) attr(x_stats[[x]], "label")
+    )
+  }
+  .stats <- intersect(.stats, names(x_stats))
+  x_stats <- x_stats[.stats]
+  # browser()
+  if (!is.numeric(x) && !is.logical(x)) {
+    for (stat in c("count", "count_fraction")) {
+      for (a in names(x_stats[[stat]])) {
+        a_lvl <- paste(stat, a, sep = ".")
+        .stats <- c(.stats, a_lvl)
+        .formats[a_lvl] <- .formats[stat]
+        .labels[a_lvl] <- a
+        .indent_mods[a_lvl] <- .indent_mods[stat]
+      }
+    }
 
-#' @describeIn summarize_variables Formatted analysis function method for `logical` class.
-#'
-#' @examples
-#' # `a_summary.logical`
-#' afun <- make_afun(
-#'   getS3method("a_summary", "logical")
-#' )
-#' afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
-#'
-#' @export
-a_summary.logical <- make_afun(
-  s_summary.logical,
-  .formats = .a_summary_counts_formats
-)
+    x_stats <- unlist(x_stats, recursive = FALSE)
+    .stats <- names(x_stats)
+  }
+  .formats_x <- extract_by_name(
+    .formats, .stats, if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats
+  )
+  .labels_x <- extract_by_name(.labels, .stats, if (is.numeric(x)) .a_summary_numeric_labels)
+  .indent_mods_x <- extract_by_name(
+    .indent_mods, .stats, if (is.numeric(x)) .a_summary_numeric_indent_mods else .a_summary_counts_indent_mods
+  )
+
+  in_rows(
+    .list = x_stats,
+    .formats = .formats_x,
+    .names = .labels_x,
+    .labels = .labels_x,
+    .indent_mods = .indent_mods_x,
+    .format_na_strs = na_level
+  )
+}
 
 #' Constructor Function for [summarize_vars()] and [summarize_colvars()]
 #'
@@ -572,97 +586,13 @@ a_summary.logical <- make_afun(
 #'
 #' @return Combined formatted analysis function for use in [summarize_vars()].
 #'
-#' @note Since [a_summary()] is generic and we want customization of the formatting arguments
-#'   via [rtables::make_afun()], we need to create another temporary generic function, with
-#'   corresponding customized methods. Then in order for the methods to be found,
-#'   we need to wrap them in a combined `afun`. Since this is required by two layout creating
-#'   functions (and possibly others in the future), we provide a constructor that does this:
-#'   [create_afun_summary()].
-#'
-#' @examples
-#' # `create_afun_summary()` to create combined `afun`
-#'
-#' afun <- create_afun_summary(
-#'   .stats = NULL,
-#'   .formats = c(median = "xx."),
-#'   .labels = c(median = "My median"),
-#'   .indent_mods = c(median = 1L)
-#' )
-#' ## Fabricated dataset.
-#' dta_test <- data.frame(
-#'   USUBJID = rep(1:6, each = 3),
-#'   PARAMCD = rep("lab", 6 * 3),
-#'   AVISIT  = rep(paste0("V", 1:3), 6),
-#'   ARM     = rep(LETTERS[1:3], rep(6, 3)),
-#'   AVAL    = c(9:1, rep(NA, 9))
-#' )
-#'
-#' l <- basic_table() %>%
-#'   split_cols_by(var = "ARM") %>%
-#'   split_rows_by(var = "AVISIT") %>%
-#'   analyze(vars = "AVAL", afun = afun)
-#'
-#' build_table(l, df = dta_test)
-#'
 #' @export
 create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) {
-  function(x,
-           ...,
-           .N_row, # nolint
-           .N_col, # nolint
-           .var) {
-    afun <- function(x, ...) {
-      UseMethod("afun", x)
-    }
-
-    numeric_stats <- afun_selected_stats(
-      .stats,
-      all_stats = names(.a_summary_numeric_formats)
-    )
-    afun.numeric <- make_afun( # nolint
-      a_summary.numeric,
-      .stats = numeric_stats,
-      .formats = extract_by_name(.formats, numeric_stats),
-      .labels = extract_by_name(.labels, numeric_stats),
-      .indent_mods = extract_by_name(.indent_mods, numeric_stats)
-    )
-
-    factor_stats <- afun_selected_stats(.stats, c("n", "count", "count_fraction"))
-    ungroup_stats <- afun_selected_stats(.stats, c("count", "count_fraction"))
-    afun.factor <- make_afun( # nolint
-      a_summary.factor,
-      .stats = factor_stats,
-      .formats = extract_by_name(.formats, factor_stats),
-      .labels = extract_by_name(.labels, factor_stats),
-      .indent_mods = extract_by_name(.indent_mods, factor_stats),
-      .ungroup_stats = ungroup_stats
-    )
-
-    afun.character <- make_afun( # nolint
-      a_summary.character,
-      .stats = factor_stats,
-      .formats = extract_by_name(.formats, factor_stats),
-      .labels = extract_by_name(.labels, factor_stats),
-      .indent_mods = extract_by_name(.indent_mods, factor_stats),
-      .ungroup_stats = ungroup_stats
-    )
-
-    afun.logical <- make_afun( # nolint
-      a_summary.logical,
-      .stats = factor_stats,
-      .formats = extract_by_name(.formats, factor_stats),
-      .labels = extract_by_name(.labels, factor_stats),
-      .indent_mods = extract_by_name(.indent_mods, factor_stats)
-    )
-
-    afun(
-      x = x,
-      ...,
-      .N_row = .N_row,
-      .N_col = .N_col,
-      .var = .var
-    )
-  }
+  lifecycle::deprecate_stop(
+    "0.8.2",
+    "create_afun_summary()",
+    "a_summary()"
+  )
 }
 
 #' @describeIn summarize_variables Layout-creating function which can take statistics function arguments
@@ -745,16 +675,15 @@ summarize_vars <- function(lyt,
                            .formats = NULL,
                            .labels = NULL,
                            .indent_mods = NULL) {
-  afun <- create_afun_summary(.stats, .formats, .labels, .indent_mods)
-
   analyze(
     lyt = lyt,
     vars = vars,
     var_labels = var_labels,
-    afun = afun,
+    afun = a_summary,
     nested = nested,
-    extra_args = list(...),
-    na_str = na_level,
+    extra_args = list(
+      .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_level = na_level, ...
+    ),
     inclNAs = TRUE,
     show_labels = show_labels,
     table_names = table_names,
diff --git a/R/utils.R b/R/utils.R
index fa25eedad2..3a1cbf3f33 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -259,25 +259,34 @@ combine_vectors <- function(x, y) {
 #' - If `x` is `NULL`, then still always `NULL` is returned (same as in base function).
 #' - If `x` is not `NULL`, then the intersection of its names is made with `names` and those
 #'   elements are returned. That is, `names` which don't appear in `x` are not returned as `NA`s.
+#' - If `x_defaults` is not `NULL`, these values will be used to fill in any values that are in
+#'   `names` but not in `x`.
 #'
 #' @param x (named `vector`)\cr where to extract named elements from.
 #' @param names (`character`)\cr vector of names to extract.
+#' @param x_defaults (named `vector`)\cr named vector of default values to fill in for each value in `names` if no
+#'   corresponding value in `x` exists.
 #'
 #' @return `NULL` if `x` is `NULL`, otherwise the extracted elements from `x`.
 #'
 #' @keywords internal
-extract_by_name <- function(x, names) {
-  if (is.null(x)) {
-    return(NULL)
-  }
+extract_by_name <- function(x, names, x_defaults = NULL) {
+  if (is.null(x)) return(NULL)
   checkmate::assert_named(x)
   checkmate::assert_character(names)
   which_extract <- intersect(names(x), names)
+  x_fill <- c()
+  if (!is.null(x_defaults)) {
+    checkmate::assert_named(x_defaults)
+    x_fill <- x_defaults
+  }
   if (length(which_extract) > 0) {
-    x[which_extract]
+    x_fill[which_extract] <- x[which_extract]
+    x_fill <- x_fill[names]
   } else {
-    NULL
+    x_fill <- NULL
   }
+  x_fill
 }
 
 #' Labels for Adverse Event Baskets
diff --git a/man/create_afun_summary.Rd b/man/create_afun_summary.Rd
index 69c3086158..e7df14a6d2 100644
--- a/man/create_afun_summary.Rd
+++ b/man/create_afun_summary.Rd
@@ -25,37 +25,3 @@ Combined formatted analysis function for use in \code{\link[=summarize_vars]{sum
 
 Constructor function which creates a combined formatted analysis function.
 }
-\note{
-Since \code{\link[=a_summary]{a_summary()}} is generic and we want customization of the formatting arguments
-via \code{\link[rtables:make_afun]{rtables::make_afun()}}, we need to create another temporary generic function, with
-corresponding customized methods. Then in order for the methods to be found,
-we need to wrap them in a combined \code{afun}. Since this is required by two layout creating
-functions (and possibly others in the future), we provide a constructor that does this:
-\code{\link[=create_afun_summary]{create_afun_summary()}}.
-}
-\examples{
-# `create_afun_summary()` to create combined `afun`
-
-afun <- create_afun_summary(
-  .stats = NULL,
-  .formats = c(median = "xx."),
-  .labels = c(median = "My median"),
-  .indent_mods = c(median = 1L)
-)
-## Fabricated dataset.
-dta_test <- data.frame(
-  USUBJID = rep(1:6, each = 3),
-  PARAMCD = rep("lab", 6 * 3),
-  AVISIT  = rep(paste0("V", 1:3), 6),
-  ARM     = rep(LETTERS[1:3], rep(6, 3)),
-  AVAL    = c(9:1, rep(NA, 9))
-)
-
-l <- basic_table() \%>\%
-  split_cols_by(var = "ARM") \%>\%
-  split_rows_by(var = "AVISIT") \%>\%
-  analyze(vars = "AVAL", afun = afun)
-
-build_table(l, df = dta_test)
-
-}
diff --git a/man/extract_by_name.Rd b/man/extract_by_name.Rd
index bc328b2a20..186478ec0a 100644
--- a/man/extract_by_name.Rd
+++ b/man/extract_by_name.Rd
@@ -4,12 +4,15 @@
 \alias{extract_by_name}
 \title{Extract Elements by Name}
 \usage{
-extract_by_name(x, names)
+extract_by_name(x, names, x_defaults = NULL)
 }
 \arguments{
 \item{x}{(named \code{vector})\cr where to extract named elements from.}
 
 \item{names}{(\code{character})\cr vector of names to extract.}
+
+\item{x_defaults}{(named \code{vector})\cr named vector of default values to fill in for each value in \code{names} if no
+corresponding value in \code{x} exists.}
 }
 \value{
 \code{NULL} if \code{x} is \code{NULL}, otherwise the extracted elements from \code{x}.
@@ -23,6 +26,8 @@ Differences to the standard \code{[} function are:
 \item If \code{x} is \code{NULL}, then still always \code{NULL} is returned (same as in base function).
 \item If \code{x} is not \code{NULL}, then the intersection of its names is made with \code{names} and those
 elements are returned. That is, \code{names} which don't appear in \code{x} are not returned as \code{NA}s.
+\item If \code{x_defaults} is not \code{NULL}, these values will be used to fill in any values that are in
+\code{names} but not in \code{x}.
 }
 }
 \keyword{internal}
diff --git a/man/summarize_colvars.Rd b/man/summarize_colvars.Rd
index 005cfae98f..d278f80b11 100644
--- a/man/summarize_colvars.Rd
+++ b/man/summarize_colvars.Rd
@@ -7,6 +7,7 @@
 summarize_colvars(
   lyt,
   ...,
+  na_level = NA_character_,
   .stats = c("n", "mean_sd", "median", "range", "count_fraction"),
   .formats = NULL,
   .labels = NULL,
@@ -18,6 +19,8 @@ summarize_colvars(
 
 \item{...}{arguments passed to \code{s_summary()}.}
 
+\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.}
+
 \item{.stats}{(\code{character})\cr statistics to select for the table.}
 
 \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.}
diff --git a/man/summarize_variables.Rd b/man/summarize_variables.Rd
index f8aa8e3934..f0b624a61c 100644
--- a/man/summarize_variables.Rd
+++ b/man/summarize_variables.Rd
@@ -8,10 +8,6 @@
 \alias{s_summary.character}
 \alias{s_summary.logical}
 \alias{a_summary}
-\alias{a_summary.numeric}
-\alias{a_summary.factor}
-\alias{a_summary.character}
-\alias{a_summary.logical}
 \alias{summarize_vars}
 \title{Summarize Variables}
 \usage{
@@ -57,45 +53,15 @@ s_summary(x, na.rm = TRUE, denom, .N_row, .N_col, .var, ...)
   ...
 )
 
-a_summary(x, ..., .N_row, .N_col, .var)
-
-\method{a_summary}{numeric}(
-  x,
-  na.rm = TRUE,
-  denom,
-  .N_row,
-  .N_col,
-  .var,
-  control = control_summarize_vars(),
-  ...
-)
-
-\method{a_summary}{factor}(
-  x,
-  na.rm = TRUE,
-  denom = c("n", "N_row", "N_col"),
-  .N_row,
-  .N_col,
-  ...
-)
-
-\method{a_summary}{character}(
+a_summary(
   x,
-  na.rm = TRUE,
-  denom = c("n", "N_row", "N_col"),
-  .N_row,
   .N_col,
-  .var,
-  verbose = TRUE,
-  ...
-)
-
-\method{a_summary}{logical}(
-  x,
-  na.rm = TRUE,
-  denom = c("n", "N_row", "N_col"),
   .N_row,
-  .N_col,
+  .stats = NULL,
+  .formats = NULL,
+  .labels = NULL,
+  .indent_mods = NULL,
+  na_level = NA_character_,
   ...
 )
 
@@ -150,6 +116,18 @@ See more about \code{type} in \code{\link[stats:quantile]{stats::quantile()}}.
 \item{verbose}{(\code{logical})\cr Defaults to \code{TRUE}, which prints out warnings and messages. It is mainly used
 to print out information about factor casting.}
 
+\item{.stats}{(\code{character})\cr statistics to select for the table.}
+
+\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.}
+
+\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).}
+
+\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector
+should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation
+for that statistic's row label.}
+
+\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.}
+
 \item{lyt}{(\code{layout})\cr input layout where analyses will be added to.}
 
 \item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.}
@@ -160,8 +138,6 @@ to print out information about factor casting.}
 possible} (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split
 underneath analyses, which is not allowed.}
 
-\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.}
-
 \item{show_labels}{(\code{string})\cr label visibility: one of "default", "visible" and "hidden".}
 
 \item{table_names}{(\code{character})\cr this can be customized in case that the same \code{vars} are analyzed multiple times,
@@ -169,16 +145,6 @@ to avoid warnings from \code{rtables}.}
 
 \item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group
 defined by this split instruction, or \code{NA_character_} (the default) for no section divider.}
-
-\item{.stats}{(\code{character})\cr statistics to select for the table.}
-
-\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.}
-
-\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).}
-
-\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector
-should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation
-for that statistic's row label.}
 }
 \value{
 \itemize{
@@ -264,15 +230,8 @@ conversion to factor (with a warning) and then forwards to the method for factor
 
 \item \code{s_summary(logical)}: Method for \code{logical} class.
 
-\item \code{a_summary()}: Formatted analysis function which is used as \code{afun} in \code{summarize_vars()}.
-
-\item \code{a_summary(numeric)}: Formatted analysis function method for \code{numeric} class.
-
-\item \code{a_summary(factor)}: Formatted analysis function method for \code{factor} class.
-
-\item \code{a_summary(character)}: Formatted analysis function method for \code{character} class.
-
-\item \code{a_summary(logical)}: Formatted analysis function method for \code{logical} class.
+\item \code{a_summary()}: Formatted analysis function which is used as \code{afun} in \code{summarize_vars()} and
+\code{cfun} in \code{summarize_colvars()}.
 
 \item \code{summarize_vars()}: Layout-creating function which can take statistics function arguments
 and additional format arguments. This function is a wrapper for \code{\link[rtables:analyze]{rtables::analyze()}}.
@@ -374,30 +333,10 @@ x <- c(TRUE, FALSE, TRUE, TRUE)
 s_summary(x, denom = "N_row", .N_row = 10L)
 s_summary(x, denom = "N_col", .N_col = 20L)
 
-# `a_summary.numeric`
 a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
-
-# `a_summary.factor`
-# We need to ungroup `count` and `count_fraction` first so that the rtables formatting
-# functions can be applied correctly.
-afun <- make_afun(
-  getS3method("a_summary", "factor"),
-  .ungroup_stats = c("count", "count_fraction")
-)
-afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
-
-# `a_summary.character`
-afun <- make_afun(
-  getS3method("a_summary", "character"),
-  .ungroup_stats = c("count", "count_fraction")
-)
-afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
-
-# `a_summary.logical`
-afun <- make_afun(
-  getS3method("a_summary", "logical")
-)
-afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
+a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
+a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
+a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
 
 ## Fabricated dataset.
 dta_test <- data.frame(
diff --git a/man/summary_formats.Rd b/man/summary_formats.Rd
index 690994c351..b22355fa50 100644
--- a/man/summary_formats.Rd
+++ b/man/summary_formats.Rd
@@ -13,6 +13,6 @@ summary_formats(type = "numeric")
 A named \code{vector} of default statistic formats for the given data type.
 }
 \description{
-Returns format patterns for descriptive statistics. The format is understood by the \code{rtables}.
+Returns format patterns for descriptive statistics. The format is understood by \code{rtables}.
 }
 \keyword{internal}
diff --git a/man/summary_labels.Rd b/man/summary_labels.Rd
index a7cc88596a..b30caa625c 100644
--- a/man/summary_labels.Rd
+++ b/man/summary_labels.Rd
@@ -4,10 +4,13 @@
 \alias{summary_labels}
 \title{Label Function for Descriptive Statistics}
 \usage{
-summary_labels()
+summary_labels(type = "numeric")
+}
+\arguments{
+\item{type}{(\code{string})\cr choice of a summary data type. Only \code{counts} and \code{numeric} types are currently supported.}
 }
 \value{
-A named \code{vector} of default statistic labels.
+A named \code{vector} of default statistic labels for the given data type.
 }
 \description{
 Returns labels of descriptive statistics for numeric variables.
diff --git a/tests/testthat/_snaps/summarize_variables.md b/tests/testthat/_snaps/summarize_variables.md
index 66a86bcb1f..a7a3db1ea3 100644
--- a/tests/testthat/_snaps/summarize_variables.md
+++ b/tests/testthat/_snaps/summarize_variables.md
@@ -920,47 +920,6 @@
       [1] 0
       
 
-# create_afun_summary creates an `afun` that works
-
-    Code
-      res
-    Output
-                               A              B          C    
-      ————————————————————————————————————————————————————————
-      V1                                                      
-        AVAL                                                  
-          n                    2              1          0    
-            My median          8              3          NA   
-          Min - Max        6.0 - 9.0      3.0 - 3.0      NA   
-          Mean 95% CI   (-11.56, 26.56)      NA          NA   
-        ARM                                                   
-          n                    2              2          2    
-          A                2 (100%)           0          0    
-          B                    0          2 (100%)       0    
-          C                    0              0       2 (100%)
-      V2                                                      
-        AVAL                                                  
-          n                    2              1          0    
-            My median          6              2          NA   
-          Min - Max        5.0 - 8.0      2.0 - 2.0      NA   
-          Mean 95% CI   (-12.56, 25.56)      NA          NA   
-        ARM                                                   
-          n                    2              2          2    
-          A                2 (100%)           0          0    
-          B                    0          2 (100%)       0    
-          C                    0              0       2 (100%)
-      V3                                                      
-        AVAL                                                  
-          n                    2              1          0    
-            My median          6              1          NA   
-          Min - Max        4.0 - 7.0      1.0 - 1.0      NA   
-          Mean 95% CI   (-13.56, 24.56)      NA          NA   
-        ARM                                                   
-          n                    2              2          2    
-          A                2 (100%)           0          0    
-          B                    0          2 (100%)       0    
-          C                    0              0       2 (100%)
-
 # `summarize_vars` works with healthy input, default `na.rm = TRUE`.
 
     Code
diff --git a/tests/testthat/test-summarize_variables.R b/tests/testthat/test-summarize_variables.R
index c98635e7dc..6e163c6d76 100644
--- a/tests/testthat/test-summarize_variables.R
+++ b/tests/testthat/test-summarize_variables.R
@@ -160,33 +160,6 @@ testthat::test_that("s_summary works with logical vectors and by if requested do
   testthat::expect_snapshot(res)
 })
 
-testthat::test_that("create_afun_summary creates an `afun` that works", {
-  afun <- create_afun_summary(
-    .stats = c("n", "count_fraction", "median", "range", "mean_ci"),
-    .formats = c(median = "xx."),
-    .labels = c(median = "My median"),
-    .indent_mods = c(median = 1L)
-  )
-  dta_test <- data.frame(
-    USUBJID = rep(1:6, each = 3),
-    PARAMCD = rep("lab", 6 * 3),
-    AVISIT = rep(paste0("V", 1:3), 6),
-    ARM = rep(LETTERS[1:3], rep(6, 3)),
-    AVAL = c(9:1, rep(NA, 9)),
-    stringsAsFactors = TRUE
-  )
-
-  l <- basic_table() %>%
-    split_cols_by(var = "ARM") %>%
-    split_rows_by(var = "AVISIT") %>%
-    analyze(vars = c("AVAL", "ARM"), afun = afun)
-
-  result <- build_table(l, df = dta_test)
-
-  res <- testthat::expect_silent(result)
-  testthat::expect_snapshot(res)
-})
-
 testthat::test_that("`summarize_vars` works with healthy input, default `na.rm = TRUE`.", {
   dta_test <- data.frame(AVAL = c(1:4, NA, NA))
 

From c0596583de14ce12c1343bf07c6712a18b21cc07 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Wed, 14 Jun 2023 20:32:18 -0400
Subject: [PATCH 02/33] Refactor a_compare, deprecate create_afun_compare

---
 NAMESPACE                  |   4 -
 R/compare_variables.R      | 284 +++++++++++--------------------------
 R/summarize_variables.R    |   8 +-
 man/compare_variables.Rd   | 103 +++++---------
 man/create_afun_compare.Rd |  36 +----
 man/create_afun_summary.Rd |   8 +-
 6 files changed, 130 insertions(+), 313 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 8c20ce6c27..35026811bf 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,9 +1,5 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(a_compare,character)
-S3method(a_compare,factor)
-S3method(a_compare,logical)
-S3method(a_compare,numeric)
 S3method(as.rtable,data.frame)
 S3method(h_coxreg_inter_effect,factor)
 S3method(h_coxreg_inter_effect,numeric)
diff --git a/R/compare_variables.R b/R/compare_variables.R
index 4ce447f9bc..246b72634d 100644
--- a/R/compare_variables.R
+++ b/R/compare_variables.R
@@ -244,119 +244,90 @@ s_compare.logical <- function(x,
   y
 }
 
+.a_compare_numeric_formats <- c(.a_summary_numeric_formats, pval = "x.xxxx | (<0.0001)")
+.a_compare_numeric_labels <- c(.a_summary_numeric_labels, pval = "p-value (t-test)")
+.a_compare_numeric_indent_mods <- c(.a_summary_numeric_indent_mods, pval = 0L)
+.a_compare_counts_formats <- c(.a_summary_counts_formats, pval = "x.xxxx | (<0.0001)")
+.a_compare_counts_labels <- c(.a_summary_counts_labels, pval = "p-value (chi-squared test)")
+.a_compare_counts_indent_mods <- c(.a_summary_counts_indent_mods, pval = 0L)
+
 #' @describeIn compare_variables Formatted analysis function which is used as `afun`
 #'   in `compare_vars()`.
 #'
 #' @return
 #' * `a_compare()` returns the corresponding list with formatted [rtables::CellValue()].
 #'
+#' @examples
+#' a_compare(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .in_ref_col = FALSE, .var = "bla")
+#' a_compare(factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), .in_ref_col = FALSE)
+#' a_compare(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .in_ref_col = FALSE, .var = "x", verbose = FALSE)
+#' a_compare(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = FALSE)
+#'
 #' @export
 a_compare <- function(x,
+                      .N_col,
+                      .N_row,
                       .ref_group,
                       .in_ref_col,
-                      ...,
-                      .var) {
-  UseMethod("a_compare", x)
-}
-
-#' @describeIn compare_variables Formatted analysis function method for `numeric` class.
-#'
-#' @examples
-#' # `a_compare.numeric`
-#' a_compare(
-#'   rnorm(10, 5, 1),
-#'   .ref_group = rnorm(20, -5, 1),
-#'   .in_ref_col = FALSE,
-#'   .var = "bla"
-#' )
-#'
-#' @export
-a_compare.numeric <- make_afun(
-  s_compare.numeric,
-  .formats = c(
-    .a_summary_numeric_formats,
-    pval = "x.xxxx | (<0.0001)"
-  ),
-  .labels = c(
-    .a_summary_numeric_labels,
-    pval = "p-value (t-test)"
-  ),
-  .null_ref_cells = FALSE
-)
-
-.a_compare_counts_formats <- c(
-  .a_summary_counts_formats,
-  pval = "x.xxxx | (<0.0001)"
-)
-
-.a_compare_counts_labels <- c(
-  pval = "p-value (chi-squared test)"
-)
-
-#' @describeIn compare_variables Formatted analysis function method for `factor` class.
-#'
-#' @examples
-#' # `a_compare.factor`
-#' # We need to ungroup `count` and `count_fraction` first so that the `rtables` formatting
-#' # functions can be applied correctly.
-#' afun <- make_afun(
-#'   getS3method("a_compare", "factor"),
-#'   .ungroup_stats = c("count", "count_fraction")
-#' )
-#' x <- factor(c("a", "a", "b", "c", "a"))
-#' y <- factor(c("a", "a", "b", "c"))
-#' afun(x, .ref_group = y, .in_ref_col = FALSE)
-#'
-#' @export
-a_compare.factor <- make_afun(
-  s_compare.factor,
-  .formats = .a_compare_counts_formats,
-  .labels = .a_compare_counts_labels,
-  .null_ref_cells = FALSE
-)
-
-#' @describeIn compare_variables Formatted analysis function method for `character` class.
-#'
-#' @examples
-#' # `a_compare.character`
-#' afun <- make_afun(
-#'   getS3method("a_compare", "character"),
-#'   .ungroup_stats = c("count", "count_fraction")
-#' )
-#' x <- c("A", "B", "A", "C")
-#' y <- c("B", "A", "C")
-#' afun(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE)
-#'
-#' @export
-a_compare.character <- make_afun(
-  s_compare.character,
-  .formats = .a_compare_counts_formats,
-  .labels = .a_compare_counts_labels,
-  .null_ref_cells = FALSE
-)
+                      .stats = NULL,
+                      .formats = NULL,
+                      .labels = NULL,
+                      .indent_mods = NULL,
+                      na_level = NA_character_,
+                      ...) {
+  # browser()
+  if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats)
+  if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats
+  if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels
+  if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) {
+    .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats)
+  }
+  x_stats <- s_compare(x = x, .N_col = .N_col, .N_row = .N_row, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...)
+  if (is.numeric(x)) {
+    .labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply(
+      c("mean_ci", "mean_pval", "median_ci", "quantiles"),
+      function(x) attr(x_stats[[x]], "label")
+    )
+  }
+  .stats <- intersect(.stats, names(x_stats))
+  x_stats <- x_stats[.stats]
+  # browser()
+  if (!is.numeric(x) && !is.logical(x)) {
+    for (stat in c("count", "count_fraction")) {
+      for (a in names(x_stats[[stat]])) {
+        a_lvl <- paste(stat, a, sep = ".")
+        .stats <- c(.stats, a_lvl)
+        .formats[a_lvl] <- .formats[stat]
+        .labels[a_lvl] <- a
+        .indent_mods[a_lvl] <- .indent_mods[stat]
+      }
+    }
+    if (.in_ref_col) x_stats[["pval"]] <- "pvalue"
+    x_stats <- unlist(x_stats, recursive = FALSE)
+    if (.in_ref_col) x_stats[["pval"]] <- character()
+    .stats <- names(x_stats)
+  }
+  .formats_x <- extract_by_name(
+    .formats, .stats, if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats
+  )
+  .labels_x <- extract_by_name(.labels, .stats, if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels)
+  .indent_mods_x <- extract_by_name(
+    .indent_mods, .stats, if (is.numeric(x)) .a_compare_numeric_indent_mods else .a_compare_counts_indent_mods
+  )
 
-#' @describeIn compare_variables Formatted analysis function method for `logical` class.
-#'
-#' @examples
-#' # `a_compare.logical`
-#' afun <- make_afun(
-#'   getS3method("a_compare", "logical")
-#' )
-#' x <- c(TRUE, FALSE, FALSE, TRUE, TRUE)
-#' y <- c(TRUE, FALSE)
-#' afun(x, .ref_group = y, .in_ref_col = FALSE)
-#'
-#' @export
-a_compare.logical <- make_afun(
-  s_compare.logical,
-  .formats = .a_compare_counts_formats,
-  .labels = .a_compare_counts_labels,
-  .null_ref_cells = FALSE
-)
+  in_rows(
+    .list = x_stats,
+    .formats = .formats_x,
+    .names = .labels_x,
+    .labels = .labels_x,
+    .indent_mods = .indent_mods_x,
+    .format_na_strs = na_level
+  )
+}
 
 #' Constructor Function for [compare_vars()]
 #'
-#' @description `r lifecycle::badge("stable")`
+#' @description `r lifecycle::badge("deprecated")`
 #'
 #' Constructor function which creates a combined formatted analysis function.
 #'
@@ -367,110 +338,20 @@ a_compare.logical <- make_afun(
 #'
 #' @return Combined formatted analysis function for use in [compare_vars()].
 #'
-#' @note Since [a_compare()] is generic and we want customization of the formatting arguments
-#'   via [rtables::make_afun()], we need to create another temporary generic function, with
-#'   corresponding customized methods. Then in order for the methods to be found,
-#'   we need to wrap them in a combined `afun`. Since this is required by two layout creating
-#'   functions (and possibly others in the future), we provide a constructor that does this:
-#'   [create_afun_compare()].
+#' @note This function has been deprecated in favor of direct implementation of `a_compare()`.
 #'
 #' @seealso [compare_vars()]
 #'
-#' @examples
-#' # `create_afun_compare()` to create combined `afun`
-#'
-#' afun <- create_afun_compare(
-#'   .stats = c("n", "count_fraction", "mean_sd", "pval"),
-#'   .indent_mods = c(pval = 1L)
-#' )
-#'
-#' lyt <- basic_table() %>%
-#'   split_cols_by("ARMCD", ref_group = "ARM A") %>%
-#'   analyze(
-#'     "AGE",
-#'     afun = afun,
-#'     show_labels = "visible"
-#'   )
-#' build_table(lyt, df = tern_ex_adsl)
-#'
-#' lyt <- basic_table() %>%
-#'   split_cols_by("ARMCD", ref_group = "ARM A") %>%
-#'   analyze(
-#'     "SEX",
-#'     afun = afun,
-#'     show_labels = "visible"
-#'   )
-#' build_table(lyt, df = tern_ex_adsl)
-#'
 #' @export
 create_afun_compare <- function(.stats = NULL,
                                 .formats = NULL,
                                 .labels = NULL,
                                 .indent_mods = NULL) {
-  function(x,
-           .ref_group,
-           .in_ref_col,
-           ...,
-           .var) {
-    afun <- function(x, ...) {
-      UseMethod("afun", x)
-    }
-
-    numeric_stats <- afun_selected_stats(
-      .stats,
-      all_stats = c(names(.a_summary_numeric_formats), "pval")
-    )
-    afun.numeric <- make_afun( # nolint
-      a_compare.numeric,
-      .stats = numeric_stats,
-      .formats = extract_by_name(.formats, numeric_stats),
-      .labels = extract_by_name(.labels, numeric_stats),
-      .indent_mods = extract_by_name(.indent_mods, numeric_stats),
-      .null_ref_cells = FALSE
-    )
-
-    factor_stats <- afun_selected_stats(
-      .stats,
-      all_stats = names(.a_compare_counts_formats)
-    )
-    ungroup_stats <- afun_selected_stats(.stats, c("count", "count_fraction"))
-    afun.factor <- make_afun( # nolint
-      a_compare.factor,
-      .stats = factor_stats,
-      .formats = extract_by_name(.formats, factor_stats),
-      .labels = extract_by_name(.labels, factor_stats),
-      .indent_mods = extract_by_name(.indent_mods, factor_stats),
-      .ungroup_stats = ungroup_stats,
-      .null_ref_cells = FALSE
-    )
-
-    afun.character <- make_afun( # nolint
-      a_compare.character,
-      .stats = factor_stats,
-      .formats = extract_by_name(.formats, factor_stats),
-      .labels = extract_by_name(.labels, factor_stats),
-      .indent_mods = extract_by_name(.indent_mods, factor_stats),
-      .ungroup_stats = ungroup_stats,
-      .null_ref_cells = FALSE
-    )
-
-    afun.logical <- make_afun( # nolint
-      a_compare.logical,
-      .stats = factor_stats,
-      .formats = extract_by_name(.formats, factor_stats),
-      .labels = extract_by_name(.labels, factor_stats),
-      .indent_mods = extract_by_name(.indent_mods, factor_stats),
-      .null_ref_cells = FALSE
-    )
-
-    afun(
-      x = x,
-      .ref_group = .ref_group,
-      .in_ref_col = .in_ref_col,
-      ...,
-      .var = .var
-    )
-  }
+  lifecycle::deprecate_stop(
+    "0.8.2",
+    "create_afun_compare()",
+    "a_compare()"
+  )
 }
 
 #' @describeIn compare_variables Layout-creating function which can take statistics function arguments
@@ -515,22 +396,23 @@ compare_vars <- function(lyt,
                          na_level = NA_character_,
                          show_labels = "default",
                          table_names = vars,
+                         section_div = NA_character_,
                          .stats = c("n", "mean_sd", "count_fraction", "pval"),
                          .formats = NULL,
                          .labels = NULL,
                          .indent_mods = NULL) {
-  afun <- create_afun_compare(.stats, .formats, .labels, .indent_mods)
-
   analyze(
     lyt = lyt,
     vars = vars,
     var_labels = var_labels,
-    afun = afun,
+    afun = a_compare,
     nested = nested,
-    extra_args = list(...),
-    na_str = na_level,
+    extra_args = list(
+      .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_level = na_level, ...
+    ),
     inclNAs = TRUE,
     show_labels = show_labels,
-    table_names = table_names
+    table_names = table_names,
+    section_div = section_div
   )
 }
diff --git a/R/summarize_variables.R b/R/summarize_variables.R
index 3c25f7bc4c..e5f8861571 100644
--- a/R/summarize_variables.R
+++ b/R/summarize_variables.R
@@ -558,7 +558,7 @@ a_summary <- function(x,
   .formats_x <- extract_by_name(
     .formats, .stats, if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats
   )
-  .labels_x <- extract_by_name(.labels, .stats, if (is.numeric(x)) .a_summary_numeric_labels)
+  .labels_x <- extract_by_name(.labels, .stats, if (is.numeric(x)) .a_summary_numeric_labels else .a_summary_counts_labels)
   .indent_mods_x <- extract_by_name(
     .indent_mods, .stats, if (is.numeric(x)) .a_summary_numeric_indent_mods else .a_summary_counts_indent_mods
   )
@@ -575,7 +575,7 @@ a_summary <- function(x,
 
 #' Constructor Function for [summarize_vars()] and [summarize_colvars()]
 #'
-#' @description `r lifecycle::badge("stable")`
+#' @description `r lifecycle::badge("deprecated")`
 #'
 #' Constructor function which creates a combined formatted analysis function.
 #'
@@ -586,6 +586,10 @@ a_summary <- function(x,
 #'
 #' @return Combined formatted analysis function for use in [summarize_vars()].
 #'
+#' @note This function has been deprecated in favor of direct implementation of `a_summary()`.
+#'
+#' @seealso [summarize_vars()]
+#'
 #' @export
 create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) {
   lifecycle::deprecate_stop(
diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd
index 2626f42de8..b61c7e8f00 100644
--- a/man/compare_variables.Rd
+++ b/man/compare_variables.Rd
@@ -8,10 +8,6 @@
 \alias{s_compare.character}
 \alias{s_compare.logical}
 \alias{a_compare}
-\alias{a_compare.numeric}
-\alias{a_compare.factor}
-\alias{a_compare.character}
-\alias{a_compare.logical}
 \alias{compare_vars}
 \title{Compare Variables Between Groups}
 \usage{
@@ -34,25 +30,20 @@ s_compare(x, .ref_group, .in_ref_col, ...)
 
 \method{s_compare}{logical}(x, .ref_group, .in_ref_col, na.rm = TRUE, denom = "n", ...)
 
-a_compare(x, .ref_group, .in_ref_col, ..., .var)
-
-\method{a_compare}{numeric}(x, .ref_group, .in_ref_col, ...)
-
-\method{a_compare}{factor}(x, .ref_group, .in_ref_col, denom = "n", na.rm = TRUE, ...)
-
-\method{a_compare}{character}(
+a_compare(
   x,
+  .N_col,
+  .N_row,
   .ref_group,
   .in_ref_col,
-  denom = "n",
-  na.rm = TRUE,
-  .var,
-  verbose = TRUE,
+  .stats = NULL,
+  .formats = NULL,
+  .labels = NULL,
+  .indent_mods = NULL,
+  na_level = NA_character_,
   ...
 )
 
-\method{a_compare}{logical}(x, .ref_group, .in_ref_col, na.rm = TRUE, denom = "n", ...)
-
 compare_vars(
   lyt,
   vars,
@@ -62,6 +53,7 @@ compare_vars(
   na_level = NA_character_,
   show_labels = "default",
   table_names = vars,
+  section_div = NA_character_,
   .stats = c("n", "mean_sd", "count_fraction", "pval"),
   .formats = NULL,
   .labels = NULL,
@@ -88,6 +80,23 @@ by a statistics function.}
 \item{verbose}{(\code{logical})\cr Whether warnings and messages should be printed. Mainly used
 to print out information about factor casting. Defaults to \code{TRUE}.}
 
+\item{.N_col}{(\code{count})\cr row-wise N (row group count) for the group of observations being analyzed
+(i.e. with no column-based subsetting) that is passed by \code{rtables}.}
+
+\item{.N_row}{(\code{count})\cr column-wise N (column count) for the full column that is passed by \code{rtables}.}
+
+\item{.stats}{(\code{character})\cr statistics to select for the table.}
+
+\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.}
+
+\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).}
+
+\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector
+should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation
+for that statistic's row label.}
+
+\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.}
+
 \item{lyt}{(\code{layout})\cr input layout where analyses will be added to.}
 
 \item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.}
@@ -98,22 +107,13 @@ to print out information about factor casting. Defaults to \code{TRUE}.}
 possible} (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split
 underneath analyses, which is not allowed.}
 
-\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.}
-
 \item{show_labels}{(\code{string})\cr label visibility: one of "default", "visible" and "hidden".}
 
 \item{table_names}{(\code{character})\cr this can be customized in case that the same \code{vars} are analyzed multiple times,
 to avoid warnings from \code{rtables}.}
 
-\item{.stats}{(\code{character})\cr statistics to select for the table.}
-
-\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.}
-
-\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).}
-
-\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector
-should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation
-for that statistic's row label.}
+\item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group
+defined by this split instruction, or \code{NA_character_} (the default) for no section divider.}
 }
 \value{
 \itemize{
@@ -154,14 +154,6 @@ is used. If missing values are not removed, then they are counted as \code{FALSE
 \item \code{a_compare()}: Formatted analysis function which is used as \code{afun}
 in \code{compare_vars()}.
 
-\item \code{a_compare(numeric)}: Formatted analysis function method for \code{numeric} class.
-
-\item \code{a_compare(factor)}: Formatted analysis function method for \code{factor} class.
-
-\item \code{a_compare(character)}: Formatted analysis function method for \code{character} class.
-
-\item \code{a_compare(logical)}: Formatted analysis function method for \code{logical} class.
-
 \item \code{compare_vars()}: Layout-creating function which can take statistics function arguments
 and additional format arguments. This function is a wrapper for \code{\link[rtables:analyze]{rtables::analyze()}}.
 
@@ -240,41 +232,10 @@ y <- c(NA, NA, NA, NA, FALSE)
 s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE)
 s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE)
 
-# `a_compare.numeric`
-a_compare(
-  rnorm(10, 5, 1),
-  .ref_group = rnorm(20, -5, 1),
-  .in_ref_col = FALSE,
-  .var = "bla"
-)
-
-# `a_compare.factor`
-# We need to ungroup `count` and `count_fraction` first so that the `rtables` formatting
-# functions can be applied correctly.
-afun <- make_afun(
-  getS3method("a_compare", "factor"),
-  .ungroup_stats = c("count", "count_fraction")
-)
-x <- factor(c("a", "a", "b", "c", "a"))
-y <- factor(c("a", "a", "b", "c"))
-afun(x, .ref_group = y, .in_ref_col = FALSE)
-
-# `a_compare.character`
-afun <- make_afun(
-  getS3method("a_compare", "character"),
-  .ungroup_stats = c("count", "count_fraction")
-)
-x <- c("A", "B", "A", "C")
-y <- c("B", "A", "C")
-afun(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE)
-
-# `a_compare.logical`
-afun <- make_afun(
-  getS3method("a_compare", "logical")
-)
-x <- c(TRUE, FALSE, FALSE, TRUE, TRUE)
-y <- c(TRUE, FALSE)
-afun(x, .ref_group = y, .in_ref_col = FALSE)
+a_compare(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .in_ref_col = FALSE, .var = "bla")
+a_compare(factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), .in_ref_col = FALSE)
+a_compare(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .in_ref_col = FALSE, .var = "x", verbose = FALSE)
+a_compare(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = FALSE)
 
 # `compare_vars()` in `rtables` pipelines
 
diff --git a/man/create_afun_compare.Rd b/man/create_afun_compare.Rd
index a82994f874..14f71af618 100644
--- a/man/create_afun_compare.Rd
+++ b/man/create_afun_compare.Rd
@@ -26,44 +26,12 @@ for that statistic's row label.}
 Combined formatted analysis function for use in \code{\link[=compare_vars]{compare_vars()}}.
 }
 \description{
-\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
 
 Constructor function which creates a combined formatted analysis function.
 }
 \note{
-Since \code{\link[=a_compare]{a_compare()}} is generic and we want customization of the formatting arguments
-via \code{\link[rtables:make_afun]{rtables::make_afun()}}, we need to create another temporary generic function, with
-corresponding customized methods. Then in order for the methods to be found,
-we need to wrap them in a combined \code{afun}. Since this is required by two layout creating
-functions (and possibly others in the future), we provide a constructor that does this:
-\code{\link[=create_afun_compare]{create_afun_compare()}}.
-}
-\examples{
-# `create_afun_compare()` to create combined `afun`
-
-afun <- create_afun_compare(
-  .stats = c("n", "count_fraction", "mean_sd", "pval"),
-  .indent_mods = c(pval = 1L)
-)
-
-lyt <- basic_table() \%>\%
-  split_cols_by("ARMCD", ref_group = "ARM A") \%>\%
-  analyze(
-    "AGE",
-    afun = afun,
-    show_labels = "visible"
-  )
-build_table(lyt, df = tern_ex_adsl)
-
-lyt <- basic_table() \%>\%
-  split_cols_by("ARMCD", ref_group = "ARM A") \%>\%
-  analyze(
-    "SEX",
-    afun = afun,
-    show_labels = "visible"
-  )
-build_table(lyt, df = tern_ex_adsl)
-
+This function has been deprecated in favor of direct implementation of \code{a_compare()}.
 }
 \seealso{
 \code{\link[=compare_vars]{compare_vars()}}
diff --git a/man/create_afun_summary.Rd b/man/create_afun_summary.Rd
index e7df14a6d2..0c22e921ad 100644
--- a/man/create_afun_summary.Rd
+++ b/man/create_afun_summary.Rd
@@ -21,7 +21,13 @@ for that statistic's row label.}
 Combined formatted analysis function for use in \code{\link[=summarize_vars]{summarize_vars()}}.
 }
 \description{
-\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
 
 Constructor function which creates a combined formatted analysis function.
 }
+\note{
+This function has been deprecated in favor of direct implementation of \code{a_summary()}.
+}
+\seealso{
+\code{\link[=summarize_vars]{summarize_vars()}}
+}

From 6df46b958f8b723360514b6a0c8cc7c18410df1c Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Wed, 14 Jun 2023 21:17:51 -0400
Subject: [PATCH 03/33] Update pkgdown, remove browser calls

---
 R/compare_variables.R   | 2 --
 R/summarize_variables.R | 2 --
 _pkgdown.yml            | 1 -
 3 files changed, 5 deletions(-)

diff --git a/R/compare_variables.R b/R/compare_variables.R
index 246b72634d..1e1af32c3d 100644
--- a/R/compare_variables.R
+++ b/R/compare_variables.R
@@ -275,7 +275,6 @@ a_compare <- function(x,
                       .indent_mods = NULL,
                       na_level = NA_character_,
                       ...) {
-  # browser()
   if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats)
   if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats
   if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels
@@ -291,7 +290,6 @@ a_compare <- function(x,
   }
   .stats <- intersect(.stats, names(x_stats))
   x_stats <- x_stats[.stats]
-  # browser()
   if (!is.numeric(x) && !is.logical(x)) {
     for (stat in c("count", "count_fraction")) {
       for (a in names(x_stats[[stat]])) {
diff --git a/R/summarize_variables.R b/R/summarize_variables.R
index e5f8861571..17f36a6adb 100644
--- a/R/summarize_variables.R
+++ b/R/summarize_variables.R
@@ -524,7 +524,6 @@ a_summary <- function(x,
                       .indent_mods = NULL,
                       na_level = NA_character_,
                       ...) {
-  # browser()
   if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats)
   if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats
   if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_summary_numeric_labels else .a_summary_counts_labels
@@ -540,7 +539,6 @@ a_summary <- function(x,
   }
   .stats <- intersect(.stats, names(x_stats))
   x_stats <- x_stats[.stats]
-  # browser()
   if (!is.numeric(x) && !is.logical(x)) {
     for (stat in c("count", "count_fraction")) {
       for (a in names(x_stats[[stat]])) {
diff --git a/_pkgdown.yml b/_pkgdown.yml
index aa1bc386aa..d905a742e9 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -67,7 +67,6 @@ reference:
   - title: Analysis Helper Functions
     desc: These functions are useful in defining an analysis.
     contents:
-      - starts_with("create_", internal = TRUE)
       - starts_with("h_")
       - starts_with("or_")
       - starts_with("prop_")

From cf35bcef13d76ddb79b8300e4eca08157fe1259d Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Wed, 14 Jun 2023 21:26:45 -0400
Subject: [PATCH 04/33] Fix extract_by_name

---
 R/utils.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/utils.R b/R/utils.R
index 3a1cbf3f33..9a93fbc52d 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -282,7 +282,7 @@ extract_by_name <- function(x, names, x_defaults = NULL) {
   }
   if (length(which_extract) > 0) {
     x_fill[which_extract] <- x[which_extract]
-    x_fill <- x_fill[names]
+    x_fill <- x_fill[which_extract]
   } else {
     x_fill <- NULL
   }

From c3329062172d4eabd1a0ec09ede6a53a002801fd Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Thu, 15 Jun 2023 17:15:05 -0400
Subject: [PATCH 05/33] Fix NA handling

---
 R/compare_variables.R   | 34 +++++++++++++++++++++++++++-------
 R/summarize_variables.R | 32 +++++++++++++++++++++++++++-----
 R/utils.R               |  2 +-
 3 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/R/compare_variables.R b/R/compare_variables.R
index 1e1af32c3d..2f0470afa4 100644
--- a/R/compare_variables.R
+++ b/R/compare_variables.R
@@ -267,42 +267,62 @@ s_compare.logical <- function(x,
 a_compare <- function(x,
                       .N_col,
                       .N_row,
+                      .var,
+                      .df_row,
                       .ref_group,
                       .in_ref_col,
                       .stats = NULL,
                       .formats = NULL,
                       .labels = NULL,
                       .indent_mods = NULL,
+                      na.rm = TRUE,
                       na_level = NA_character_,
                       ...) {
-  if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats)
+  if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels)
   if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats
   if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels
+  if (is.null(.indent_mods)) .indent_mods <- if (is.numeric(x)) .a_compare_numeric_indent_mods else .a_compare_counts_indent_mods
   if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) {
     .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats)
   }
-  x_stats <- s_compare(x = x, .N_col = .N_col, .N_row = .N_row, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...)
+  if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level")
+  x_stats <- s_compare(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...)
   if (is.numeric(x)) {
     .labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply(
       c("mean_ci", "mean_pval", "median_ci", "quantiles"),
       function(x) attr(x_stats[[x]], "label")
     )
   }
+  # browser()
   .stats <- intersect(.stats, names(x_stats))
   x_stats <- x_stats[.stats]
   if (!is.numeric(x) && !is.logical(x)) {
     for (stat in c("count", "count_fraction")) {
       for (a in names(x_stats[[stat]])) {
+        a <- if (a == "na-level") "NA" else a
         a_lvl <- paste(stat, a, sep = ".")
         .stats <- c(.stats, a_lvl)
-        .formats[a_lvl] <- .formats[stat]
-        .labels[a_lvl] <- a
-        .indent_mods[a_lvl] <- .indent_mods[stat]
+        .formats <- append(.formats, .formats[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.formats)) {
+          which(names(.formats) == stat) - 1 + which(names(x_stats[[stat]]) == a)
+        } else {
+          length(.formats)
+        })
+        .labels <- append(.labels, a %>% `names<-`(a_lvl), after = if (stat %in% names(.labels)) {
+          which(names(.labels) == stat) - 1 + which(names(x_stats[[stat]]) == a)
+        } else {
+          length(.labels)
+        })
+        .indent_mods <- append(.indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) {
+          which(names(.indent_mods) == stat) - 1 + which(names(x_stats[[stat]]) == a)
+        } else {
+          length(.indent_mods)
+        })
       }
     }
     if (.in_ref_col) x_stats[["pval"]] <- "pvalue"
     x_stats <- unlist(x_stats, recursive = FALSE)
     if (.in_ref_col) x_stats[["pval"]] <- character()
+    names(x_stats) <- gsub("na-level", "NA", names(x_stats))
     .stats <- names(x_stats)
   }
   .formats_x <- extract_by_name(
@@ -312,7 +332,6 @@ a_compare <- function(x,
   .indent_mods_x <- extract_by_name(
     .indent_mods, .stats, if (is.numeric(x)) .a_compare_numeric_indent_mods else .a_compare_counts_indent_mods
   )
-
   in_rows(
     .list = x_stats,
     .formats = .formats_x,
@@ -391,6 +410,7 @@ compare_vars <- function(lyt,
                          var_labels = vars,
                          nested = TRUE,
                          ...,
+                         na.rm = TRUE,
                          na_level = NA_character_,
                          show_labels = "default",
                          table_names = vars,
@@ -406,7 +426,7 @@ compare_vars <- function(lyt,
     afun = a_compare,
     nested = nested,
     extra_args = list(
-      .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_level = na_level, ...
+      .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na.rm = na.rm, na_level = na_level, ...
     ),
     inclNAs = TRUE,
     show_labels = show_labels,
diff --git a/R/summarize_variables.R b/R/summarize_variables.R
index 17f36a6adb..37d22eb8b0 100644
--- a/R/summarize_variables.R
+++ b/R/summarize_variables.R
@@ -518,19 +518,26 @@ s_summary.logical <- function(x,
 a_summary <- function(x,
                       .N_col,
                       .N_row,
+                      .var,
+                      .df_row,
                       .stats = NULL,
                       .formats = NULL,
                       .labels = NULL,
                       .indent_mods = NULL,
+                      na.rm = TRUE,
                       na_level = NA_character_,
                       ...) {
   if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats)
   if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats
   if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_summary_numeric_labels else .a_summary_counts_labels
+  if (is.null(.indent_mods)) {
+    .indent_mods <- if (is.numeric(x)) .a_summary_numeric_indent_mods else .a_summary_counts_indent_mods
+  }
   if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) {
     .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats)
   }
-  x_stats <- s_summary(x = x, .N_col = .N_col, .N_row = .N_row, ...)
+  if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level")
+  x_stats <- s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...)
   if (is.numeric(x)) {
     .labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply(
       c("mean_ci", "mean_pval", "median_ci", "quantiles"),
@@ -542,15 +549,29 @@ a_summary <- function(x,
   if (!is.numeric(x) && !is.logical(x)) {
     for (stat in c("count", "count_fraction")) {
       for (a in names(x_stats[[stat]])) {
+        a <- if (a == "na-level") "NA" else a
         a_lvl <- paste(stat, a, sep = ".")
         .stats <- c(.stats, a_lvl)
-        .formats[a_lvl] <- .formats[stat]
-        .labels[a_lvl] <- a
-        .indent_mods[a_lvl] <- .indent_mods[stat]
+        .formats <- append(.formats, .formats[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.formats)) {
+          which(names(.formats) == stat) - 1 + which(names(x_stats[[stat]]) == a)
+        } else {
+          length(.formats)
+        })
+        .labels <- append(.labels, a %>% `names<-`(a_lvl), after = if (stat %in% names(.labels)) {
+          which(names(.labels) == stat) - 1 + which(names(x_stats[[stat]]) == a)
+        } else {
+          length(.labels)
+        })
+        .indent_mods <- append(.indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) {
+          which(names(.indent_mods) == stat) - 1 + which(names(x_stats[[stat]]) == a)
+        } else {
+          length(.indent_mods)
+        })
       }
     }
 
     x_stats <- unlist(x_stats, recursive = FALSE)
+    names(x_stats) <- gsub("na-level", "NA", names(x_stats))
     .stats <- names(x_stats)
   }
   .formats_x <- extract_by_name(
@@ -669,6 +690,7 @@ summarize_vars <- function(lyt,
                            var_labels = vars,
                            nested = TRUE,
                            ...,
+                           na.rm = TRUE,
                            na_level = NA_character_,
                            show_labels = "default",
                            table_names = vars,
@@ -684,7 +706,7 @@ summarize_vars <- function(lyt,
     afun = a_summary,
     nested = nested,
     extra_args = list(
-      .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_level = na_level, ...
+      .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na.rm = na.rm, na_level = na_level, ...
     ),
     inclNAs = TRUE,
     show_labels = show_labels,
diff --git a/R/utils.R b/R/utils.R
index 9a93fbc52d..5b9e27142f 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -282,7 +282,7 @@ extract_by_name <- function(x, names, x_defaults = NULL) {
   }
   if (length(which_extract) > 0) {
     x_fill[which_extract] <- x[which_extract]
-    x_fill <- x_fill[which_extract]
+    x_fill <- if (!is.null(x_defaults)) x_fill[names] else x_fill[which_extract]
   } else {
     x_fill <- NULL
   }

From f2cd0628c6b215ba449829d5cbdb458d48c921fe Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Thu, 15 Jun 2023 18:28:38 -0400
Subject: [PATCH 06/33] Modularize

---
 R/compare_variables.R      | 39 +++++++---------------------
 R/summarize_variables.R    | 32 +++++------------------
 R/utils_factor.R           | 53 ++++++++++++++++++++++++++++++++++++++
 man/compare_variables.Rd   |  6 +++++
 man/summarize_variables.Rd |  6 +++++
 man/ungroup_stats.Rd       | 34 ++++++++++++++++++++++++
 6 files changed, 115 insertions(+), 55 deletions(-)
 create mode 100644 man/ungroup_stats.Rd

diff --git a/R/compare_variables.R b/R/compare_variables.R
index 2f0470afa4..5aa334c8a1 100644
--- a/R/compare_variables.R
+++ b/R/compare_variables.R
@@ -281,7 +281,9 @@ a_compare <- function(x,
   if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels)
   if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats
   if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels
-  if (is.null(.indent_mods)) .indent_mods <- if (is.numeric(x)) .a_compare_numeric_indent_mods else .a_compare_counts_indent_mods
+  if (is.null(.indent_mods)) {
+    .indent_mods <- if (is.numeric(x)) .a_compare_numeric_indent_mods else .a_compare_counts_indent_mods
+  }
   if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) {
     .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats)
   }
@@ -293,37 +295,15 @@ a_compare <- function(x,
       function(x) attr(x_stats[[x]], "label")
     )
   }
-  # browser()
   .stats <- intersect(.stats, names(x_stats))
   x_stats <- x_stats[.stats]
   if (!is.numeric(x) && !is.logical(x)) {
-    for (stat in c("count", "count_fraction")) {
-      for (a in names(x_stats[[stat]])) {
-        a <- if (a == "na-level") "NA" else a
-        a_lvl <- paste(stat, a, sep = ".")
-        .stats <- c(.stats, a_lvl)
-        .formats <- append(.formats, .formats[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.formats)) {
-          which(names(.formats) == stat) - 1 + which(names(x_stats[[stat]]) == a)
-        } else {
-          length(.formats)
-        })
-        .labels <- append(.labels, a %>% `names<-`(a_lvl), after = if (stat %in% names(.labels)) {
-          which(names(.labels) == stat) - 1 + which(names(x_stats[[stat]]) == a)
-        } else {
-          length(.labels)
-        })
-        .indent_mods <- append(.indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) {
-          which(names(.indent_mods) == stat) - 1 + which(names(x_stats[[stat]]) == a)
-        } else {
-          length(.indent_mods)
-        })
-      }
-    }
-    if (.in_ref_col) x_stats[["pval"]] <- "pvalue"
-    x_stats <- unlist(x_stats, recursive = FALSE)
-    if (.in_ref_col) x_stats[["pval"]] <- character()
-    names(x_stats) <- gsub("na-level", "NA", names(x_stats))
-    .stats <- names(x_stats)
+    x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, .in_ref_col)
+    x_stats <- x_ungrp[["x"]]
+    .stats <- x_ungrp[[".stats"]]
+    .formats <- x_ungrp[[".formats"]]
+    .labels <- x_ungrp[[".labels"]]
+    .indent_mods <- x_ungrp[[".indent_mods"]]
   }
   .formats_x <- extract_by_name(
     .formats, .stats, if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats
@@ -332,6 +312,7 @@ a_compare <- function(x,
   .indent_mods_x <- extract_by_name(
     .indent_mods, .stats, if (is.numeric(x)) .a_compare_numeric_indent_mods else .a_compare_counts_indent_mods
   )
+
   in_rows(
     .list = x_stats,
     .formats = .formats_x,
diff --git a/R/summarize_variables.R b/R/summarize_variables.R
index 37d22eb8b0..d45650f24e 100644
--- a/R/summarize_variables.R
+++ b/R/summarize_variables.R
@@ -547,32 +547,12 @@ a_summary <- function(x,
   .stats <- intersect(.stats, names(x_stats))
   x_stats <- x_stats[.stats]
   if (!is.numeric(x) && !is.logical(x)) {
-    for (stat in c("count", "count_fraction")) {
-      for (a in names(x_stats[[stat]])) {
-        a <- if (a == "na-level") "NA" else a
-        a_lvl <- paste(stat, a, sep = ".")
-        .stats <- c(.stats, a_lvl)
-        .formats <- append(.formats, .formats[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.formats)) {
-          which(names(.formats) == stat) - 1 + which(names(x_stats[[stat]]) == a)
-        } else {
-          length(.formats)
-        })
-        .labels <- append(.labels, a %>% `names<-`(a_lvl), after = if (stat %in% names(.labels)) {
-          which(names(.labels) == stat) - 1 + which(names(x_stats[[stat]]) == a)
-        } else {
-          length(.labels)
-        })
-        .indent_mods <- append(.indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) {
-          which(names(.indent_mods) == stat) - 1 + which(names(x_stats[[stat]]) == a)
-        } else {
-          length(.indent_mods)
-        })
-      }
-    }
-
-    x_stats <- unlist(x_stats, recursive = FALSE)
-    names(x_stats) <- gsub("na-level", "NA", names(x_stats))
-    .stats <- names(x_stats)
+    x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods)
+    x_stats <- x_ungrp[["x"]]
+    .stats <- x_ungrp[[".stats"]]
+    .formats <- x_ungrp[[".formats"]]
+    .labels <- x_ungrp[[".labels"]]
+    .indent_mods <- x_ungrp[[".indent_mods"]]
   }
   .formats_x <- extract_by_name(
     .formats, .stats, if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats
diff --git a/R/utils_factor.R b/R/utils_factor.R
index 151512fec8..8e5888ac22 100644
--- a/R/utils_factor.R
+++ b/R/utils_factor.R
@@ -296,3 +296,56 @@ fct_collapse_only <- function(.f, ..., .na_level = "<Missing>") {
   x <- forcats::fct_collapse(.f, ..., other_level = .na_level)
   do.call(forcats::fct_relevel, args = c(list(.f = x), as.list(new_lvls)))
 }
+
+#' Ungroup and Format Non-Numeric Statistics
+#'
+#' Ungroups grouped non-numeric statistics (`count` and `count_fraction`) within input vectors `.stats`, `.formats`,
+#' `.labels`, and `.indent_mods`. Formats explicit `NA` level in each input vector and changes p-value to
+#' `character()` if current column is a reference column. Used within `a_summary()` and `a_compare()`.
+#'
+#' @inheritParams argument_convention
+#'
+#' @return A `list` with modified elements `x`, `.stats`, `.formats`, `.labels`, and `.indent_mods`.
+#'
+#' @seealso [a_summary()] and [a_compare()] which use this function internally.
+#'
+#' @keywords internal
+ungroup_stats <- function(x, .stats, .formats, .labels, .indent_mods, .in_ref_col = FALSE) {
+  checkmate::assert_true(!is.numeric(x) & !is.logical(x))
+  for (stat in c("count", "count_fraction")) {
+    for (a in names(x[[stat]])) {
+      a <- if (a == "na-level") "NA" else a
+      a_lvl <- paste(stat, a, sep = ".")
+      .stats <- c(.stats, a_lvl)
+      .formats <- append(.formats, .formats[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.formats)) {
+        which(names(.formats) == stat) - 1 + which(names(x[[stat]]) == a)
+      } else {
+        length(.formats)
+      })
+      .labels <- append(.labels, a %>% `names<-`(a_lvl), after = if (stat %in% names(.labels)) {
+        which(names(.labels) == stat) - 1 + which(names(x[[stat]]) == a)
+      } else {
+        length(.labels)
+      })
+      .indent_mods <- append(.indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) {
+        which(names(.indent_mods) == stat) - 1 + which(names(x[[stat]]) == a)
+      } else {
+        length(.indent_mods)
+      })
+    }
+  }
+
+  if (.in_ref_col && "pval" %in% names(x)) x[["pval"]] <- "pvalue"
+  x <- unlist(x, recursive = FALSE)
+  if (.in_ref_col && "pval" %in% names(x)) x[["pval"]] <- character()
+  names(x) <- gsub("na-level", "NA", names(x))
+  .stats <- names(x)
+
+  list(
+    x = x,
+    .stats = .stats,
+    .formats = .formats,
+    .labels = .labels,
+    .indent_mods = .indent_mods
+  )
+}
diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd
index b61c7e8f00..13818ae632 100644
--- a/man/compare_variables.Rd
+++ b/man/compare_variables.Rd
@@ -34,12 +34,15 @@ a_compare(
   x,
   .N_col,
   .N_row,
+  .var,
+  .df_row,
   .ref_group,
   .in_ref_col,
   .stats = NULL,
   .formats = NULL,
   .labels = NULL,
   .indent_mods = NULL,
+  na.rm = TRUE,
   na_level = NA_character_,
   ...
 )
@@ -50,6 +53,7 @@ compare_vars(
   var_labels = vars,
   nested = TRUE,
   ...,
+  na.rm = TRUE,
   na_level = NA_character_,
   show_labels = "default",
   table_names = vars,
@@ -85,6 +89,8 @@ to print out information about factor casting. Defaults to \code{TRUE}.}
 
 \item{.N_row}{(\code{count})\cr column-wise N (column count) for the full column that is passed by \code{rtables}.}
 
+\item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.}
+
 \item{.stats}{(\code{character})\cr statistics to select for the table.}
 
 \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.}
diff --git a/man/summarize_variables.Rd b/man/summarize_variables.Rd
index f0b624a61c..b06ec1aeeb 100644
--- a/man/summarize_variables.Rd
+++ b/man/summarize_variables.Rd
@@ -57,10 +57,13 @@ a_summary(
   x,
   .N_col,
   .N_row,
+  .var,
+  .df_row,
   .stats = NULL,
   .formats = NULL,
   .labels = NULL,
   .indent_mods = NULL,
+  na.rm = TRUE,
   na_level = NA_character_,
   ...
 )
@@ -71,6 +74,7 @@ summarize_vars(
   var_labels = vars,
   nested = TRUE,
   ...,
+  na.rm = TRUE,
   na_level = NA_character_,
   show_labels = "default",
   table_names = vars,
@@ -116,6 +120,8 @@ See more about \code{type} in \code{\link[stats:quantile]{stats::quantile()}}.
 \item{verbose}{(\code{logical})\cr Defaults to \code{TRUE}, which prints out warnings and messages. It is mainly used
 to print out information about factor casting.}
 
+\item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.}
+
 \item{.stats}{(\code{character})\cr statistics to select for the table.}
 
 \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.}
diff --git a/man/ungroup_stats.Rd b/man/ungroup_stats.Rd
new file mode 100644
index 0000000000..47bedd3df1
--- /dev/null
+++ b/man/ungroup_stats.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils_factor.R
+\name{ungroup_stats}
+\alias{ungroup_stats}
+\title{Ungroup and Format Non-Numeric Statistics}
+\usage{
+ungroup_stats(x, .stats, .formats, .labels, .indent_mods, .in_ref_col = FALSE)
+}
+\arguments{
+\item{x}{(\code{numeric})\cr vector of numbers we want to analyze.}
+
+\item{.stats}{(\code{character})\cr statistics to select for the table.}
+
+\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.}
+
+\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).}
+
+\item{.indent_mods}{(named \code{integer})\cr indent modifiers for the labels. Defaults to 0, which corresponds to the
+unmodified default behavior. Can be negative.}
+
+\item{.in_ref_col}{(\code{logical})\cr \code{TRUE} when working with the reference level, \code{FALSE} otherwise.}
+}
+\value{
+A \code{list} with modified elements \code{x}, \code{.stats}, \code{.formats}, \code{.labels}, and \code{.indent_mods}.
+}
+\description{
+Ungroups grouped non-numeric statistics (\code{count} and \code{count_fraction}) within input vectors \code{.stats}, \code{.formats},
+\code{.labels}, and \code{.indent_mods}. Formats explicit \code{NA} level in each input vector and changes p-value to
+\code{character()} if current column is a reference column. Used within \code{a_summary()} and \code{a_compare()}.
+}
+\seealso{
+\code{\link[=a_summary]{a_summary()}} and \code{\link[=a_compare]{a_compare()}} which use this function internally.
+}
+\keyword{internal}

From 6e89c201b6996217a4358e7803844c788512b3a2 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Thu, 15 Jun 2023 20:23:05 -0400
Subject: [PATCH 07/33] Remove duplication (a_compare)

---
 NAMESPACE                  |  1 -
 R/compare_variables.R      | 79 +++++++------------------------
 R/summarize_variables.R    | 96 ++++++++++++++++++++++++--------------
 R/utils_factor.R           |  4 +-
 man/compare_variables.Rd   | 13 +++---
 man/create_afun_compare.Rd |  3 +-
 man/summarize_variables.Rd | 43 +++++++++++------
 man/ungroup_stats.Rd       |  4 +-
 8 files changed, 118 insertions(+), 125 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 35026811bf..bfadcbd091 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -20,7 +20,6 @@ S3method(tidy,glm)
 S3method(tidy,step)
 S3method(tidy,summary.coxph)
 export(CombinationFunction)
-export(a_compare)
 export(a_count_occurrences)
 export(a_count_occurrences_by_grade)
 export(a_count_patients_with_event)
diff --git a/R/compare_variables.R b/R/compare_variables.R
index 5aa334c8a1..8d8f6e0072 100644
--- a/R/compare_variables.R
+++ b/R/compare_variables.R
@@ -21,8 +21,9 @@
 #' * For `compare_vars()`, the column split must define a reference group via `ref_group` so that the comparison
 #'   is well defined.
 #'
-#' @seealso Relevant constructor function [create_afun_compare()], and [s_summary()] which is used internally
-#'   to compute a summary within `s_compare()`.
+#' @seealso Relevant constructor function [create_afun_compare()], [s_summary()] which is used internally
+#'   to compute a summary within `s_compare()`, and [a_compare()] which is used (with `compare = TRUE`) as the analysis
+#'   function for `compare_vars()`.
 #'
 #' @name compare_variables
 #' @include summarize_variables.R
@@ -177,8 +178,8 @@ s_compare.character <- function(x,
                                 .var,
                                 verbose = TRUE,
                                 ...) {
-  x <- as_factor_keep_attributes(x, x_name = .var, verbose = verbose)
-  .ref_group <- as_factor_keep_attributes(.ref_group, x_name = .var, verbose = verbose)
+  x <- as_factor_keep_attributes(x, verbose = verbose)
+  .ref_group <- as_factor_keep_attributes(.ref_group, verbose = verbose)
   s_compare(
     x = x,
     .ref_group = .ref_group,
@@ -244,26 +245,15 @@ s_compare.logical <- function(x,
   y
 }
 
-.a_compare_numeric_formats <- c(.a_summary_numeric_formats, pval = "x.xxxx | (<0.0001)")
-.a_compare_numeric_labels <- c(.a_summary_numeric_labels, pval = "p-value (t-test)")
-.a_compare_numeric_indent_mods <- c(.a_summary_numeric_indent_mods, pval = 0L)
-.a_compare_counts_formats <- c(.a_summary_counts_formats, pval = "x.xxxx | (<0.0001)")
-.a_compare_counts_labels <- c(.a_summary_counts_labels, pval = "p-value (chi-squared test)")
-.a_compare_counts_indent_mods <- c(.a_summary_counts_indent_mods, pval = 0L)
-
 #' @describeIn compare_variables Formatted analysis function which is used as `afun`
 #'   in `compare_vars()`.
 #'
 #' @return
 #' * `a_compare()` returns the corresponding list with formatted [rtables::CellValue()].
 #'
-#' @examples
-#' a_compare(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .in_ref_col = FALSE, .var = "bla")
-#' a_compare(factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), .in_ref_col = FALSE)
-#' a_compare(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .in_ref_col = FALSE, .var = "x", verbose = FALSE)
-#' a_compare(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = FALSE)
+#' @note This function has been deprecated in favor of `a_summary()` with argument `compare` set to `TRUE`.
 #'
-#' @export
+#' @keywords internal
 a_compare <- function(x,
                       .N_col,
                       .N_row,
@@ -278,48 +268,10 @@ a_compare <- function(x,
                       na.rm = TRUE,
                       na_level = NA_character_,
                       ...) {
-  if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels)
-  if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats
-  if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels
-  if (is.null(.indent_mods)) {
-    .indent_mods <- if (is.numeric(x)) .a_compare_numeric_indent_mods else .a_compare_counts_indent_mods
-  }
-  if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) {
-    .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats)
-  }
-  if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level")
-  x_stats <- s_compare(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...)
-  if (is.numeric(x)) {
-    .labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply(
-      c("mean_ci", "mean_pval", "median_ci", "quantiles"),
-      function(x) attr(x_stats[[x]], "label")
-    )
-  }
-  .stats <- intersect(.stats, names(x_stats))
-  x_stats <- x_stats[.stats]
-  if (!is.numeric(x) && !is.logical(x)) {
-    x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, .in_ref_col)
-    x_stats <- x_ungrp[["x"]]
-    .stats <- x_ungrp[[".stats"]]
-    .formats <- x_ungrp[[".formats"]]
-    .labels <- x_ungrp[[".labels"]]
-    .indent_mods <- x_ungrp[[".indent_mods"]]
-  }
-  .formats_x <- extract_by_name(
-    .formats, .stats, if (is.numeric(x)) .a_compare_numeric_formats else .a_compare_counts_formats
-  )
-  .labels_x <- extract_by_name(.labels, .stats, if (is.numeric(x)) .a_compare_numeric_labels else .a_compare_counts_labels)
-  .indent_mods_x <- extract_by_name(
-    .indent_mods, .stats, if (is.numeric(x)) .a_compare_numeric_indent_mods else .a_compare_counts_indent_mods
-  )
-
-  in_rows(
-    .list = x_stats,
-    .formats = .formats_x,
-    .names = .labels_x,
-    .labels = .labels_x,
-    .indent_mods = .indent_mods_x,
-    .format_na_strs = na_level
+  lifecycle::deprecate_stop(
+    "0.8.2",
+    "a_compare()",
+    "a_summary(compare = TRUE)"
   )
 }
 
@@ -336,7 +288,8 @@ a_compare <- function(x,
 #'
 #' @return Combined formatted analysis function for use in [compare_vars()].
 #'
-#' @note This function has been deprecated in favor of direct implementation of `a_compare()`.
+#' @note This function has been deprecated in favor of direct implementation of `a_summary()` with argument `compare`
+#'   set to `TRUE`.
 #'
 #' @seealso [compare_vars()]
 #'
@@ -348,7 +301,7 @@ create_afun_compare <- function(.stats = NULL,
   lifecycle::deprecate_stop(
     "0.8.2",
     "create_afun_compare()",
-    "a_compare()"
+    "a_summary(compare = TRUE)"
   )
 }
 
@@ -404,10 +357,10 @@ compare_vars <- function(lyt,
     lyt = lyt,
     vars = vars,
     var_labels = var_labels,
-    afun = a_compare,
+    afun = a_summary,
     nested = nested,
     extra_args = list(
-      .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na.rm = na.rm, na_level = na_level, ...
+      .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na.rm = na.rm, na_level = na_level, compare = TRUE, ...
     ),
     inclNAs = TRUE,
     show_labels = show_labels,
diff --git a/R/summarize_variables.R b/R/summarize_variables.R
index d45650f24e..6aace87616 100644
--- a/R/summarize_variables.R
+++ b/R/summarize_variables.R
@@ -495,59 +495,89 @@ s_summary.logical <- function(x,
   y
 }
 
-.a_summary_numeric_formats <- summary_formats()
-.a_summary_numeric_labels <- summary_labels()
-.a_summary_numeric_indent_mods <- rep(0L, length(summary_labels())) %>% `names<-`(names(.a_summary_numeric_labels))
-.a_summary_counts_formats <- summary_formats(type = "counts")
-.a_summary_counts_labels <- summary_labels(type = "counts")
-.a_summary_counts_indent_mods <- rep(0L, length(.a_summary_counts_labels)) %>% `names<-`(names(.a_summary_counts_labels))
+.a_compare_numeric_formats <- c(summary_formats(), pval = "x.xxxx | (<0.0001)")
+.a_compare_numeric_labels <- c(summary_labels(), pval = "p-value (t-test)")
+.a_compare_numeric_indents <- c(rep(0L, length(.a_compare_numeric_labels)) %>% `names<-`(names(.a_compare_numeric_labels)))
+.a_compare_counts_formats <- c(summary_formats(type = "counts"), pval = "x.xxxx | (<0.0001)")
+.a_compare_counts_labels <- c(summary_labels(type = "counts"), pval = "p-value (chi-squared test)")
+.a_compare_counts_indents <- c(rep(0L, length(.a_compare_counts_labels)) %>% `names<-`(names(.a_compare_counts_labels)))
 
 #' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and
-#'   `cfun` in `summarize_colvars()`.
+#'   `compare_vars()` and as `cfun` in `summarize_colvars()`.
+#'
+#' @param compare (`logical`)\cr Whether comparison statistics should be analyzed instead of summary statistics
+#'   (`compare = TRUE` adds `pval` statistic comparing against reference group).
 #'
 #' @return
 #' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()].
 #'
+#' @note To use for comparison (with p-value statistic added), parameter `compare` must be set to `TRUE`.
+#'
 #' @examples
-#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
-#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
-#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
-#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
+#' # summary analysis - compare = FALSE
+#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla", .df_row = c())
+#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10, .df_row = c())
+#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, .df_row = c(), verbose = FALSE)
+#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10, .df_row = c())
+#'
+#' # comparison analysis - compare = TRUE
+#' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .in_ref_col = FALSE, .var = "bla", compare = TRUE)
+#' a_summary(factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), .in_ref_col = FALSE, compare = TRUE)
+#' a_summary(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .in_ref_col = FALSE, .var = "x", verbose = FALSE, compare = TRUE)
+#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = FALSE, compare = TRUE)
 #'
 #' @export
 a_summary <- function(x,
                       .N_col,
                       .N_row,
-                      .var,
-                      .df_row,
+                      .var = NULL,
+                      .df_row = NULL,
+                      .ref_group = NULL,
+                      .in_ref_col = NULL,
                       .stats = NULL,
                       .formats = NULL,
                       .labels = NULL,
                       .indent_mods = NULL,
                       na.rm = TRUE,
                       na_level = NA_character_,
+                      compare = FALSE,
                       ...) {
-  if (is.null(.stats)) .stats <- names(if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats)
-  if (is.null(.formats)) .formats <- if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats
-  if (is.null(.labels)) .labels <- if (is.numeric(x)) .a_summary_numeric_labels else .a_summary_counts_labels
+  x_stats <- if (!compare) {
+    s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...)
+  } else {
+    s_compare(
+      x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...
+    )
+  }
+
+  if (is.null(.stats)) {
+    .stats <- names(get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats")))
+    if (!compare) .stats <- head(.stats, -1)
+  }
+  if (is.null(.formats)) {
+    .formats <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats"))
+    if (!compare) .formats <- head(.formats, -1)
+  }
+  if (is.null(.labels)) {
+    if (is.numeric(x)) .a_compare_numeric_labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply(
+      c("mean_ci", "mean_pval", "median_ci", "quantiles"), function(x) attr(x_stats[[x]], "label")
+    )
+    .labels <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_labels"))
+    if (!compare) .labels <- head(.labels, -1)
+  }
   if (is.null(.indent_mods)) {
-    .indent_mods <- if (is.numeric(x)) .a_summary_numeric_indent_mods else .a_summary_counts_indent_mods
+    .indent_mods <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_indents"))
+    if (!compare) .indent_mods <- head(.indent_mods, -1)
   }
   if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) {
     .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats)
   }
   if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level")
-  x_stats <- s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...)
-  if (is.numeric(x)) {
-    .labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply(
-      c("mean_ci", "mean_pval", "median_ci", "quantiles"),
-      function(x) attr(x_stats[[x]], "label")
-    )
-  }
+
   .stats <- intersect(.stats, names(x_stats))
   x_stats <- x_stats[.stats]
   if (!is.numeric(x) && !is.logical(x)) {
-    x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods)
+    x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, if (compare) .in_ref_col else FALSE)
     x_stats <- x_ungrp[["x"]]
     .stats <- x_ungrp[[".stats"]]
     .formats <- x_ungrp[[".formats"]]
@@ -555,11 +585,11 @@ a_summary <- function(x,
     .indent_mods <- x_ungrp[[".indent_mods"]]
   }
   .formats_x <- extract_by_name(
-    .formats, .stats, if (is.numeric(x)) .a_summary_numeric_formats else .a_summary_counts_formats
+    .formats, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats"))
   )
-  .labels_x <- extract_by_name(.labels, .stats, if (is.numeric(x)) .a_summary_numeric_labels else .a_summary_counts_labels)
+  .labels_x <- extract_by_name(.labels, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_labels")))
   .indent_mods_x <- extract_by_name(
-    .indent_mods, .stats, if (is.numeric(x)) .a_summary_numeric_indent_mods else .a_summary_counts_indent_mods
+    .indent_mods, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_indents"))
   )
 
   in_rows(
@@ -641,8 +671,7 @@ create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) {
 #'     .labels = c(n = "n", mean_sd = "Mean, SD", quantiles = c("Q1 - Q3"))
 #'   )
 #'
-#' results <- build_table(l, df = dta_test)
-#' as_html(results)
+#' build_table(l, df = dta_test)
 #'
 #' ## Use arguments interpreted by `s_summary`.
 #' l <- basic_table() %>%
@@ -650,7 +679,7 @@ create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) {
 #'   split_rows_by(var = "AVISIT") %>%
 #'   summarize_vars(vars = "AVAL", na.rm = FALSE)
 #'
-#' results <- build_table(l, df = dta_test)
+#' build_table(l, df = dta_test)
 #'
 #' ## Handle `NA` levels first when summarizing factors.
 #' dta_test$AVISIT <- NA_character_
@@ -659,10 +688,7 @@ create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) {
 #'   split_cols_by(var = "ARM") %>%
 #'   summarize_vars(vars = "AVISIT", na.rm = FALSE)
 #'
-#' results <- build_table(l, df = dta_test)
-#' \dontrun{
-#' Viewer(results)
-#' }
+#' build_table(l, df = dta_test)
 #'
 #' @export
 summarize_vars <- function(lyt,
diff --git a/R/utils_factor.R b/R/utils_factor.R
index 8e5888ac22..0ddfaf5db7 100644
--- a/R/utils_factor.R
+++ b/R/utils_factor.R
@@ -301,13 +301,13 @@ fct_collapse_only <- function(.f, ..., .na_level = "<Missing>") {
 #'
 #' Ungroups grouped non-numeric statistics (`count` and `count_fraction`) within input vectors `.stats`, `.formats`,
 #' `.labels`, and `.indent_mods`. Formats explicit `NA` level in each input vector and changes p-value to
-#' `character()` if current column is a reference column. Used within `a_summary()` and `a_compare()`.
+#' `character()` if current column is a reference column. Used within [`a_summary()`].
 #'
 #' @inheritParams argument_convention
 #'
 #' @return A `list` with modified elements `x`, `.stats`, `.formats`, `.labels`, and `.indent_mods`.
 #'
-#' @seealso [a_summary()] and [a_compare()] which use this function internally.
+#' @seealso [a_summary()] which uses this function internally.
 #'
 #' @keywords internal
 ungroup_stats <- function(x, .stats, .formats, .labels, .indent_mods, .in_ref_col = FALSE) {
diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd
index 13818ae632..670c6cc8df 100644
--- a/man/compare_variables.Rd
+++ b/man/compare_variables.Rd
@@ -180,6 +180,8 @@ Therefore it is always better to manually convert character variables to factors
 \item For \code{compare_vars()}, the column split must define a reference group via \code{ref_group} so that the comparison
 is well defined.
 }
+
+This function has been deprecated in favor of \code{a_summary()} with argument \code{compare} set to \code{TRUE}.
 }
 \examples{
 # `s_compare.numeric`
@@ -238,11 +240,6 @@ y <- c(NA, NA, NA, NA, FALSE)
 s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE)
 s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE)
 
-a_compare(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .in_ref_col = FALSE, .var = "bla")
-a_compare(factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), .in_ref_col = FALSE)
-a_compare(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .in_ref_col = FALSE, .var = "x", verbose = FALSE)
-a_compare(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = FALSE)
-
 # `compare_vars()` in `rtables` pipelines
 
 ## Default output within a `rtables` pipeline.
@@ -264,6 +261,8 @@ build_table(lyt, df = tern_ex_adsl)
 
 }
 \seealso{
-Relevant constructor function \code{\link[=create_afun_compare]{create_afun_compare()}}, and \code{\link[=s_summary]{s_summary()}} which is used internally
-to compute a summary within \code{s_compare()}.
+Relevant constructor function \code{\link[=create_afun_compare]{create_afun_compare()}}, \code{\link[=s_summary]{s_summary()}} which is used internally
+to compute a summary within \code{s_compare()}, and \code{\link[=a_compare]{a_compare()}} which is used (with \code{compare = TRUE}) as the analysis
+function for \code{compare_vars()}.
 }
+\keyword{internal}
diff --git a/man/create_afun_compare.Rd b/man/create_afun_compare.Rd
index 14f71af618..cea746b451 100644
--- a/man/create_afun_compare.Rd
+++ b/man/create_afun_compare.Rd
@@ -31,7 +31,8 @@ Combined formatted analysis function for use in \code{\link[=compare_vars]{compa
 Constructor function which creates a combined formatted analysis function.
 }
 \note{
-This function has been deprecated in favor of direct implementation of \code{a_compare()}.
+This function has been deprecated in favor of direct implementation of \code{a_summary()} with argument \code{compare}
+set to \code{TRUE}.
 }
 \seealso{
 \code{\link[=compare_vars]{compare_vars()}}
diff --git a/man/summarize_variables.Rd b/man/summarize_variables.Rd
index b06ec1aeeb..1f2d0455cd 100644
--- a/man/summarize_variables.Rd
+++ b/man/summarize_variables.Rd
@@ -57,14 +57,17 @@ a_summary(
   x,
   .N_col,
   .N_row,
-  .var,
-  .df_row,
+  .var = NULL,
+  .df_row = NULL,
+  .ref_group = NULL,
+  .in_ref_col = NULL,
   .stats = NULL,
   .formats = NULL,
   .labels = NULL,
   .indent_mods = NULL,
   na.rm = TRUE,
   na_level = NA_character_,
+  compare = FALSE,
   ...
 )
 
@@ -122,6 +125,10 @@ to print out information about factor casting.}
 
 \item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.}
 
+\item{.ref_group}{(\code{data.frame} or \code{vector})\cr the data corresponding to the reference group.}
+
+\item{.in_ref_col}{(\code{logical})\cr \code{TRUE} when working with the reference level, \code{FALSE} otherwise.}
+
 \item{.stats}{(\code{character})\cr statistics to select for the table.}
 
 \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.}
@@ -134,6 +141,9 @@ for that statistic's row label.}
 
 \item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.}
 
+\item{compare}{(\code{logical})\cr Whether comparison statistics should be analyzed instead of summary statistics
+(\code{compare = TRUE} adds \code{pval} statistic comparing against reference group).}
+
 \item{lyt}{(\code{layout})\cr input layout where analyses will be added to.}
 
 \item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.}
@@ -237,7 +247,7 @@ conversion to factor (with a warning) and then forwards to the method for factor
 \item \code{s_summary(logical)}: Method for \code{logical} class.
 
 \item \code{a_summary()}: Formatted analysis function which is used as \code{afun} in \code{summarize_vars()} and
-\code{cfun} in \code{summarize_colvars()}.
+\code{compare_vars()} and as \code{cfun} in \code{summarize_colvars()}.
 
 \item \code{summarize_vars()}: Layout-creating function which can take statistics function arguments
 and additional format arguments. This function is a wrapper for \code{\link[rtables:analyze]{rtables::analyze()}}.
@@ -266,6 +276,8 @@ can be generated correctly. In particular for sparse tables this very likely can
 It is therefore better to always pre-process the dataset such that factors are manually
 created from character variables before passing the dataset to \code{\link[rtables:build_table]{rtables::build_table()}}.
 }
+
+To use for comparison (with p-value statistic added), parameter \code{compare} must be set to \code{TRUE}.
 }
 \examples{
 # `s_summary.numeric`
@@ -339,10 +351,17 @@ x <- c(TRUE, FALSE, TRUE, TRUE)
 s_summary(x, denom = "N_row", .N_row = 10L)
 s_summary(x, denom = "N_col", .N_col = 20L)
 
-a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
-a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
-a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
-a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
+# summary analysis - compare = FALSE
+a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla", .df_row = c())
+a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10, .df_row = c())
+a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, .df_row = c(), verbose = FALSE)
+a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10, .df_row = c())
+
+# comparison analysis - compare = TRUE
+a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .in_ref_col = FALSE, .var = "bla", compare = TRUE)
+a_summary(factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), .in_ref_col = FALSE, compare = TRUE)
+a_summary(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .in_ref_col = FALSE, .var = "x", verbose = FALSE, compare = TRUE)
+a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = FALSE, compare = TRUE)
 
 ## Fabricated dataset.
 dta_test <- data.frame(
@@ -373,8 +392,7 @@ l <- basic_table() \%>\%
     .labels = c(n = "n", mean_sd = "Mean, SD", quantiles = c("Q1 - Q3"))
   )
 
-results <- build_table(l, df = dta_test)
-as_html(results)
+build_table(l, df = dta_test)
 
 ## Use arguments interpreted by `s_summary`.
 l <- basic_table() \%>\%
@@ -382,7 +400,7 @@ l <- basic_table() \%>\%
   split_rows_by(var = "AVISIT") \%>\%
   summarize_vars(vars = "AVAL", na.rm = FALSE)
 
-results <- build_table(l, df = dta_test)
+build_table(l, df = dta_test)
 
 ## Handle `NA` levels first when summarizing factors.
 dta_test$AVISIT <- NA_character_
@@ -391,9 +409,6 @@ l <- basic_table() \%>\%
   split_cols_by(var = "ARM") \%>\%
   summarize_vars(vars = "AVISIT", na.rm = FALSE)
 
-results <- build_table(l, df = dta_test)
-\dontrun{
-Viewer(results)
-}
+build_table(l, df = dta_test)
 
 }
diff --git a/man/ungroup_stats.Rd b/man/ungroup_stats.Rd
index 47bedd3df1..c9dffda957 100644
--- a/man/ungroup_stats.Rd
+++ b/man/ungroup_stats.Rd
@@ -26,9 +26,9 @@ A \code{list} with modified elements \code{x}, \code{.stats}, \code{.formats}, \
 \description{
 Ungroups grouped non-numeric statistics (\code{count} and \code{count_fraction}) within input vectors \code{.stats}, \code{.formats},
 \code{.labels}, and \code{.indent_mods}. Formats explicit \code{NA} level in each input vector and changes p-value to
-\code{character()} if current column is a reference column. Used within \code{a_summary()} and \code{a_compare()}.
+\code{character()} if current column is a reference column. Used within \code{\link[=a_summary]{a_summary()}}.
 }
 \seealso{
-\code{\link[=a_summary]{a_summary()}} and \code{\link[=a_compare]{a_compare()}} which use this function internally.
+\code{\link[=a_summary]{a_summary()}} which uses this function internally.
 }
 \keyword{internal}

From c0c783aab225d09c4fb012bd7eeabb0de771c962 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Fri, 16 Jun 2023 13:54:59 -0400
Subject: [PATCH 08/33] Add workaround for all-NA rows

---
 R/summarize_variables.R | 7 ++++++-
 R/utils_factor.R        | 6 +++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/R/summarize_variables.R b/R/summarize_variables.R
index 6aace87616..1622dfd7a8 100644
--- a/R/summarize_variables.R
+++ b/R/summarize_variables.R
@@ -542,6 +542,12 @@ a_summary <- function(x,
                       na_level = NA_character_,
                       compare = FALSE,
                       ...) {
+  # Remove all-NA rows
+  in_tot_col <- nrow(.df_row) == length(x)
+  .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ]
+  if (in_tot_col && !identical(.df_row[[.var]], x)) x <-  .df_row[[.var]]
+
+  if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level")
   x_stats <- if (!compare) {
     s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...)
   } else {
@@ -572,7 +578,6 @@ a_summary <- function(x,
   if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) {
     .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats)
   }
-  if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level")
 
   .stats <- intersect(.stats, names(x_stats))
   x_stats <- x_stats[.stats]
diff --git a/R/utils_factor.R b/R/utils_factor.R
index 0ddfaf5db7..4fb93b8c34 100644
--- a/R/utils_factor.R
+++ b/R/utils_factor.R
@@ -318,17 +318,17 @@ ungroup_stats <- function(x, .stats, .formats, .labels, .indent_mods, .in_ref_co
       a_lvl <- paste(stat, a, sep = ".")
       .stats <- c(.stats, a_lvl)
       .formats <- append(.formats, .formats[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.formats)) {
-        which(names(.formats) == stat) - 1 + which(names(x[[stat]]) == a)
+        which(names(.formats) == stat) - 1 + which(names(x[[stat]]) == if (a != "NA" | "NA" %in% names(x[[stat]])) a else "na-level")
       } else {
         length(.formats)
       })
       .labels <- append(.labels, a %>% `names<-`(a_lvl), after = if (stat %in% names(.labels)) {
-        which(names(.labels) == stat) - 1 + which(names(x[[stat]]) == a)
+        which(names(.labels) == stat) - 1 + which(names(x[[stat]]) == if (a != "NA" | "NA" %in% names(x[[stat]])) a else "na-level")
       } else {
         length(.labels)
       })
       .indent_mods <- append(.indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) {
-        which(names(.indent_mods) == stat) - 1 + which(names(x[[stat]]) == a)
+        which(names(.indent_mods) == stat) - 1 + which(names(x[[stat]]) == if (a != "NA" | "NA" %in% names(x[[stat]])) a else "na-level")
       } else {
         length(.indent_mods)
       })

From eb626293950b8cb9bc4fe5de63c1a91b484be625 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Thu, 22 Jun 2023 15:21:06 -0400
Subject: [PATCH 09/33] Add tests, fix lint, update NEWS

---
 NEWS.md                                      |   8 +-
 R/compare_variables.R                        |  41 +++-
 R/summarize_variables.R                      |  86 ++++---
 R/utils.R                                    |   2 +
 R/utils_factor.R                             |  23 +-
 man/compare_variables.Rd                     |  10 +-
 man/summarize_variables.Rd                   |  29 ++-
 tests/testthat/_snaps/summarize_variables.md | 223 +++++++++++++++++++
 tests/testthat/test-summarize_variables.R    |  86 +++++++
 9 files changed, 437 insertions(+), 71 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 0da41e0802..177d666b9a 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,7 @@
 # tern 0.8.2.9014
 
+### Breaking Changes
+
 ### Enhancements
 * Added explicit zero counts to `g_km` plot "at risk" annotation tables.
 * Added a flag for total level split in `analyze_patients_exposure_in_cols`.
@@ -7,6 +9,8 @@
 * Updated `summarize_coxreg` to print covariates in data rows for univariate Cox regression with no interactions and content rows otherwise.
 * Removed "baseline status" text from `d_count_abnormal_by_baseline` labels.
 * Improved default sizing of annotation tables in `g_km` and added dynamic scaling of the `surv_med` and `coxph` annotation tables, with customization via the `width_annots` argument.
+* Refactored `a_summary` to no longer use helper function `create_afun_summary`. 
+* Refactored `summarize_vars` and `compare_vars` to use refactored `a_summary`.
 
 ### Bug Fixes
 * Fixed bug in `split_text_grob` preventing titles and footnotes from being properly formatted and printed by `decorate_grob`.
@@ -15,7 +19,9 @@
 
 ### Miscellaneous
 * Updated README to include installation instructions for CRAN.
-* Began deprecation of `indent_mod` argument and replace it with the `.indent_mods` argument in `summarize_num_patients` and `analyze_num_patients`.
+* Began deprecation of `indent_mod` argument and replaced it with the `.indent_mods` argument in `summarize_num_patients` and `analyze_num_patients`.
+* Deprecated `a_compare` and replaced it with `a_summary` with argument `compare = TRUE`.
+* Deprecated helper functions `create_afun_summary` and `create_afun_compare` which are no longer used by `a_summary` and `a_compare` respectively.
 
 # tern 0.8.2
 
diff --git a/R/compare_variables.R b/R/compare_variables.R
index 8d8f6e0072..739f4ecdce 100644
--- a/R/compare_variables.R
+++ b/R/compare_variables.R
@@ -128,6 +128,7 @@ s_compare.factor <- function(x,
     .ref_group <- .ref_group %>% explicit_na(label = "NA")
   }
 
+  if ("NA" %in% levels(x)) levels(.ref_group) <- c(levels(.ref_group), "NA")
   checkmate::assert_factor(x, levels = levels(.ref_group), min.levels = 2)
 
   y$pval <- if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) {
@@ -251,28 +252,45 @@ s_compare.logical <- function(x,
 #' @return
 #' * `a_compare()` returns the corresponding list with formatted [rtables::CellValue()].
 #'
-#' @note This function has been deprecated in favor of `a_summary()` with argument `compare` set to `TRUE`.
+#' @note `a_compare()` has been deprecated in favor of `a_summary()` with argument `compare` set to `TRUE`.
 #'
 #' @keywords internal
 a_compare <- function(x,
-                      .N_col,
-                      .N_row,
-                      .var,
-                      .df_row,
-                      .ref_group,
-                      .in_ref_col,
+                      .N_col, # nolint
+                      .N_row, # nolint
+                      .var = NULL,
+                      .df_row = NULL,
+                      .ref_group = NULL,
+                      .in_ref_col = FALSE,
                       .stats = NULL,
                       .formats = NULL,
                       .labels = NULL,
                       .indent_mods = NULL,
-                      na.rm = TRUE,
+                      na.rm = TRUE, # nolint
                       na_level = NA_character_,
                       ...) {
-  lifecycle::deprecate_stop(
+  lifecycle::deprecate_warn(
     "0.8.2",
     "a_compare()",
     "a_summary(compare = TRUE)"
   )
+  a_summary(
+    x = x,
+    .N_col = .N_col,
+    .N_row = .N_row,
+    .var = .var,
+    .df_row = .df_row,
+    .ref_group = .ref_group,
+    .in_ref_col = .in_ref_col,
+    .stats = .stats,
+    .formats = .formats,
+    .labels = .labels,
+    .indent_mods = .indent_mods,
+    na.rm = na.rm,
+    na_level = na_level,
+    compare = TRUE,
+    ...
+  )
 }
 
 #' Constructor Function for [compare_vars()]
@@ -344,7 +362,7 @@ compare_vars <- function(lyt,
                          var_labels = vars,
                          nested = TRUE,
                          ...,
-                         na.rm = TRUE,
+                         na.rm = TRUE, # nolint
                          na_level = NA_character_,
                          show_labels = "default",
                          table_names = vars,
@@ -360,7 +378,8 @@ compare_vars <- function(lyt,
     afun = a_summary,
     nested = nested,
     extra_args = list(
-      .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na.rm = na.rm, na_level = na_level, compare = TRUE, ...
+      .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods,
+      na.rm = na.rm, na_level = na_level, compare = TRUE, ...
     ),
     inclNAs = TRUE,
     show_labels = show_labels,
diff --git a/R/summarize_variables.R b/R/summarize_variables.R
index 1622dfd7a8..f80c4e0840 100644
--- a/R/summarize_variables.R
+++ b/R/summarize_variables.R
@@ -497,7 +497,9 @@ s_summary.logical <- function(x,
 
 .a_compare_numeric_formats <- c(summary_formats(), pval = "x.xxxx | (<0.0001)")
 .a_compare_numeric_labels <- c(summary_labels(), pval = "p-value (t-test)")
-.a_compare_numeric_indents <- c(rep(0L, length(.a_compare_numeric_labels)) %>% `names<-`(names(.a_compare_numeric_labels)))
+.a_compare_numeric_indents <- c(
+  rep(0L, length(.a_compare_numeric_labels)) %>% `names<-`(names(.a_compare_numeric_labels))
+)
 .a_compare_counts_formats <- c(summary_formats(type = "counts"), pval = "x.xxxx | (<0.0001)")
 .a_compare_counts_labels <- c(summary_labels(type = "counts"), pval = "p-value (chi-squared test)")
 .a_compare_counts_indents <- c(rep(0L, length(.a_compare_counts_labels)) %>% `names<-`(names(.a_compare_counts_labels)))
@@ -511,41 +513,51 @@ s_summary.logical <- function(x,
 #' @return
 #' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()].
 #'
-#' @note To use for comparison (with p-value statistic added), parameter `compare` must be set to `TRUE`.
+#' @note
+#' * To use for comparison (with additional p-value statistic), parameter `compare` must be set to `TRUE`.
+#' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is.
 #'
 #' @examples
 #' # summary analysis - compare = FALSE
-#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla", .df_row = c())
-#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10, .df_row = c())
-#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, .df_row = c(), verbose = FALSE)
-#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10, .df_row = c())
+#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
+#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
+#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
+#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
 #'
 #' # comparison analysis - compare = TRUE
-#' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .in_ref_col = FALSE, .var = "bla", compare = TRUE)
-#' a_summary(factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), .in_ref_col = FALSE, compare = TRUE)
-#' a_summary(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .in_ref_col = FALSE, .var = "x", verbose = FALSE, compare = TRUE)
-#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = FALSE, compare = TRUE)
+#' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE)
+#' a_summary(
+#'   factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE
+#' )
+#' a_summary(
+#'   c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE
+#' )
+#' a_summary(
+#'   c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE
+#' )
 #'
 #' @export
 a_summary <- function(x,
-                      .N_col,
-                      .N_row,
+                      .N_col, # nolint
+                      .N_row, # nolint
                       .var = NULL,
                       .df_row = NULL,
                       .ref_group = NULL,
-                      .in_ref_col = NULL,
+                      .in_ref_col = FALSE,
                       .stats = NULL,
                       .formats = NULL,
                       .labels = NULL,
                       .indent_mods = NULL,
-                      na.rm = TRUE,
+                      na.rm = TRUE, # nolint
                       na_level = NA_character_,
                       compare = FALSE,
                       ...) {
   # Remove all-NA rows
-  in_tot_col <- nrow(.df_row) == length(x)
-  .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ]
-  if (in_tot_col && !identical(.df_row[[.var]], x)) x <-  .df_row[[.var]]
+  if (!is.null(.df_row) && ncol(.df_row) > 1) {
+    in_tot_col <- nrow(.df_row) == length(x)
+    .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ]
+    if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]]
+  }
 
   if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level")
   x_stats <- if (!compare) {
@@ -564,10 +576,12 @@ a_summary <- function(x,
     .formats <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats"))
     if (!compare) .formats <- head(.formats, -1)
   }
-  if (is.null(.labels)) {
-    if (is.numeric(x)) .a_compare_numeric_labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply(
+  if (is.numeric(x)) {
+    .a_compare_numeric_labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply(
       c("mean_ci", "mean_pval", "median_ci", "quantiles"), function(x) attr(x_stats[[x]], "label")
     )
+  }
+  if (is.null(.labels)) {
     .labels <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_labels"))
     if (!compare) .labels <- head(.labels, -1)
   }
@@ -575,34 +589,37 @@ a_summary <- function(x,
     .indent_mods <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_indents"))
     if (!compare) .indent_mods <- head(.indent_mods, -1)
   }
-  if (length(.indent_mods) == 1 & is.null(names(.indent_mods))) {
+  if (length(.indent_mods) == 1 && is.null(names(.indent_mods))) {
     .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats)
   }
 
   .stats <- intersect(.stats, names(x_stats))
   x_stats <- x_stats[.stats]
+  .formats <- extract_by_name(
+    .formats, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats"))
+  )
+  .labels <- extract_by_name(
+    .labels, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_labels"))
+  )
+  .indent_mods <- extract_by_name(
+    .indent_mods, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_indents"))
+  )
+
   if (!is.numeric(x) && !is.logical(x)) {
-    x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, if (compare) .in_ref_col else FALSE)
+    x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, .in_ref_col)
     x_stats <- x_ungrp[["x"]]
     .stats <- x_ungrp[[".stats"]]
     .formats <- x_ungrp[[".formats"]]
     .labels <- x_ungrp[[".labels"]]
     .indent_mods <- x_ungrp[[".indent_mods"]]
   }
-  .formats_x <- extract_by_name(
-    .formats, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats"))
-  )
-  .labels_x <- extract_by_name(.labels, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_labels")))
-  .indent_mods_x <- extract_by_name(
-    .indent_mods, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_indents"))
-  )
 
   in_rows(
     .list = x_stats,
-    .formats = .formats_x,
-    .names = .labels_x,
-    .labels = .labels_x,
-    .indent_mods = .indent_mods_x,
+    .formats = .formats,
+    .names = .labels,
+    .labels = .labels,
+    .indent_mods = .indent_mods,
     .format_na_strs = na_level
   )
 }
@@ -701,7 +718,7 @@ summarize_vars <- function(lyt,
                            var_labels = vars,
                            nested = TRUE,
                            ...,
-                           na.rm = TRUE,
+                           na.rm = TRUE, # nolint
                            na_level = NA_character_,
                            show_labels = "default",
                            table_names = vars,
@@ -717,7 +734,8 @@ summarize_vars <- function(lyt,
     afun = a_summary,
     nested = nested,
     extra_args = list(
-      .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na.rm = na.rm, na_level = na_level, ...
+      .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods,
+      na.rm = na.rm, na_level = na_level, ...
     ),
     inclNAs = TRUE,
     show_labels = show_labels,
diff --git a/R/utils.R b/R/utils.R
index 5b9e27142f..d8c80a063c 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -283,6 +283,8 @@ extract_by_name <- function(x, names, x_defaults = NULL) {
   if (length(which_extract) > 0) {
     x_fill[which_extract] <- x[which_extract]
     x_fill <- if (!is.null(x_defaults)) x_fill[names] else x_fill[which_extract]
+  } else if (!is.null(x_defaults)) {
+    x_fill <- x_fill[names]
   } else {
     x_fill <- NULL
   }
diff --git a/R/utils_factor.R b/R/utils_factor.R
index 4fb93b8c34..283ec30f06 100644
--- a/R/utils_factor.R
+++ b/R/utils_factor.R
@@ -316,22 +316,25 @@ ungroup_stats <- function(x, .stats, .formats, .labels, .indent_mods, .in_ref_co
     for (a in names(x[[stat]])) {
       a <- if (a == "na-level") "NA" else a
       a_lvl <- paste(stat, a, sep = ".")
+      a_name <- if (a != "NA" || "NA" %in% names(x[[stat]])) a else "na-level"
       .stats <- c(.stats, a_lvl)
       .formats <- append(.formats, .formats[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.formats)) {
-        which(names(.formats) == stat) - 1 + which(names(x[[stat]]) == if (a != "NA" | "NA" %in% names(x[[stat]])) a else "na-level")
+        which(names(.formats) == stat) - 1 + which(names(x[[stat]]) == a_name)
       } else {
         length(.formats)
       })
       .labels <- append(.labels, a %>% `names<-`(a_lvl), after = if (stat %in% names(.labels)) {
-        which(names(.labels) == stat) - 1 + which(names(x[[stat]]) == if (a != "NA" | "NA" %in% names(x[[stat]])) a else "na-level")
+        which(names(.labels) == stat) - 1 + which(names(x[[stat]]) == a_name)
       } else {
         length(.labels)
       })
-      .indent_mods <- append(.indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) {
-        which(names(.indent_mods) == stat) - 1 + which(names(x[[stat]]) == if (a != "NA" | "NA" %in% names(x[[stat]])) a else "na-level")
-      } else {
-        length(.indent_mods)
-      })
+      .indent_mods <- append(
+        .indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) {
+          which(names(.indent_mods) == stat) - 1 + which(names(x[[stat]]) == a_name)
+        } else {
+          length(.indent_mods)
+        }
+      )
     }
   }
 
@@ -344,8 +347,8 @@ ungroup_stats <- function(x, .stats, .formats, .labels, .indent_mods, .in_ref_co
   list(
     x = x,
     .stats = .stats,
-    .formats = .formats,
-    .labels = .labels,
-    .indent_mods = .indent_mods
+    .formats = .formats[.stats],
+    .labels = .labels[.stats],
+    .indent_mods = .indent_mods[.stats]
   )
 }
diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd
index 670c6cc8df..8beb2ddae6 100644
--- a/man/compare_variables.Rd
+++ b/man/compare_variables.Rd
@@ -34,10 +34,10 @@ a_compare(
   x,
   .N_col,
   .N_row,
-  .var,
-  .df_row,
-  .ref_group,
-  .in_ref_col,
+  .var = NULL,
+  .df_row = NULL,
+  .ref_group = NULL,
+  .in_ref_col = FALSE,
   .stats = NULL,
   .formats = NULL,
   .labels = NULL,
@@ -181,7 +181,7 @@ Therefore it is always better to manually convert character variables to factors
 is well defined.
 }
 
-This function has been deprecated in favor of \code{a_summary()} with argument \code{compare} set to \code{TRUE}.
+\code{a_compare()} has been deprecated in favor of \code{a_summary()} with argument \code{compare} set to \code{TRUE}.
 }
 \examples{
 # `s_compare.numeric`
diff --git a/man/summarize_variables.Rd b/man/summarize_variables.Rd
index 1f2d0455cd..f54f0b88dd 100644
--- a/man/summarize_variables.Rd
+++ b/man/summarize_variables.Rd
@@ -60,7 +60,7 @@ a_summary(
   .var = NULL,
   .df_row = NULL,
   .ref_group = NULL,
-  .in_ref_col = NULL,
+  .in_ref_col = FALSE,
   .stats = NULL,
   .formats = NULL,
   .labels = NULL,
@@ -277,7 +277,10 @@ It is therefore better to always pre-process the dataset such that factors are m
 created from character variables before passing the dataset to \code{\link[rtables:build_table]{rtables::build_table()}}.
 }
 
-To use for comparison (with p-value statistic added), parameter \code{compare} must be set to \code{TRUE}.
+\itemize{
+\item To use for comparison (with additional p-value statistic), parameter \code{compare} must be set to \code{TRUE}.
+\item Ensure that either all \code{NA} values are converted to an explicit \code{NA} level or all \code{NA} values are left as is.
+}
 }
 \examples{
 # `s_summary.numeric`
@@ -352,16 +355,22 @@ s_summary(x, denom = "N_row", .N_row = 10L)
 s_summary(x, denom = "N_col", .N_col = 20L)
 
 # summary analysis - compare = FALSE
-a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla", .df_row = c())
-a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10, .df_row = c())
-a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, .df_row = c(), verbose = FALSE)
-a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10, .df_row = c())
+a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
+a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
+a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
+a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
 
 # comparison analysis - compare = TRUE
-a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .in_ref_col = FALSE, .var = "bla", compare = TRUE)
-a_summary(factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), .in_ref_col = FALSE, compare = TRUE)
-a_summary(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .in_ref_col = FALSE, .var = "x", verbose = FALSE, compare = TRUE)
-a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = FALSE, compare = TRUE)
+a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE)
+a_summary(
+  factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE
+)
+a_summary(
+  c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE
+)
+a_summary(
+  c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE
+)
 
 ## Fabricated dataset.
 dta_test <- data.frame(
diff --git a/tests/testthat/_snaps/summarize_variables.md b/tests/testthat/_snaps/summarize_variables.md
index a7a3db1ea3..6104bd290b 100644
--- a/tests/testthat/_snaps/summarize_variables.md
+++ b/tests/testthat/_snaps/summarize_variables.md
@@ -920,6 +920,229 @@
       [1] 0
       
 
+# a_summary works with healthy input.
+
+    Code
+      res
+    Output
+      RowsVerticalSection (in_rows) object print method:
+      ----------------------------
+                            row_name   formatted_cell indent_mod                   row_label
+      1                            n               10          0                           n
+      2                          Sum              1.3          0                         Sum
+      3                         Mean              0.1          0                        Mean
+      4                           SD              0.8          0                          SD
+      5                           SE              0.2          0                          SE
+      6                    Mean (SD)        0.1 (0.8)          0                   Mean (SD)
+      7                    Mean (SE)        0.1 (0.2)          0                   Mean (SE)
+      8                  Mean 95% CI    (-0.43, 0.69)          0                 Mean 95% CI
+      9                Mean -/+ 1xSE    (-0.11, 0.38)          0               Mean -/+ 1xSE
+      10               Mean -/+ 1xSD    (-0.65, 0.91)          0               Mean -/+ 1xSD
+      11 Mean p-value (H0: mean = 0)             0.61          0 Mean p-value (H0: mean = 0)
+      12                      Median              0.3          0                      Median
+      13   Median Absolute Deviation             -0.0          0   Median Absolute Deviation
+      14               Median 95% CI    (-0.82, 0.74)          0               Median 95% CI
+      15             25% and 75%-ile       -0.6 - 0.6          0             25% and 75%-ile
+      16                         IQR              1.2          0                         IQR
+      17                   Min - Max       -0.8 - 1.6          0                   Min - Max
+      18                      CV (%)            590.4          0                      CV (%)
+      19                     Minimum             -0.8          0                     Minimum
+      20                     Maximum              1.6          0                     Maximum
+      21          Median (Min - Max) 0.3 (-0.8 - 1.6)          0          Median (Min - Max)
+      22              Geometric Mean               NA          0              Geometric Mean
+      23         CV % Geometric Mean               NA          0         CV % Geometric Mean
+
+---
+
+    Code
+      res
+    Output
+      RowsVerticalSection (in_rows) object print method:
+      ----------------------------
+        row_name formatted_cell indent_mod row_label
+      1        n              5          0         n
+      2        a              3          0         a
+      3        b              1          0         b
+      4        c              1          0         c
+      5        a        3 (60%)          0         a
+      6        b        1 (20%)          0         b
+      7        c        1 (20%)          0         c
+      8    n_blq              0          0     n_blq
+
+---
+
+    Code
+      res
+    Output
+      RowsVerticalSection (in_rows) object print method:
+      ----------------------------
+        row_name formatted_cell indent_mod row_label
+      1        n              4          0         n
+      2        A              2          0         A
+      3        B              1          0         B
+      4        C              1          0         C
+      5        A        2 (50%)          0         A
+      6        B        1 (25%)          0         B
+      7        C        1 (25%)          0         C
+      8    n_blq              0          0     n_blq
+
+---
+
+    Code
+      res
+    Output
+      RowsVerticalSection (in_rows) object print method:
+      ----------------------------
+              row_name formatted_cell indent_mod      row_label
+      1              n              5          0              n
+      2          count              3          0          count
+      3 count_fraction        3 (60%)          0 count_fraction
+      4          n_blq              0          0          n_blq
+
+# a_summary works with custom input.
+
+    Code
+      res
+    Output
+      RowsVerticalSection (in_rows) object print method:
+      ----------------------------
+             row_name formatted_cell indent_mod     row_label
+      1      std. dev              1          3      std. dev
+      2 Median 95% CI   -0.62 - 1.12          3 Median 95% CI
+
+---
+
+    Code
+      res
+    Output
+      RowsVerticalSection (in_rows) object print method:
+      ----------------------------
+                  row_name formatted_cell indent_mod         row_label
+      1  number of records           5.00         -1 number of records
+      2                  a              2          5                 a
+      3                  b              1          5                 b
+      4                  c              1          5                 c
+      5                 NA              1          5                NA
+      6                  a        2 (40%)          0                 a
+      7                  b        1 (20%)          0                 b
+      8                  c        1 (20%)          0                 c
+      9                 NA        1 (20%)          0                NA
+      10             n_blq              0          0             n_blq
+
+# a_summary works with healthy input when compare = TRUE.
+
+    Code
+      res
+    Output
+      RowsVerticalSection (in_rows) object print method:
+      ----------------------------
+                            row_name  formatted_cell indent_mod                   row_label
+      1                            n              10          0                           n
+      2                          Sum            51.3          0                         Sum
+      3                         Mean             5.1          0                        Mean
+      4                           SD             0.8          0                          SD
+      5                           SE             0.2          0                          SE
+      6                    Mean (SD)       5.1 (0.8)          0                   Mean (SD)
+      7                    Mean (SE)       5.1 (0.2)          0                   Mean (SE)
+      8                  Mean 95% CI    (4.57, 5.69)          0                 Mean 95% CI
+      9                Mean -/+ 1xSE    (4.89, 5.38)          0               Mean -/+ 1xSE
+      10               Mean -/+ 1xSD    (4.35, 5.91)          0               Mean -/+ 1xSD
+      11 Mean p-value (H0: mean = 0)            0.00          0 Mean p-value (H0: mean = 0)
+      12                      Median             5.3          0                      Median
+      13   Median Absolute Deviation            -0.0          0   Median Absolute Deviation
+      14               Median 95% CI    (4.18, 5.74)          0               Median 95% CI
+      15             25% and 75%-ile       4.4 - 5.6          0             25% and 75%-ile
+      16                         IQR             1.2          0                         IQR
+      17                   Min - Max       4.2 - 6.6          0                   Min - Max
+      18                      CV (%)            15.2          0                      CV (%)
+      19                     Minimum             4.2          0                     Minimum
+      20                     Maximum             6.6          0                     Maximum
+      21          Median (Min - Max) 5.3 (4.2 - 6.6)          0          Median (Min - Max)
+      22              Geometric Mean             5.1          0              Geometric Mean
+      23         CV % Geometric Mean            15.2          0         CV % Geometric Mean
+      24            p-value (t-test)         <0.0001          0            p-value (t-test)
+
+---
+
+    Code
+      res
+    Output
+      RowsVerticalSection (in_rows) object print method:
+      ----------------------------
+                          row_name formatted_cell indent_mod                  row_label
+      1                          n              5          0                          n
+      2                          a              3          0                          a
+      3                          b              1          0                          b
+      4                          c              1          0                          c
+      5                          a        3 (60%)          0                          a
+      6                          b        1 (20%)          0                          b
+      7                          c        1 (20%)          0                          c
+      8                      n_blq              0          0                      n_blq
+      9 p-value (chi-squared test)         0.9560          0 p-value (chi-squared test)
+
+---
+
+    Code
+      res
+    Output
+      RowsVerticalSection (in_rows) object print method:
+      ----------------------------
+                          row_name formatted_cell indent_mod                  row_label
+      1                          n              4          0                          n
+      2                          A              2          0                          A
+      3                          B              1          0                          B
+      4                          C              1          0                          C
+      5                          A        2 (50%)          0                          A
+      6                          B        1 (25%)          0                          B
+      7                          C        1 (25%)          0                          C
+      8                      n_blq              0          0                      n_blq
+      9 p-value (chi-squared test)         0.9074          0 p-value (chi-squared test)
+
+---
+
+    Code
+      res
+    Output
+      RowsVerticalSection (in_rows) object print method:
+      ----------------------------
+                          row_name formatted_cell indent_mod                  row_label
+      1                          n              5          0                          n
+      2                      count              3          0                      count
+      3             count_fraction        3 (60%)          0             count_fraction
+      4                      n_blq              0          0                      n_blq
+      5 p-value (chi-squared test)         0.8091          0 p-value (chi-squared test)
+
+# a_summary works with custom input when compare = TRUE.
+
+    Code
+      res
+    Output
+      RowsVerticalSection (in_rows) object print method:
+      ----------------------------
+             row_name formatted_cell indent_mod     row_label
+      1        pvalue        <0.0001          3        pvalue
+      2 Median 95% CI   -0.41 - 1.10          3 Median 95% CI
+
+---
+
+    Code
+      res
+    Output
+      RowsVerticalSection (in_rows) object print method:
+      ----------------------------
+                           row_name formatted_cell indent_mod                  row_label
+      1           number of records           5.00         -1          number of records
+      2                           a              2          5                          a
+      3                           b              1          5                          b
+      4                           c              1          5                          c
+      5                          NA              1          5                         NA
+      6                           a        2 (40%)          0                          a
+      7                           b        1 (20%)          0                          b
+      8                           c        1 (20%)          0                          c
+      9                          NA        1 (20%)          0                         NA
+      10                      n_blq              0          0                      n_blq
+      11 p-value (chi-squared test)         0.8254          0 p-value (chi-squared test)
+
 # `summarize_vars` works with healthy input, default `na.rm = TRUE`.
 
     Code
diff --git a/tests/testthat/test-summarize_variables.R b/tests/testthat/test-summarize_variables.R
index 6e163c6d76..716dad910e 100644
--- a/tests/testthat/test-summarize_variables.R
+++ b/tests/testthat/test-summarize_variables.R
@@ -160,6 +160,92 @@ testthat::test_that("s_summary works with logical vectors and by if requested do
   testthat::expect_snapshot(res)
 })
 
+testthat::test_that("a_summary works with healthy input.", {
+  options("width" = 100)
+  # numeric input
+  set.seed(1)
+  result <- a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+
+  # factor input
+  result <- a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+
+  # character input
+  result <- a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+
+  # logical input
+  result <- a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+})
+
+testthat::test_that("a_summary works with custom input.", {
+  options("width" = 100)
+  result <- a_summary(
+    rnorm(10), .N_col = 10, .N_row = 20, control_summarize_vars(conf_level = 0.90), .stats = c("sd", "median_ci"),
+    .formats = c(sd = "xx.", median_ci = "xx.xx - xx.xx"), .labels = c(sd = "std. dev"), .indent_mods = 3L
+  )
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+
+  result <- a_summary(
+    factor(c("a", "a", "b", "c", NA)), .N_row = 10, .N_col = 10, .formats = c(n = "xx.xx"),
+    .labels = c(n = "number of records"), .indent_mods = c(n = -1L, count = 5L), na.rm = FALSE
+  )
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+})
+
+testthat::test_that("a_summary works with healthy input when compare = TRUE.", {
+  options("width" = 100)
+  # numeric input
+  set.seed(1)
+  result <- a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE)
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+
+  # factor input
+  result <- a_summary(
+    factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE
+  )
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+
+  # character input
+  result <- a_summary(c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE)
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+
+  # logical input
+  result <- a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), compare = TRUE)
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+})
+
+testthat::test_that("a_summary works with custom input when compare = TRUE.", {
+  options("width" = 100)
+  result <- a_summary(
+    rnorm(10), .ref_group = rnorm(20, -5, 1), .N_col = 10, .N_row = 20, control_summarize_vars(conf_level = 0.90),
+    .stats = c("pval", "median_ci"), .formats = c(median_ci = "xx.xx - xx.xx"), .labels = c(pval = "pvalue"),
+    .indent_mods = 3L, compare = TRUE
+  )
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+
+  result <- a_summary(
+    factor(c("a", "a", "b", "c", NA)), .ref_group = factor(c("a", "a", "b", "c")), .N_row = 10, .N_col = 10,
+    .formats = c(n = "xx.xx"), .labels = c(n = "number of records"), .indent_mods = c(n = -1L, count = 5L),
+    na.rm = FALSE, compare = TRUE
+  )
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+})
+
 testthat::test_that("`summarize_vars` works with healthy input, default `na.rm = TRUE`.", {
   dta_test <- data.frame(AVAL = c(1:4, NA, NA))
 

From 825c36f0a740a82969fb93ce2af4f3a2e878cc2b Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Thu, 22 Jun 2023 16:06:17 -0400
Subject: [PATCH 10/33] Improve code

---
 R/compare_variables.R   | 10 ++++---
 R/summarize_colvars.R   |  9 ++++---
 R/summarize_variables.R | 59 +++++++++++++++++------------------------
 3 files changed, 37 insertions(+), 41 deletions(-)

diff --git a/R/compare_variables.R b/R/compare_variables.R
index 739f4ecdce..3e7939338e 100644
--- a/R/compare_variables.R
+++ b/R/compare_variables.R
@@ -371,16 +371,18 @@ compare_vars <- function(lyt,
                          .formats = NULL,
                          .labels = NULL,
                          .indent_mods = NULL) {
+  extra_args <- list(.stats = .stats, na.rm = na.rm, na_level = na_level, compare = TRUE, ...)
+  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
+  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
+  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
+
   analyze(
     lyt = lyt,
     vars = vars,
     var_labels = var_labels,
     afun = a_summary,
     nested = nested,
-    extra_args = list(
-      .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods,
-      na.rm = na.rm, na_level = na_level, compare = TRUE, ...
-    ),
+    extra_args = extra_args,
     inclNAs = TRUE,
     show_labels = show_labels,
     table_names = table_names,
diff --git a/R/summarize_colvars.R b/R/summarize_colvars.R
index 8d75b41b0e..7d20568238 100644
--- a/R/summarize_colvars.R
+++ b/R/summarize_colvars.R
@@ -67,11 +67,14 @@ summarize_colvars <- function(lyt,
                               .formats = NULL,
                               .labels = NULL,
                               .indent_mods = NULL) {
+  extra_args <- list(.stats = .stats, na_level = na_level, ...)
+  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
+  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
+  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
+
   analyze_colvars(
     lyt,
     afun = a_summary,
-    extra_args =  list(
-      .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_level = na_level, ...
-    )
+    extra_args =  extra_args
   )
 }
diff --git a/R/summarize_variables.R b/R/summarize_variables.R
index f80c4e0840..1d8b295ca4 100644
--- a/R/summarize_variables.R
+++ b/R/summarize_variables.R
@@ -544,10 +544,10 @@ a_summary <- function(x,
                       .df_row = NULL,
                       .ref_group = NULL,
                       .in_ref_col = FALSE,
-                      .stats = NULL,
-                      .formats = NULL,
-                      .labels = NULL,
-                      .indent_mods = NULL,
+                      .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels"))),
+                      .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_formats")),
+                      .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels")),
+                      .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_indents")),
                       na.rm = TRUE, # nolint
                       na_level = NA_character_,
                       compare = FALSE,
@@ -560,34 +560,23 @@ a_summary <- function(x,
   }
 
   if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level")
-  x_stats <- if (!compare) {
-    s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...)
+  if (!compare) {
+    x_stats <- s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...)
+    .stats <- setdiff(.stats, "pval")
+    .formats <- .formats[setdiff(names(.formats), "pval")]
+    .labels <- .labels[setdiff(names(.labels), "pval")]
+    if (!is.null(names(.indent_mods))) .indent_mods <- .indent_mods[setdiff(names(.indent_mods), "pval")]
   } else {
-    s_compare(
+    x_stats <- s_compare(
       x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...
     )
   }
-
-  if (is.null(.stats)) {
-    .stats <- names(get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats")))
-    if (!compare) .stats <- head(.stats, -1)
-  }
-  if (is.null(.formats)) {
-    .formats <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats"))
-    if (!compare) .formats <- head(.formats, -1)
-  }
   if (is.numeric(x)) {
-    .a_compare_numeric_labels[c("mean_ci", "mean_pval", "median_ci", "quantiles")] <- sapply(
-      c("mean_ci", "mean_pval", "median_ci", "quantiles"), function(x) attr(x_stats[[x]], "label")
-    )
-  }
-  if (is.null(.labels)) {
-    .labels <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_labels"))
-    if (!compare) .labels <- head(.labels, -1)
-  }
-  if (is.null(.indent_mods)) {
-    .indent_mods <- get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_indents"))
-    if (!compare) .indent_mods <- head(.indent_mods, -1)
+    for (i in intersect(.stats, c("mean_ci", "mean_pval", "median_ci", "quantiles"))) {
+      if (!i %in% names(.labels) || .labels[[i]] == .a_compare_numeric_labels[[i]]) {
+        .labels[[i]] <- attr(x_stats[[i]], "label")
+      }
+    }
   }
   if (length(.indent_mods) == 1 && is.null(names(.indent_mods))) {
     .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats)
@@ -596,13 +585,13 @@ a_summary <- function(x,
   .stats <- intersect(.stats, names(x_stats))
   x_stats <- x_stats[.stats]
   .formats <- extract_by_name(
-    .formats, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_formats"))
+    .formats, .stats, get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_formats"))
   )
   .labels <- extract_by_name(
-    .labels, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_labels"))
+    .labels, .stats, get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels"))
   )
   .indent_mods <- extract_by_name(
-    .indent_mods, .stats, get(paste0(".a_compare_", if (is.numeric(x)) "numeric" else "counts", "_indents"))
+    .indent_mods, .stats, get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_indents"))
   )
 
   if (!is.numeric(x) && !is.logical(x)) {
@@ -727,16 +716,18 @@ summarize_vars <- function(lyt,
                            .formats = NULL,
                            .labels = NULL,
                            .indent_mods = NULL) {
+  extra_args <- list(.stats = .stats, na.rm = na.rm, na_level = na_level, ...)
+  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
+  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
+  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
+
   analyze(
     lyt = lyt,
     vars = vars,
     var_labels = var_labels,
     afun = a_summary,
     nested = nested,
-    extra_args = list(
-      .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods,
-      na.rm = na.rm, na_level = na_level, ...
-    ),
+    extra_args = extra_args,
     inclNAs = TRUE,
     show_labels = show_labels,
     table_names = table_names,

From ec7a1a3855b9154a90098337d240c7063fce01f8 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Thu, 22 Jun 2023 16:44:04 -0400
Subject: [PATCH 11/33] Update stale snapshots

---
 .../_snaps/estimate_incidence_rate.md         |  4 +--
 tests/testthat/_snaps/summarize_coxreg.md     | 33 +++++++++++++++++++
 tests/testthat/_snaps/summarize_glm_count.md  | 16 ++++-----
 3 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/tests/testthat/_snaps/estimate_incidence_rate.md b/tests/testthat/_snaps/estimate_incidence_rate.md
index cabf6a1ab4..ba28293ec4 100644
--- a/tests/testthat/_snaps/estimate_incidence_rate.md
+++ b/tests/testthat/_snaps/estimate_incidence_rate.md
@@ -9,10 +9,10 @@
       $conf_type
       [1] "exact"
       
-      $time_unit_input
+      $input_time_unit
       [1] "month"
       
-      $time_unit_output
+      $num_pt_year
       [1] 100
       
 
diff --git a/tests/testthat/_snaps/summarize_coxreg.md b/tests/testthat/_snaps/summarize_coxreg.md
index 715fd433e2..98b9138061 100644
--- a/tests/testthat/_snaps/summarize_coxreg.md
+++ b/tests/testthat/_snaps/summarize_coxreg.md
@@ -224,6 +224,39 @@
         Age                                                                      
           All                                   1.01       (1.00, 1.02)   0.2486 
 
+# summarize_coxreg works with character covariate in univariate case when interaction = TRUE
+
+    Code
+      res
+    Output
+                                      n    Hazard Ratio      95% CI      p-value   Interaction p-value
+      ————————————————————————————————————————————————————————————————————————————————————————————————
+      Treatment:                                                                                      
+        ARM: 2 vs control (ARM: 1)   340       0.64       (0.43, 0.94)   0.0242                       
+      Covariate:                                                                                      
+        COVAR2                       340                                                 0.7759       
+          F                                    0.67       (0.36, 1.22)                                
+          M                                    0.60       (0.36, 0.99)                                
+
+---
+
+    Code
+      res
+    Output
+                                      n    Hazard Ratio      95% CI      p-value   Interaction p-value
+      ————————————————————————————————————————————————————————————————————————————————————————————————
+      Treatment:                                                                                      
+        ARM: 2 vs control (ARM: 1)   340       0.64       (0.43, 0.94)   0.0242                       
+      Covariate:                                                                                      
+        A Covariate Label            340                                                 0.9883       
+          1                                    0.63       (0.35, 1.14)                                
+          2                                    0.58       (0.27, 1.26)                                
+          3                                    0.55       (0.22, 1.35)                                
+          4                                    0.69       (0.23, 2.07)                                
+        COVAR2                       340                                                 0.7759       
+          F                                    0.67       (0.36, 1.22)                                
+          M                                    0.60       (0.36, 0.99)                                
+
 # summarize_coxreg adds the multivariate Cox regression layer to rtables
 
     Code
diff --git a/tests/testthat/_snaps/summarize_glm_count.md b/tests/testthat/_snaps/summarize_glm_count.md
index 81caa9c79f..a2e15c0bd7 100644
--- a/tests/testthat/_snaps/summarize_glm_count.md
+++ b/tests/testthat/_snaps/summarize_glm_count.md
@@ -68,10 +68,10 @@
     Code
       res
     Output
-                         rate asymp.LCL asymp.UCL            ARM
-      A: Drug X      9.029759  6.295928  12.95068      A: Drug X
-      B: Placebo     9.029759  6.295928  12.95068     B: Placebo
-      C: Combination 9.029759  6.295928  12.95068 C: Combination
+                     rate asymp.LCL asymp.UCL            ARM
+      A: Drug X      3.07  2.202774  4.278651      A: Drug X
+      B: Placebo     3.07  2.202774  4.278651     B: Placebo
+      C: Combination 3.07  2.202774  4.278651 C: Combination
 
 # s_glm_count works with healthy input
 
@@ -82,12 +82,12 @@
       [1] 73
       
       $rate
-      [1] 10.20612
+      [1] 3.486005
       attr(,"label")
       [1] "Adjusted Rate"
       
       $rate_ci
-      [1]  5.627535 18.509855
+      [1] 1.983340 6.127155
       attr(,"label")
       [1] "95% CI"
       
@@ -116,12 +116,12 @@
       [1] 73
       
       $rate
-      [1] 10.20612
+      [1] 3.486005
       attr(,"label")
       [1] "Adjusted Rate"
       
       $rate_ci
-      [1]  5.627535 18.509855
+      [1] 1.983340 6.127155
       attr(,"label")
       [1] "95% CI"
       

From 65572ff6dca9f915380a7eddea48895f2baaa1ac Mon Sep 17 00:00:00 2001
From: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 22 Jun 2023 20:47:58 +0000
Subject: [PATCH 12/33] [skip actions] Restyle files

---
 R/summarize_colvars.R                     |  2 +-
 R/summarize_variables.R                   |  9 ++++++---
 R/utils.R                                 |  4 +++-
 R/utils_factor.R                          |  3 ++-
 tests/testthat/test-summarize_variables.R | 15 ++++++++++-----
 5 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/R/summarize_colvars.R b/R/summarize_colvars.R
index ebc9fa638f..7df3304f77 100644
--- a/R/summarize_colvars.R
+++ b/R/summarize_colvars.R
@@ -75,6 +75,6 @@ summarize_colvars <- function(lyt,
   analyze_colvars(
     lyt,
     afun = a_summary,
-    extra_args =  extra_args
+    extra_args = extra_args
   )
 }
diff --git a/R/summarize_variables.R b/R/summarize_variables.R
index 1d8b295ca4..2af5e41fb7 100644
--- a/R/summarize_variables.R
+++ b/R/summarize_variables.R
@@ -527,13 +527,16 @@ s_summary.logical <- function(x,
 #' # comparison analysis - compare = TRUE
 #' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE)
 #' a_summary(
-#'   factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE
+#'   factor(c("a", "a", "b", "c", "a")),
+#'   .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE
 #' )
 #' a_summary(
-#'   c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE
+#'   c("A", "B", "A", "C"),
+#'   .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE
 #' )
 #' a_summary(
-#'   c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE
+#'   c(TRUE, FALSE, FALSE, TRUE, TRUE),
+#'   .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE
 #' )
 #'
 #' @export
diff --git a/R/utils.R b/R/utils.R
index 7d1a71cfd2..66f8b3ad51 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -248,7 +248,9 @@ combine_vectors <- function(x, y) {
 #'
 #' @keywords internal
 extract_by_name <- function(x, names, x_defaults = NULL) {
-  if (is.null(x)) return(NULL)
+  if (is.null(x)) {
+    return(NULL)
+  }
   checkmate::assert_named(x)
   checkmate::assert_character(names)
   which_extract <- intersect(names(x), names)
diff --git a/R/utils_factor.R b/R/utils_factor.R
index 8d0c707336..1e727061c8 100644
--- a/R/utils_factor.R
+++ b/R/utils_factor.R
@@ -309,7 +309,8 @@ ungroup_stats <- function(x, .stats, .formats, .labels, .indent_mods, .in_ref_co
         length(.labels)
       })
       .indent_mods <- append(
-        .indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.indent_mods)) {
+        .indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl),
+        after = if (stat %in% names(.indent_mods)) {
           which(names(.indent_mods) == stat) - 1 + which(names(x[[stat]]) == a_name)
         } else {
           length(.indent_mods)
diff --git a/tests/testthat/test-summarize_variables.R b/tests/testthat/test-summarize_variables.R
index 716dad910e..ba22857d26 100644
--- a/tests/testthat/test-summarize_variables.R
+++ b/tests/testthat/test-summarize_variables.R
@@ -187,14 +187,16 @@ testthat::test_that("a_summary works with healthy input.", {
 testthat::test_that("a_summary works with custom input.", {
   options("width" = 100)
   result <- a_summary(
-    rnorm(10), .N_col = 10, .N_row = 20, control_summarize_vars(conf_level = 0.90), .stats = c("sd", "median_ci"),
+    rnorm(10),
+    .N_col = 10, .N_row = 20, control_summarize_vars(conf_level = 0.90), .stats = c("sd", "median_ci"),
     .formats = c(sd = "xx.", median_ci = "xx.xx - xx.xx"), .labels = c(sd = "std. dev"), .indent_mods = 3L
   )
   res <- testthat::expect_silent(result)
   testthat::expect_snapshot(res)
 
   result <- a_summary(
-    factor(c("a", "a", "b", "c", NA)), .N_row = 10, .N_col = 10, .formats = c(n = "xx.xx"),
+    factor(c("a", "a", "b", "c", NA)),
+    .N_row = 10, .N_col = 10, .formats = c(n = "xx.xx"),
     .labels = c(n = "number of records"), .indent_mods = c(n = -1L, count = 5L), na.rm = FALSE
   )
   res <- testthat::expect_silent(result)
@@ -211,7 +213,8 @@ testthat::test_that("a_summary works with healthy input when compare = TRUE.", {
 
   # factor input
   result <- a_summary(
-    factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE
+    factor(c("a", "a", "b", "c", "a")),
+    .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE
   )
   res <- testthat::expect_silent(result)
   testthat::expect_snapshot(res)
@@ -230,7 +233,8 @@ testthat::test_that("a_summary works with healthy input when compare = TRUE.", {
 testthat::test_that("a_summary works with custom input when compare = TRUE.", {
   options("width" = 100)
   result <- a_summary(
-    rnorm(10), .ref_group = rnorm(20, -5, 1), .N_col = 10, .N_row = 20, control_summarize_vars(conf_level = 0.90),
+    rnorm(10),
+    .ref_group = rnorm(20, -5, 1), .N_col = 10, .N_row = 20, control_summarize_vars(conf_level = 0.90),
     .stats = c("pval", "median_ci"), .formats = c(median_ci = "xx.xx - xx.xx"), .labels = c(pval = "pvalue"),
     .indent_mods = 3L, compare = TRUE
   )
@@ -238,7 +242,8 @@ testthat::test_that("a_summary works with custom input when compare = TRUE.", {
   testthat::expect_snapshot(res)
 
   result <- a_summary(
-    factor(c("a", "a", "b", "c", NA)), .ref_group = factor(c("a", "a", "b", "c")), .N_row = 10, .N_col = 10,
+    factor(c("a", "a", "b", "c", NA)),
+    .ref_group = factor(c("a", "a", "b", "c")), .N_row = 10, .N_col = 10,
     .formats = c(n = "xx.xx"), .labels = c(n = "number of records"), .indent_mods = c(n = -1L, count = 5L),
     na.rm = FALSE, compare = TRUE
   )

From 2a870c26b93f502c7f9a26ab68668df511b7a5ca Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Thu, 22 Jun 2023 17:05:56 -0400
Subject: [PATCH 13/33] Add test, update NEWS

---
 NEWS.md                     |  2 ++
 R/utils.R                   |  2 +-
 tests/testthat/test-utils.R | 19 +++++++++++++++++++
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 8db55c707f..ded07bab27 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,6 +4,8 @@
 * Added method for `character` class to `h_coxreg_inter_effect` enabling `character` covariates in `summarize_coxreg`.
 * Refactored `a_summary` to no longer use helper function `create_afun_summary`. 
 * Refactored `summarize_vars` and `compare_vars` to use refactored `a_summary`.
+* Created new helper function `ungroup_stats` to ungroup and format statistics calculated for each level of a factor variable.
+* Added new argument `x_defaults` to `extract_by_name` to allow user to specify default values to implement for values in `names` that are not specified in `x`.
 
 ### Miscellaneous
 * Began deprecation of `time_unit_input` and `time_unit_output` arguments and replaced them with the `input_time_unit` and `num_pt_year`, respectively, in `control_incidence_rate`.
diff --git a/R/utils.R b/R/utils.R
index 66f8b3ad51..037a29acbd 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -262,7 +262,7 @@ extract_by_name <- function(x, names, x_defaults = NULL) {
   if (length(which_extract) > 0) {
     x_fill[which_extract] <- x[which_extract]
     x_fill <- if (!is.null(x_defaults)) x_fill[names] else x_fill[which_extract]
-  } else if (!is.null(x_defaults)) {
+  } else if (length(intersect(names(x_defaults), names)) > 0) {
     x_fill <- x_fill[names]
   } else {
     x_fill <- NULL
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
index 87210c6163..feba1d12e9 100644
--- a/tests/testthat/test-utils.R
+++ b/tests/testthat/test-utils.R
@@ -89,6 +89,25 @@ testthat::test_that("extract_by_name returns NULL when there is no overlap", {
   testthat::expect_snapshot(res)
 })
 
+testthat::test_that("extract_by_name x_defaults argument works as expected", {
+  result <- extract_by_name(
+    x = c(a = "xx", b = function(x) paste(x, "bla")),
+    names = c("b", "c"),
+    x_defaults = c(c = "c_default")
+
+  )
+  expected <- c(b = function(x) paste(x, "bla"), c = "c_default")
+  testthat::expect_identical(result, expected)
+
+  # no overlap
+  result <- extract_by_name(
+    x = c(a = "xx", b = "xx.xx"),
+    c("d", "c"),
+    c(a = "test")
+  )
+  testthat::expect_identical(result, NULL)
+})
+
 testthat::test_that("aesi_label works as expected for SMQ", {
   smq01nam <- c("AESI 1", "", NA)
   smq01sc <- c("NARROW", "", NA)

From b66be67495d6b30d325915ecc9b24e8a7d4df644 Mon Sep 17 00:00:00 2001
From: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 22 Jun 2023 21:08:38 +0000
Subject: [PATCH 14/33] [skip actions] Restyle files

---
 tests/testthat/test-utils.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
index feba1d12e9..b772079d10 100644
--- a/tests/testthat/test-utils.R
+++ b/tests/testthat/test-utils.R
@@ -94,7 +94,6 @@ testthat::test_that("extract_by_name x_defaults argument works as expected", {
     x = c(a = "xx", b = function(x) paste(x, "bla")),
     names = c("b", "c"),
     x_defaults = c(c = "c_default")
-
   )
   expected <- c(b = function(x) paste(x, "bla"), c = "c_default")
   testthat::expect_identical(result, expected)

From c23aeab26cda1530c65efadfc7ce5f4c15e58b4a Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Thu, 22 Jun 2023 17:30:18 -0400
Subject: [PATCH 15/33] Update deprecated functions, roxygen

---
 R/compare_variables.R                     | 16 ++++++++--------
 R/summarize_variables.R                   |  4 ++--
 man/compare_variables.Rd                  | 12 ++++++++----
 man/summarize_variables.Rd                | 21 ++++++++++++++-------
 tests/testthat/test-compare_variables.R   | 13 +++++++++++++
 tests/testthat/test-summarize_variables.R |  6 ++++++
 6 files changed, 51 insertions(+), 21 deletions(-)

diff --git a/R/compare_variables.R b/R/compare_variables.R
index 3e7939338e..2159c2efed 100644
--- a/R/compare_variables.R
+++ b/R/compare_variables.R
@@ -262,17 +262,17 @@ a_compare <- function(x,
                       .df_row = NULL,
                       .ref_group = NULL,
                       .in_ref_col = FALSE,
-                      .stats = NULL,
-                      .formats = NULL,
-                      .labels = NULL,
-                      .indent_mods = NULL,
+                      .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels"))),
+                      .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_formats")),
+                      .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels")),
+                      .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_indents")),
                       na.rm = TRUE, # nolint
                       na_level = NA_character_,
                       ...) {
   lifecycle::deprecate_warn(
-    "0.8.2",
+    "0.8.3",
     "a_compare()",
-    "a_summary(compare = TRUE)"
+    details = "Please use a_summary() with argument `compare` set to TRUE instead."
   )
   a_summary(
     x = x,
@@ -317,9 +317,9 @@ create_afun_compare <- function(.stats = NULL,
                                 .labels = NULL,
                                 .indent_mods = NULL) {
   lifecycle::deprecate_stop(
-    "0.8.2",
+    "0.8.3",
     "create_afun_compare()",
-    "a_summary(compare = TRUE)"
+    details = "Please use a_summary(compare = TRUE) directly instead."
   )
 }
 
diff --git a/R/summarize_variables.R b/R/summarize_variables.R
index 2af5e41fb7..4fa1562205 100644
--- a/R/summarize_variables.R
+++ b/R/summarize_variables.R
@@ -636,9 +636,9 @@ a_summary <- function(x,
 #' @export
 create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) {
   lifecycle::deprecate_stop(
-    "0.8.2",
+    "0.8.3",
     "create_afun_summary()",
-    "a_summary()"
+    details = "Please use a_summary() directly instead."
   )
 }
 
diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd
index 8beb2ddae6..7ba393662c 100644
--- a/man/compare_variables.Rd
+++ b/man/compare_variables.Rd
@@ -38,10 +38,14 @@ a_compare(
   .df_row = NULL,
   .ref_group = NULL,
   .in_ref_col = FALSE,
-  .stats = NULL,
-  .formats = NULL,
-  .labels = NULL,
-  .indent_mods = NULL,
+  .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"),
+    "_labels"))),
+  .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"),
+    "_formats")),
+  .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"),
+    "_labels")),
+  .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"),
+    "_indents")),
   na.rm = TRUE,
   na_level = NA_character_,
   ...
diff --git a/man/summarize_variables.Rd b/man/summarize_variables.Rd
index f54f0b88dd..cc4fbccc81 100644
--- a/man/summarize_variables.Rd
+++ b/man/summarize_variables.Rd
@@ -61,10 +61,14 @@ a_summary(
   .df_row = NULL,
   .ref_group = NULL,
   .in_ref_col = FALSE,
-  .stats = NULL,
-  .formats = NULL,
-  .labels = NULL,
-  .indent_mods = NULL,
+  .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"),
+    "_labels"))),
+  .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"),
+    "_formats")),
+  .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"),
+    "_labels")),
+  .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"),
+    "_indents")),
   na.rm = TRUE,
   na_level = NA_character_,
   compare = FALSE,
@@ -363,13 +367,16 @@ a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
 # comparison analysis - compare = TRUE
 a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE)
 a_summary(
-  factor(c("a", "a", "b", "c", "a")), .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE
+  factor(c("a", "a", "b", "c", "a")),
+  .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE
 )
 a_summary(
-  c("A", "B", "A", "C"), .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE
+  c("A", "B", "A", "C"),
+  .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE
 )
 a_summary(
-  c(TRUE, FALSE, FALSE, TRUE, TRUE), .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE
+  c(TRUE, FALSE, FALSE, TRUE, TRUE),
+  .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE
 )
 
 ## Fabricated dataset.
diff --git a/tests/testthat/test-compare_variables.R b/tests/testthat/test-compare_variables.R
index 1c76f86ae3..6979f844b9 100644
--- a/tests/testthat/test-compare_variables.R
+++ b/tests/testthat/test-compare_variables.R
@@ -124,3 +124,16 @@ testthat::test_that("compare_vars 'na_level' argument works as expected", {
   res <- testthat::expect_silent(result)
   testthat::expect_snapshot(res)
 })
+
+# Deprecated functions
+
+testthat::test_that("create_afun_compare returns error message", {
+  testthat::expect_error(create_afun_compare())
+})
+
+testthat::test_that("a_compare returns correct output and warning message", {
+  testthat::expect_warning(result <- a_compare(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .stats = c("n", "pval")))
+  expected <- a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .stats = c("n", "pval"), compare = TRUE)
+
+  testthat::expect_equal(result, expected)
+})
diff --git a/tests/testthat/test-summarize_variables.R b/tests/testthat/test-summarize_variables.R
index ba22857d26..c926338f17 100644
--- a/tests/testthat/test-summarize_variables.R
+++ b/tests/testthat/test-summarize_variables.R
@@ -458,3 +458,9 @@ testthat::test_that("summarize_vars 'na_level' argument works as expected", {
   res <- testthat::expect_silent(result)
   testthat::expect_snapshot(res)
 })
+
+# Deprecated functions
+
+testthat::test_that("create_afun_summary returns error message", {
+  testthat::expect_error(create_afun_summary())
+})

From 11d8696459797c3491130dcbd621fc1396061948 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Thu, 22 Jun 2023 17:34:07 -0400
Subject: [PATCH 16/33] Update WORDLIST

---
 inst/WORDLIST | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/inst/WORDLIST b/inst/WORDLIST
index dadff44ef3..06c569fd9d 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -15,6 +15,8 @@ Satterthwaite
 Schouten
 TLG
 TLGs
+Ungroup
+Ungroups
 biomarker
 biomarkers
 coercible
@@ -29,4 +31,5 @@ responders
 subtable
 subtables
 unformatted
+ungroup
 unstratified

From 93fc5e16f8cc2feb9886161271c698cede9ca0c9 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Thu, 22 Jun 2023 17:40:04 -0400
Subject: [PATCH 17/33] Fix checks

---
 NEWS.md      | 2 +-
 _pkgdown.yml | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index ded07bab27..cc38aa1de8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,7 +4,7 @@
 * Added method for `character` class to `h_coxreg_inter_effect` enabling `character` covariates in `summarize_coxreg`.
 * Refactored `a_summary` to no longer use helper function `create_afun_summary`. 
 * Refactored `summarize_vars` and `compare_vars` to use refactored `a_summary`.
-* Created new helper function `ungroup_stats` to ungroup and format statistics calculated for each level of a factor variable.
+* Created new internal helper function `ungroup_stats` to ungroup and format statistics calculated for factor variables.
 * Added new argument `x_defaults` to `extract_by_name` to allow user to specify default values to implement for values in `names` that are not specified in `x`.
 
 ### Miscellaneous
diff --git a/_pkgdown.yml b/_pkgdown.yml
index b4c818a3cf..fb620ca251 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -177,3 +177,4 @@ reference:
     desc: Functions that are currently deprecated within `tern`.
     contents:
       - pairwise
+      - starts_with("create_")

From 56ccc08c270d53fb6e8f4fcd5337305d8e0d0ed0 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Tue, 4 Jul 2023 17:10:45 -0400
Subject: [PATCH 18/33] Refactor, create summary customizing function

---
 R/analyze_variables.R | 351 +++++++++++++++++++++++++++++-------------
 1 file changed, 244 insertions(+), 107 deletions(-)

diff --git a/R/analyze_variables.R b/R/analyze_variables.R
index 3fc0a98ebb..5ef65ef05b 100644
--- a/R/analyze_variables.R
+++ b/R/analyze_variables.R
@@ -42,14 +42,19 @@ control_summarize_vars <- control_analyze_vars
 #'
 #' @return A named `vector` of default statistic formats for the given data type.
 #'
-#' @keywords internal
-summary_formats <- function(type = "numeric") {
-  if (type == "counts") {
+#' @examples
+#' summary_formats()
+#' summary_formats(type = "count", include_pval = TRUE)
+#'
+#' @export
+summary_formats <- function(type = "numeric", include_pval = FALSE) {
+  fmts <- if (type == "counts") {
     c(
       n = "xx.",
       count = "xx.",
       count_fraction = format_count_fraction,
-      n_blq = "xx."
+      n_blq = "xx.",
+      pval = "x.xxxx | (<0.0001)"
     )
   } else {
     c(
@@ -75,9 +80,12 @@ summary_formats <- function(type = "numeric") {
       max = "xx.x",
       median_range = "xx.x (xx.x - xx.x)",
       geom_mean = "xx.x",
-      geom_cv = "xx.x"
+      geom_cv = "xx.x",
+      pval = "x.xxxx | (<0.0001)"
     )
   }
+  if (!include_pval) fmts <- head(fmts, -1)
+  fmts
 }
 
 #' Label Function for Descriptive Statistics
@@ -88,14 +96,19 @@ summary_formats <- function(type = "numeric") {
 #'
 #' @return A named `vector` of default statistic labels for the given data type.
 #'
-#' @keywords internal
-summary_labels <- function(type = "numeric") {
-  if (type == "counts") {
+#' @examples
+#' summary_labels()
+#' summary_labels(type = "count", include_pval = TRUE)
+#'
+#' @export
+summary_labels <- function(type = "numeric", include_pval = FALSE) {
+  lbls <- if (type == "counts") {
     c(
       n = "n",
       count = "count",
       count_fraction = "count_fraction",
-      n_blq = "n_blq"
+      n_blq = "n_blq",
+      pval = "p-value (chi-squared test)"
     )
   } else {
     c(
@@ -121,9 +134,76 @@ summary_labels <- function(type = "numeric") {
       max = "Maximum",
       median_range = "Median (Min - Max)",
       geom_mean = "Geometric Mean",
-      geom_cv = "CV % Geometric Mean"
+      geom_cv = "CV % Geometric Mean",
+      pval = "p-value (t-test)"
+    )
+  }
+  if (!include_pval) lbls <- head(lbls, -1)
+  lbls
+}
+
+#' Set Defaults Settings for Summary Statistics
+#'
+#' @inheritParams summary_formats
+#' @param include_pval (`logical`)\cr whether p-value should be included as a default statistic.
+#' @param stats_custom (`named vector` of `character`) vector of statistics to include if not the defaults. This
+#'   argument overrides `include_pval` and other custom value arguments such that only settings for these statistics
+#'   will be returned.
+#' @param formats_custom (`named vector` of `character`) vector of custom statistics formats to use in place of the
+#'   defaults defined in [`summary_formats()`]. Names should be a subset of the statistics defined in `stats_custom` (or
+#'   default statistics if this is `NULL`).
+#' @param labels_custom (`named vector` of `character`) vector of custom statistics labels to use in place of the
+#'   defaults defined in [`summary_labels()`]. Names should be a subset of the statistics defined in `stats_custom` (or
+#'   default statistics if this is `NULL`).
+#' @param indents_custom (`integer` or `named vector` of `integer`) vector of custom indentation modifiers for
+#'   statistics to use instead of the default of `0L` for all statistics. Names should be a subset of the statistics
+#'   defined in `stats_custom` (or default statistics if this is `NULL`). Alternatively, the same indentation modifier
+#'   can be applied to all statistics by setting `indents_custom` to a single integer value.
+#'
+#' @examples
+#' summary_custom()
+#' summary_custom(type = "counts", include_pval = TRUE)
+#' summary_custom(
+#'   include_pval = TRUE, stats_custom = c("n", "mean", "sd", "pval"),
+#'   labels_custom = c(sd = "Std. Dev."), indents_custom = 3L
+#' )
+#'
+#' @export
+summary_custom <- function(type = "numeric",
+                           include_pval = FALSE,
+                           stats_custom = NULL,
+                           formats_custom = NULL,
+                           labels_custom = NULL,
+                           indents_custom = NULL) {
+  .formats <- summary_formats(type = type, include_pval = include_pval)
+  if (is.null(stats_custom)) {
+    .stats <- names(.formats)
+  } else {
+    checkmate::assert_subset(
+      stats_custom,
+      c(names(s_summary(if (type == "numeric") numeric() else TRUE)), "pval")
     )
+    .stats <- stats_custom
   }
+  .labels <- summary_labels(type = type, include_pval = include_pval)
+  .indents <- setNames(rep(0L, length(.stats)), .stats)
+
+  if (!is.null(formats_custom)) .formats[names(formats_custom)] <- formats_custom
+  if (!is.null(labels_custom)) .labels[names(labels_custom)] <- labels_custom
+  if (!is.null(indents_custom)) {
+    if (is.null(names(indents_custom)) && length(indents_custom) == 1) {
+      .indents[names(.indents)] <- indents_custom
+    } else {
+      .indents[names(indents_custom)] <- indents_custom
+    }
+  }
+
+  list(
+    stats = .stats,
+    formats = .formats[.stats],
+    labels = .labels[.stats],
+    indents = .indents[.stats]
+  )
 }
 
 #' Analyze Variables
@@ -502,47 +582,20 @@ s_summary.logical <- function(x,
   y
 }
 
-<<<<<<< HEAD:R/summarize_variables.R
-.a_compare_numeric_formats <- c(summary_formats(), pval = "x.xxxx | (<0.0001)")
-.a_compare_numeric_labels <- c(summary_labels(), pval = "p-value (t-test)")
-.a_compare_numeric_indents <- c(
-  rep(0L, length(.a_compare_numeric_labels)) %>% `names<-`(names(.a_compare_numeric_labels))
-)
-.a_compare_counts_formats <- c(summary_formats(type = "counts"), pval = "x.xxxx | (<0.0001)")
-.a_compare_counts_labels <- c(summary_labels(type = "counts"), pval = "p-value (chi-squared test)")
-.a_compare_counts_indents <- c(rep(0L, length(.a_compare_counts_labels)) %>% `names<-`(names(.a_compare_counts_labels)))
+
 
 #' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and
 #'   `compare_vars()` and as `cfun` in `summarize_colvars()`.
 #'
 #' @param compare (`logical`)\cr Whether comparison statistics should be analyzed instead of summary statistics
 #'   (`compare = TRUE` adds `pval` statistic comparing against reference group).
-=======
-#' @describeIn analyze_variables Formatted analysis function which is used as `afun` in `analyze_vars()`.
->>>>>>> main:R/analyze_variables.R
 #'
 #' @return
 #' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()].
 #'
-<<<<<<< HEAD:R/summarize_variables.R
 #' @note
 #' * To use for comparison (with additional p-value statistic), parameter `compare` must be set to `TRUE`.
 #' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is.
-=======
-#' @export
-a_summary <- function(x,
-                      ...,
-                      .N_row, # nolint
-                      .N_col, # nolint
-                      .var) {
-  UseMethod("a_summary", x)
-}
-
-.a_summary_numeric_formats <- summary_formats()
-.a_summary_numeric_labels <- summary_labels()
-
-#' @describeIn analyze_variables Formatted analysis function method for `numeric` class.
->>>>>>> main:R/analyze_variables.R
 #'
 #' @examples
 #' # summary analysis - compare = FALSE
@@ -567,21 +620,21 @@ a_summary <- function(x,
 #' )
 #'
 #' @export
-a_summary <- function(x,
-                      .N_col, # nolint
-                      .N_row, # nolint
-                      .var = NULL,
-                      .df_row = NULL,
-                      .ref_group = NULL,
-                      .in_ref_col = FALSE,
-                      .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels"))),
-                      .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_formats")),
-                      .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels")),
-                      .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_indents")),
-                      na.rm = TRUE, # nolint
-                      na_level = NA_character_,
-                      compare = FALSE,
-                      ...) {
+a_summary_output <- function(x,
+                             .N_col, # nolint
+                             .N_row, # nolint
+                             .var = NULL,
+                             .df_row = NULL,
+                             .ref_group = NULL,
+                             .in_ref_col = FALSE,
+                             .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels"))),
+                             .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_formats")),
+                             .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels")),
+                             .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_indents")),
+                             na.rm = TRUE, # nolint
+                             na_level = NA_character_,
+                             compare = FALSE,
+                             ...) {
   # Remove all-NA rows
   if (!is.null(.df_row) && ncol(.df_row) > 1) {
     in_tot_col <- nrow(.df_row) == length(x)
@@ -609,10 +662,9 @@ a_summary <- function(x,
     }
   }
   if (length(.indent_mods) == 1 && is.null(names(.indent_mods))) {
-    .indent_mods <- rep(.indent_mods, length(.stats)) %>% `names<-`(.stats)
+    .indent_mods <- setNames(rep(.indent_mods, length(.stats)), .stats)
   }
 
-<<<<<<< HEAD:R/summarize_variables.R
   .stats <- intersect(.stats, names(x_stats))
   x_stats <- x_stats[.stats]
   .formats <- extract_by_name(
@@ -643,7 +695,38 @@ a_summary <- function(x,
     .format_na_strs = na_level
   )
 }
-=======
+
+.a_compare_numeric_formats <- c(summary_formats(), pval = "x.xxxx | (<0.0001)")
+.a_compare_numeric_labels <- c(summary_labels(), pval = "p-value (t-test)")
+.a_compare_numeric_indents <- c(
+  rep(0L, length(.a_compare_numeric_labels)) %>% `names<-`(names(.a_compare_numeric_labels))
+)
+.a_compare_counts_formats <- c(summary_formats(type = "counts"), pval = "x.xxxx | (<0.0001)")
+.a_compare_counts_labels <- c(summary_labels(type = "counts"), pval = "p-value (chi-squared test)")
+.a_compare_counts_indents <- c(rep(0L, length(.a_compare_counts_labels)) %>% `names<-`(names(.a_compare_counts_labels)))
+
+
+#' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and
+#'   `compare_vars()` and as `cfun` in `summarize_colvars()`.
+#'
+#' @param compare (`logical`)\cr Whether comparison statistics should be analyzed instead of summary statistics
+#'   (`compare = TRUE` adds `pval` statistic comparing against reference group).
+#'
+#' @return
+#' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()].
+#'
+#' @note
+#' * To use for comparison (with additional p-value statistic), parameter `compare` must be set to `TRUE`.
+#' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is.
+#' @export
+a_summary <- function(x,
+                      ...,
+                      .N_row, # nolint
+                      .N_col, # nolint
+                      .var) {
+  UseMethod("a_summary", x)
+}
+
 #' @describeIn analyze_variables Formatted analysis function method for `factor` class.
 #'
 #' @examples
@@ -657,42 +740,115 @@ a_summary <- function(x,
 #' afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
 #'
 #' @export
-a_summary.factor <- make_afun(
-  s_summary.factor,
-  .formats = .a_summary_counts_formats
-)
+a_summary.numeric <- function(x,
+                              .N_col, # nolint
+                              .N_row, # nolint
+                              .var = NULL,
+                              .df_row = NULL,
+                              .ref_group = NULL,
+                              .in_ref_col = FALSE,
+                              compare = FALSE,
+                              .stats = summary_custom(include_pval = compare)$stats,
+                              .formats = summary_custom(include_pval = compare)$formats,
+                              .labels = summary_custom(include_pval = compare)$labels,
+                              .indent_mods = summary_custom(include_pval = compare)$indents,
+                              na.rm = TRUE, # nolint
+                              na_level = NA_character_,
+                              ...) {
+  # browser()
+  # Remove all-NA rows
+  if (!is.null(.df_row) && ncol(.df_row) > 1) {
+    in_tot_col <- nrow(.df_row) == length(x)
+    .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ]
+    if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]]
+  }
 
-#' @describeIn analyze_variables Formatted analysis function method for `character` class.
-#'
-#' @examples
-#' # `a_summary.character`
-#' afun <- make_afun(
-#'   getS3method("a_summary", "character"),
-#'   .ungroup_stats = c("count", "count_fraction")
-#' )
-#' afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
-#'
-#' @export
-a_summary.character <- make_afun(
-  s_summary.character,
-  .formats = .a_summary_counts_formats
-)
+  x_stats <- if (!compare) {
+    s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...)
+  } else {
+    s_compare(
+      x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...
+    )
+  }
+
+  format_vals <- summary_custom(
+    include_pval = compare,
+    stats_custom = .stats,
+    formats_custom = .formats,
+    labels_custom = .labels,
+    indents_custom = .indent_mods
+  )
+  .stats <- format_vals$stats
+  .formats <- format_vals$formats
+  .labels <- format_vals$labels
+  .indent_mods <- format_vals$indents
+
+  for (i in intersect(.stats, c("mean_ci", "mean_pval", "median_ci", "quantiles"))) {
+    if (!i %in% names(.labels) || .labels[[i]] == summary_custom()$labels[[i]]) {
+      .labels[[i]] <- attr(x_stats[[i]], "label")
+    }
+  }
+
+  x_stats <- x_stats[.stats]
 
-#' @describeIn analyze_variables Formatted analysis function method for `logical` class.
+  in_rows(
+    .list = x_stats,
+    .formats = .formats,
+    .names = .labels,
+    .labels = .labels,
+    .indent_mods = .indent_mods,
+    .format_na_strs = na_level
+  )
+}
+
+#' #' @describeIn analyze_variables Formatted analysis function method for `factor` class.
+#' #'
+#' #' @examples
+#' #' # `a_summary.factor`
+#' #' # We need to ungroup `count` and `count_fraction` first so that the rtables formatting
+#' #' # functions can be applied correctly.
+#' #' afun <- make_afun(
+#' #'   getS3method("a_summary", "factor"),
+#' #'   .ungroup_stats = c("count", "count_fraction")
+#' #' )
+#' #' afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
+#' #'
+#' #' @export
+#' a_summary.factor <- make_afun(
+#'   s_summary.factor,
+#'   .formats = .a_summary_counts_formats
+#' )
 #'
-#' @examples
-#' # `a_summary.logical`
-#' afun <- make_afun(
-#'   getS3method("a_summary", "logical")
+#' #' @describeIn analyze_variables Formatted analysis function method for `character` class.
+#' #'
+#' #' @examples
+#' #' # `a_summary.character`
+#' #' afun <- make_afun(
+#' #'   getS3method("a_summary", "character"),
+#' #'   .ungroup_stats = c("count", "count_fraction")
+#' #' )
+#' #' afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
+#' #'
+#' #' @export
+#' a_summary.character <- make_afun(
+#'   s_summary.character,
+#'   .formats = .a_summary_counts_formats
 #' )
-#' afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
 #'
-#' @export
-a_summary.logical <- make_afun(
-  s_summary.logical,
-  .formats = .a_summary_counts_formats
-)
->>>>>>> main:R/analyze_variables.R
+#' #' @describeIn analyze_variables Formatted analysis function method for `logical` class.
+#' #'
+#' #' @examples
+#' #' # `a_summary.logical`
+#' #' afun <- make_afun(
+#' #'   getS3method("a_summary", "logical")
+#' #' )
+#' #' afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
+#' #'
+#' #' @export
+#' a_summary.logical <- make_afun(
+#'   s_summary.logical,
+#'   .formats = .a_summary_counts_formats
+#' )
 
 #' Constructor Function for [analyze_vars()] and [summarize_colvars()]
 #'
@@ -786,9 +942,8 @@ create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) {
 #'
 #' build_table(l, df = dta_test)
 #'
-<<<<<<< HEAD:R/summarize_variables.R
 #' @export
-summarize_vars <- function(lyt,
+analyze_vars <- function(lyt,
                            vars,
                            var_labels = vars,
                            nested = TRUE,
@@ -806,24 +961,6 @@ summarize_vars <- function(lyt,
   if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
   if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
   if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
-=======
-#' @export analyze_vars summarize_vars
-#' @aliases summarize_vars
-analyze_vars <- function(lyt,
-                         vars,
-                         var_labels = vars,
-                         nested = TRUE,
-                         ...,
-                         na_level = NA_character_,
-                         show_labels = "default",
-                         table_names = vars,
-                         section_div = NA_character_,
-                         .stats = c("n", "mean_sd", "median", "range", "count_fraction"),
-                         .formats = NULL,
-                         .labels = NULL,
-                         .indent_mods = NULL) {
-  afun <- create_afun_summary(.stats, .formats, .labels, .indent_mods)
->>>>>>> main:R/analyze_variables.R
 
   analyze(
     lyt = lyt,

From 5987bc9b92b6f7d8dc9f16f6f5c156071964b364 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Tue, 4 Jul 2023 17:57:42 -0400
Subject: [PATCH 19/33] temp

---
 NAMESPACE                |   9 +-
 R/analyze_variables.R    | 315 +++++++++++++++++--------
 man/analyze_variables.Rd | 490 +++++++++++++++++++++++++++++++++++++++
 man/summary_custom.Rd    |  49 ++++
 man/summary_formats.Rd   |   8 +-
 man/summary_labels.Rd    |   8 +-
 6 files changed, 777 insertions(+), 102 deletions(-)
 create mode 100644 man/analyze_variables.Rd
 create mode 100644 man/summary_custom.Rd

diff --git a/NAMESPACE b/NAMESPACE
index de60f429eb..d21c817875 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,9 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(a_summary,character)
+S3method(a_summary,factor)
+S3method(a_summary,logical)
+S3method(a_summary,numeric)
 S3method(as.rtable,data.frame)
 S3method(h_coxreg_inter_effect,character)
 S3method(h_coxreg_inter_effect,factor)
@@ -33,6 +37,7 @@ export(a_odds_ratio)
 export(a_proportion)
 export(a_proportion_diff)
 export(a_summary)
+export(a_summary_output)
 export(add_rowcounts)
 export(aesi_label)
 export(analyze_num_patients)
@@ -261,7 +266,9 @@ export(summarize_num_patients)
 export(summarize_occurrences_by_grade)
 export(summarize_patients_events_in_cols)
 export(summarize_patients_exposure_in_cols)
-export(summarize_vars)
+export(summary_custom)
+export(summary_formats)
+export(summary_labels)
 export(surv_time)
 export(surv_timepoint)
 export(tabulate_rsp_biomarkers)
diff --git a/R/analyze_variables.R b/R/analyze_variables.R
index 5ef65ef05b..d60ee8f7c8 100644
--- a/R/analyze_variables.R
+++ b/R/analyze_variables.R
@@ -623,17 +623,17 @@ s_summary.logical <- function(x,
 a_summary_output <- function(x,
                              .N_col, # nolint
                              .N_row, # nolint
-                             .var = NULL,
-                             .df_row = NULL,
-                             .ref_group = NULL,
-                             .in_ref_col = FALSE,
-                             .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels"))),
-                             .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_formats")),
-                             .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels")),
-                             .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_indents")),
-                             na.rm = TRUE, # nolint
-                             na_level = NA_character_,
-                             compare = FALSE,
+                             .var,
+                             .df_row,
+                             .ref_group,
+                             .in_ref_col,
+                             .stats,
+                             .formats,
+                             .labels,
+                             .indent_mods,
+                             na.rm, # nolint
+                             na_level,
+                             compare,
                              ...) {
   # Remove all-NA rows
   if (!is.null(.df_row) && ncol(.df_row) > 1) {
@@ -643,41 +643,36 @@ a_summary_output <- function(x,
   }
 
   if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level")
-  if (!compare) {
-    x_stats <- s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...)
-    .stats <- setdiff(.stats, "pval")
-    .formats <- .formats[setdiff(names(.formats), "pval")]
-    .labels <- .labels[setdiff(names(.labels), "pval")]
-    if (!is.null(names(.indent_mods))) .indent_mods <- .indent_mods[setdiff(names(.indent_mods), "pval")]
+  x_stats <- if (!compare) {
+    s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...)
   } else {
-    x_stats <- s_compare(
+    s_compare(
       x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...
     )
   }
+
+  custom_summary <- summary_custom(
+    include_pval = compare,
+    stats_custom = .stats,
+    formats_custom = .formats,
+    labels_custom = .labels,
+    indents_custom = .indent_mods
+  )
+  .stats <- custom_summary$stats
+  .formats <- custom_summary$formats
+  .labels <- custom_summary$labels
+  .indent_mods <- custom_summary$indents
+
   if (is.numeric(x)) {
     for (i in intersect(.stats, c("mean_ci", "mean_pval", "median_ci", "quantiles"))) {
-      if (!i %in% names(.labels) || .labels[[i]] == .a_compare_numeric_labels[[i]]) {
+      if (!i %in% names(.labels) || .labels[[i]] == summary_custom()$labels[[i]]) {
         .labels[[i]] <- attr(x_stats[[i]], "label")
       }
     }
   }
-  if (length(.indent_mods) == 1 && is.null(names(.indent_mods))) {
-    .indent_mods <- setNames(rep(.indent_mods, length(.stats)), .stats)
-  }
 
-  .stats <- intersect(.stats, names(x_stats))
   x_stats <- x_stats[.stats]
-  .formats <- extract_by_name(
-    .formats, .stats, get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_formats"))
-  )
-  .labels <- extract_by_name(
-    .labels, .stats, get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels"))
-  )
-  .indent_mods <- extract_by_name(
-    .indent_mods, .stats, get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_indents"))
-  )
-
-  if (!is.numeric(x) && !is.logical(x)) {
+  if (is.factor(x)) {
     x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, .in_ref_col)
     x_stats <- x_ungrp[["x"]]
     .stats <- x_ungrp[[".stats"]]
@@ -696,16 +691,6 @@ a_summary_output <- function(x,
   )
 }
 
-.a_compare_numeric_formats <- c(summary_formats(), pval = "x.xxxx | (<0.0001)")
-.a_compare_numeric_labels <- c(summary_labels(), pval = "p-value (t-test)")
-.a_compare_numeric_indents <- c(
-  rep(0L, length(.a_compare_numeric_labels)) %>% `names<-`(names(.a_compare_numeric_labels))
-)
-.a_compare_counts_formats <- c(summary_formats(type = "counts"), pval = "x.xxxx | (<0.0001)")
-.a_compare_counts_labels <- c(summary_labels(type = "counts"), pval = "p-value (chi-squared test)")
-.a_compare_counts_indents <- c(rep(0L, length(.a_compare_counts_labels)) %>% `names<-`(names(.a_compare_counts_labels)))
-
-
 #' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and
 #'   `compare_vars()` and as `cfun` in `summarize_colvars()`.
 #'
@@ -720,10 +705,13 @@ a_summary_output <- function(x,
 #' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is.
 #' @export
 a_summary <- function(x,
-                      ...,
-                      .N_row, # nolint
                       .N_col, # nolint
-                      .var) {
+                      .N_row, # nolint
+                      .var,
+                      .df_row,
+                      .ref_group,
+                      .in_ref_col,
+                      ...) {
   UseMethod("a_summary", x)
 }
 
@@ -755,7 +743,6 @@ a_summary.numeric <- function(x,
                               na.rm = TRUE, # nolint
                               na_level = NA_character_,
                               ...) {
-  # browser()
   # Remove all-NA rows
   if (!is.null(.df_row) && ncol(.df_row) > 1) {
     in_tot_col <- nrow(.df_row) == length(x)
@@ -771,17 +758,17 @@ a_summary.numeric <- function(x,
     )
   }
 
-  format_vals <- summary_custom(
+  custom_summary <- summary_custom(
     include_pval = compare,
     stats_custom = .stats,
     formats_custom = .formats,
     labels_custom = .labels,
     indents_custom = .indent_mods
   )
-  .stats <- format_vals$stats
-  .formats <- format_vals$formats
-  .labels <- format_vals$labels
-  .indent_mods <- format_vals$indents
+  .stats <- custom_summary$stats
+  .formats <- custom_summary$formats
+  .labels <- custom_summary$labels
+  .indent_mods <- custom_summary$indents
 
   for (i in intersect(.stats, c("mean_ci", "mean_pval", "median_ci", "quantiles"))) {
     if (!i %in% names(.labels) || .labels[[i]] == summary_custom()$labels[[i]]) {
@@ -790,7 +777,6 @@ a_summary.numeric <- function(x,
   }
 
   x_stats <- x_stats[.stats]
-
   in_rows(
     .list = x_stats,
     .formats = .formats,
@@ -801,54 +787,189 @@ a_summary.numeric <- function(x,
   )
 }
 
-#' #' @describeIn analyze_variables Formatted analysis function method for `factor` class.
-#' #'
-#' #' @examples
-#' #' # `a_summary.factor`
-#' #' # We need to ungroup `count` and `count_fraction` first so that the rtables formatting
-#' #' # functions can be applied correctly.
-#' #' afun <- make_afun(
-#' #'   getS3method("a_summary", "factor"),
-#' #'   .ungroup_stats = c("count", "count_fraction")
-#' #' )
-#' #' afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
-#' #'
-#' #' @export
-#' a_summary.factor <- make_afun(
-#'   s_summary.factor,
-#'   .formats = .a_summary_counts_formats
+#' @describeIn analyze_variables Formatted analysis function method for `factor` class.
+#'
+#' @examples
+#' # `a_summary.factor`
+#' # We need to ungroup `count` and `count_fraction` first so that the rtables formatting
+#' # functions can be applied correctly.
+#' afun <- make_afun(
+#'   getS3method("a_summary", "factor"),
+#'   .ungroup_stats = c("count", "count_fraction")
 #' )
+#' afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
+#'
+#' @export
+a_summary.factor <- function(x,
+                             .N_col, # nolint
+                             .N_row, # nolint
+                             .var = NULL,
+                             .df_row = NULL,
+                             .ref_group = NULL,
+                             .in_ref_col = FALSE,
+                             compare = FALSE,
+                             .stats = summary_custom(type = "counts", include_pval = compare)$stats,
+                             .formats = summary_custom(type = "counts", include_pval = compare)$formats,
+                             .labels = summary_custom(type = "counts", include_pval = compare)$labels,
+                             .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents,
+                             na.rm = TRUE, # nolint
+                             na_level = NA_character_,
+                             ...) {
+  # Remove all-NA rows
+  if (!is.null(.df_row) && ncol(.df_row) > 1) {
+    in_tot_col <- nrow(.df_row) == length(x)
+    .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ]
+    if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]]
+  }
+
+  if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level")
+
+  x_stats <- if (!compare) {
+    s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...)
+  } else {
+    s_compare(
+      x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...
+    )
+  }
+
+  custom_summary <- summary_custom(
+    type = "counts",
+    include_pval = compare,
+    stats_custom = .stats,
+    formats_custom = .formats,
+    labels_custom = .labels,
+    indents_custom = .indent_mods
+  )
+  .stats <- custom_summary$stats
+  .formats <- custom_summary$formats
+  .labels <- custom_summary$labels
+  .indent_mods <- custom_summary$indents
+
+  x_stats <- x_stats[.stats]
+  x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, .in_ref_col)
+  x_stats <- x_ungrp[["x"]]
+  .stats <- x_ungrp[[".stats"]]
+  .formats <- x_ungrp[[".formats"]]
+  .labels <- x_ungrp[[".labels"]]
+  .indent_mods <- x_ungrp[[".indent_mods"]]
+
+  in_rows(
+    .list = x_stats,
+    .formats = .formats,
+    .names = .labels,
+    .labels = .labels,
+    .indent_mods = .indent_mods,
+    .format_na_strs = na_level
+  )
+}
+
+#' @describeIn analyze_variables Formatted analysis function method for `character` class.
 #'
-#' #' @describeIn analyze_variables Formatted analysis function method for `character` class.
-#' #'
-#' #' @examples
-#' #' # `a_summary.character`
-#' #' afun <- make_afun(
-#' #'   getS3method("a_summary", "character"),
-#' #'   .ungroup_stats = c("count", "count_fraction")
-#' #' )
-#' #' afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
-#' #'
-#' #' @export
-#' a_summary.character <- make_afun(
-#'   s_summary.character,
-#'   .formats = .a_summary_counts_formats
+#' @examples
+#' # `a_summary.character`
+#' afun <- make_afun(
+#'   getS3method("a_summary", "character"),
+#'   .ungroup_stats = c("count", "count_fraction")
 #' )
+#' afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
 #'
-#' #' @describeIn analyze_variables Formatted analysis function method for `logical` class.
-#' #'
-#' #' @examples
-#' #' # `a_summary.logical`
-#' #' afun <- make_afun(
-#' #'   getS3method("a_summary", "logical")
-#' #' )
-#' #' afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
-#' #'
-#' #' @export
-#' a_summary.logical <- make_afun(
-#'   s_summary.logical,
-#'   .formats = .a_summary_counts_formats
+#' @export
+a_summary.character <- function(x,
+                                .N_col, # nolint
+                                .N_row, # nolint
+                                .var = NULL,
+                                .df_row = NULL,
+                                .ref_group = NULL,
+                                .in_ref_col = FALSE,
+                                compare = FALSE,
+                                .stats = summary_custom(type = "counts", include_pval = compare)$stats,
+                                .formats = summary_custom(type = "counts", include_pval = compare)$formats,
+                                .labels = summary_custom(type = "counts", include_pval = compare)$labels,
+                                .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents,
+                                na.rm = TRUE, # nolint
+                                na_level = NA_character_,
+                                ...) {
+  x <- as.factor(x)
+  a_summary(x,
+           .N_col, # nolint
+           .N_row, # nolint
+           .var = .var,
+           .df_row = .df_row,
+           .ref_group = .ref_group,
+           .in_ref_col = .in_ref_col,
+           compare = compare,
+           .stats = .stats,
+           .formats = .formats,
+           .labels = .labels,
+           .indent_mods = .indent_mods,
+           na.rm = na.rm,
+           na_level = na_level,
+           ...)
+}
+
+#' @describeIn analyze_variables Formatted analysis function method for `logical` class.
+#'
+#' @examples
+#' # `a_summary.logical`
+#' afun <- make_afun(
+#'   getS3method("a_summary", "logical")
 #' )
+#' afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
+#'
+#' @export
+a_summary.logical <- function(x,
+                             .N_col, # nolint
+                             .N_row, # nolint
+                             .var = NULL,
+                             .df_row = NULL,
+                             .ref_group = NULL,
+                             .in_ref_col = FALSE,
+                             compare = FALSE,
+                             .stats = summary_custom(type = "counts", include_pval = compare)$stats,
+                             .formats = summary_custom(type = "counts", include_pval = compare)$formats,
+                             .labels = summary_custom(type = "counts", include_pval = compare)$labels,
+                             .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents,
+                             na.rm = TRUE, # nolint
+                             na_level = NA_character_,
+                             ...) {
+  # Remove all-NA rows
+  if (!is.null(.df_row) && ncol(.df_row) > 1) {
+    in_tot_col <- nrow(.df_row) == length(x)
+    .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ]
+    if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]]
+  }
+
+  if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level")
+  x_stats <- if (!compare) {
+    s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...)
+  } else {
+    s_compare(
+      x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...
+    )
+  }
+
+  custom_summary <- summary_custom(
+    include_pval = compare,
+    stats_custom = .stats,
+    formats_custom = .formats,
+    labels_custom = .labels,
+    indents_custom = .indent_mods
+  )
+  .stats <- custom_summary$stats
+  .formats <- custom_summary$formats
+  .labels <- custom_summary$labels
+  .indent_mods <- custom_summary$indents
+
+  x_stats <- x_stats[.stats]
+  in_rows(
+    .list = x_stats,
+    .formats = .formats,
+    .names = .labels,
+    .labels = .labels,
+    .indent_mods = .indent_mods,
+    .format_na_strs = na_level
+  )
+}
 
 #' Constructor Function for [analyze_vars()] and [summarize_colvars()]
 #'
diff --git a/man/analyze_variables.Rd b/man/analyze_variables.Rd
new file mode 100644
index 0000000000..5ec4af07e4
--- /dev/null
+++ b/man/analyze_variables.Rd
@@ -0,0 +1,490 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/analyze_variables.R
+\name{analyze_variables}
+\alias{analyze_variables}
+\alias{s_summary}
+\alias{s_summary.numeric}
+\alias{s_summary.factor}
+\alias{s_summary.character}
+\alias{s_summary.logical}
+\alias{a_summary.numeric}
+\alias{a_summary.factor}
+\alias{a_summary.character}
+\alias{a_summary.logical}
+\alias{analyze_vars}
+\title{Analyze Variables}
+\usage{
+s_summary(x, na.rm = TRUE, denom, .N_row, .N_col, .var, ...)
+
+\method{s_summary}{numeric}(
+  x,
+  na.rm = TRUE,
+  denom,
+  .N_row,
+  .N_col,
+  .var,
+  control = control_analyze_vars(),
+  ...
+)
+
+\method{s_summary}{factor}(
+  x,
+  na.rm = TRUE,
+  denom = c("n", "N_row", "N_col"),
+  .N_row,
+  .N_col,
+  ...
+)
+
+\method{s_summary}{character}(
+  x,
+  na.rm = TRUE,
+  denom = c("n", "N_row", "N_col"),
+  .N_row,
+  .N_col,
+  .var,
+  verbose = TRUE,
+  ...
+)
+
+\method{s_summary}{logical}(
+  x,
+  na.rm = TRUE,
+  denom = c("n", "N_row", "N_col"),
+  .N_row,
+  .N_col,
+  ...
+)
+
+\method{a_summary}{numeric}(
+  x,
+  .N_col,
+  .N_row,
+  .var = NULL,
+  .df_row = NULL,
+  .ref_group = NULL,
+  .in_ref_col = FALSE,
+  compare = FALSE,
+  .stats = summary_custom(include_pval = compare)$stats,
+  .formats = summary_custom(include_pval = compare)$formats,
+  .labels = summary_custom(include_pval = compare)$labels,
+  .indent_mods = summary_custom(include_pval = compare)$indents,
+  na.rm = TRUE,
+  na_level = NA_character_,
+  ...
+)
+
+\method{a_summary}{factor}(
+  x,
+  .N_col,
+  .N_row,
+  .var = NULL,
+  .df_row = NULL,
+  .ref_group = NULL,
+  .in_ref_col = FALSE,
+  compare = FALSE,
+  .stats = summary_custom(type = "counts", include_pval = compare)$stats,
+  .formats = summary_custom(type = "counts", include_pval = compare)$formats,
+  .labels = summary_custom(type = "counts", include_pval = compare)$labels,
+  .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents,
+  na.rm = TRUE,
+  na_level = NA_character_,
+  ...
+)
+
+\method{a_summary}{character}(
+  x,
+  .N_col,
+  .N_row,
+  .var = NULL,
+  .df_row = NULL,
+  .ref_group = NULL,
+  .in_ref_col = FALSE,
+  compare = FALSE,
+  .stats = summary_custom(type = "counts", include_pval = compare)$stats,
+  .formats = summary_custom(type = "counts", include_pval = compare)$formats,
+  .labels = summary_custom(type = "counts", include_pval = compare)$labels,
+  .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents,
+  na.rm = TRUE,
+  na_level = NA_character_,
+  ...
+)
+
+\method{a_summary}{logical}(
+  x,
+  .N_col,
+  .N_row,
+  .var = NULL,
+  .df_row = NULL,
+  .ref_group = NULL,
+  .in_ref_col = FALSE,
+  compare = FALSE,
+  .stats = summary_custom(type = "counts", include_pval = compare)$stats,
+  .formats = summary_custom(type = "counts", include_pval = compare)$formats,
+  .labels = summary_custom(type = "counts", include_pval = compare)$labels,
+  .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents,
+  na.rm = TRUE,
+  na_level = NA_character_,
+  ...
+)
+
+analyze_vars(
+  lyt,
+  vars,
+  var_labels = vars,
+  nested = TRUE,
+  ...,
+  na.rm = TRUE,
+  na_level = NA_character_,
+  show_labels = "default",
+  table_names = vars,
+  section_div = NA_character_,
+  .stats = c("n", "mean_sd", "median", "range", "count_fraction"),
+  .formats = NULL,
+  .labels = NULL,
+  .indent_mods = NULL
+)
+}
+\arguments{
+\item{x}{(\code{numeric})\cr vector of numbers we want to analyze.}
+
+\item{na.rm}{(\code{flag})\cr whether \code{NA} values should be removed from \code{x} prior to analysis.}
+
+\item{denom}{(\code{string})\cr choice of denominator for proportion. Options are:
+\itemize{
+\item \code{n}: number of values in this row and column intersection.
+\item \code{N_row}: total number of values in this row across columns.
+\item \code{N_col}: total number of values in this column across rows.
+}}
+
+\item{.N_row}{(\code{count})\cr column-wise N (column count) for the full column that is passed by \code{rtables}.}
+
+\item{.N_col}{(\code{count})\cr row-wise N (row group count) for the group of observations being analyzed
+(i.e. with no column-based subsetting) that is passed by \code{rtables}.}
+
+\item{.var}{(\code{string})\cr single variable name that is passed by \code{rtables} when requested
+by a statistics function.}
+
+\item{...}{arguments passed to \code{s_summary()}.}
+
+\item{control}{(\code{list})\cr parameters for descriptive statistics details, specified by using
+the helper function \code{\link[=control_analyze_vars]{control_analyze_vars()}}. Some possible parameter options are:
+\itemize{
+\item \code{conf_level} (\code{proportion})\cr confidence level of the interval for mean and median.
+\item \code{quantiles} (\code{numeric})\cr vector of length two to specify the quantiles.
+\item \code{quantile_type} (\code{numeric})\cr between 1 and 9 selecting quantile algorithms to be used.
+See more about \code{type} in \code{\link[stats:quantile]{stats::quantile()}}.
+\item \code{test_mean} (\code{numeric})\cr value to test against the mean under the null hypothesis when calculating p-value.
+}}
+
+\item{verbose}{(\code{logical})\cr Defaults to \code{TRUE}, which prints out warnings and messages. It is mainly used
+to print out information about factor casting.}
+
+\item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.}
+
+\item{.ref_group}{(\code{data.frame} or \code{vector})\cr the data corresponding to the reference group.}
+
+\item{.in_ref_col}{(\code{logical})\cr \code{TRUE} when working with the reference level, \code{FALSE} otherwise.}
+
+\item{.stats}{(\code{character})\cr statistics to select for the table.}
+
+\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.}
+
+\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).}
+
+\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector
+should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation
+for that statistic's row label.}
+
+\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.}
+
+\item{lyt}{(\code{layout})\cr input layout where analyses will be added to.}
+
+\item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.}
+
+\item{var_labels}{(\code{character})\cr character for label.}
+
+\item{nested}{(\code{flag})\cr whether this layout instruction be applied within the existing layout structure \emph{if
+possible} (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split
+underneath analyses, which is not allowed.}
+
+\item{show_labels}{(\code{string})\cr label visibility: one of "default", "visible" and "hidden".}
+
+\item{table_names}{(\code{character})\cr this can be customized in case that the same \code{vars} are analyzed multiple times,
+to avoid warnings from \code{rtables}.}
+
+\item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group
+defined by this split instruction, or \code{NA_character_} (the default) for no section divider.}
+}
+\value{
+\itemize{
+\item \code{s_summary()} returns different statistics depending on the class of \code{x}.
+}
+
+\itemize{
+\item If \code{x} is of class \code{numeric}, returns a \code{list} with the following named \code{numeric} items:
+\itemize{
+\item \code{n}: The \code{\link[=length]{length()}} of \code{x}.
+\item \code{sum}: The \code{\link[=sum]{sum()}} of \code{x}.
+\item \code{mean}: The \code{\link[=mean]{mean()}} of \code{x}.
+\item \code{sd}: The \code{\link[stats:sd]{stats::sd()}} of \code{x}.
+\item \code{se}: The standard error of \code{x} mean, i.e.: (\code{sd(x) / sqrt(length(x))}).
+\item \code{mean_sd}: The \code{\link[=mean]{mean()}} and \code{\link[stats:sd]{stats::sd()}} of \code{x}.
+\item \code{mean_se}: The \code{\link[=mean]{mean()}} of \code{x} and its standard error (see above).
+\item \code{mean_ci}: The CI for the mean of \code{x} (from \code{\link[=stat_mean_ci]{stat_mean_ci()}}).
+\item \code{mean_sei}: The SE interval for the mean of \code{x}, i.e.: (\code{\link[=mean]{mean()}} -/+ \code{\link[stats:sd]{stats::sd()}} / \code{\link[=sqrt]{sqrt()}}).
+\item \code{mean_sdi}: The SD interval for the mean of \code{x}, i.e.: (\code{\link[=mean]{mean()}} -/+ \code{\link[stats:sd]{stats::sd()}}).
+\item \code{mean_pval}: The two-sided p-value of the mean of \code{x} (from \code{\link[=stat_mean_pval]{stat_mean_pval()}}).
+\item \code{median}: The \code{\link[stats:median]{stats::median()}} of \code{x}.
+\item \code{mad}: The median absolute deviation of \code{x}, i.e.: (\code{\link[stats:median]{stats::median()}} of \code{xc},
+where \code{xc} = \code{x} - \code{\link[stats:median]{stats::median()}}).
+\item \code{median_ci}: The CI for the median of \code{x} (from \code{\link[=stat_median_ci]{stat_median_ci()}}).
+\item \code{quantiles}: Two sample quantiles of \code{x} (from \code{\link[stats:quantile]{stats::quantile()}}).
+\item \code{iqr}: The \code{\link[stats:IQR]{stats::IQR()}} of \code{x}.
+\item \code{range}: The \code{\link[=range_noinf]{range_noinf()}} of \code{x}.
+\item \code{min}: The \code{\link[=max]{max()}} of \code{x}.
+\item \code{max}: The \code{\link[=min]{min()}} of \code{x}.
+\item \code{median_range}: The \code{\link[=median]{median()}} and \code{\link[=range_noinf]{range_noinf()}} of \code{x}.
+\item \code{cv}: The coefficient of variation of \code{x}, i.e.: (\code{\link[stats:sd]{stats::sd()}} / \code{\link[=mean]{mean()}} * 100).
+\item \code{geom_mean}: The geometric mean of \code{x}, i.e.: (\code{exp(mean(log(x)))}).
+\item \code{geom_cv}: The geometric coefficient of variation of \code{x}, i.e.: (\code{sqrt(exp(sd(log(x)) ^ 2) - 1) * 100}).
+}
+}
+
+\itemize{
+\item If \code{x} is of class \code{factor} or converted from \code{character}, returns a \code{list} with named \code{numeric} items:
+\itemize{
+\item \code{n}: The \code{\link[=length]{length()}} of \code{x}.
+\item \code{count}: A list with the number of cases for each level of the factor \code{x}.
+\item \code{count_fraction}: Similar to \code{count} but also includes the proportion of cases for each level of the
+factor \code{x} relative to the denominator, or \code{NA} if the denominator is zero.
+}
+}
+
+\itemize{
+\item If \code{x} is of class \code{logical}, returns a \code{list} with named \code{numeric} items:
+\itemize{
+\item \code{n}: The \code{\link[=length]{length()}} of \code{x} (possibly after removing \code{NA}s).
+\item \code{count}: Count of \code{TRUE} in \code{x}.
+\item \code{count_fraction}: Count and proportion of \code{TRUE} in \code{x} relative to the denominator, or \code{NA} if the
+denominator is zero. Note that \code{NA}s in \code{x} are never counted or leading to \code{NA} here.
+}
+}
+
+\itemize{
+\item \code{analyze_vars()} returns a layout object suitable for passing to further layouting functions,
+or to \code{\link[rtables:build_table]{rtables::build_table()}}. Adding this function to an \code{rtable} layout will add formatted rows containing
+the statistics from \code{s_summary()} to the table layout.
+}
+}
+\description{
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}
+
+We use the S3 generic function \code{\link[=s_summary]{s_summary()}} to implement summaries for different \code{x} objects. This
+is used as a statistics function in combination with the analyze function \code{\link[=analyze_vars]{analyze_vars()}}.
+}
+\section{Functions}{
+\itemize{
+\item \code{s_summary()}: S3 generic function to produces a variable summary.
+
+\item \code{s_summary(numeric)}: Method for \code{numeric} class.
+
+\item \code{s_summary(factor)}: Method for \code{factor} class.
+
+\item \code{s_summary(character)}: Method for \code{character} class. This makes an automatic
+conversion to factor (with a warning) and then forwards to the method for factors.
+
+\item \code{s_summary(logical)}: Method for \code{logical} class.
+
+\item \code{a_summary(numeric)}: Formatted analysis function method for \code{factor} class.
+
+\item \code{a_summary(factor)}: Formatted analysis function method for \code{factor} class.
+
+\item \code{a_summary(character)}: Formatted analysis function method for \code{character} class.
+
+\item \code{a_summary(logical)}: Formatted analysis function method for \code{logical} class.
+
+\item \code{analyze_vars()}: Layout-creating function which can take statistics function arguments
+and additional format arguments. This function is a wrapper for \code{\link[rtables:analyze]{rtables::analyze()}}.
+
+}}
+\note{
+\itemize{
+\item If \code{x} is an empty vector, \code{NA} is returned. This is the expected feature so as to return \code{rcell} content in
+\code{rtables} when the intersection of a column and a row delimits an empty data selection.
+\item When the \code{mean} function is applied to an empty vector, \code{NA} will be returned instead of \code{NaN}, the latter
+being standard behavior in R.
+}
+
+\itemize{
+\item If \code{x} is an empty \code{factor}, a list is still returned for \code{counts} with one element
+per factor level. If there are no levels in \code{x}, the function fails.
+\item If factor variables contain \code{NA}, these \code{NA} values are excluded by default. To include \code{NA} values
+set \code{na.rm = FALSE} and missing values will be displayed as an \code{NA} level. Alternatively, an explicit
+factor level can be defined for \code{NA} values during pre-processing via \code{\link[=df_explicit_na]{df_explicit_na()}} - the
+default \code{na_level} (\code{"<Missing>"}) will also be excluded when \code{na.rm} is set to \code{TRUE}.
+}
+
+\itemize{
+\item Automatic conversion of character to factor does not guarantee that the table
+can be generated correctly. In particular for sparse tables this very likely can fail.
+It is therefore better to always pre-process the dataset such that factors are manually
+created from character variables before passing the dataset to \code{\link[rtables:build_table]{rtables::build_table()}}.
+}
+
+Deprecation cycle started for \code{summarize_vars} as it is going to renamed into
+\code{analyze_vars}. Intention is to reflect better the core underlying \code{rtables}
+functions; in this case \code{\link[rtables:analyze]{rtables::analyze()}}.
+}
+\examples{
+# `s_summary.numeric`
+
+## Basic usage: empty numeric returns NA-filled items.
+s_summary(numeric())
+
+## Management of NA values.
+x <- c(NA_real_, 1)
+s_summary(x, na.rm = TRUE)
+s_summary(x, na.rm = FALSE)
+
+x <- c(NA_real_, 1, 2)
+s_summary(x, stats = NULL)
+
+## Benefits in `rtables` contructions:
+require(rtables)
+dta_test <- data.frame(
+  Group = rep(LETTERS[1:3], each = 2),
+  sub_group = rep(letters[1:2], each = 3),
+  x = 1:6
+)
+
+## The summary obtained in with `rtables`:
+basic_table() \%>\%
+  split_cols_by(var = "Group") \%>\%
+  split_rows_by(var = "sub_group") \%>\%
+  analyze(vars = "x", afun = s_summary) \%>\%
+  build_table(df = dta_test)
+
+## By comparison with `lapply`:
+X <- split(dta_test, f = with(dta_test, interaction(Group, sub_group)))
+lapply(X, function(x) s_summary(x$x))
+
+# `s_summary.factor`
+
+## Basic usage:
+s_summary(factor(c("a", "a", "b", "c", "a")))
+# Empty factor returns NA-filled items.
+s_summary(factor(levels = c("a", "b", "c")))
+
+## Management of NA values.
+x <- factor(c(NA, "Female"))
+x <- explicit_na(x)
+s_summary(x, na.rm = TRUE)
+s_summary(x, na.rm = FALSE)
+
+## Different denominators.
+x <- factor(c("a", "a", "b", "c", "a"))
+s_summary(x, denom = "N_row", .N_row = 10L)
+s_summary(x, denom = "N_col", .N_col = 20L)
+
+# `s_summary.character`
+
+## Basic usage:
+s_summary(c("a", "a", "b", "c", "a"), .var = "x", verbose = FALSE)
+s_summary(c("a", "a", "b", "c", "a", ""), .var = "x", na.rm = FALSE, verbose = FALSE)
+
+# `s_summary.logical`
+
+## Basic usage:
+s_summary(c(TRUE, FALSE, TRUE, TRUE))
+
+## Management of NA values.
+x <- c(NA, TRUE, FALSE)
+s_summary(x, na.rm = TRUE)
+s_summary(x, na.rm = FALSE)
+
+## Different denominators.
+x <- c(TRUE, FALSE, TRUE, TRUE)
+s_summary(x, denom = "N_row", .N_row = 10L)
+s_summary(x, denom = "N_col", .N_col = 20L)
+
+# `a_summary.factor`
+# We need to ungroup `count` and `count_fraction` first so that the rtables formatting
+# functions can be applied correctly.
+afun <- make_afun(
+  getS3method("a_summary", "factor"),
+  .ungroup_stats = c("count", "count_fraction")
+)
+afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
+
+# `a_summary.factor`
+# We need to ungroup `count` and `count_fraction` first so that the rtables formatting
+# functions can be applied correctly.
+afun <- make_afun(
+  getS3method("a_summary", "factor"),
+  .ungroup_stats = c("count", "count_fraction")
+)
+afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
+
+# `a_summary.character`
+afun <- make_afun(
+  getS3method("a_summary", "character"),
+  .ungroup_stats = c("count", "count_fraction")
+)
+afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
+
+# `a_summary.logical`
+afun <- make_afun(
+  getS3method("a_summary", "logical")
+)
+afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
+
+## Fabricated dataset.
+dta_test <- data.frame(
+  USUBJID = rep(1:6, each = 3),
+  PARAMCD = rep("lab", 6 * 3),
+  AVISIT  = rep(paste0("V", 1:3), 6),
+  ARM     = rep(LETTERS[1:3], rep(6, 3)),
+  AVAL    = c(9:1, rep(NA, 9))
+)
+
+# `analyze_vars()` in `rtables` pipelines
+## Default output within a `rtables` pipeline.
+l <- basic_table() \%>\%
+  split_cols_by(var = "ARM") \%>\%
+  split_rows_by(var = "AVISIT") \%>\%
+  analyze_vars(vars = "AVAL")
+
+build_table(l, df = dta_test)
+
+## Select and format statistics output.
+l <- basic_table() \%>\%
+  split_cols_by(var = "ARM") \%>\%
+  split_rows_by(var = "AVISIT") \%>\%
+  analyze_vars(
+    vars = "AVAL",
+    .stats = c("n", "mean_sd", "quantiles"),
+    .formats = c("mean_sd" = "xx.x, xx.x"),
+    .labels = c(n = "n", mean_sd = "Mean, SD", quantiles = c("Q1 - Q3"))
+  )
+
+build_table(l, df = dta_test)
+
+## Use arguments interpreted by `s_summary`.
+l <- basic_table() \%>\%
+  split_cols_by(var = "ARM") \%>\%
+  split_rows_by(var = "AVISIT") \%>\%
+  analyze_vars(vars = "AVAL", na.rm = FALSE)
+
+build_table(l, df = dta_test)
+
+## Handle `NA` levels first when summarizing factors.
+dta_test$AVISIT <- NA_character_
+dta_test <- df_explicit_na(dta_test)
+l <- basic_table() \%>\%
+  split_cols_by(var = "ARM") \%>\%
+  analyze_vars(vars = "AVISIT", na.rm = FALSE)
+
+build_table(l, df = dta_test)
+
+}
diff --git a/man/summary_custom.Rd b/man/summary_custom.Rd
new file mode 100644
index 0000000000..ece05515c2
--- /dev/null
+++ b/man/summary_custom.Rd
@@ -0,0 +1,49 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/analyze_variables.R
+\name{summary_custom}
+\alias{summary_custom}
+\title{Set Defaults Settings for Summary Statistics}
+\usage{
+summary_custom(
+  type = "numeric",
+  include_pval = FALSE,
+  stats_custom = NULL,
+  formats_custom = NULL,
+  labels_custom = NULL,
+  indents_custom = NULL
+)
+}
+\arguments{
+\item{type}{(\code{string})\cr choice of a summary data type. Only \code{counts} and \code{numeric} types are currently supported.}
+
+\item{include_pval}{(\code{logical})\cr whether p-value should be included as a default statistic.}
+
+\item{stats_custom}{(\verb{named vector} of \code{character}) vector of statistics to include if not the defaults. This
+argument overrides \code{include_pval} and other custom value arguments such that only settings for these statistics
+will be returned.}
+
+\item{formats_custom}{(\verb{named vector} of \code{character}) vector of custom statistics formats to use in place of the
+defaults defined in \code{\link[=summary_formats]{summary_formats()}}. Names should be a subset of the statistics defined in \code{stats_custom} (or
+default statistics if this is \code{NULL}).}
+
+\item{labels_custom}{(\verb{named vector} of \code{character}) vector of custom statistics labels to use in place of the
+defaults defined in \code{\link[=summary_labels]{summary_labels()}}. Names should be a subset of the statistics defined in \code{stats_custom} (or
+default statistics if this is \code{NULL}).}
+
+\item{indents_custom}{(\code{integer} or \verb{named vector} of \code{integer}) vector of custom indentation modifiers for
+statistics to use instead of the default of \code{0L} for all statistics. Names should be a subset of the statistics
+defined in \code{stats_custom} (or default statistics if this is \code{NULL}). Alternatively, the same indentation modifier
+can be applied to all statistics by setting \code{indents_custom} to a single integer value.}
+}
+\description{
+Set Defaults Settings for Summary Statistics
+}
+\examples{
+summary_custom()
+summary_custom(type = "counts", include_pval = TRUE)
+summary_custom(
+  include_pval = TRUE, stats_custom = c("n", "mean", "sd", "pval"),
+  labels_custom = c(sd = "Std. Dev."), indents_custom = 3L
+)
+
+}
diff --git a/man/summary_formats.Rd b/man/summary_formats.Rd
index ffa2da528f..86ea9d925d 100644
--- a/man/summary_formats.Rd
+++ b/man/summary_formats.Rd
@@ -4,7 +4,7 @@
 \alias{summary_formats}
 \title{Format Function for Descriptive Statistics}
 \usage{
-summary_formats(type = "numeric")
+summary_formats(type = "numeric", include_pval = FALSE)
 }
 \arguments{
 \item{type}{(\code{string})\cr choice of a summary data type. Only \code{counts} and \code{numeric} types are currently supported.}
@@ -15,4 +15,8 @@ A named \code{vector} of default statistic formats for the given data type.
 \description{
 Returns format patterns for descriptive statistics. The format is understood by \code{rtables}.
 }
-\keyword{internal}
+\examples{
+summary_formats()
+summary_formats(type = "count", include_pval = TRUE)
+
+}
diff --git a/man/summary_labels.Rd b/man/summary_labels.Rd
index 3566089803..f2306a536c 100644
--- a/man/summary_labels.Rd
+++ b/man/summary_labels.Rd
@@ -4,7 +4,7 @@
 \alias{summary_labels}
 \title{Label Function for Descriptive Statistics}
 \usage{
-summary_labels(type = "numeric")
+summary_labels(type = "numeric", include_pval = FALSE)
 }
 \arguments{
 \item{type}{(\code{string})\cr choice of a summary data type. Only \code{counts} and \code{numeric} types are currently supported.}
@@ -15,4 +15,8 @@ A named \code{vector} of default statistic labels for the given data type.
 \description{
 Returns labels of descriptive statistics for numeric variables.
 }
-\keyword{internal}
+\examples{
+summary_labels()
+summary_labels(type = "count", include_pval = TRUE)
+
+}

From 8b386988f5122e3c30b2f0ff63421dc81a17a2e1 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Tue, 4 Jul 2023 18:18:58 -0400
Subject: [PATCH 20/33] Fix functions, remove custom argument

---
 R/analyze_variables.R       | 257 +++++++++++-------------------------
 R/utils.R                   |  19 +--
 man/analyze_variables.Rd    |  40 +++---
 man/extract_by_name.Rd      |   7 +-
 tests/testthat/test-utils.R |  18 ---
 5 files changed, 95 insertions(+), 246 deletions(-)

diff --git a/R/analyze_variables.R b/R/analyze_variables.R
index d60ee8f7c8..cc0868b9cb 100644
--- a/R/analyze_variables.R
+++ b/R/analyze_variables.R
@@ -75,11 +75,12 @@ summary_formats <- function(type = "numeric", include_pval = FALSE) {
       quantiles = "xx.x - xx.x",
       iqr = "xx.x",
       range = "xx.x - xx.x",
-      cv = "xx.x",
       min = "xx.x",
       max = "xx.x",
       median_range = "xx.x (xx.x - xx.x)",
+      cv = "xx.x",
       geom_mean = "xx.x",
+      geom_mean_ci = "(xx.xx, xx.xx)",
       geom_cv = "xx.x",
       pval = "x.xxxx | (<0.0001)"
     )
@@ -129,11 +130,12 @@ summary_labels <- function(type = "numeric", include_pval = FALSE) {
       quantiles = "25% and 75%-ile",
       iqr = "IQR",
       range = "Min - Max",
-      cv = "CV (%)",
       min = "Minimum",
       max = "Maximum",
       median_range = "Median (Min - Max)",
+      cv = "CV (%)",
       geom_mean = "Geometric Mean",
+      geom_mean_ci = "Geometric Mean 95% CI",
       geom_cv = "CV % Geometric Mean",
       pval = "p-value (t-test)"
     )
@@ -179,11 +181,7 @@ summary_custom <- function(type = "numeric",
   if (is.null(stats_custom)) {
     .stats <- names(.formats)
   } else {
-    checkmate::assert_subset(
-      stats_custom,
-      c(names(s_summary(if (type == "numeric") numeric() else TRUE)), "pval")
-    )
-    .stats <- stats_custom
+    .stats <- intersect(stats_custom, names(.formats))
   }
   .labels <- summary_labels(type = type, include_pval = include_pval)
   .indents <- setNames(rep(0L, length(.stats)), .stats)
@@ -597,28 +595,6 @@ s_summary.logical <- function(x,
 #' * To use for comparison (with additional p-value statistic), parameter `compare` must be set to `TRUE`.
 #' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is.
 #'
-#' @examples
-#' # summary analysis - compare = FALSE
-#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
-#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
-#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
-#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
-#'
-#' # comparison analysis - compare = TRUE
-#' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE)
-#' a_summary(
-#'   factor(c("a", "a", "b", "c", "a")),
-#'   .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE
-#' )
-#' a_summary(
-#'   c("A", "B", "A", "C"),
-#'   .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE
-#' )
-#' a_summary(
-#'   c(TRUE, FALSE, FALSE, TRUE, TRUE),
-#'   .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE
-#' )
-#'
 #' @export
 a_summary_output <- function(x,
                              .N_col, # nolint
@@ -703,6 +679,7 @@ a_summary_output <- function(x,
 #' @note
 #' * To use for comparison (with additional p-value statistic), parameter `compare` must be set to `TRUE`.
 #' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is.
+#'
 #' @export
 a_summary <- function(x,
                       .N_col, # nolint
@@ -718,14 +695,8 @@ a_summary <- function(x,
 #' @describeIn analyze_variables Formatted analysis function method for `factor` class.
 #'
 #' @examples
-#' # `a_summary.factor`
-#' # We need to ungroup `count` and `count_fraction` first so that the rtables formatting
-#' # functions can be applied correctly.
-#' afun <- make_afun(
-#'   getS3method("a_summary", "factor"),
-#'   .ungroup_stats = c("count", "count_fraction")
-#' )
-#' afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
+#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
+#' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE)
 #'
 #' @export
 a_summary.numeric <- function(x,
@@ -743,61 +714,33 @@ a_summary.numeric <- function(x,
                               na.rm = TRUE, # nolint
                               na_level = NA_character_,
                               ...) {
-  # Remove all-NA rows
-  if (!is.null(.df_row) && ncol(.df_row) > 1) {
-    in_tot_col <- nrow(.df_row) == length(x)
-    .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ]
-    if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]]
-  }
-
-  x_stats <- if (!compare) {
-    s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...)
-  } else {
-    s_compare(
-      x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...
-    )
-  }
-
-  custom_summary <- summary_custom(
-    include_pval = compare,
-    stats_custom = .stats,
-    formats_custom = .formats,
-    labels_custom = .labels,
-    indents_custom = .indent_mods
-  )
-  .stats <- custom_summary$stats
-  .formats <- custom_summary$formats
-  .labels <- custom_summary$labels
-  .indent_mods <- custom_summary$indents
-
-  for (i in intersect(.stats, c("mean_ci", "mean_pval", "median_ci", "quantiles"))) {
-    if (!i %in% names(.labels) || .labels[[i]] == summary_custom()$labels[[i]]) {
-      .labels[[i]] <- attr(x_stats[[i]], "label")
-    }
-  }
-
-  x_stats <- x_stats[.stats]
-  in_rows(
-    .list = x_stats,
+  a_summary(
+    x = x,
+    .N_col = .N_col,
+    .N_row = .N_row,
+    .var = .var,
+    .df_row = .df_row,
+    .ref_group = .ref_group,
+    .in_ref_col = .in_ref_col,
+    compare = compare,
+    .stats = .stats,
     .formats = .formats,
-    .names = .labels,
     .labels = .labels,
     .indent_mods = .indent_mods,
-    .format_na_strs = na_level
+    na.rm = na.rm,
+    na_level = na_level,
+    ...
   )
 }
 
 #' @describeIn analyze_variables Formatted analysis function method for `factor` class.
 #'
 #' @examples
-#' # `a_summary.factor`
-#' # We need to ungroup `count` and `count_fraction` first so that the rtables formatting
-#' # functions can be applied correctly.
-#' afun <- make_afun(
-#'   getS3method("a_summary", "factor"),
-#'   .ungroup_stats = c("count", "count_fraction")
+#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
+#' a_summary(
+#'   factor(c("a", "a", "b", "c", "a")),
+#'   .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE
 #' )
-#' afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
 #'
 #' @export
 a_summary.factor <- function(x,
@@ -815,63 +758,33 @@ a_summary.factor <- function(x,
                              na.rm = TRUE, # nolint
                              na_level = NA_character_,
                              ...) {
-  # Remove all-NA rows
-  if (!is.null(.df_row) && ncol(.df_row) > 1) {
-    in_tot_col <- nrow(.df_row) == length(x)
-    .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ]
-    if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]]
-  }
-
-  if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level")
-
-  x_stats <- if (!compare) {
-    s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...)
-  } else {
-    s_compare(
-      x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...
-    )
-  }
-
-  custom_summary <- summary_custom(
-    type = "counts",
-    include_pval = compare,
-    stats_custom = .stats,
-    formats_custom = .formats,
-    labels_custom = .labels,
-    indents_custom = .indent_mods
-  )
-  .stats <- custom_summary$stats
-  .formats <- custom_summary$formats
-  .labels <- custom_summary$labels
-  .indent_mods <- custom_summary$indents
-
-  x_stats <- x_stats[.stats]
-  x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, .in_ref_col)
-  x_stats <- x_ungrp[["x"]]
-  .stats <- x_ungrp[[".stats"]]
-  .formats <- x_ungrp[[".formats"]]
-  .labels <- x_ungrp[[".labels"]]
-  .indent_mods <- x_ungrp[[".indent_mods"]]
-
-  in_rows(
-    .list = x_stats,
+  a_summary(
+    x = x,
+    .N_col = .N_col,
+    .N_row = .N_row,
+    .var = .var,
+    .df_row = .df_row,
+    .ref_group = .ref_group,
+    .in_ref_col = .in_ref_col,
+    compare = compare,
+    .stats = .stats,
     .formats = .formats,
-    .names = .labels,
     .labels = .labels,
     .indent_mods = .indent_mods,
-    .format_na_strs = na_level
+    na.rm = na.rm,
+    na_level = na_level,
+    ...
   )
 }
 
 #' @describeIn analyze_variables Formatted analysis function method for `character` class.
 #'
 #' @examples
-#' # `a_summary.character`
-#' afun <- make_afun(
-#'   getS3method("a_summary", "character"),
-#'   .ungroup_stats = c("count", "count_fraction")
+#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
+#' a_summary(
+#'   c("A", "B", "A", "C"),
+#'   .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE
 #' )
-#' afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
 #'
 #' @export
 a_summary.character <- function(x,
@@ -890,31 +803,33 @@ a_summary.character <- function(x,
                                 na_level = NA_character_,
                                 ...) {
   x <- as.factor(x)
-  a_summary(x,
-           .N_col, # nolint
-           .N_row, # nolint
-           .var = .var,
-           .df_row = .df_row,
-           .ref_group = .ref_group,
-           .in_ref_col = .in_ref_col,
-           compare = compare,
-           .stats = .stats,
-           .formats = .formats,
-           .labels = .labels,
-           .indent_mods = .indent_mods,
-           na.rm = na.rm,
-           na_level = na_level,
-           ...)
+  a_summary(
+    x = x,
+    .N_col = .N_col,
+    .N_row = .N_row,
+    .var = .var,
+    .df_row = .df_row,
+    .ref_group = .ref_group,
+    .in_ref_col = .in_ref_col,
+    compare = compare,
+    .stats = .stats,
+    .formats = .formats,
+    .labels = .labels,
+    .indent_mods = .indent_mods,
+    na.rm = na.rm,
+    na_level = na_level,
+    ...
+  )
 }
 
 #' @describeIn analyze_variables Formatted analysis function method for `logical` class.
 #'
 #' @examples
-#' # `a_summary.logical`
-#' afun <- make_afun(
-#'   getS3method("a_summary", "logical")
+#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
+#' a_summary(
+#'   c(TRUE, FALSE, FALSE, TRUE, TRUE),
+#'   .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE
 #' )
-#' afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
 #'
 #' @export
 a_summary.logical <- function(x,
@@ -932,42 +847,22 @@ a_summary.logical <- function(x,
                              na.rm = TRUE, # nolint
                              na_level = NA_character_,
                              ...) {
-  # Remove all-NA rows
-  if (!is.null(.df_row) && ncol(.df_row) > 1) {
-    in_tot_col <- nrow(.df_row) == length(x)
-    .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ]
-    if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]]
-  }
-
-  if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level")
-  x_stats <- if (!compare) {
-    s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...)
-  } else {
-    s_compare(
-      x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...
-    )
-  }
-
-  custom_summary <- summary_custom(
-    include_pval = compare,
-    stats_custom = .stats,
-    formats_custom = .formats,
-    labels_custom = .labels,
-    indents_custom = .indent_mods
-  )
-  .stats <- custom_summary$stats
-  .formats <- custom_summary$formats
-  .labels <- custom_summary$labels
-  .indent_mods <- custom_summary$indents
-
-  x_stats <- x_stats[.stats]
-  in_rows(
-    .list = x_stats,
+  a_summary(
+    x = x,
+    .N_col = .N_col,
+    .N_row = .N_row,
+    .var = .var,
+    .df_row = .df_row,
+    .ref_group = .ref_group,
+    .in_ref_col = .in_ref_col,
+    compare = compare,
+    .stats = .stats,
     .formats = .formats,
-    .names = .labels,
     .labels = .labels,
     .indent_mods = .indent_mods,
-    .format_na_strs = na_level
+    na.rm = na.rm,
+    na_level = na_level,
+    ...
   )
 }
 
diff --git a/R/utils.R b/R/utils.R
index 037a29acbd..a5dcaed550 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -236,38 +236,25 @@ combine_vectors <- function(x, y) {
 #' - If `x` is `NULL`, then still always `NULL` is returned (same as in base function).
 #' - If `x` is not `NULL`, then the intersection of its names is made with `names` and those
 #'   elements are returned. That is, `names` which don't appear in `x` are not returned as `NA`s.
-#' - If `x_defaults` is not `NULL`, these values will be used to fill in any values that are in
-#'   `names` but not in `x`.
 #'
 #' @param x (named `vector`)\cr where to extract named elements from.
 #' @param names (`character`)\cr vector of names to extract.
-#' @param x_defaults (named `vector`)\cr named vector of default values to fill in for each value in `names` if no
-#'   corresponding value in `x` exists.
 #'
 #' @return `NULL` if `x` is `NULL`, otherwise the extracted elements from `x`.
 #'
 #' @keywords internal
-extract_by_name <- function(x, names, x_defaults = NULL) {
+extract_by_name <- function(x, names) {
   if (is.null(x)) {
     return(NULL)
   }
   checkmate::assert_named(x)
   checkmate::assert_character(names)
   which_extract <- intersect(names(x), names)
-  x_fill <- c()
-  if (!is.null(x_defaults)) {
-    checkmate::assert_named(x_defaults)
-    x_fill <- x_defaults
-  }
   if (length(which_extract) > 0) {
-    x_fill[which_extract] <- x[which_extract]
-    x_fill <- if (!is.null(x_defaults)) x_fill[names] else x_fill[which_extract]
-  } else if (length(intersect(names(x_defaults), names)) > 0) {
-    x_fill <- x_fill[names]
+    x[which_extract]
   } else {
-    x_fill <- NULL
+    NULL
   }
-  x_fill
 }
 
 #' Labels for Adverse Event Baskets
diff --git a/man/analyze_variables.Rd b/man/analyze_variables.Rd
index 5ec4af07e4..714af59a8d 100644
--- a/man/analyze_variables.Rd
+++ b/man/analyze_variables.Rd
@@ -408,36 +408,26 @@ x <- c(TRUE, FALSE, TRUE, TRUE)
 s_summary(x, denom = "N_row", .N_row = 10L)
 s_summary(x, denom = "N_col", .N_col = 20L)
 
-# `a_summary.factor`
-# We need to ungroup `count` and `count_fraction` first so that the rtables formatting
-# functions can be applied correctly.
-afun <- make_afun(
-  getS3method("a_summary", "factor"),
-  .ungroup_stats = c("count", "count_fraction")
-)
-afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
-
-# `a_summary.factor`
-# We need to ungroup `count` and `count_fraction` first so that the rtables formatting
-# functions can be applied correctly.
-afun <- make_afun(
-  getS3method("a_summary", "factor"),
-  .ungroup_stats = c("count", "count_fraction")
+a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
+a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE)
+
+a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
+a_summary(
+  factor(c("a", "a", "b", "c", "a")),
+  .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE
 )
-afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
 
-# `a_summary.character`
-afun <- make_afun(
-  getS3method("a_summary", "character"),
-  .ungroup_stats = c("count", "count_fraction")
+a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
+a_summary(
+  c("A", "B", "A", "C"),
+  .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE
 )
-afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
 
-# `a_summary.logical`
-afun <- make_afun(
-  getS3method("a_summary", "logical")
+a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
+a_summary(
+  c(TRUE, FALSE, FALSE, TRUE, TRUE),
+  .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE
 )
-afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
 
 ## Fabricated dataset.
 dta_test <- data.frame(
diff --git a/man/extract_by_name.Rd b/man/extract_by_name.Rd
index 186478ec0a..bc328b2a20 100644
--- a/man/extract_by_name.Rd
+++ b/man/extract_by_name.Rd
@@ -4,15 +4,12 @@
 \alias{extract_by_name}
 \title{Extract Elements by Name}
 \usage{
-extract_by_name(x, names, x_defaults = NULL)
+extract_by_name(x, names)
 }
 \arguments{
 \item{x}{(named \code{vector})\cr where to extract named elements from.}
 
 \item{names}{(\code{character})\cr vector of names to extract.}
-
-\item{x_defaults}{(named \code{vector})\cr named vector of default values to fill in for each value in \code{names} if no
-corresponding value in \code{x} exists.}
 }
 \value{
 \code{NULL} if \code{x} is \code{NULL}, otherwise the extracted elements from \code{x}.
@@ -26,8 +23,6 @@ Differences to the standard \code{[} function are:
 \item If \code{x} is \code{NULL}, then still always \code{NULL} is returned (same as in base function).
 \item If \code{x} is not \code{NULL}, then the intersection of its names is made with \code{names} and those
 elements are returned. That is, \code{names} which don't appear in \code{x} are not returned as \code{NA}s.
-\item If \code{x_defaults} is not \code{NULL}, these values will be used to fill in any values that are in
-\code{names} but not in \code{x}.
 }
 }
 \keyword{internal}
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
index b772079d10..87210c6163 100644
--- a/tests/testthat/test-utils.R
+++ b/tests/testthat/test-utils.R
@@ -89,24 +89,6 @@ testthat::test_that("extract_by_name returns NULL when there is no overlap", {
   testthat::expect_snapshot(res)
 })
 
-testthat::test_that("extract_by_name x_defaults argument works as expected", {
-  result <- extract_by_name(
-    x = c(a = "xx", b = function(x) paste(x, "bla")),
-    names = c("b", "c"),
-    x_defaults = c(c = "c_default")
-  )
-  expected <- c(b = function(x) paste(x, "bla"), c = "c_default")
-  testthat::expect_identical(result, expected)
-
-  # no overlap
-  result <- extract_by_name(
-    x = c(a = "xx", b = "xx.xx"),
-    c("d", "c"),
-    c(a = "test")
-  )
-  testthat::expect_identical(result, NULL)
-})
-
 testthat::test_that("aesi_label works as expected for SMQ", {
   smq01nam <- c("AESI 1", "", NA)
   smq01sc <- c("NARROW", "", NA)

From 5ce7c64b808c64f7dad26d161acd1259714f620c Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Tue, 4 Jul 2023 18:46:57 -0400
Subject: [PATCH 21/33] Fix tests, clean up code

---
 NEWS.md                                    |  1 -
 R/analyze_variables.R                      | 79 ++++++++++++----------
 R/compare_variables.R                      | 12 ----
 tests/testthat/_snaps/analyze_variables.md | 24 ++++---
 tests/testthat/test-analyze_variables.R    |  2 +-
 5 files changed, 56 insertions(+), 62 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index e7f52b7bd3..9262a426af 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -5,7 +5,6 @@
 * Refactored `a_summary` to no longer use helper function `create_afun_summary`. 
 * Refactored `summarize_vars` and `compare_vars` to use refactored `a_summary`.
 * Created new internal helper function `ungroup_stats` to ungroup and format statistics calculated for factor variables.
-* Added new argument `x_defaults` to `extract_by_name` to allow user to specify default values to implement for values in `names` that are not specified in `x`.
 
 ### Enhancements
 * Started deprecation cycle for `summarize_vars` and `control_summarize_vars`. Renamed into `analyze_vars` and `control_analyze_vars` to reflect underlying `rtables` machinery while keeping backward compatibility with aliases.
diff --git a/R/analyze_variables.R b/R/analyze_variables.R
index cc0868b9cb..21cf753303 100644
--- a/R/analyze_variables.R
+++ b/R/analyze_variables.R
@@ -177,12 +177,10 @@ summary_custom <- function(type = "numeric",
                            formats_custom = NULL,
                            labels_custom = NULL,
                            indents_custom = NULL) {
+  if ("pval" %in% stats_custom) include_pval <- TRUE
+
   .formats <- summary_formats(type = type, include_pval = include_pval)
-  if (is.null(stats_custom)) {
-    .stats <- names(.formats)
-  } else {
-    .stats <- intersect(stats_custom, names(.formats))
-  }
+  .stats <- if (is.null(stats_custom)) names(.formats) else intersect(stats_custom, names(.formats))
   .labels <- summary_labels(type = type, include_pval = include_pval)
   .indents <- setNames(rep(0L, length(.stats)), .stats)
 
@@ -603,13 +601,14 @@ a_summary_output <- function(x,
                              .df_row,
                              .ref_group,
                              .in_ref_col,
+                             compare,
+                             type,
                              .stats,
                              .formats,
                              .labels,
                              .indent_mods,
                              na.rm, # nolint
                              na_level,
-                             compare,
                              ...) {
   # Remove all-NA rows
   if (!is.null(.df_row) && ncol(.df_row) > 1) {
@@ -628,6 +627,7 @@ a_summary_output <- function(x,
   }
 
   custom_summary <- summary_custom(
+    type = type,
     include_pval = compare,
     stats_custom = .stats,
     formats_custom = .formats,
@@ -714,7 +714,7 @@ a_summary.numeric <- function(x,
                               na.rm = TRUE, # nolint
                               na_level = NA_character_,
                               ...) {
-  a_summary(
+  a_summary_output(
     x = x,
     .N_col = .N_col,
     .N_row = .N_row,
@@ -723,6 +723,7 @@ a_summary.numeric <- function(x,
     .ref_group = .ref_group,
     .in_ref_col = .in_ref_col,
     compare = compare,
+    type = "numeric",
     .stats = .stats,
     .formats = .formats,
     .labels = .labels,
@@ -758,7 +759,7 @@ a_summary.factor <- function(x,
                              na.rm = TRUE, # nolint
                              na_level = NA_character_,
                              ...) {
-  a_summary(
+  a_summary_output(
     x = x,
     .N_col = .N_col,
     .N_row = .N_row,
@@ -767,6 +768,7 @@ a_summary.factor <- function(x,
     .ref_group = .ref_group,
     .in_ref_col = .in_ref_col,
     compare = compare,
+    type = "counts",
     .stats = .stats,
     .formats = .formats,
     .labels = .labels,
@@ -803,7 +805,8 @@ a_summary.character <- function(x,
                                 na_level = NA_character_,
                                 ...) {
   x <- as.factor(x)
-  a_summary(
+  .ref_group <- as.factor(.ref_group)
+  a_summary_output(
     x = x,
     .N_col = .N_col,
     .N_row = .N_row,
@@ -812,6 +815,7 @@ a_summary.character <- function(x,
     .ref_group = .ref_group,
     .in_ref_col = .in_ref_col,
     compare = compare,
+    type = "counts",
     .stats = .stats,
     .formats = .formats,
     .labels = .labels,
@@ -833,21 +837,21 @@ a_summary.character <- function(x,
 #'
 #' @export
 a_summary.logical <- function(x,
-                             .N_col, # nolint
-                             .N_row, # nolint
-                             .var = NULL,
-                             .df_row = NULL,
-                             .ref_group = NULL,
-                             .in_ref_col = FALSE,
-                             compare = FALSE,
-                             .stats = summary_custom(type = "counts", include_pval = compare)$stats,
-                             .formats = summary_custom(type = "counts", include_pval = compare)$formats,
-                             .labels = summary_custom(type = "counts", include_pval = compare)$labels,
-                             .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents,
-                             na.rm = TRUE, # nolint
-                             na_level = NA_character_,
-                             ...) {
-  a_summary(
+                              .N_col, # nolint
+                              .N_row, # nolint
+                              .var = NULL,
+                              .df_row = NULL,
+                              .ref_group = NULL,
+                              .in_ref_col = FALSE,
+                              compare = FALSE,
+                              .stats = summary_custom(type = "counts", include_pval = compare)$stats,
+                              .formats = summary_custom(type = "counts", include_pval = compare)$formats,
+                              .labels = summary_custom(type = "counts", include_pval = compare)$labels,
+                              .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents,
+                              na.rm = TRUE, # nolint
+                              na_level = NA_character_,
+                              ...) {
+  a_summary_output(
     x = x,
     .N_col = .N_col,
     .N_row = .N_row,
@@ -856,6 +860,7 @@ a_summary.logical <- function(x,
     .ref_group = .ref_group,
     .in_ref_col = .in_ref_col,
     compare = compare,
+    type = "counts",
     .stats = .stats,
     .formats = .formats,
     .labels = .labels,
@@ -960,19 +965,19 @@ create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) {
 #'
 #' @export
 analyze_vars <- function(lyt,
-                           vars,
-                           var_labels = vars,
-                           nested = TRUE,
-                           ...,
-                           na.rm = TRUE, # nolint
-                           na_level = NA_character_,
-                           show_labels = "default",
-                           table_names = vars,
-                           section_div = NA_character_,
-                           .stats = c("n", "mean_sd", "median", "range", "count_fraction"),
-                           .formats = NULL,
-                           .labels = NULL,
-                           .indent_mods = NULL) {
+                         vars,
+                         var_labels = vars,
+                         nested = TRUE,
+                         ...,
+                         na.rm = TRUE, # nolint
+                         na_level = NA_character_,
+                         show_labels = "default",
+                         table_names = vars,
+                         section_div = NA_character_,
+                         .stats = c("n", "mean_sd", "median", "range", "count_fraction"),
+                         .formats = NULL,
+                         .labels = NULL,
+                         .indent_mods = NULL) {
   extra_args <- list(.stats = .stats, na.rm = na.rm, na_level = na_level, ...)
   if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
   if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
diff --git a/R/compare_variables.R b/R/compare_variables.R
index 2c5092c703..cb97788397 100644
--- a/R/compare_variables.R
+++ b/R/compare_variables.R
@@ -262,12 +262,6 @@ a_compare <- function(x,
                       .df_row = NULL,
                       .ref_group = NULL,
                       .in_ref_col = FALSE,
-                      .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels"))),
-                      .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_formats")),
-                      .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_labels")),
-                      .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"), "_indents")),
-                      na.rm = TRUE, # nolint
-                      na_level = NA_character_,
                       ...) {
   lifecycle::deprecate_warn(
     "0.8.3",
@@ -282,12 +276,6 @@ a_compare <- function(x,
     .df_row = .df_row,
     .ref_group = .ref_group,
     .in_ref_col = .in_ref_col,
-    .stats = .stats,
-    .formats = .formats,
-    .labels = .labels,
-    .indent_mods = .indent_mods,
-    na.rm = na.rm,
-    na_level = na_level,
     compare = TRUE,
     ...
   )
diff --git a/tests/testthat/_snaps/analyze_variables.md b/tests/testthat/_snaps/analyze_variables.md
index 587af512d0..78d1afbf16 100644
--- a/tests/testthat/_snaps/analyze_variables.md
+++ b/tests/testthat/_snaps/analyze_variables.md
@@ -945,12 +945,13 @@
       15             25% and 75%-ile       -0.6 - 0.6          0             25% and 75%-ile
       16                         IQR              1.2          0                         IQR
       17                   Min - Max       -0.8 - 1.6          0                   Min - Max
-      18                      CV (%)            590.4          0                      CV (%)
-      19                     Minimum             -0.8          0                     Minimum
-      20                     Maximum              1.6          0                     Maximum
-      21          Median (Min - Max) 0.3 (-0.8 - 1.6)          0          Median (Min - Max)
+      18                     Minimum             -0.8          0                     Minimum
+      19                     Maximum              1.6          0                     Maximum
+      20          Median (Min - Max) 0.3 (-0.8 - 1.6)          0          Median (Min - Max)
+      21                      CV (%)            590.4          0                      CV (%)
       22              Geometric Mean               NA          0              Geometric Mean
-      23         CV % Geometric Mean               NA          0         CV % Geometric Mean
+      23       Geometric Mean 95% CI               NA          0       Geometric Mean 95% CI
+      24         CV % Geometric Mean               NA          0         CV % Geometric Mean
 
 ---
 
@@ -1054,13 +1055,14 @@
       15             25% and 75%-ile       4.4 - 5.6          0             25% and 75%-ile
       16                         IQR             1.2          0                         IQR
       17                   Min - Max       4.2 - 6.6          0                   Min - Max
-      18                      CV (%)            15.2          0                      CV (%)
-      19                     Minimum             4.2          0                     Minimum
-      20                     Maximum             6.6          0                     Maximum
-      21          Median (Min - Max) 5.3 (4.2 - 6.6)          0          Median (Min - Max)
+      18                     Minimum             4.2          0                     Minimum
+      19                     Maximum             6.6          0                     Maximum
+      20          Median (Min - Max) 5.3 (4.2 - 6.6)          0          Median (Min - Max)
+      21                      CV (%)            15.2          0                      CV (%)
       22              Geometric Mean             5.1          0              Geometric Mean
-      23         CV % Geometric Mean            15.2          0         CV % Geometric Mean
-      24            p-value (t-test)         <0.0001          0            p-value (t-test)
+      23       Geometric Mean 95% CI    (4.56, 5.66)          0       Geometric Mean 95% CI
+      24         CV % Geometric Mean            15.2          0         CV % Geometric Mean
+      25            p-value (t-test)         <0.0001          0            p-value (t-test)
 
 ---
 
diff --git a/tests/testthat/test-analyze_variables.R b/tests/testthat/test-analyze_variables.R
index 4c2bf85f8a..b1a0590f5d 100644
--- a/tests/testthat/test-analyze_variables.R
+++ b/tests/testthat/test-analyze_variables.R
@@ -364,7 +364,7 @@ testthat::test_that("`analyze_vars` works with character input and gives the sam
 
   l <- basic_table() %>%
     analyze_vars(vars = "foo")
-  testthat::expect_warning(result <- build_table(l, dta))
+  result <- build_table(l, dta)
 
   dta_factor <- dta %>%
     dplyr::mutate(foo = factor(foo))

From 9af071099edf54e438aba17fe110454e98c64360 Mon Sep 17 00:00:00 2001
From: "27856297+dependabot-preview[bot]@users.noreply.github.com"
 <27856297+dependabot-preview[bot]@users.noreply.github.com>
Date: Tue, 4 Jul 2023 22:51:06 +0000
Subject: [PATCH 22/33] [skip actions] Roxygen Man Pages Auto Update

---
 man/compare_variables.Rd | 34 ++++++++++++----------------------
 1 file changed, 12 insertions(+), 22 deletions(-)

diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd
index 7ba393662c..b9c7117358 100644
--- a/man/compare_variables.Rd
+++ b/man/compare_variables.Rd
@@ -38,16 +38,6 @@ a_compare(
   .df_row = NULL,
   .ref_group = NULL,
   .in_ref_col = FALSE,
-  .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"),
-    "_labels"))),
-  .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"),
-    "_formats")),
-  .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"),
-    "_labels")),
-  .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"),
-    "_indents")),
-  na.rm = TRUE,
-  na_level = NA_character_,
   ...
 )
 
@@ -95,18 +85,6 @@ to print out information about factor casting. Defaults to \code{TRUE}.}
 
 \item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.}
 
-\item{.stats}{(\code{character})\cr statistics to select for the table.}
-
-\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.}
-
-\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).}
-
-\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector
-should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation
-for that statistic's row label.}
-
-\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.}
-
 \item{lyt}{(\code{layout})\cr input layout where analyses will be added to.}
 
 \item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.}
@@ -117,6 +95,8 @@ for that statistic's row label.}
 possible} (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split
 underneath analyses, which is not allowed.}
 
+\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.}
+
 \item{show_labels}{(\code{string})\cr label visibility: one of "default", "visible" and "hidden".}
 
 \item{table_names}{(\code{character})\cr this can be customized in case that the same \code{vars} are analyzed multiple times,
@@ -124,6 +104,16 @@ to avoid warnings from \code{rtables}.}
 
 \item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group
 defined by this split instruction, or \code{NA_character_} (the default) for no section divider.}
+
+\item{.stats}{(\code{character})\cr statistics to select for the table.}
+
+\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.}
+
+\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).}
+
+\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector
+should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation
+for that statistic's row label.}
 }
 \value{
 \itemize{

From a4bcb31c09e4f466c3a40b463cffc356c79b3db5 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Tue, 4 Jul 2023 20:31:21 -0400
Subject: [PATCH 23/33] Fix docs

---
 NAMESPACE                                  |   4 +-
 R/analyze_variables.R                      | 231 ++++++++-------------
 R/utils_factor.R                           |  15 +-
 man/analyze_variables.Rd                   | 102 +++++----
 man/compare_variables.Rd                   |  34 ++-
 man/summary_custom.Rd                      |  49 -----
 man/summary_formats.Rd                     |  22 --
 man/summary_labels.Rd                      |  22 --
 man/summary_stats.Rd                       |  90 ++++++++
 man/ungroup_stats.Rd                       |  14 +-
 tests/testthat/_snaps/analyze_variables.md | 198 +++++++++++++++++-
 tests/testthat/test-analyze_variables.R    |  98 ++++++++-
 12 files changed, 552 insertions(+), 327 deletions(-)
 delete mode 100644 man/summary_custom.Rd
 delete mode 100644 man/summary_formats.Rd
 delete mode 100644 man/summary_labels.Rd
 create mode 100644 man/summary_stats.Rd

diff --git a/NAMESPACE b/NAMESPACE
index d21c817875..2717dc01a0 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,8 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(a_summary,character)
-S3method(a_summary,factor)
-S3method(a_summary,logical)
+S3method(a_summary,default)
 S3method(a_summary,numeric)
 S3method(as.rtable,data.frame)
 S3method(h_coxreg_inter_effect,character)
diff --git a/R/analyze_variables.R b/R/analyze_variables.R
index 21cf753303..8539f09bf9 100644
--- a/R/analyze_variables.R
+++ b/R/analyze_variables.R
@@ -34,13 +34,23 @@ control_analyze_vars <- function(conf_level = 0.95,
 
 control_summarize_vars <- control_analyze_vars
 
-#' Format Function for Descriptive Statistics
+#' Summary Statistic Settings Functions
 #'
-#' Returns format patterns for descriptive statistics. The format is understood by `rtables`.
+#' @description `r lifecycle::badge("stable")`
+#'
+#' Functions to retrieve default settings for summary statistics and customize these settings.
 #'
-#' @param type (`string`)\cr choice of a summary data type. Only `counts` and `numeric` types are currently supported.
+#' @param type (`character`)\cr choice of summary data type. Only `counts` and `numeric` types are currently supported.
+#' @param include_pval (`logical`)\cr whether p-value should be included as a default statistic.
 #'
-#' @return A named `vector` of default statistic formats for the given data type.
+#' @name summary_stats
+NULL
+
+#' @describeIn summary_stats Function to retrieve default formats for summary statistics. Returns format patterns for
+#'   descriptive statistics which are understood by `rtables`.
+#'
+#' @return
+#' * `summary_formats` returns a named `vector` of default statistic formats for the given data type.
 #'
 #' @examples
 #' summary_formats()
@@ -89,13 +99,11 @@ summary_formats <- function(type = "numeric", include_pval = FALSE) {
   fmts
 }
 
-#' Label Function for Descriptive Statistics
-#'
-#' Returns labels of descriptive statistics for numeric variables.
-#'
-#' @param type (`string`)\cr choice of a summary data type. Only `counts` and `numeric` types are currently supported.
+#' @describeIn summary_stats Function to retrieve default labels for summary statistics. Returns labels of descriptive
+#'   statistics which are understood by `rtables`.
 #'
-#' @return A named `vector` of default statistic labels for the given data type.
+#' @return
+#' * `summary_labels` returns a named `vector` of default statistic labels for the given data type.
 #'
 #' @examples
 #' summary_labels()
@@ -144,30 +152,33 @@ summary_labels <- function(type = "numeric", include_pval = FALSE) {
   lbls
 }
 
-#' Set Defaults Settings for Summary Statistics
+#' @describeIn summary_stats Function to configure settings for default or custom summary statistics for a given data
+#'   type. In addition to selecting a custom subset of statistics, the user can also set custom formats, labels, and
+#'   indent modifiers for any of these statistics.
 #'
-#' @inheritParams summary_formats
-#' @param include_pval (`logical`)\cr whether p-value should be included as a default statistic.
-#' @param stats_custom (`named vector` of `character`) vector of statistics to include if not the defaults. This
+#' @param stats_custom (`named vector` of `character`)\cr vector of statistics to include if not the defaults. This
 #'   argument overrides `include_pval` and other custom value arguments such that only settings for these statistics
 #'   will be returned.
-#' @param formats_custom (`named vector` of `character`) vector of custom statistics formats to use in place of the
+#' @param formats_custom (`named vector` of `character`)\cr vector of custom statistics formats to use in place of the
 #'   defaults defined in [`summary_formats()`]. Names should be a subset of the statistics defined in `stats_custom` (or
 #'   default statistics if this is `NULL`).
-#' @param labels_custom (`named vector` of `character`) vector of custom statistics labels to use in place of the
+#' @param labels_custom (`named vector` of `character`)\cr vector of custom statistics labels to use in place of the
 #'   defaults defined in [`summary_labels()`]. Names should be a subset of the statistics defined in `stats_custom` (or
 #'   default statistics if this is `NULL`).
-#' @param indents_custom (`integer` or `named vector` of `integer`) vector of custom indentation modifiers for
+#' @param indent_mods_custom (`integer` or `named vector` of `integer`)\cr vector of custom indentation modifiers for
 #'   statistics to use instead of the default of `0L` for all statistics. Names should be a subset of the statistics
 #'   defined in `stats_custom` (or default statistics if this is `NULL`). Alternatively, the same indentation modifier
-#'   can be applied to all statistics by setting `indents_custom` to a single integer value.
+#'   can be applied to all statistics by setting `indent_mods_custom` to a single integer value.
+#'
+#' @return
+#' * `summary_custom` returns a `list` of 4 named elements: `stats`, `formats`, `labels`, and `indent_mods`.
 #'
 #' @examples
 #' summary_custom()
 #' summary_custom(type = "counts", include_pval = TRUE)
 #' summary_custom(
 #'   include_pval = TRUE, stats_custom = c("n", "mean", "sd", "pval"),
-#'   labels_custom = c(sd = "Std. Dev."), indents_custom = 3L
+#'   labels_custom = c(sd = "Std. Dev."), indent_mods_custom = 3L
 #' )
 #'
 #' @export
@@ -176,21 +187,21 @@ summary_custom <- function(type = "numeric",
                            stats_custom = NULL,
                            formats_custom = NULL,
                            labels_custom = NULL,
-                           indents_custom = NULL) {
+                           indent_mods_custom = NULL) {
   if ("pval" %in% stats_custom) include_pval <- TRUE
 
   .formats <- summary_formats(type = type, include_pval = include_pval)
   .stats <- if (is.null(stats_custom)) names(.formats) else intersect(stats_custom, names(.formats))
   .labels <- summary_labels(type = type, include_pval = include_pval)
-  .indents <- setNames(rep(0L, length(.stats)), .stats)
+  .indent_mods <- setNames(rep(0L, length(.stats)), .stats)
 
   if (!is.null(formats_custom)) .formats[names(formats_custom)] <- formats_custom
   if (!is.null(labels_custom)) .labels[names(labels_custom)] <- labels_custom
-  if (!is.null(indents_custom)) {
-    if (is.null(names(indents_custom)) && length(indents_custom) == 1) {
-      .indents[names(.indents)] <- indents_custom
+  if (!is.null(indent_mods_custom)) {
+    if (is.null(names(indent_mods_custom)) && length(indent_mods_custom) == 1) {
+      .indent_mods[names(.indent_mods)] <- indent_mods_custom
     } else {
-      .indents[names(indents_custom)] <- indents_custom
+      .indent_mods[names(indent_mods_custom)] <- indent_mods_custom
     }
   }
 
@@ -198,7 +209,7 @@ summary_custom <- function(type = "numeric",
     stats = .stats,
     formats = .formats[.stats],
     labels = .labels[.stats],
-    indents = .indents[.stats]
+    indent_mods = .indent_mods[.stats]
   )
 }
 
@@ -499,6 +510,7 @@ s_summary.factor <- function(x,
 #' s_summary(c("a", "a", "b", "c", "a", ""), .var = "x", na.rm = FALSE, verbose = FALSE)
 #'
 #' @export
+
 s_summary.character <- function(x,
                                 na.rm = TRUE, # nolint
                                 denom = c("n", "N_row", "N_col"),
@@ -578,21 +590,23 @@ s_summary.logical <- function(x,
   y
 }
 
-
-
-#' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and
-#'   `compare_vars()` and as `cfun` in `summarize_colvars()`.
+#' @describeIn analyze_variables Formatted analysis helper function which is used within `a_summary`.
 #'
 #' @param compare (`logical`)\cr Whether comparison statistics should be analyzed instead of summary statistics
 #'   (`compare = TRUE` adds `pval` statistic comparing against reference group).
+#' @param type (`character`)\cr type of statistics to calculate given `x`. If `x` is numeric `type` should be
+#'   `"numeric"`, otherwise type should be `"counts"`.
 #'
 #' @return
-#' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()].
+#' * `a_summary_output()` returns the corresponding list with formatted [rtables::CellValue()].
 #'
 #' @note
 #' * To use for comparison (with additional p-value statistic), parameter `compare` must be set to `TRUE`.
 #' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is.
 #'
+#' @examples
+#' a_summary_output()
+#'
 #' @export
 a_summary_output <- function(x,
                              .N_col, # nolint
@@ -632,12 +646,12 @@ a_summary_output <- function(x,
     stats_custom = .stats,
     formats_custom = .formats,
     labels_custom = .labels,
-    indents_custom = .indent_mods
+    indent_mods_custom = .indent_mods
   )
   .stats <- custom_summary$stats
   .formats <- custom_summary$formats
   .labels <- custom_summary$labels
-  .indent_mods <- custom_summary$indents
+  .indent_mods <- custom_summary$indent_mods
 
   if (is.numeric(x)) {
     for (i in intersect(.stats, c("mean_ci", "mean_pval", "median_ci", "quantiles"))) {
@@ -648,7 +662,7 @@ a_summary_output <- function(x,
   }
 
   x_stats <- x_stats[.stats]
-  if (is.factor(x)) {
+  if (is.factor(x) || is.character(x)) {
     x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, .in_ref_col)
     x_stats <- x_ungrp[["x"]]
     .stats <- x_ungrp[[".stats"]]
@@ -667,12 +681,9 @@ a_summary_output <- function(x,
   )
 }
 
-#' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and
+#' @describeIn analyze_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and
 #'   `compare_vars()` and as `cfun` in `summarize_colvars()`.
 #'
-#' @param compare (`logical`)\cr Whether comparison statistics should be analyzed instead of summary statistics
-#'   (`compare = TRUE` adds `pval` statistic comparing against reference group).
-#'
 #' @return
 #' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()].
 #'
@@ -692,49 +703,9 @@ a_summary <- function(x,
   UseMethod("a_summary", x)
 }
 
-#' @describeIn analyze_variables Formatted analysis function method for `factor` class.
+#' @describeIn analyze_variables Formatted analysis function `default` method for non-numeric classes.
 #'
-#' @examples
-#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
-#' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE)
-#'
-#' @export
-a_summary.numeric <- function(x,
-                              .N_col, # nolint
-                              .N_row, # nolint
-                              .var = NULL,
-                              .df_row = NULL,
-                              .ref_group = NULL,
-                              .in_ref_col = FALSE,
-                              compare = FALSE,
-                              .stats = summary_custom(include_pval = compare)$stats,
-                              .formats = summary_custom(include_pval = compare)$formats,
-                              .labels = summary_custom(include_pval = compare)$labels,
-                              .indent_mods = summary_custom(include_pval = compare)$indents,
-                              na.rm = TRUE, # nolint
-                              na_level = NA_character_,
-                              ...) {
-  a_summary_output(
-    x = x,
-    .N_col = .N_col,
-    .N_row = .N_row,
-    .var = .var,
-    .df_row = .df_row,
-    .ref_group = .ref_group,
-    .in_ref_col = .in_ref_col,
-    compare = compare,
-    type = "numeric",
-    .stats = .stats,
-    .formats = .formats,
-    .labels = .labels,
-    .indent_mods = .indent_mods,
-    na.rm = na.rm,
-    na_level = na_level,
-    ...
-  )
-}
-
-#' @describeIn analyze_variables Formatted analysis function method for `factor` class.
+#' @method a_summary default
 #'
 #' @examples
 #' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
@@ -743,69 +714,34 @@ a_summary.numeric <- function(x,
 #'   .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE
 #' )
 #'
-#' @export
-a_summary.factor <- function(x,
-                             .N_col, # nolint
-                             .N_row, # nolint
-                             .var = NULL,
-                             .df_row = NULL,
-                             .ref_group = NULL,
-                             .in_ref_col = FALSE,
-                             compare = FALSE,
-                             .stats = summary_custom(type = "counts", include_pval = compare)$stats,
-                             .formats = summary_custom(type = "counts", include_pval = compare)$formats,
-                             .labels = summary_custom(type = "counts", include_pval = compare)$labels,
-                             .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents,
-                             na.rm = TRUE, # nolint
-                             na_level = NA_character_,
-                             ...) {
-  a_summary_output(
-    x = x,
-    .N_col = .N_col,
-    .N_row = .N_row,
-    .var = .var,
-    .df_row = .df_row,
-    .ref_group = .ref_group,
-    .in_ref_col = .in_ref_col,
-    compare = compare,
-    type = "counts",
-    .stats = .stats,
-    .formats = .formats,
-    .labels = .labels,
-    .indent_mods = .indent_mods,
-    na.rm = na.rm,
-    na_level = na_level,
-    ...
-  )
-}
-
-#' @describeIn analyze_variables Formatted analysis function method for `character` class.
-#'
-#' @examples
 #' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
 #' a_summary(
 #'   c("A", "B", "A", "C"),
 #'   .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE
 #' )
 #'
+#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
+#' a_summary(
+#'   c(TRUE, FALSE, FALSE, TRUE, TRUE),
+#'   .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE
+#' )
+#'
 #' @export
-a_summary.character <- function(x,
-                                .N_col, # nolint
-                                .N_row, # nolint
-                                .var = NULL,
-                                .df_row = NULL,
-                                .ref_group = NULL,
-                                .in_ref_col = FALSE,
-                                compare = FALSE,
-                                .stats = summary_custom(type = "counts", include_pval = compare)$stats,
-                                .formats = summary_custom(type = "counts", include_pval = compare)$formats,
-                                .labels = summary_custom(type = "counts", include_pval = compare)$labels,
-                                .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents,
-                                na.rm = TRUE, # nolint
-                                na_level = NA_character_,
-                                ...) {
-  x <- as.factor(x)
-  .ref_group <- as.factor(.ref_group)
+a_summary.default <- function(x,
+                              .N_col, # nolint
+                              .N_row, # nolint
+                              .var = NULL,
+                              .df_row = NULL,
+                              .ref_group = NULL,
+                              .in_ref_col = FALSE,
+                              compare = FALSE,
+                              .stats = summary_custom(type = "counts", include_pval = compare)$stats,
+                              .formats = summary_custom(type = "counts", include_pval = compare)$formats,
+                              .labels = summary_custom(type = "counts", include_pval = compare)$labels,
+                              .indent_mods = summary_custom(type = "counts", include_pval = compare)$indent_mods,
+                              na.rm = TRUE, # nolint
+                              na_level = NA_character_,
+                              ...) {
   a_summary_output(
     x = x,
     .N_col = .N_col,
@@ -826,17 +762,16 @@ a_summary.character <- function(x,
   )
 }
 
-#' @describeIn analyze_variables Formatted analysis function method for `logical` class.
+#' @describeIn analyze_variables Formatted analysis function method for `numeric` class.
+#'
+#' @method a_summary numeric
 #'
 #' @examples
-#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
-#' a_summary(
-#'   c(TRUE, FALSE, FALSE, TRUE, TRUE),
-#'   .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE
-#' )
+#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
+#' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE)
 #'
 #' @export
-a_summary.logical <- function(x,
+a_summary.numeric <- function(x,
                               .N_col, # nolint
                               .N_row, # nolint
                               .var = NULL,
@@ -844,10 +779,10 @@ a_summary.logical <- function(x,
                               .ref_group = NULL,
                               .in_ref_col = FALSE,
                               compare = FALSE,
-                              .stats = summary_custom(type = "counts", include_pval = compare)$stats,
-                              .formats = summary_custom(type = "counts", include_pval = compare)$formats,
-                              .labels = summary_custom(type = "counts", include_pval = compare)$labels,
-                              .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents,
+                              .stats = summary_custom(include_pval = compare)$stats,
+                              .formats = summary_custom(include_pval = compare)$formats,
+                              .labels = summary_custom(include_pval = compare)$labels,
+                              .indent_mods = summary_custom(include_pval = compare)$indent_mods,
                               na.rm = TRUE, # nolint
                               na_level = NA_character_,
                               ...) {
@@ -860,7 +795,7 @@ a_summary.logical <- function(x,
     .ref_group = .ref_group,
     .in_ref_col = .in_ref_col,
     compare = compare,
-    type = "counts",
+    type = "numeric",
     .stats = .stats,
     .formats = .formats,
     .labels = .labels,
diff --git a/R/utils_factor.R b/R/utils_factor.R
index 1e727061c8..85b5c7677f 100644
--- a/R/utils_factor.R
+++ b/R/utils_factor.R
@@ -284,15 +284,24 @@ fct_collapse_only <- function(.f, ..., .na_level = "<Missing>") {
 #' `character()` if current column is a reference column. Used within [`a_summary()`].
 #'
 #' @inheritParams argument_convention
+#' @param x  (`named list` of `numeric`)\cr list of numeric statistics containing the statistics to ungroup.
+#' @param which_stats (`vector` of `character`)\cr which statistics should be ungrouped.
 #'
 #' @return A `list` with modified elements `x`, `.stats`, `.formats`, `.labels`, and `.indent_mods`.
 #'
 #' @seealso [a_summary()] which uses this function internally.
 #'
 #' @keywords internal
-ungroup_stats <- function(x, .stats, .formats, .labels, .indent_mods, .in_ref_col = FALSE) {
-  checkmate::assert_true(!is.numeric(x) & !is.logical(x))
-  for (stat in c("count", "count_fraction")) {
+ungroup_stats <- function(x,
+                          .stats,
+                          .formats,
+                          .labels,
+                          .indent_mods,
+                          .in_ref_col = FALSE,
+                          which_stats = c("count", "count_fraction")) {
+  checkmate::assert_list(x)
+
+  for (stat in which_stats) {
     for (a in names(x[[stat]])) {
       a <- if (a == "na-level") "NA" else a
       a_lvl <- paste(stat, a, sep = ".")
diff --git a/man/analyze_variables.Rd b/man/analyze_variables.Rd
index 714af59a8d..a09d5e2962 100644
--- a/man/analyze_variables.Rd
+++ b/man/analyze_variables.Rd
@@ -7,10 +7,10 @@
 \alias{s_summary.factor}
 \alias{s_summary.character}
 \alias{s_summary.logical}
+\alias{a_summary_output}
+\alias{a_summary}
+\alias{a_summary.default}
 \alias{a_summary.numeric}
-\alias{a_summary.factor}
-\alias{a_summary.character}
-\alias{a_summary.logical}
 \alias{analyze_vars}
 \title{Analyze Variables}
 \usage{
@@ -56,43 +56,28 @@ s_summary(x, na.rm = TRUE, denom, .N_row, .N_col, .var, ...)
   ...
 )
 
-\method{a_summary}{numeric}(
+a_summary_output(
   x,
   .N_col,
   .N_row,
-  .var = NULL,
-  .df_row = NULL,
-  .ref_group = NULL,
-  .in_ref_col = FALSE,
-  compare = FALSE,
-  .stats = summary_custom(include_pval = compare)$stats,
-  .formats = summary_custom(include_pval = compare)$formats,
-  .labels = summary_custom(include_pval = compare)$labels,
-  .indent_mods = summary_custom(include_pval = compare)$indents,
-  na.rm = TRUE,
-  na_level = NA_character_,
+  .var,
+  .df_row,
+  .ref_group,
+  .in_ref_col,
+  compare,
+  type,
+  .stats,
+  .formats,
+  .labels,
+  .indent_mods,
+  na.rm,
+  na_level,
   ...
 )
 
-\method{a_summary}{factor}(
-  x,
-  .N_col,
-  .N_row,
-  .var = NULL,
-  .df_row = NULL,
-  .ref_group = NULL,
-  .in_ref_col = FALSE,
-  compare = FALSE,
-  .stats = summary_custom(type = "counts", include_pval = compare)$stats,
-  .formats = summary_custom(type = "counts", include_pval = compare)$formats,
-  .labels = summary_custom(type = "counts", include_pval = compare)$labels,
-  .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents,
-  na.rm = TRUE,
-  na_level = NA_character_,
-  ...
-)
+a_summary(x, .N_col, .N_row, .var, .df_row, .ref_group, .in_ref_col, ...)
 
-\method{a_summary}{character}(
+\method{a_summary}{default}(
   x,
   .N_col,
   .N_row,
@@ -104,13 +89,13 @@ s_summary(x, na.rm = TRUE, denom, .N_row, .N_col, .var, ...)
   .stats = summary_custom(type = "counts", include_pval = compare)$stats,
   .formats = summary_custom(type = "counts", include_pval = compare)$formats,
   .labels = summary_custom(type = "counts", include_pval = compare)$labels,
-  .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents,
+  .indent_mods = summary_custom(type = "counts", include_pval = compare)$indent_mods,
   na.rm = TRUE,
   na_level = NA_character_,
   ...
 )
 
-\method{a_summary}{logical}(
+\method{a_summary}{numeric}(
   x,
   .N_col,
   .N_row,
@@ -119,10 +104,10 @@ s_summary(x, na.rm = TRUE, denom, .N_row, .N_col, .var, ...)
   .ref_group = NULL,
   .in_ref_col = FALSE,
   compare = FALSE,
-  .stats = summary_custom(type = "counts", include_pval = compare)$stats,
-  .formats = summary_custom(type = "counts", include_pval = compare)$formats,
-  .labels = summary_custom(type = "counts", include_pval = compare)$labels,
-  .indent_mods = summary_custom(type = "counts", include_pval = compare)$indents,
+  .stats = summary_custom(include_pval = compare)$stats,
+  .formats = summary_custom(include_pval = compare)$formats,
+  .labels = summary_custom(include_pval = compare)$labels,
+  .indent_mods = summary_custom(include_pval = compare)$indent_mods,
   na.rm = TRUE,
   na_level = NA_character_,
   ...
@@ -186,6 +171,12 @@ to print out information about factor casting.}
 
 \item{.in_ref_col}{(\code{logical})\cr \code{TRUE} when working with the reference level, \code{FALSE} otherwise.}
 
+\item{compare}{(\code{logical})\cr Whether comparison statistics should be analyzed instead of summary statistics
+(\code{compare = TRUE} adds \code{pval} statistic comparing against reference group).}
+
+\item{type}{(\code{character})\cr type of statistics to calculate given \code{x}. If \code{x} is numeric \code{type} should be
+\code{"numeric"}, otherwise type should be \code{"counts"}.}
+
 \item{.stats}{(\code{character})\cr statistics to select for the table.}
 
 \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.}
@@ -271,6 +262,14 @@ denominator is zero. Note that \code{NA}s in \code{x} are never counted or leadi
 }
 }
 
+\itemize{
+\item \code{a_summary_output()} returns the corresponding list with formatted \code{\link[rtables:CellValue]{rtables::CellValue()}}.
+}
+
+\itemize{
+\item \code{a_summary()} returns the corresponding list with formatted \code{\link[rtables:CellValue]{rtables::CellValue()}}.
+}
+
 \itemize{
 \item \code{analyze_vars()} returns a layout object suitable for passing to further layouting functions,
 or to \code{\link[rtables:build_table]{rtables::build_table()}}. Adding this function to an \code{rtable} layout will add formatted rows containing
@@ -296,13 +295,14 @@ conversion to factor (with a warning) and then forwards to the method for factor
 
 \item \code{s_summary(logical)}: Method for \code{logical} class.
 
-\item \code{a_summary(numeric)}: Formatted analysis function method for \code{factor} class.
+\item \code{a_summary_output()}: Formatted analysis helper function which is used within \code{a_summary}.
 
-\item \code{a_summary(factor)}: Formatted analysis function method for \code{factor} class.
+\item \code{a_summary()}: Formatted analysis function which is used as \code{afun} in \code{summarize_vars()} and
+\code{compare_vars()} and as \code{cfun} in \code{summarize_colvars()}.
 
-\item \code{a_summary(character)}: Formatted analysis function method for \code{character} class.
+\item \code{a_summary(default)}: Formatted analysis function \code{default} method for non-numeric classes.
 
-\item \code{a_summary(logical)}: Formatted analysis function method for \code{logical} class.
+\item \code{a_summary(numeric)}: Formatted analysis function method for \code{numeric} class.
 
 \item \code{analyze_vars()}: Layout-creating function which can take statistics function arguments
 and additional format arguments. This function is a wrapper for \code{\link[rtables:analyze]{rtables::analyze()}}.
@@ -332,6 +332,16 @@ It is therefore better to always pre-process the dataset such that factors are m
 created from character variables before passing the dataset to \code{\link[rtables:build_table]{rtables::build_table()}}.
 }
 
+\itemize{
+\item To use for comparison (with additional p-value statistic), parameter \code{compare} must be set to \code{TRUE}.
+\item Ensure that either all \code{NA} values are converted to an explicit \code{NA} level or all \code{NA} values are left as is.
+}
+
+\itemize{
+\item To use for comparison (with additional p-value statistic), parameter \code{compare} must be set to \code{TRUE}.
+\item Ensure that either all \code{NA} values are converted to an explicit \code{NA} level or all \code{NA} values are left as is.
+}
+
 Deprecation cycle started for \code{summarize_vars} as it is going to renamed into
 \code{analyze_vars}. Intention is to reflect better the core underlying \code{rtables}
 functions; in this case \code{\link[rtables:analyze]{rtables::analyze()}}.
@@ -408,8 +418,7 @@ x <- c(TRUE, FALSE, TRUE, TRUE)
 s_summary(x, denom = "N_row", .N_row = 10L)
 s_summary(x, denom = "N_col", .N_col = 20L)
 
-a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
-a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE)
+a_summary_output()
 
 a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
 a_summary(
@@ -429,6 +438,9 @@ a_summary(
   .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE
 )
 
+a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
+a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE)
+
 ## Fabricated dataset.
 dta_test <- data.frame(
   USUBJID = rep(1:6, each = 3),
diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd
index 7ba393662c..b9c7117358 100644
--- a/man/compare_variables.Rd
+++ b/man/compare_variables.Rd
@@ -38,16 +38,6 @@ a_compare(
   .df_row = NULL,
   .ref_group = NULL,
   .in_ref_col = FALSE,
-  .stats = names(get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"),
-    "_labels"))),
-  .formats = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"),
-    "_formats")),
-  .labels = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"),
-    "_labels")),
-  .indent_mods = get(paste0(".a_compare_", ifelse(is.numeric(x), "numeric", "counts"),
-    "_indents")),
-  na.rm = TRUE,
-  na_level = NA_character_,
   ...
 )
 
@@ -95,18 +85,6 @@ to print out information about factor casting. Defaults to \code{TRUE}.}
 
 \item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.}
 
-\item{.stats}{(\code{character})\cr statistics to select for the table.}
-
-\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.}
-
-\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).}
-
-\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector
-should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation
-for that statistic's row label.}
-
-\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.}
-
 \item{lyt}{(\code{layout})\cr input layout where analyses will be added to.}
 
 \item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.}
@@ -117,6 +95,8 @@ for that statistic's row label.}
 possible} (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split
 underneath analyses, which is not allowed.}
 
+\item{na_level}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.}
+
 \item{show_labels}{(\code{string})\cr label visibility: one of "default", "visible" and "hidden".}
 
 \item{table_names}{(\code{character})\cr this can be customized in case that the same \code{vars} are analyzed multiple times,
@@ -124,6 +104,16 @@ to avoid warnings from \code{rtables}.}
 
 \item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group
 defined by this split instruction, or \code{NA_character_} (the default) for no section divider.}
+
+\item{.stats}{(\code{character})\cr statistics to select for the table.}
+
+\item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.}
+
+\item{.labels}{(named \code{character})\cr labels for the statistics (without indent).}
+
+\item{.indent_mods}{(named \code{vector} of \code{integer})\cr indent modifiers for the labels. Each element of the vector
+should be a name-value pair with name corresponding to a statistic specified in \code{.stats} and value the indentation
+for that statistic's row label.}
 }
 \value{
 \itemize{
diff --git a/man/summary_custom.Rd b/man/summary_custom.Rd
deleted file mode 100644
index ece05515c2..0000000000
--- a/man/summary_custom.Rd
+++ /dev/null
@@ -1,49 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/analyze_variables.R
-\name{summary_custom}
-\alias{summary_custom}
-\title{Set Defaults Settings for Summary Statistics}
-\usage{
-summary_custom(
-  type = "numeric",
-  include_pval = FALSE,
-  stats_custom = NULL,
-  formats_custom = NULL,
-  labels_custom = NULL,
-  indents_custom = NULL
-)
-}
-\arguments{
-\item{type}{(\code{string})\cr choice of a summary data type. Only \code{counts} and \code{numeric} types are currently supported.}
-
-\item{include_pval}{(\code{logical})\cr whether p-value should be included as a default statistic.}
-
-\item{stats_custom}{(\verb{named vector} of \code{character}) vector of statistics to include if not the defaults. This
-argument overrides \code{include_pval} and other custom value arguments such that only settings for these statistics
-will be returned.}
-
-\item{formats_custom}{(\verb{named vector} of \code{character}) vector of custom statistics formats to use in place of the
-defaults defined in \code{\link[=summary_formats]{summary_formats()}}. Names should be a subset of the statistics defined in \code{stats_custom} (or
-default statistics if this is \code{NULL}).}
-
-\item{labels_custom}{(\verb{named vector} of \code{character}) vector of custom statistics labels to use in place of the
-defaults defined in \code{\link[=summary_labels]{summary_labels()}}. Names should be a subset of the statistics defined in \code{stats_custom} (or
-default statistics if this is \code{NULL}).}
-
-\item{indents_custom}{(\code{integer} or \verb{named vector} of \code{integer}) vector of custom indentation modifiers for
-statistics to use instead of the default of \code{0L} for all statistics. Names should be a subset of the statistics
-defined in \code{stats_custom} (or default statistics if this is \code{NULL}). Alternatively, the same indentation modifier
-can be applied to all statistics by setting \code{indents_custom} to a single integer value.}
-}
-\description{
-Set Defaults Settings for Summary Statistics
-}
-\examples{
-summary_custom()
-summary_custom(type = "counts", include_pval = TRUE)
-summary_custom(
-  include_pval = TRUE, stats_custom = c("n", "mean", "sd", "pval"),
-  labels_custom = c(sd = "Std. Dev."), indents_custom = 3L
-)
-
-}
diff --git a/man/summary_formats.Rd b/man/summary_formats.Rd
deleted file mode 100644
index 86ea9d925d..0000000000
--- a/man/summary_formats.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/analyze_variables.R
-\name{summary_formats}
-\alias{summary_formats}
-\title{Format Function for Descriptive Statistics}
-\usage{
-summary_formats(type = "numeric", include_pval = FALSE)
-}
-\arguments{
-\item{type}{(\code{string})\cr choice of a summary data type. Only \code{counts} and \code{numeric} types are currently supported.}
-}
-\value{
-A named \code{vector} of default statistic formats for the given data type.
-}
-\description{
-Returns format patterns for descriptive statistics. The format is understood by \code{rtables}.
-}
-\examples{
-summary_formats()
-summary_formats(type = "count", include_pval = TRUE)
-
-}
diff --git a/man/summary_labels.Rd b/man/summary_labels.Rd
deleted file mode 100644
index f2306a536c..0000000000
--- a/man/summary_labels.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/analyze_variables.R
-\name{summary_labels}
-\alias{summary_labels}
-\title{Label Function for Descriptive Statistics}
-\usage{
-summary_labels(type = "numeric", include_pval = FALSE)
-}
-\arguments{
-\item{type}{(\code{string})\cr choice of a summary data type. Only \code{counts} and \code{numeric} types are currently supported.}
-}
-\value{
-A named \code{vector} of default statistic labels for the given data type.
-}
-\description{
-Returns labels of descriptive statistics for numeric variables.
-}
-\examples{
-summary_labels()
-summary_labels(type = "count", include_pval = TRUE)
-
-}
diff --git a/man/summary_stats.Rd b/man/summary_stats.Rd
new file mode 100644
index 0000000000..1821410347
--- /dev/null
+++ b/man/summary_stats.Rd
@@ -0,0 +1,90 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/analyze_variables.R
+\name{summary_stats}
+\alias{summary_stats}
+\alias{summary_formats}
+\alias{summary_labels}
+\alias{summary_custom}
+\title{Summary Statistic Settings Functions}
+\usage{
+summary_formats(type = "numeric", include_pval = FALSE)
+
+summary_labels(type = "numeric", include_pval = FALSE)
+
+summary_custom(
+  type = "numeric",
+  include_pval = FALSE,
+  stats_custom = NULL,
+  formats_custom = NULL,
+  labels_custom = NULL,
+  indent_mods_custom = NULL
+)
+}
+\arguments{
+\item{type}{(\code{character})\cr choice of summary data type. Only \code{counts} and \code{numeric} types are currently supported.}
+
+\item{include_pval}{(\code{logical})\cr whether p-value should be included as a default statistic.}
+
+\item{stats_custom}{(\verb{named vector} of \code{character})\cr vector of statistics to include if not the defaults. This
+argument overrides \code{include_pval} and other custom value arguments such that only settings for these statistics
+will be returned.}
+
+\item{formats_custom}{(\verb{named vector} of \code{character})\cr vector of custom statistics formats to use in place of the
+defaults defined in \code{\link[=summary_formats]{summary_formats()}}. Names should be a subset of the statistics defined in \code{stats_custom} (or
+default statistics if this is \code{NULL}).}
+
+\item{labels_custom}{(\verb{named vector} of \code{character})\cr vector of custom statistics labels to use in place of the
+defaults defined in \code{\link[=summary_labels]{summary_labels()}}. Names should be a subset of the statistics defined in \code{stats_custom} (or
+default statistics if this is \code{NULL}).}
+
+\item{indent_mods_custom}{(\code{integer} or \verb{named vector} of \code{integer})\cr vector of custom indentation modifiers for
+statistics to use instead of the default of \code{0L} for all statistics. Names should be a subset of the statistics
+defined in \code{stats_custom} (or default statistics if this is \code{NULL}). Alternatively, the same indentation modifier
+can be applied to all statistics by setting \code{indent_mods_custom} to a single integer value.}
+}
+\value{
+\itemize{
+\item \code{summary_formats} returns a named \code{vector} of default statistic formats for the given data type.
+}
+
+\itemize{
+\item \code{summary_labels} returns a named \code{vector} of default statistic labels for the given data type.
+}
+
+\itemize{
+\item \code{summary_custom} returns a \code{list} of 4 named elements: \code{stats}, \code{formats}, \code{labels}, and \code{indent_mods}.
+}
+}
+\description{
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}
+
+Functions to retrieve default settings for summary statistics and customize these settings.
+}
+\section{Functions}{
+\itemize{
+\item \code{summary_formats()}: Function to retrieve default formats for summary statistics. Returns format patterns for
+descriptive statistics which are understood by \code{rtables}.
+
+\item \code{summary_labels()}: Function to retrieve default labels for summary statistics. Returns labels of descriptive
+statistics which are understood by \code{rtables}.
+
+\item \code{summary_custom()}: Function to configure settings for default or custom summary statistics for a given data
+type. In addition to selecting a custom subset of statistics, the user can also set custom formats, labels, and
+indent modifiers for any of these statistics.
+
+}}
+\examples{
+summary_formats()
+summary_formats(type = "count", include_pval = TRUE)
+
+summary_labels()
+summary_labels(type = "count", include_pval = TRUE)
+
+summary_custom()
+summary_custom(type = "counts", include_pval = TRUE)
+summary_custom(
+  include_pval = TRUE, stats_custom = c("n", "mean", "sd", "pval"),
+  labels_custom = c(sd = "Std. Dev."), indent_mods_custom = 3L
+)
+
+}
diff --git a/man/ungroup_stats.Rd b/man/ungroup_stats.Rd
index c9dffda957..b427561695 100644
--- a/man/ungroup_stats.Rd
+++ b/man/ungroup_stats.Rd
@@ -4,10 +4,18 @@
 \alias{ungroup_stats}
 \title{Ungroup and Format Non-Numeric Statistics}
 \usage{
-ungroup_stats(x, .stats, .formats, .labels, .indent_mods, .in_ref_col = FALSE)
+ungroup_stats(
+  x,
+  .stats,
+  .formats,
+  .labels,
+  .indent_mods,
+  .in_ref_col = FALSE,
+  which_stats = c("count", "count_fraction")
+)
 }
 \arguments{
-\item{x}{(\code{numeric})\cr vector of numbers we want to analyze.}
+\item{x}{(\verb{named list} of \code{numeric})\cr list of numeric statistics containing the statistics to ungroup.}
 
 \item{.stats}{(\code{character})\cr statistics to select for the table.}
 
@@ -19,6 +27,8 @@ ungroup_stats(x, .stats, .formats, .labels, .indent_mods, .in_ref_col = FALSE)
 unmodified default behavior. Can be negative.}
 
 \item{.in_ref_col}{(\code{logical})\cr \code{TRUE} when working with the reference level, \code{FALSE} otherwise.}
+
+\item{which_stats}{(\code{vector} of \code{character})\cr which statistics should be ungrouped.}
 }
 \value{
 A \code{list} with modified elements \code{x}, \code{.stats}, \code{.formats}, \code{.labels}, and \code{.indent_mods}.
diff --git a/tests/testthat/_snaps/analyze_variables.md b/tests/testthat/_snaps/analyze_variables.md
index 78d1afbf16..fa1cb362c3 100644
--- a/tests/testthat/_snaps/analyze_variables.md
+++ b/tests/testthat/_snaps/analyze_variables.md
@@ -16,6 +16,202 @@
       [1] 0
       
 
+# summary_formats works as expected
+
+    Code
+      res
+    Output
+                         n                  sum                 mean 
+                     "xx."               "xx.x"               "xx.x" 
+                        sd                   se              mean_sd 
+                    "xx.x"               "xx.x"        "xx.x (xx.x)" 
+                   mean_se              mean_ci             mean_sei 
+             "xx.x (xx.x)"     "(xx.xx, xx.xx)"     "(xx.xx, xx.xx)" 
+                  mean_sdi            mean_pval               median 
+          "(xx.xx, xx.xx)"              "xx.xx"               "xx.x" 
+                       mad            median_ci            quantiles 
+                    "xx.x"     "(xx.xx, xx.xx)"        "xx.x - xx.x" 
+                       iqr                range                  min 
+                    "xx.x"        "xx.x - xx.x"               "xx.x" 
+                       max         median_range                   cv 
+                    "xx.x" "xx.x (xx.x - xx.x)"               "xx.x" 
+                 geom_mean         geom_mean_ci              geom_cv 
+                    "xx.x"     "(xx.xx, xx.xx)"               "xx.x" 
+
+---
+
+    Code
+      res
+    Output
+      $n
+      [1] "xx."
+      
+      $count
+      [1] "xx."
+      
+      $count_fraction
+      function(x, ...) {
+        attr(x, "label") <- NULL
+      
+        if (any(is.na(x))) {
+          return("NA")
+        }
+      
+        checkmate::assert_vector(x)
+        checkmate::assert_integerish(x[1])
+        assert_proportion_value(x[2], include_boundaries = TRUE)
+      
+        result <- if (x[1] == 0) {
+          "0"
+        } else {
+          paste0(x[1], " (", round(x[2] * 100, 1), "%)")
+        }
+      
+        return(result)
+      }
+      <environment: namespace:tern>
+      
+      $n_blq
+      [1] "xx."
+      
+      $pval
+      [1] "x.xxxx | (<0.0001)"
+      
+
+# summary_labels works as expected
+
+    Code
+      res
+    Output
+                                  n                           sum 
+                                "n"                         "Sum" 
+                               mean                            sd 
+                             "Mean"                          "SD" 
+                                 se                       mean_sd 
+                               "SE"                   "Mean (SD)" 
+                            mean_se                       mean_ci 
+                        "Mean (SE)"                 "Mean 95% CI" 
+                           mean_sei                      mean_sdi 
+                    "Mean -/+ 1xSE"               "Mean -/+ 1xSD" 
+                          mean_pval                        median 
+      "Mean p-value (H0: mean = 0)"                      "Median" 
+                                mad                     median_ci 
+        "Median Absolute Deviation"               "Median 95% CI" 
+                          quantiles                           iqr 
+                  "25% and 75%-ile"                         "IQR" 
+                              range                           min 
+                        "Min - Max"                     "Minimum" 
+                                max                  median_range 
+                          "Maximum"          "Median (Min - Max)" 
+                                 cv                     geom_mean 
+                           "CV (%)"              "Geometric Mean" 
+                       geom_mean_ci                       geom_cv 
+            "Geometric Mean 95% CI"         "CV % Geometric Mean" 
+
+---
+
+    Code
+      res
+    Output
+                                 n                        count 
+                               "n"                      "count" 
+                    count_fraction                        n_blq 
+                  "count_fraction"                      "n_blq" 
+                              pval 
+      "p-value (chi-squared test)" 
+
+# summary_custom works as expected
+
+    Code
+      res
+    Output
+      $stats
+       [1] "n"            "sum"          "mean"         "sd"           "se"          
+       [6] "mean_sd"      "mean_se"      "mean_ci"      "mean_sei"     "mean_sdi"    
+      [11] "mean_pval"    "median"       "mad"          "median_ci"    "quantiles"   
+      [16] "iqr"          "range"        "min"          "max"          "median_range"
+      [21] "cv"           "geom_mean"    "geom_mean_ci" "geom_cv"     
+      
+      $formats
+                         n                  sum                 mean 
+                     "xx."               "xx.x"               "xx.x" 
+                        sd                   se              mean_sd 
+                    "xx.x"               "xx.x"        "xx.x (xx.x)" 
+                   mean_se              mean_ci             mean_sei 
+             "xx.x (xx.x)"     "(xx.xx, xx.xx)"     "(xx.xx, xx.xx)" 
+                  mean_sdi            mean_pval               median 
+          "(xx.xx, xx.xx)"              "xx.xx"               "xx.x" 
+                       mad            median_ci            quantiles 
+                    "xx.x"     "(xx.xx, xx.xx)"        "xx.x - xx.x" 
+                       iqr                range                  min 
+                    "xx.x"        "xx.x - xx.x"               "xx.x" 
+                       max         median_range                   cv 
+                    "xx.x" "xx.x (xx.x - xx.x)"               "xx.x" 
+                 geom_mean         geom_mean_ci              geom_cv 
+                    "xx.x"     "(xx.xx, xx.xx)"               "xx.x" 
+      
+      $labels
+                                  n                           sum 
+                                "n"                         "Sum" 
+                               mean                            sd 
+                             "Mean"                          "SD" 
+                                 se                       mean_sd 
+                               "SE"                   "Mean (SD)" 
+                            mean_se                       mean_ci 
+                        "Mean (SE)"                 "Mean 95% CI" 
+                           mean_sei                      mean_sdi 
+                    "Mean -/+ 1xSE"               "Mean -/+ 1xSD" 
+                          mean_pval                        median 
+      "Mean p-value (H0: mean = 0)"                      "Median" 
+                                mad                     median_ci 
+        "Median Absolute Deviation"               "Median 95% CI" 
+                          quantiles                           iqr 
+                  "25% and 75%-ile"                         "IQR" 
+                              range                           min 
+                        "Min - Max"                     "Minimum" 
+                                max                  median_range 
+                          "Maximum"          "Median (Min - Max)" 
+                                 cv                     geom_mean 
+                           "CV (%)"              "Geometric Mean" 
+                       geom_mean_ci                       geom_cv 
+            "Geometric Mean 95% CI"         "CV % Geometric Mean" 
+      
+      $indent_mods
+                 n          sum         mean           sd           se      mean_sd 
+                 0            0            0            0            0            0 
+           mean_se      mean_ci     mean_sei     mean_sdi    mean_pval       median 
+                 0            0            0            0            0            0 
+               mad    median_ci    quantiles          iqr        range          min 
+                 0            0            0            0            0            0 
+               max median_range           cv    geom_mean geom_mean_ci      geom_cv 
+                 0            0            0            0            0            0 
+      
+
+---
+
+    Code
+      res
+    Output
+      $stats
+      [1] "n"     "count"
+      
+      $formats
+      $formats$n
+      [1] "xx.xx"
+      
+      $formats$count
+      [1] "xx."
+      
+      
+      $labels
+          n count 
+        "n"   "#" 
+      
+      $indent_mods
+          n count 
+          2     2 
+      
+
 # s_summary return NA for x length 0L
 
     Code
@@ -920,7 +1116,7 @@
       [1] 0
       
 
-# a_summary works with healthy input.
+# a_summary_output and a_summary work with healthy input.
 
     Code
       res
diff --git a/tests/testthat/test-analyze_variables.R b/tests/testthat/test-analyze_variables.R
index b1a0590f5d..4e6da3a58b 100644
--- a/tests/testthat/test-analyze_variables.R
+++ b/tests/testthat/test-analyze_variables.R
@@ -13,6 +13,39 @@ testthat::test_that("control_analyze_vars fails wrong inputs", {
   testthat::expect_error(control_analyze_vars(conf_level = 95))
 })
 
+testthat::test_that("summary_formats works as expected", {
+  result <- summary_formats()
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+
+  result <- summary_formats(type = "counts", include_pval = TRUE)
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+})
+
+testthat::test_that("summary_labels works as expected", {
+  result <- summary_labels()
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+
+  result <- summary_labels(type = "counts", include_pval = TRUE)
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+})
+
+testthat::test_that("summary_custom works as expected", {
+  result <- summary_custom()
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+
+  result <- summary_custom(
+    type = "counts", stats_custom = c("n", "count"),
+    formats_custom = c(n = "xx.xx"), labels_custom = c(count = "#"), indent_mods_custom = 2L
+  )
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+})
+
 testthat::test_that("s_summary return NA for x length 0L", {
   x <- numeric()
 
@@ -160,27 +193,72 @@ testthat::test_that("s_summary works with logical vectors and by if requested do
   testthat::expect_snapshot(res)
 })
 
-testthat::test_that("a_summary works with healthy input.", {
+testthat::test_that("a_summary_output and a_summary work with healthy input.", {
   options("width" = 100)
-  # numeric input
+
+  # numeric input - a_summary_output
   set.seed(1)
-  result <- a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
+  x <- rnorm(10)
+  result <- a_summary_output(
+    x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE,
+    compare = FALSE, type = "numeric", .stats = summary_custom()$stats, .formats = summary_custom()$formats,
+    .labels = summary_custom()$labels, .indent_mods = summary_custom()$indents, na.rm = TRUE, na_level = NA_character_
+  )
+  res_out <- testthat::expect_silent(result)
+
+  # numeric input - a_summary
+  result <- a_summary(x = x, .N_col = 10, .N_row = 20, .var = "bla")
   res <- testthat::expect_silent(result)
+  testthat::expect_identical(res_out, res)
   testthat::expect_snapshot(res)
 
-  # factor input
-  result <- a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
+  # factor input - a_summary_output
+  x <- factor(c("a", "a", "b", "c", "a"))
+  result <- a_summary_output(
+    x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE,
+    compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats,
+    .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels,
+    .indent_mods = summary_custom(type = "counts")$indents, na.rm = TRUE, na_level = NA_character_
+  )
+  res_out <- testthat::expect_silent(result)
+
+  # factor input - a_summary
+  result <- a_summary(x = x, .N_row = 10, .N_col = 10)
   res <- testthat::expect_silent(result)
+  testthat::expect_identical(res_out, res)
   testthat::expect_snapshot(res)
 
-  # character input
-  result <- a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
+  # character input - a_summary_output
+  x <- c("A", "B", "A", "C")
+  result <- a_summary_output(
+    x = x, .N_col = 10, .N_row = 10, .var = "x", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE,
+    compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats,
+    .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels,
+    .indent_mods = summary_custom(type = "counts")$indents, na.rm = TRUE, na_level = NA_character_,
+    verbose = FALSE
+  )
+  res_out <- testthat::expect_silent(result)
+
+  # character input - a_summary
+  result <- a_summary(x = x, .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
   res <- testthat::expect_silent(result)
+  testthat::expect_identical(res_out, res)
   testthat::expect_snapshot(res)
 
-  # logical input
-  result <- a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
+  # logical input - a_summary_output
+  x <- c(TRUE, FALSE, FALSE, TRUE, TRUE)
+  result <- a_summary_output(
+    x = x, .N_col = 10, .N_row = 10, .var = NULL, .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE,
+    compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats,
+    .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels,
+    .indent_mods = summary_custom(type = "counts")$indents, na.rm = TRUE, na_level = NA_character_
+  )
+  res_out <- testthat::expect_silent(result)
+
+  # logical input - a_summary
+  result <- a_summary(x = x, .N_row = 10, .N_col = 10)
   res <- testthat::expect_silent(result)
+  testthat::expect_identical(res_out, res)
   testthat::expect_snapshot(res)
 })
 
@@ -364,7 +442,7 @@ testthat::test_that("`analyze_vars` works with character input and gives the sam
 
   l <- basic_table() %>%
     analyze_vars(vars = "foo")
-  result <- build_table(l, dta)
+  testthat::expect_warning(result <- build_table(l, dta))
 
   dta_factor <- dta %>%
     dplyr::mutate(foo = factor(foo))

From a073c27920e506657efd3d07ad6fc6e980535384 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Tue, 4 Jul 2023 20:48:39 -0400
Subject: [PATCH 24/33] Fix checks

---
 R/analyze_functions.R      | 2 +-
 R/analyze_variables.R      | 6 +++---
 _pkgdown.yml               | 1 +
 man/analyze_functions.Rd   | 2 +-
 man/analyze_variables.Rd   | 2 +-
 man/create_afun_summary.Rd | 2 +-
 6 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/R/analyze_functions.R b/R/analyze_functions.R
index bc8c364207..20addfd507 100644
--- a/R/analyze_functions.R
+++ b/R/analyze_functions.R
@@ -32,7 +32,7 @@
 #'   leverage `analyze_colvars` to have the context split in rows and the analysis
 #'   methods in columns.
 #' * [summarize_change()]
-#' * [analyze_vars()]: formerly known as [summarize_vars()], it was renamed to reflect
+#' * [analyze_vars()]: formerly known as `summarize_vars()`, it was renamed to reflect
 #'   core function [rtables::analyze()].
 #' * [surv_time()]
 #' * [surv_timepoint()]
diff --git a/R/analyze_variables.R b/R/analyze_variables.R
index 8539f09bf9..f807499b23 100644
--- a/R/analyze_variables.R
+++ b/R/analyze_variables.R
@@ -193,7 +193,7 @@ summary_custom <- function(type = "numeric",
   .formats <- summary_formats(type = type, include_pval = include_pval)
   .stats <- if (is.null(stats_custom)) names(.formats) else intersect(stats_custom, names(.formats))
   .labels <- summary_labels(type = type, include_pval = include_pval)
-  .indent_mods <- setNames(rep(0L, length(.stats)), .stats)
+  .indent_mods <- stats::setNames(rep(0L, length(.stats)), .stats)
 
   if (!is.null(formats_custom)) .formats[names(formats_custom)] <- formats_custom
   if (!is.null(labels_custom)) .labels[names(labels_custom)] <- labels_custom
@@ -681,7 +681,7 @@ a_summary_output <- function(x,
   )
 }
 
-#' @describeIn analyze_variables Formatted analysis function which is used as `afun` in `summarize_vars()` and
+#' @describeIn analyze_variables Formatted analysis function which is used as `afun` in `analyze_vars()` and
 #'   `compare_vars()` and as `cfun` in `summarize_colvars()`.
 #'
 #' @return
@@ -821,7 +821,7 @@ a_summary.numeric <- function(x,
 #'
 #' @note This function has been deprecated in favor of direct implementation of `a_summary()`.
 #'
-#' @seealso [summarize_vars()]
+#' @seealso [analyze_vars()]
 #'
 #' @export
 create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) {
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 7ee0307ab9..228a5a3c77 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -70,6 +70,7 @@ reference:
       - starts_with("h_")
       - starts_with("or_")
       - starts_with("prop_")
+      - starts_with("summmary_")
       - -starts_with("h_col_")
       - -h_content_first_row
       - -h_data_plot
diff --git a/man/analyze_functions.Rd b/man/analyze_functions.Rd
index 086a022c4e..14017aabc1 100644
--- a/man/analyze_functions.Rd
+++ b/man/analyze_functions.Rd
@@ -34,7 +34,7 @@ variables that are split into different columns. In comparison, \link{analyze_co
 leverage \code{analyze_colvars} to have the context split in rows and the analysis
 methods in columns.
 \item \code{\link[=summarize_change]{summarize_change()}}
-\item \code{\link[=analyze_vars]{analyze_vars()}}: formerly known as \code{\link[=summarize_vars]{summarize_vars()}}, it was renamed to reflect
+\item \code{\link[=analyze_vars]{analyze_vars()}}: formerly known as \code{summarize_vars()}, it was renamed to reflect
 core function \code{\link[rtables:analyze]{rtables::analyze()}}.
 \item \code{\link[=surv_time]{surv_time()}}
 \item \code{\link[=surv_timepoint]{surv_timepoint()}}
diff --git a/man/analyze_variables.Rd b/man/analyze_variables.Rd
index a09d5e2962..0a574d5387 100644
--- a/man/analyze_variables.Rd
+++ b/man/analyze_variables.Rd
@@ -297,7 +297,7 @@ conversion to factor (with a warning) and then forwards to the method for factor
 
 \item \code{a_summary_output()}: Formatted analysis helper function which is used within \code{a_summary}.
 
-\item \code{a_summary()}: Formatted analysis function which is used as \code{afun} in \code{summarize_vars()} and
+\item \code{a_summary()}: Formatted analysis function which is used as \code{afun} in \code{analyze_vars()} and
 \code{compare_vars()} and as \code{cfun} in \code{summarize_colvars()}.
 
 \item \code{a_summary(default)}: Formatted analysis function \code{default} method for non-numeric classes.
diff --git a/man/create_afun_summary.Rd b/man/create_afun_summary.Rd
index e97a1e439a..f5bc4e57d6 100644
--- a/man/create_afun_summary.Rd
+++ b/man/create_afun_summary.Rd
@@ -29,5 +29,5 @@ Constructor function which creates a combined formatted analysis function.
 This function has been deprecated in favor of direct implementation of \code{a_summary()}.
 }
 \seealso{
-\code{\link[=summarize_vars]{summarize_vars()}}
+\code{\link[=analyze_vars]{analyze_vars()}}
 }

From adaccca93cb11496692558faa55d02da718d004c Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Tue, 4 Jul 2023 20:53:41 -0400
Subject: [PATCH 25/33] Add examples

---
 R/analyze_variables.R    | 17 ++++++++++++++++-
 man/analyze_variables.Rd | 17 ++++++++++++++++-
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/R/analyze_variables.R b/R/analyze_variables.R
index f807499b23..8c1e4154c2 100644
--- a/R/analyze_variables.R
+++ b/R/analyze_variables.R
@@ -605,7 +605,22 @@ s_summary.logical <- function(x,
 #' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is.
 #'
 #' @examples
-#' a_summary_output()
+#' set.seed(1)
+#' x <- rnorm(10)
+#' a_summary_output(
+#'   x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE,
+#'   compare = FALSE, type = "numeric", .stats = summary_custom()$stats, .formats = summary_custom()$formats,
+#'   .labels = summary_custom()$labels, .indent_mods = summary_custom()$indents,
+#'   na.rm = TRUE, na_level = NA_character_
+#' )
+#'
+#' x <- factor(c("a", "a", "b", "c", "a"))
+#' a_summary_output(
+#'   x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE,
+#'   compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats,
+#'   .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels,
+#'   .indent_mods = summary_custom(type = "counts")$indents, na.rm = TRUE, na_level = NA_character_
+#' )
 #'
 #' @export
 a_summary_output <- function(x,
diff --git a/man/analyze_variables.Rd b/man/analyze_variables.Rd
index 0a574d5387..26f21fe37e 100644
--- a/man/analyze_variables.Rd
+++ b/man/analyze_variables.Rd
@@ -418,7 +418,22 @@ x <- c(TRUE, FALSE, TRUE, TRUE)
 s_summary(x, denom = "N_row", .N_row = 10L)
 s_summary(x, denom = "N_col", .N_col = 20L)
 
-a_summary_output()
+set.seed(1)
+x <- rnorm(10)
+a_summary_output(
+  x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE,
+  compare = FALSE, type = "numeric", .stats = summary_custom()$stats, .formats = summary_custom()$formats,
+  .labels = summary_custom()$labels, .indent_mods = summary_custom()$indents,
+  na.rm = TRUE, na_level = NA_character_
+)
+
+x <- factor(c("a", "a", "b", "c", "a"))
+a_summary_output(
+  x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE,
+  compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats,
+  .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels,
+  .indent_mods = summary_custom(type = "counts")$indents, na.rm = TRUE, na_level = NA_character_
+)
 
 a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
 a_summary(

From 2953a5d16d6ab5f5f487bc695ae3305eea050603 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Tue, 4 Jul 2023 21:03:19 -0400
Subject: [PATCH 26/33] Update WORDLIST

---
 _pkgdown.yml  | 2 +-
 inst/WORDLIST | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/_pkgdown.yml b/_pkgdown.yml
index 228a5a3c77..e9fc0e9df1 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -70,7 +70,7 @@ reference:
       - starts_with("h_")
       - starts_with("or_")
       - starts_with("prop_")
-      - starts_with("summmary_")
+      - summary_stats
       - -starts_with("h_col_")
       - -h_content_first_row
       - -h_data_plot
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 06c569fd9d..ad964f9947 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -32,4 +32,5 @@ subtable
 subtables
 unformatted
 ungroup
+ungrouped
 unstratified

From 10d47942ab3dcbe7859392d07fafb1adca064053 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Tue, 4 Jul 2023 21:16:16 -0400
Subject: [PATCH 27/33] roxygen examples too wide

---
 R/analyze_variables.R    | 19 +++++++++++--------
 man/analyze_variables.Rd | 19 +++++++++++--------
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/R/analyze_variables.R b/R/analyze_variables.R
index 8c1e4154c2..052a2534a6 100644
--- a/R/analyze_variables.R
+++ b/R/analyze_variables.R
@@ -608,18 +608,21 @@ s_summary.logical <- function(x,
 #' set.seed(1)
 #' x <- rnorm(10)
 #' a_summary_output(
-#'   x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE,
-#'   compare = FALSE, type = "numeric", .stats = summary_custom()$stats, .formats = summary_custom()$formats,
-#'   .labels = summary_custom()$labels, .indent_mods = summary_custom()$indents,
-#'   na.rm = TRUE, na_level = NA_character_
+#'   x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL,
+#'   .in_ref_col = FALSE, compare = FALSE, type = "numeric", .stats = summary_custom()$stats,
+#'   .formats = summary_custom()$formats, .labels = summary_custom()$labels,
+#'   .indent_mods = summary_custom()$indents, na.rm = TRUE, na_level = NA_character_
 #' )
 #'
 #' x <- factor(c("a", "a", "b", "c", "a"))
 #' a_summary_output(
-#'   x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE,
-#'   compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats,
-#'   .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels,
-#'   .indent_mods = summary_custom(type = "counts")$indents, na.rm = TRUE, na_level = NA_character_
+#'   x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL,
+#'   .in_ref_col = FALSE, compare = FALSE, type = "counts",
+#'   .stats = summary_custom(type = "counts")$stats,
+#'   .formats = summary_custom(type = "counts")$formats,
+#'   .labels = summary_custom(type = "counts")$labels,
+#'   .indent_mods = summary_custom(type = "counts")$indents,
+#'   na.rm = TRUE, na_level = NA_character_
 #' )
 #'
 #' @export
diff --git a/man/analyze_variables.Rd b/man/analyze_variables.Rd
index 26f21fe37e..24cbc1d250 100644
--- a/man/analyze_variables.Rd
+++ b/man/analyze_variables.Rd
@@ -421,18 +421,21 @@ s_summary(x, denom = "N_col", .N_col = 20L)
 set.seed(1)
 x <- rnorm(10)
 a_summary_output(
-  x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE,
-  compare = FALSE, type = "numeric", .stats = summary_custom()$stats, .formats = summary_custom()$formats,
-  .labels = summary_custom()$labels, .indent_mods = summary_custom()$indents,
-  na.rm = TRUE, na_level = NA_character_
+  x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL,
+  .in_ref_col = FALSE, compare = FALSE, type = "numeric", .stats = summary_custom()$stats,
+  .formats = summary_custom()$formats, .labels = summary_custom()$labels,
+  .indent_mods = summary_custom()$indents, na.rm = TRUE, na_level = NA_character_
 )
 
 x <- factor(c("a", "a", "b", "c", "a"))
 a_summary_output(
-  x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE,
-  compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats,
-  .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels,
-  .indent_mods = summary_custom(type = "counts")$indents, na.rm = TRUE, na_level = NA_character_
+  x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL,
+  .in_ref_col = FALSE, compare = FALSE, type = "counts",
+  .stats = summary_custom(type = "counts")$stats,
+  .formats = summary_custom(type = "counts")$formats,
+  .labels = summary_custom(type = "counts")$labels,
+  .indent_mods = summary_custom(type = "counts")$indents,
+  na.rm = TRUE, na_level = NA_character_
 )
 
 a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)

From c8b430dc9b01323c0b7eee28db429489316d0c27 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Tue, 11 Jul 2023 21:38:06 -0400
Subject: [PATCH 28/33] Separate files for summary funs, internal afun helper

---
 DESCRIPTION                             |   1 +
 NAMESPACE                               |   1 -
 R/analyze_variables.R                   | 283 +++---------------------
 R/summary_stats.R                       | 214 ++++++++++++++++++
 man/a_summary_internal.Rd               |  47 ++++
 man/analyze_variables.Rd                |  57 +----
 man/control_analyze_vars.Rd             |   2 +-
 man/summary_stats.Rd                    |   2 +-
 tests/testthat/test-analyze_variables.R |  18 +-
 9 files changed, 302 insertions(+), 323 deletions(-)
 create mode 100644 R/summary_stats.R
 create mode 100644 man/a_summary_internal.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 26e6386f03..a1111d59d8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -145,6 +145,7 @@ Collate:
     'summarize_glm_count.R'
     'summarize_num_patients.R'
     'summarize_patients_exposure_in_cols.R'
+    'summary_stats.R'
     'survival_biomarkers_subgroups.R'
     'survival_coxph_pairwise.R'
     'survival_duration_subgroups.R'
diff --git a/NAMESPACE b/NAMESPACE
index 2717dc01a0..5239b12fcb 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -35,7 +35,6 @@ export(a_odds_ratio)
 export(a_proportion)
 export(a_proportion_diff)
 export(a_summary)
-export(a_summary_output)
 export(add_rowcounts)
 export(aesi_label)
 export(analyze_num_patients)
diff --git a/R/analyze_variables.R b/R/analyze_variables.R
index 052a2534a6..5a2eefbd48 100644
--- a/R/analyze_variables.R
+++ b/R/analyze_variables.R
@@ -1,218 +1,3 @@
-#' Control Function for Descriptive Statistics
-#'
-#' @description `r lifecycle::badge("stable")`
-#'
-#' Sets a list of parameters for summaries of descriptive statistics. Typically used internally to specify
-#' details for [s_summary()]. This function family is mainly used by [analyze_vars()].
-#'
-#' @inheritParams argument_convention
-#' @param quantiles (`numeric`)\cr of length two to specify the quantiles to calculate.
-#' @param quantile_type (`numeric`)\cr between 1 and 9 selecting quantile algorithms to be used.
-#'   Default is set to 2 as this matches the default quantile algorithm in SAS `proc univariate` set by `QNTLDEF=5`.
-#'   This differs from R's default. See more about `type` in [stats::quantile()].
-#' @param test_mean (`numeric`)\cr to test against the mean under the null hypothesis when calculating p-value.
-#'
-#' @note Deprecation cycle started for `control_summarize_vars` as it is going to renamed into
-#'   `control_analyze_vars`. Intention is to reflect better the core underlying `rtables`
-#'   functions; in this case [analyze_vars()] wraps [rtables::analyze()].
-#'
-#' @return A list of components with the same names as the arguments.
-#'
-#' @export control_analyze_vars control_summarize_vars
-#' @aliases control_summarize_vars
-control_analyze_vars <- function(conf_level = 0.95,
-                                 quantiles = c(0.25, 0.75),
-                                 quantile_type = 2,
-                                 test_mean = 0) {
-  checkmate::assert_vector(quantiles, len = 2)
-  checkmate::assert_int(quantile_type, lower = 1, upper = 9)
-  checkmate::assert_numeric(test_mean)
-  lapply(quantiles, assert_proportion_value)
-  assert_proportion_value(conf_level)
-  list(conf_level = conf_level, quantiles = quantiles, quantile_type = quantile_type, test_mean = test_mean)
-}
-
-control_summarize_vars <- control_analyze_vars
-
-#' Summary Statistic Settings Functions
-#'
-#' @description `r lifecycle::badge("stable")`
-#'
-#' Functions to retrieve default settings for summary statistics and customize these settings.
-#'
-#' @param type (`character`)\cr choice of summary data type. Only `counts` and `numeric` types are currently supported.
-#' @param include_pval (`logical`)\cr whether p-value should be included as a default statistic.
-#'
-#' @name summary_stats
-NULL
-
-#' @describeIn summary_stats Function to retrieve default formats for summary statistics. Returns format patterns for
-#'   descriptive statistics which are understood by `rtables`.
-#'
-#' @return
-#' * `summary_formats` returns a named `vector` of default statistic formats for the given data type.
-#'
-#' @examples
-#' summary_formats()
-#' summary_formats(type = "count", include_pval = TRUE)
-#'
-#' @export
-summary_formats <- function(type = "numeric", include_pval = FALSE) {
-  fmts <- if (type == "counts") {
-    c(
-      n = "xx.",
-      count = "xx.",
-      count_fraction = format_count_fraction,
-      n_blq = "xx.",
-      pval = "x.xxxx | (<0.0001)"
-    )
-  } else {
-    c(
-      n = "xx.",
-      sum = "xx.x",
-      mean = "xx.x",
-      sd = "xx.x",
-      se = "xx.x",
-      mean_sd = "xx.x (xx.x)",
-      mean_se = "xx.x (xx.x)",
-      mean_ci = "(xx.xx, xx.xx)",
-      mean_sei = "(xx.xx, xx.xx)",
-      mean_sdi = "(xx.xx, xx.xx)",
-      mean_pval = "xx.xx",
-      median = "xx.x",
-      mad = "xx.x",
-      median_ci = "(xx.xx, xx.xx)",
-      quantiles = "xx.x - xx.x",
-      iqr = "xx.x",
-      range = "xx.x - xx.x",
-      min = "xx.x",
-      max = "xx.x",
-      median_range = "xx.x (xx.x - xx.x)",
-      cv = "xx.x",
-      geom_mean = "xx.x",
-      geom_mean_ci = "(xx.xx, xx.xx)",
-      geom_cv = "xx.x",
-      pval = "x.xxxx | (<0.0001)"
-    )
-  }
-  if (!include_pval) fmts <- head(fmts, -1)
-  fmts
-}
-
-#' @describeIn summary_stats Function to retrieve default labels for summary statistics. Returns labels of descriptive
-#'   statistics which are understood by `rtables`.
-#'
-#' @return
-#' * `summary_labels` returns a named `vector` of default statistic labels for the given data type.
-#'
-#' @examples
-#' summary_labels()
-#' summary_labels(type = "count", include_pval = TRUE)
-#'
-#' @export
-summary_labels <- function(type = "numeric", include_pval = FALSE) {
-  lbls <- if (type == "counts") {
-    c(
-      n = "n",
-      count = "count",
-      count_fraction = "count_fraction",
-      n_blq = "n_blq",
-      pval = "p-value (chi-squared test)"
-    )
-  } else {
-    c(
-      n = "n",
-      sum = "Sum",
-      mean = "Mean",
-      sd = "SD",
-      se = "SE",
-      mean_sd = "Mean (SD)",
-      mean_se = "Mean (SE)",
-      mean_ci = "Mean 95% CI",
-      mean_sei = "Mean -/+ 1xSE",
-      mean_sdi = "Mean -/+ 1xSD",
-      mean_pval = "Mean p-value (H0: mean = 0)",
-      median = "Median",
-      mad = "Median Absolute Deviation",
-      median_ci = "Median 95% CI",
-      quantiles = "25% and 75%-ile",
-      iqr = "IQR",
-      range = "Min - Max",
-      min = "Minimum",
-      max = "Maximum",
-      median_range = "Median (Min - Max)",
-      cv = "CV (%)",
-      geom_mean = "Geometric Mean",
-      geom_mean_ci = "Geometric Mean 95% CI",
-      geom_cv = "CV % Geometric Mean",
-      pval = "p-value (t-test)"
-    )
-  }
-  if (!include_pval) lbls <- head(lbls, -1)
-  lbls
-}
-
-#' @describeIn summary_stats Function to configure settings for default or custom summary statistics for a given data
-#'   type. In addition to selecting a custom subset of statistics, the user can also set custom formats, labels, and
-#'   indent modifiers for any of these statistics.
-#'
-#' @param stats_custom (`named vector` of `character`)\cr vector of statistics to include if not the defaults. This
-#'   argument overrides `include_pval` and other custom value arguments such that only settings for these statistics
-#'   will be returned.
-#' @param formats_custom (`named vector` of `character`)\cr vector of custom statistics formats to use in place of the
-#'   defaults defined in [`summary_formats()`]. Names should be a subset of the statistics defined in `stats_custom` (or
-#'   default statistics if this is `NULL`).
-#' @param labels_custom (`named vector` of `character`)\cr vector of custom statistics labels to use in place of the
-#'   defaults defined in [`summary_labels()`]. Names should be a subset of the statistics defined in `stats_custom` (or
-#'   default statistics if this is `NULL`).
-#' @param indent_mods_custom (`integer` or `named vector` of `integer`)\cr vector of custom indentation modifiers for
-#'   statistics to use instead of the default of `0L` for all statistics. Names should be a subset of the statistics
-#'   defined in `stats_custom` (or default statistics if this is `NULL`). Alternatively, the same indentation modifier
-#'   can be applied to all statistics by setting `indent_mods_custom` to a single integer value.
-#'
-#' @return
-#' * `summary_custom` returns a `list` of 4 named elements: `stats`, `formats`, `labels`, and `indent_mods`.
-#'
-#' @examples
-#' summary_custom()
-#' summary_custom(type = "counts", include_pval = TRUE)
-#' summary_custom(
-#'   include_pval = TRUE, stats_custom = c("n", "mean", "sd", "pval"),
-#'   labels_custom = c(sd = "Std. Dev."), indent_mods_custom = 3L
-#' )
-#'
-#' @export
-summary_custom <- function(type = "numeric",
-                           include_pval = FALSE,
-                           stats_custom = NULL,
-                           formats_custom = NULL,
-                           labels_custom = NULL,
-                           indent_mods_custom = NULL) {
-  if ("pval" %in% stats_custom) include_pval <- TRUE
-
-  .formats <- summary_formats(type = type, include_pval = include_pval)
-  .stats <- if (is.null(stats_custom)) names(.formats) else intersect(stats_custom, names(.formats))
-  .labels <- summary_labels(type = type, include_pval = include_pval)
-  .indent_mods <- stats::setNames(rep(0L, length(.stats)), .stats)
-
-  if (!is.null(formats_custom)) .formats[names(formats_custom)] <- formats_custom
-  if (!is.null(labels_custom)) .labels[names(labels_custom)] <- labels_custom
-  if (!is.null(indent_mods_custom)) {
-    if (is.null(names(indent_mods_custom)) && length(indent_mods_custom) == 1) {
-      .indent_mods[names(.indent_mods)] <- indent_mods_custom
-    } else {
-      .indent_mods[names(indent_mods_custom)] <- indent_mods_custom
-    }
-  }
-
-  list(
-    stats = .stats,
-    formats = .formats[.stats],
-    labels = .labels[.stats],
-    indent_mods = .indent_mods[.stats]
-  )
-}
-
 #' Analyze Variables
 #'
 #' @description `r lifecycle::badge("stable")`
@@ -590,7 +375,7 @@ s_summary.logical <- function(x,
   y
 }
 
-#' @describeIn analyze_variables Formatted analysis helper function which is used within `a_summary`.
+#' Helper Function to Create Output Rows for [a_summary()]
 #'
 #' @param compare (`logical`)\cr Whether comparison statistics should be analyzed instead of summary statistics
 #'   (`compare = TRUE` adds `pval` statistic comparing against reference group).
@@ -598,50 +383,29 @@ s_summary.logical <- function(x,
 #'   `"numeric"`, otherwise type should be `"counts"`.
 #'
 #' @return
-#' * `a_summary_output()` returns the corresponding list with formatted [rtables::CellValue()].
+#' * `a_summary_internal()` returns a corresponding list with formatted [rtables::CellValue()] used within `a_summary`.
 #'
 #' @note
 #' * To use for comparison (with additional p-value statistic), parameter `compare` must be set to `TRUE`.
 #' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is.
 #'
-#' @examples
-#' set.seed(1)
-#' x <- rnorm(10)
-#' a_summary_output(
-#'   x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL,
-#'   .in_ref_col = FALSE, compare = FALSE, type = "numeric", .stats = summary_custom()$stats,
-#'   .formats = summary_custom()$formats, .labels = summary_custom()$labels,
-#'   .indent_mods = summary_custom()$indents, na.rm = TRUE, na_level = NA_character_
-#' )
-#'
-#' x <- factor(c("a", "a", "b", "c", "a"))
-#' a_summary_output(
-#'   x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL,
-#'   .in_ref_col = FALSE, compare = FALSE, type = "counts",
-#'   .stats = summary_custom(type = "counts")$stats,
-#'   .formats = summary_custom(type = "counts")$formats,
-#'   .labels = summary_custom(type = "counts")$labels,
-#'   .indent_mods = summary_custom(type = "counts")$indents,
-#'   na.rm = TRUE, na_level = NA_character_
-#' )
-#'
-#' @export
-a_summary_output <- function(x,
-                             .N_col, # nolint
-                             .N_row, # nolint
-                             .var,
-                             .df_row,
-                             .ref_group,
-                             .in_ref_col,
-                             compare,
-                             type,
-                             .stats,
-                             .formats,
-                             .labels,
-                             .indent_mods,
-                             na.rm, # nolint
-                             na_level,
-                             ...) {
+#' @keywords internal
+a_summary_internal <- function(x,
+                               .N_col, # nolint
+                               .N_row, # nolint
+                               .var,
+                               .df_row,
+                               .ref_group,
+                               .in_ref_col,
+                               compare,
+                               type,
+                               .stats,
+                               .formats,
+                               .labels,
+                               .indent_mods,
+                               na.rm, # nolint
+                               na_level,
+                               ...) {
   # Remove all-NA rows
   if (!is.null(.df_row) && ncol(.df_row) > 1) {
     in_tot_col <- nrow(.df_row) == length(x)
@@ -702,6 +466,11 @@ a_summary_output <- function(x,
 #' @describeIn analyze_variables Formatted analysis function which is used as `afun` in `analyze_vars()` and
 #'   `compare_vars()` and as `cfun` in `summarize_colvars()`.
 #'
+#' @param compare (`logical`)\cr Whether comparison statistics should be analyzed instead of summary statistics
+#'   (`compare = TRUE` adds `pval` statistic comparing against reference group).
+#' @param type (`character`)\cr type of statistics to calculate given `x`. If `x` is numeric `type` should be
+#'   `"numeric"`, otherwise type should be `"counts"`.
+#'
 #' @return
 #' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()].
 #'
@@ -760,7 +529,7 @@ a_summary.default <- function(x,
                               na.rm = TRUE, # nolint
                               na_level = NA_character_,
                               ...) {
-  a_summary_output(
+  a_summary_internal(
     x = x,
     .N_col = .N_col,
     .N_row = .N_row,
@@ -804,7 +573,7 @@ a_summary.numeric <- function(x,
                               na.rm = TRUE, # nolint
                               na_level = NA_character_,
                               ...) {
-  a_summary_output(
+  a_summary_internal(
     x = x,
     .N_col = .N_col,
     .N_row = .N_row,
diff --git a/R/summary_stats.R b/R/summary_stats.R
new file mode 100644
index 0000000000..7fffa4f025
--- /dev/null
+++ b/R/summary_stats.R
@@ -0,0 +1,214 @@
+#' Summary Statistic Settings Functions
+#'
+#' @description `r lifecycle::badge("stable")`
+#'
+#' Functions to retrieve default settings for summary statistics and customize these settings.
+#'
+#' @param type (`character`)\cr choice of summary data type. Only `counts` and `numeric` types are currently supported.
+#' @param include_pval (`logical`)\cr whether p-value should be included as a default statistic.
+#'
+#' @name summary_stats
+NULL
+
+#' @describeIn summary_stats Function to retrieve default formats for summary statistics. Returns format patterns for
+#'   descriptive statistics which are understood by `rtables`.
+#'
+#' @return
+#' * `summary_formats` returns a named `vector` of default statistic formats for the given data type.
+#'
+#' @examples
+#' summary_formats()
+#' summary_formats(type = "count", include_pval = TRUE)
+#'
+#' @export
+summary_formats <- function(type = "numeric", include_pval = FALSE) {
+  fmts <- if (type == "counts") {
+    c(
+      n = "xx.",
+      count = "xx.",
+      count_fraction = format_count_fraction,
+      n_blq = "xx.",
+      pval = "x.xxxx | (<0.0001)"
+    )
+  } else {
+    c(
+      n = "xx.",
+      sum = "xx.x",
+      mean = "xx.x",
+      sd = "xx.x",
+      se = "xx.x",
+      mean_sd = "xx.x (xx.x)",
+      mean_se = "xx.x (xx.x)",
+      mean_ci = "(xx.xx, xx.xx)",
+      mean_sei = "(xx.xx, xx.xx)",
+      mean_sdi = "(xx.xx, xx.xx)",
+      mean_pval = "xx.xx",
+      median = "xx.x",
+      mad = "xx.x",
+      median_ci = "(xx.xx, xx.xx)",
+      quantiles = "xx.x - xx.x",
+      iqr = "xx.x",
+      range = "xx.x - xx.x",
+      min = "xx.x",
+      max = "xx.x",
+      median_range = "xx.x (xx.x - xx.x)",
+      cv = "xx.x",
+      geom_mean = "xx.x",
+      geom_mean_ci = "(xx.xx, xx.xx)",
+      geom_cv = "xx.x",
+      pval = "x.xxxx | (<0.0001)"
+    )
+  }
+  if (!include_pval) fmts <- head(fmts, -1)
+  fmts
+}
+
+#' @describeIn summary_stats Function to retrieve default labels for summary statistics. Returns labels of descriptive
+#'   statistics which are understood by `rtables`.
+#'
+#' @return
+#' * `summary_labels` returns a named `vector` of default statistic labels for the given data type.
+#'
+#' @examples
+#' summary_labels()
+#' summary_labels(type = "count", include_pval = TRUE)
+#'
+#' @export
+summary_labels <- function(type = "numeric", include_pval = FALSE) {
+  lbls <- if (type == "counts") {
+    c(
+      n = "n",
+      count = "count",
+      count_fraction = "count_fraction",
+      n_blq = "n_blq",
+      pval = "p-value (chi-squared test)"
+    )
+  } else {
+    c(
+      n = "n",
+      sum = "Sum",
+      mean = "Mean",
+      sd = "SD",
+      se = "SE",
+      mean_sd = "Mean (SD)",
+      mean_se = "Mean (SE)",
+      mean_ci = "Mean 95% CI",
+      mean_sei = "Mean -/+ 1xSE",
+      mean_sdi = "Mean -/+ 1xSD",
+      mean_pval = "Mean p-value (H0: mean = 0)",
+      median = "Median",
+      mad = "Median Absolute Deviation",
+      median_ci = "Median 95% CI",
+      quantiles = "25% and 75%-ile",
+      iqr = "IQR",
+      range = "Min - Max",
+      min = "Minimum",
+      max = "Maximum",
+      median_range = "Median (Min - Max)",
+      cv = "CV (%)",
+      geom_mean = "Geometric Mean",
+      geom_mean_ci = "Geometric Mean 95% CI",
+      geom_cv = "CV % Geometric Mean",
+      pval = "p-value (t-test)"
+    )
+  }
+  if (!include_pval) lbls <- head(lbls, -1)
+  lbls
+}
+
+#' @describeIn summary_stats Function to configure settings for default or custom summary statistics for a given data
+#'   type. In addition to selecting a custom subset of statistics, the user can also set custom formats, labels, and
+#'   indent modifiers for any of these statistics.
+#'
+#' @param stats_custom (`named vector` of `character`)\cr vector of statistics to include if not the defaults. This
+#'   argument overrides `include_pval` and other custom value arguments such that only settings for these statistics
+#'   will be returned.
+#' @param formats_custom (`named vector` of `character`)\cr vector of custom statistics formats to use in place of the
+#'   defaults defined in [`summary_formats()`]. Names should be a subset of the statistics defined in `stats_custom` (or
+#'   default statistics if this is `NULL`).
+#' @param labels_custom (`named vector` of `character`)\cr vector of custom statistics labels to use in place of the
+#'   defaults defined in [`summary_labels()`]. Names should be a subset of the statistics defined in `stats_custom` (or
+#'   default statistics if this is `NULL`).
+#' @param indent_mods_custom (`integer` or `named vector` of `integer`)\cr vector of custom indentation modifiers for
+#'   statistics to use instead of the default of `0L` for all statistics. Names should be a subset of the statistics
+#'   defined in `stats_custom` (or default statistics if this is `NULL`). Alternatively, the same indentation modifier
+#'   can be applied to all statistics by setting `indent_mods_custom` to a single integer value.
+#'
+#' @return
+#' * `summary_custom` returns a `list` of 4 named elements: `stats`, `formats`, `labels`, and `indent_mods`.
+#'
+#' @examples
+#' summary_custom()
+#' summary_custom(type = "counts", include_pval = TRUE)
+#' summary_custom(
+#'   include_pval = TRUE, stats_custom = c("n", "mean", "sd", "pval"),
+#'   labels_custom = c(sd = "Std. Dev."), indent_mods_custom = 3L
+#' )
+#'
+#' @export
+summary_custom <- function(type = "numeric",
+                           include_pval = FALSE,
+                           stats_custom = NULL,
+                           formats_custom = NULL,
+                           labels_custom = NULL,
+                           indent_mods_custom = NULL) {
+  if ("pval" %in% stats_custom) include_pval <- TRUE
+
+  .formats <- summary_formats(type = type, include_pval = include_pval)
+  .stats <- if (is.null(stats_custom)) names(.formats) else intersect(stats_custom, names(.formats))
+  .labels <- summary_labels(type = type, include_pval = include_pval)
+  .indent_mods <- stats::setNames(rep(0L, length(.stats)), .stats)
+
+  if (!is.null(formats_custom)) .formats[names(formats_custom)] <- formats_custom
+  if (!is.null(labels_custom)) .labels[names(labels_custom)] <- labels_custom
+  if (!is.null(indent_mods_custom)) {
+    if (is.null(names(indent_mods_custom)) && length(indent_mods_custom) == 1) {
+      .indent_mods[names(.indent_mods)] <- indent_mods_custom
+    } else {
+      .indent_mods[names(indent_mods_custom)] <- indent_mods_custom
+    }
+  }
+
+  list(
+    stats = .stats,
+    formats = .formats[.stats],
+    labels = .labels[.stats],
+    indent_mods = .indent_mods[.stats]
+  )
+}
+
+#' Control Function for Descriptive Statistics
+#'
+#' @description `r lifecycle::badge("stable")`
+#'
+#' Sets a list of parameters for summaries of descriptive statistics. Typically used internally to specify
+#' details for [s_summary()]. This function family is mainly used by [analyze_vars()].
+#'
+#' @inheritParams argument_convention
+#' @param quantiles (`numeric`)\cr of length two to specify the quantiles to calculate.
+#' @param quantile_type (`numeric`)\cr between 1 and 9 selecting quantile algorithms to be used.
+#'   Default is set to 2 as this matches the default quantile algorithm in SAS `proc univariate` set by `QNTLDEF=5`.
+#'   This differs from R's default. See more about `type` in [stats::quantile()].
+#' @param test_mean (`numeric`)\cr to test against the mean under the null hypothesis when calculating p-value.
+#'
+#' @note Deprecation cycle started for `control_summarize_vars` as it is going to renamed into
+#'   `control_analyze_vars`. Intention is to reflect better the core underlying `rtables`
+#'   functions; in this case [analyze_vars()] wraps [rtables::analyze()].
+#'
+#' @return A list of components with the same names as the arguments.
+#'
+#' @export control_analyze_vars control_summarize_vars
+#' @aliases control_summarize_vars
+control_analyze_vars <- function(conf_level = 0.95,
+                                 quantiles = c(0.25, 0.75),
+                                 quantile_type = 2,
+                                 test_mean = 0) {
+  checkmate::assert_vector(quantiles, len = 2)
+  checkmate::assert_int(quantile_type, lower = 1, upper = 9)
+  checkmate::assert_numeric(test_mean)
+  lapply(quantiles, assert_proportion_value)
+  assert_proportion_value(conf_level)
+  list(conf_level = conf_level, quantiles = quantiles, quantile_type = quantile_type, test_mean = test_mean)
+}
+
+control_summarize_vars <- control_analyze_vars
diff --git a/man/a_summary_internal.Rd b/man/a_summary_internal.Rd
new file mode 100644
index 0000000000..63ea602afd
--- /dev/null
+++ b/man/a_summary_internal.Rd
@@ -0,0 +1,47 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/analyze_variables.R
+\name{a_summary_internal}
+\alias{a_summary_internal}
+\title{Helper Function to Create Output Rows for \code{\link[=a_summary]{a_summary()}}}
+\usage{
+a_summary_internal(
+  x,
+  .N_col,
+  .N_row,
+  .var,
+  .df_row,
+  .ref_group,
+  .in_ref_col,
+  compare,
+  type,
+  .stats,
+  .formats,
+  .labels,
+  .indent_mods,
+  na.rm,
+  na_level,
+  ...
+)
+}
+\arguments{
+\item{compare}{(\code{logical})\cr Whether comparison statistics should be analyzed instead of summary statistics
+(\code{compare = TRUE} adds \code{pval} statistic comparing against reference group).}
+
+\item{type}{(\code{character})\cr type of statistics to calculate given \code{x}. If \code{x} is numeric \code{type} should be
+\code{"numeric"}, otherwise type should be \code{"counts"}.}
+}
+\value{
+\itemize{
+\item \code{a_summary_internal()} returns a corresponding list with formatted \code{\link[rtables:CellValue]{rtables::CellValue()}} used within \code{a_summary}.
+}
+}
+\description{
+Helper Function to Create Output Rows for \code{\link[=a_summary]{a_summary()}}
+}
+\note{
+\itemize{
+\item To use for comparison (with additional p-value statistic), parameter \code{compare} must be set to \code{TRUE}.
+\item Ensure that either all \code{NA} values are converted to an explicit \code{NA} level or all \code{NA} values are left as is.
+}
+}
+\keyword{internal}
diff --git a/man/analyze_variables.Rd b/man/analyze_variables.Rd
index 24cbc1d250..38ae596a19 100644
--- a/man/analyze_variables.Rd
+++ b/man/analyze_variables.Rd
@@ -7,7 +7,6 @@
 \alias{s_summary.factor}
 \alias{s_summary.character}
 \alias{s_summary.logical}
-\alias{a_summary_output}
 \alias{a_summary}
 \alias{a_summary.default}
 \alias{a_summary.numeric}
@@ -56,25 +55,6 @@ s_summary(x, na.rm = TRUE, denom, .N_row, .N_col, .var, ...)
   ...
 )
 
-a_summary_output(
-  x,
-  .N_col,
-  .N_row,
-  .var,
-  .df_row,
-  .ref_group,
-  .in_ref_col,
-  compare,
-  type,
-  .stats,
-  .formats,
-  .labels,
-  .indent_mods,
-  na.rm,
-  na_level,
-  ...
-)
-
 a_summary(x, .N_col, .N_row, .var, .df_row, .ref_group, .in_ref_col, ...)
 
 \method{a_summary}{default}(
@@ -174,9 +154,6 @@ to print out information about factor casting.}
 \item{compare}{(\code{logical})\cr Whether comparison statistics should be analyzed instead of summary statistics
 (\code{compare = TRUE} adds \code{pval} statistic comparing against reference group).}
 
-\item{type}{(\code{character})\cr type of statistics to calculate given \code{x}. If \code{x} is numeric \code{type} should be
-\code{"numeric"}, otherwise type should be \code{"counts"}.}
-
 \item{.stats}{(\code{character})\cr statistics to select for the table.}
 
 \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.}
@@ -206,6 +183,9 @@ to avoid warnings from \code{rtables}.}
 
 \item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group
 defined by this split instruction, or \code{NA_character_} (the default) for no section divider.}
+
+\item{type}{(\code{character})\cr type of statistics to calculate given \code{x}. If \code{x} is numeric \code{type} should be
+\code{"numeric"}, otherwise type should be \code{"counts"}.}
 }
 \value{
 \itemize{
@@ -262,10 +242,6 @@ denominator is zero. Note that \code{NA}s in \code{x} are never counted or leadi
 }
 }
 
-\itemize{
-\item \code{a_summary_output()} returns the corresponding list with formatted \code{\link[rtables:CellValue]{rtables::CellValue()}}.
-}
-
 \itemize{
 \item \code{a_summary()} returns the corresponding list with formatted \code{\link[rtables:CellValue]{rtables::CellValue()}}.
 }
@@ -295,8 +271,6 @@ conversion to factor (with a warning) and then forwards to the method for factor
 
 \item \code{s_summary(logical)}: Method for \code{logical} class.
 
-\item \code{a_summary_output()}: Formatted analysis helper function which is used within \code{a_summary}.
-
 \item \code{a_summary()}: Formatted analysis function which is used as \code{afun} in \code{analyze_vars()} and
 \code{compare_vars()} and as \code{cfun} in \code{summarize_colvars()}.
 
@@ -337,11 +311,6 @@ created from character variables before passing the dataset to \code{\link[rtabl
 \item Ensure that either all \code{NA} values are converted to an explicit \code{NA} level or all \code{NA} values are left as is.
 }
 
-\itemize{
-\item To use for comparison (with additional p-value statistic), parameter \code{compare} must be set to \code{TRUE}.
-\item Ensure that either all \code{NA} values are converted to an explicit \code{NA} level or all \code{NA} values are left as is.
-}
-
 Deprecation cycle started for \code{summarize_vars} as it is going to renamed into
 \code{analyze_vars}. Intention is to reflect better the core underlying \code{rtables}
 functions; in this case \code{\link[rtables:analyze]{rtables::analyze()}}.
@@ -418,26 +387,6 @@ x <- c(TRUE, FALSE, TRUE, TRUE)
 s_summary(x, denom = "N_row", .N_row = 10L)
 s_summary(x, denom = "N_col", .N_col = 20L)
 
-set.seed(1)
-x <- rnorm(10)
-a_summary_output(
-  x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL,
-  .in_ref_col = FALSE, compare = FALSE, type = "numeric", .stats = summary_custom()$stats,
-  .formats = summary_custom()$formats, .labels = summary_custom()$labels,
-  .indent_mods = summary_custom()$indents, na.rm = TRUE, na_level = NA_character_
-)
-
-x <- factor(c("a", "a", "b", "c", "a"))
-a_summary_output(
-  x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL,
-  .in_ref_col = FALSE, compare = FALSE, type = "counts",
-  .stats = summary_custom(type = "counts")$stats,
-  .formats = summary_custom(type = "counts")$formats,
-  .labels = summary_custom(type = "counts")$labels,
-  .indent_mods = summary_custom(type = "counts")$indents,
-  na.rm = TRUE, na_level = NA_character_
-)
-
 a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
 a_summary(
   factor(c("a", "a", "b", "c", "a")),
diff --git a/man/control_analyze_vars.Rd b/man/control_analyze_vars.Rd
index 825166f5ec..d9a335674c 100644
--- a/man/control_analyze_vars.Rd
+++ b/man/control_analyze_vars.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/analyze_variables.R
+% Please edit documentation in R/summary_stats.R
 \name{control_analyze_vars}
 \alias{control_analyze_vars}
 \alias{control_summarize_vars}
diff --git a/man/summary_stats.Rd b/man/summary_stats.Rd
index 1821410347..441f7b2299 100644
--- a/man/summary_stats.Rd
+++ b/man/summary_stats.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/analyze_variables.R
+% Please edit documentation in R/summary_stats.R
 \name{summary_stats}
 \alias{summary_stats}
 \alias{summary_formats}
diff --git a/tests/testthat/test-analyze_variables.R b/tests/testthat/test-analyze_variables.R
index 4e6da3a58b..016d44c786 100644
--- a/tests/testthat/test-analyze_variables.R
+++ b/tests/testthat/test-analyze_variables.R
@@ -193,13 +193,13 @@ testthat::test_that("s_summary works with logical vectors and by if requested do
   testthat::expect_snapshot(res)
 })
 
-testthat::test_that("a_summary_output and a_summary work with healthy input.", {
+testthat::test_that("a_summary_internal and a_summary work with healthy input.", {
   options("width" = 100)
 
-  # numeric input - a_summary_output
+  # numeric input - a_summary_internal
   set.seed(1)
   x <- rnorm(10)
-  result <- a_summary_output(
+  result <- a_summary_internal(
     x = x, .N_col = 10, .N_row = 20, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE,
     compare = FALSE, type = "numeric", .stats = summary_custom()$stats, .formats = summary_custom()$formats,
     .labels = summary_custom()$labels, .indent_mods = summary_custom()$indents, na.rm = TRUE, na_level = NA_character_
@@ -212,9 +212,9 @@ testthat::test_that("a_summary_output and a_summary work with healthy input.", {
   testthat::expect_identical(res_out, res)
   testthat::expect_snapshot(res)
 
-  # factor input - a_summary_output
+  # factor input - a_summary_internal
   x <- factor(c("a", "a", "b", "c", "a"))
-  result <- a_summary_output(
+  result <- a_summary_internal(
     x = x, .N_col = 10, .N_row = 10, .var = "bla", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE,
     compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats,
     .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels,
@@ -228,9 +228,9 @@ testthat::test_that("a_summary_output and a_summary work with healthy input.", {
   testthat::expect_identical(res_out, res)
   testthat::expect_snapshot(res)
 
-  # character input - a_summary_output
+  # character input - a_summary_internal
   x <- c("A", "B", "A", "C")
-  result <- a_summary_output(
+  result <- a_summary_internal(
     x = x, .N_col = 10, .N_row = 10, .var = "x", .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE,
     compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats,
     .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels,
@@ -245,9 +245,9 @@ testthat::test_that("a_summary_output and a_summary work with healthy input.", {
   testthat::expect_identical(res_out, res)
   testthat::expect_snapshot(res)
 
-  # logical input - a_summary_output
+  # logical input - a_summary_internal
   x <- c(TRUE, FALSE, FALSE, TRUE, TRUE)
-  result <- a_summary_output(
+  result <- a_summary_internal(
     x = x, .N_col = 10, .N_row = 10, .var = NULL, .df_row = NULL, .ref_group = NULL, .in_ref_col = FALSE,
     compare = FALSE, type = "counts", .stats = summary_custom(type = "counts")$stats,
     .formats = summary_custom(type = "counts")$formats, .labels = summary_custom(type = "counts")$labels,

From 28b803dc6c2f8e4f788c18246d7fd9cd7b1d9028 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Fri, 14 Jul 2023 20:18:15 -0400
Subject: [PATCH 29/33] Clean up a_summary and ungroup_stats, export a_compare

---
 NAMESPACE                                  |  1 +
 R/analyze_variables.R                      | 21 ++++----
 R/compare_variables.R                      |  6 ++-
 R/utils_factor.R                           | 62 +++++++---------------
 man/compare_variables.Rd                   |  4 +-
 man/ungroup_stats.Rd                       | 22 ++------
 tests/testthat/_snaps/analyze_variables.md |  2 +-
 7 files changed, 42 insertions(+), 76 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 5239b12fcb..5e9df15f52 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -23,6 +23,7 @@ S3method(tidy,glm)
 S3method(tidy,step)
 S3method(tidy,summary.coxph)
 export(CombinationFunction)
+export(a_compare)
 export(a_count_occurrences)
 export(a_count_occurrences_by_grade)
 export(a_count_patients_sum_exposure)
diff --git a/R/analyze_variables.R b/R/analyze_variables.R
index 5a2eefbd48..12d3d7c67f 100644
--- a/R/analyze_variables.R
+++ b/R/analyze_variables.R
@@ -406,14 +406,9 @@ a_summary_internal <- function(x,
                                na.rm, # nolint
                                na_level,
                                ...) {
-  # Remove all-NA rows
-  if (!is.null(.df_row) && ncol(.df_row) > 1) {
-    in_tot_col <- nrow(.df_row) == length(x)
-    .df_row <- .df_row[rowSums(is.na(.df_row)) != ncol(.df_row), ]
-    if (in_tot_col && !identical(.df_row[[.var]], x)) x <- .df_row[[.var]]
-  }
+  # If one col has NA vals, must add NA row to other cols (using placeholder lvl `fill-na-level`)
+  if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "fill-na-level")
 
-  if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "na-level")
   x_stats <- if (!compare) {
     s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...)
   } else {
@@ -422,6 +417,7 @@ a_summary_internal <- function(x,
     )
   }
 
+  # Fill in with formatting defaults if needed
   custom_summary <- summary_custom(
     type = type,
     include_pval = compare,
@@ -434,7 +430,9 @@ a_summary_internal <- function(x,
   .formats <- custom_summary$formats
   .labels <- custom_summary$labels
   .indent_mods <- custom_summary$indent_mods
+  x_stats <- x_stats[.stats]
 
+  # Check for custom labels from control_analyze_vars
   if (is.numeric(x)) {
     for (i in intersect(.stats, c("mean_ci", "mean_pval", "median_ci", "quantiles"))) {
       if (!i %in% names(.labels) || .labels[[i]] == summary_custom()$labels[[i]]) {
@@ -443,14 +441,15 @@ a_summary_internal <- function(x,
     }
   }
 
-  x_stats <- x_stats[.stats]
   if (is.factor(x) || is.character(x)) {
-    x_ungrp <- ungroup_stats(x_stats, .stats, .formats, .labels, .indent_mods, .in_ref_col)
+    # Ungroup statistics with values for each level of x
+    x_ungrp <- ungroup_stats(x_stats, .formats, .labels, .indent_mods, .in_ref_col)
     x_stats <- x_ungrp[["x"]]
-    .stats <- x_ungrp[[".stats"]]
     .formats <- x_ungrp[[".formats"]]
-    .labels <- x_ungrp[[".labels"]]
+    .labels <- gsub("fill-na-level", "NA", x_ungrp[[".labels"]])
     .indent_mods <- x_ungrp[[".indent_mods"]]
+
+    if (.in_ref_col && "pval" %in% names(x_stats)) x_stats[["pval"]] <- character()
   }
 
   in_rows(
diff --git a/R/compare_variables.R b/R/compare_variables.R
index cb97788397..52bba6bf3d 100644
--- a/R/compare_variables.R
+++ b/R/compare_variables.R
@@ -254,7 +254,11 @@ s_compare.logical <- function(x,
 #'
 #' @note `a_compare()` has been deprecated in favor of `a_summary()` with argument `compare` set to `TRUE`.
 #'
-#' @keywords internal
+#' @examples
+#' # `a_compare` deprecated - use `a_summary()` instead
+#' a_compare(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .stats = c("n", "pval"))
+#'
+#' @export
 a_compare <- function(x,
                       .N_col, # nolint
                       .N_row, # nolint
diff --git a/R/utils_factor.R b/R/utils_factor.R
index 85b5c7677f..9e2a8d79d5 100644
--- a/R/utils_factor.R
+++ b/R/utils_factor.R
@@ -277,68 +277,42 @@ fct_collapse_only <- function(.f, ..., .na_level = "<Missing>") {
   do.call(forcats::fct_relevel, args = c(list(.f = x), as.list(new_lvls)))
 }
 
-#' Ungroup and Format Non-Numeric Statistics
+#' Ungroup Non-Numeric Statistics
 #'
-#' Ungroups grouped non-numeric statistics (`count` and `count_fraction`) within input vectors `.stats`, `.formats`,
-#' `.labels`, and `.indent_mods`. Formats explicit `NA` level in each input vector and changes p-value to
-#' `character()` if current column is a reference column. Used within [`a_summary()`].
+#' Ungroups grouped non-numeric statistics within input vectors `.formats`, `.labels`, and `.indent_mods`.
 #'
 #' @inheritParams argument_convention
 #' @param x  (`named list` of `numeric`)\cr list of numeric statistics containing the statistics to ungroup.
-#' @param which_stats (`vector` of `character`)\cr which statistics should be ungrouped.
 #'
-#' @return A `list` with modified elements `x`, `.stats`, `.formats`, `.labels`, and `.indent_mods`.
+#' @return A `list` with modified elements `x`, `.formats`, `.labels`, and `.indent_mods`.
 #'
 #' @seealso [a_summary()] which uses this function internally.
 #'
 #' @keywords internal
 ungroup_stats <- function(x,
-                          .stats,
                           .formats,
                           .labels,
                           .indent_mods,
-                          .in_ref_col = FALSE,
-                          which_stats = c("count", "count_fraction")) {
+                          .in_ref_col = FALSE) {
   checkmate::assert_list(x)
-
-  for (stat in which_stats) {
-    for (a in names(x[[stat]])) {
-      a <- if (a == "na-level") "NA" else a
-      a_lvl <- paste(stat, a, sep = ".")
-      a_name <- if (a != "NA" || "NA" %in% names(x[[stat]])) a else "na-level"
-      .stats <- c(.stats, a_lvl)
-      .formats <- append(.formats, .formats[stat] %>% `names<-`(a_lvl), after = if (stat %in% names(.formats)) {
-        which(names(.formats) == stat) - 1 + which(names(x[[stat]]) == a_name)
-      } else {
-        length(.formats)
-      })
-      .labels <- append(.labels, a %>% `names<-`(a_lvl), after = if (stat %in% names(.labels)) {
-        which(names(.labels) == stat) - 1 + which(names(x[[stat]]) == a_name)
-      } else {
-        length(.labels)
-      })
-      .indent_mods <- append(
-        .indent_mods, .indent_mods[stat] %>% `names<-`(a_lvl),
-        after = if (stat %in% names(.indent_mods)) {
-          which(names(.indent_mods) == stat) - 1 + which(names(x[[stat]]) == a_name)
-        } else {
-          length(.indent_mods)
-        }
-      )
-    }
-  }
-
-  if (.in_ref_col && "pval" %in% names(x)) x[["pval"]] <- "pvalue"
   x <- unlist(x, recursive = FALSE)
-  if (.in_ref_col && "pval" %in% names(x)) x[["pval"]] <- character()
-  names(x) <- gsub("na-level", "NA", names(x))
   .stats <- names(x)
 
+  # Ungroup stats
+  .formats <- lapply(.stats, function(x) {
+    .formats[[if (!grepl("\\.", x)) x else regmatches(x, regexpr("\\.", x), invert = TRUE)[[1]][1]]]
+  })
+  .indent_mods <- sapply(.stats, function(x) {
+    .indent_mods[[if (!grepl("\\.", x)) x else regmatches(x, regexpr("\\.", x), invert = TRUE)[[1]][1]]]
+  })
+  .labels <- sapply(.stats, function(x) {
+    if (!grepl("\\.", x)) .labels[[x]] else regmatches(x, regexpr("\\.", x), invert = TRUE)[[1]][2]
+  })
+
   list(
     x = x,
-    .stats = .stats,
-    .formats = .formats[.stats],
-    .labels = .labels[.stats],
-    .indent_mods = .indent_mods[.stats]
+    .formats = .formats,
+    .labels = .labels,
+    .indent_mods = .indent_mods
   )
 }
diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd
index b9c7117358..e2c6ed3457 100644
--- a/man/compare_variables.Rd
+++ b/man/compare_variables.Rd
@@ -234,6 +234,9 @@ y <- c(NA, NA, NA, NA, FALSE)
 s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE)
 s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE)
 
+# `a_compare` deprecated - use `a_summary()` instead
+a_compare(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .stats = c("n", "pval"))
+
 # `compare_vars()` in `rtables` pipelines
 
 ## Default output within a `rtables` pipeline.
@@ -259,4 +262,3 @@ Relevant constructor function \code{\link[=create_afun_compare]{create_afun_comp
 to compute a summary within \code{s_compare()}, and \code{\link[=a_compare]{a_compare()}} which is used (with \code{compare = TRUE}) as the analysis
 function for \code{compare_vars()}.
 }
-\keyword{internal}
diff --git a/man/ungroup_stats.Rd b/man/ungroup_stats.Rd
index b427561695..8e2d0ec447 100644
--- a/man/ungroup_stats.Rd
+++ b/man/ungroup_stats.Rd
@@ -2,23 +2,13 @@
 % Please edit documentation in R/utils_factor.R
 \name{ungroup_stats}
 \alias{ungroup_stats}
-\title{Ungroup and Format Non-Numeric Statistics}
+\title{Ungroup Non-Numeric Statistics}
 \usage{
-ungroup_stats(
-  x,
-  .stats,
-  .formats,
-  .labels,
-  .indent_mods,
-  .in_ref_col = FALSE,
-  which_stats = c("count", "count_fraction")
-)
+ungroup_stats(x, .formats, .labels, .indent_mods, .in_ref_col = FALSE)
 }
 \arguments{
 \item{x}{(\verb{named list} of \code{numeric})\cr list of numeric statistics containing the statistics to ungroup.}
 
-\item{.stats}{(\code{character})\cr statistics to select for the table.}
-
 \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics.}
 
 \item{.labels}{(named \code{character})\cr labels for the statistics (without indent).}
@@ -27,16 +17,12 @@ ungroup_stats(
 unmodified default behavior. Can be negative.}
 
 \item{.in_ref_col}{(\code{logical})\cr \code{TRUE} when working with the reference level, \code{FALSE} otherwise.}
-
-\item{which_stats}{(\code{vector} of \code{character})\cr which statistics should be ungrouped.}
 }
 \value{
-A \code{list} with modified elements \code{x}, \code{.stats}, \code{.formats}, \code{.labels}, and \code{.indent_mods}.
+A \code{list} with modified elements \code{x}, \code{.formats}, \code{.labels}, and \code{.indent_mods}.
 }
 \description{
-Ungroups grouped non-numeric statistics (\code{count} and \code{count_fraction}) within input vectors \code{.stats}, \code{.formats},
-\code{.labels}, and \code{.indent_mods}. Formats explicit \code{NA} level in each input vector and changes p-value to
-\code{character()} if current column is a reference column. Used within \code{\link[=a_summary]{a_summary()}}.
+Ungroups grouped non-numeric statistics within input vectors \code{.formats}, \code{.labels}, and \code{.indent_mods}.
 }
 \seealso{
 \code{\link[=a_summary]{a_summary()}} which uses this function internally.
diff --git a/tests/testthat/_snaps/analyze_variables.md b/tests/testthat/_snaps/analyze_variables.md
index fa1cb362c3..705312763c 100644
--- a/tests/testthat/_snaps/analyze_variables.md
+++ b/tests/testthat/_snaps/analyze_variables.md
@@ -1116,7 +1116,7 @@
       [1] 0
       
 
-# a_summary_output and a_summary work with healthy input.
+# a_summary_internal and a_summary work with healthy input.
 
     Code
       res

From d0e341c97a8497818d4ecdb70cd5eacbf07458fc Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Fri, 14 Jul 2023 20:23:00 -0400
Subject: [PATCH 30/33] Update NEWS

---
 NEWS.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 0f6cbca9c2..b089e2d17b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,19 +2,19 @@
 
 ### Enhancements
 * Added method for `character` class to `h_coxreg_inter_effect` enabling `character` covariates in `summarize_coxreg`.
-* Refactored `a_summary` to no longer use helper function `create_afun_summary`. 
-* Refactored `summarize_vars` and `compare_vars` to use refactored `a_summary`.
-* Created new internal helper function `ungroup_stats` to ungroup and format statistics calculated for factor variables.
 * Started deprecation cycle for `summarize_vars` and `control_summarize_vars`. Renamed into `analyze_vars` and `control_analyze_vars` to reflect underlying `rtables` machinery while keeping backward compatibility with aliases.
 * Added `ylim` argument to `g_km` to allow the user to set custom limits for the y-axis.
 * Added assertion to `g_km` which checks whether there is one arm present in the data when `annot_coxph` is true.
 * Added `flag_labels` argument to `s_count_patients_with_flags` to enable more label handling options in `count_patients_by_flags`. 
+* Refactored `a_summary` to no longer use helper function `create_afun_summary`. 
+* Refactored `summarize_vars` and `compare_vars` to use refactored `a_summary`.
+* Created new internal helper functions `ungroup_stats` to ungroup statistics calculated for factor variables, and `a_summary_internal` to perform calculations for `a_summary`.
 
 ### Miscellaneous
 * Began deprecation of `time_unit_input` and `time_unit_output` arguments and replaced them with the `input_time_unit` and `num_pt_year`, respectively, in `control_incidence_rate`.
+* Removed deprecated `pairwise` function.
 * Deprecated `a_compare` and replaced it with `a_summary` with argument `compare = TRUE`.
 * Deprecated helper functions `create_afun_summary` and `create_afun_compare` which are no longer used by `a_summary` and `a_compare` respectively.
-* Removed deprecated `pairwise` function.
 
 ### Bug Fixes
 * Fixed long double assertion check in `sum(weights)` for `M1mac` installation.

From 62c7990c3e4995473a8a6e35e7a23d69a2e5cb29 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Sat, 15 Jul 2023 15:22:34 -0400
Subject: [PATCH 31/33] Fix tests

---
 R/analyze_variables.R | 4 +---
 R/utils_factor.R      | 9 ++++++---
 man/ungroup_stats.Rd  | 6 ++----
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/R/analyze_variables.R b/R/analyze_variables.R
index 12d3d7c67f..6d2dce3ba2 100644
--- a/R/analyze_variables.R
+++ b/R/analyze_variables.R
@@ -443,13 +443,11 @@ a_summary_internal <- function(x,
 
   if (is.factor(x) || is.character(x)) {
     # Ungroup statistics with values for each level of x
-    x_ungrp <- ungroup_stats(x_stats, .formats, .labels, .indent_mods, .in_ref_col)
+    x_ungrp <- ungroup_stats(x_stats, .formats, .labels, .indent_mods)
     x_stats <- x_ungrp[["x"]]
     .formats <- x_ungrp[[".formats"]]
     .labels <- gsub("fill-na-level", "NA", x_ungrp[[".labels"]])
     .indent_mods <- x_ungrp[[".indent_mods"]]
-
-    if (.in_ref_col && "pval" %in% names(x_stats)) x_stats[["pval"]] <- character()
   }
 
   in_rows(
diff --git a/R/utils_factor.R b/R/utils_factor.R
index 9e2a8d79d5..aab2e52184 100644
--- a/R/utils_factor.R
+++ b/R/utils_factor.R
@@ -286,16 +286,19 @@ fct_collapse_only <- function(.f, ..., .na_level = "<Missing>") {
 #'
 #' @return A `list` with modified elements `x`, `.formats`, `.labels`, and `.indent_mods`.
 #'
-#' @seealso [a_summary()] which uses this function internally.
+#' @seealso [a_summary_internal()] which uses this function internally.
 #'
 #' @keywords internal
 ungroup_stats <- function(x,
                           .formats,
                           .labels,
-                          .indent_mods,
-                          .in_ref_col = FALSE) {
+                          .indent_mods) {
   checkmate::assert_list(x)
+  empty_pval <- "pval" %in% names(x) && length(x[["pval"]]) == 0
   x <- unlist(x, recursive = FALSE)
+
+  # If p-value is empty it is removed by unlist and needs to be re-added
+  if (empty_pval) x[["pval"]] <- character()
   .stats <- names(x)
 
   # Ungroup stats
diff --git a/man/ungroup_stats.Rd b/man/ungroup_stats.Rd
index 8e2d0ec447..fb68d1cce6 100644
--- a/man/ungroup_stats.Rd
+++ b/man/ungroup_stats.Rd
@@ -4,7 +4,7 @@
 \alias{ungroup_stats}
 \title{Ungroup Non-Numeric Statistics}
 \usage{
-ungroup_stats(x, .formats, .labels, .indent_mods, .in_ref_col = FALSE)
+ungroup_stats(x, .formats, .labels, .indent_mods)
 }
 \arguments{
 \item{x}{(\verb{named list} of \code{numeric})\cr list of numeric statistics containing the statistics to ungroup.}
@@ -15,8 +15,6 @@ ungroup_stats(x, .formats, .labels, .indent_mods, .in_ref_col = FALSE)
 
 \item{.indent_mods}{(named \code{integer})\cr indent modifiers for the labels. Defaults to 0, which corresponds to the
 unmodified default behavior. Can be negative.}
-
-\item{.in_ref_col}{(\code{logical})\cr \code{TRUE} when working with the reference level, \code{FALSE} otherwise.}
 }
 \value{
 A \code{list} with modified elements \code{x}, \code{.formats}, \code{.labels}, and \code{.indent_mods}.
@@ -25,6 +23,6 @@ A \code{list} with modified elements \code{x}, \code{.formats}, \code{.labels},
 Ungroups grouped non-numeric statistics within input vectors \code{.formats}, \code{.labels}, and \code{.indent_mods}.
 }
 \seealso{
-\code{\link[=a_summary]{a_summary()}} which uses this function internally.
+\code{\link[=a_summary_internal]{a_summary_internal()}} which uses this function internally.
 }
 \keyword{internal}

From 87b738ad2037c56b1ee4abab1fb5d6acccd3af7c Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Fri, 21 Jul 2023 15:43:12 -0400
Subject: [PATCH 32/33] Organize tests

---
 tests/testthat/_snaps/analyze_variables.md | 214 ---------------------
 tests/testthat/_snaps/summary_stats.md     | 214 +++++++++++++++++++++
 tests/testthat/test-analyze_variables.R    |  48 -----
 tests/testthat/test-summary_stats.R        |  47 +++++
 4 files changed, 261 insertions(+), 262 deletions(-)
 create mode 100644 tests/testthat/_snaps/summary_stats.md
 create mode 100644 tests/testthat/test-summary_stats.R

diff --git a/tests/testthat/_snaps/analyze_variables.md b/tests/testthat/_snaps/analyze_variables.md
index 705312763c..4fe6f5d8ed 100644
--- a/tests/testthat/_snaps/analyze_variables.md
+++ b/tests/testthat/_snaps/analyze_variables.md
@@ -1,217 +1,3 @@
-# control_analyze_vars works with customized parameters
-
-    Code
-      res
-    Output
-      $conf_level
-      [1] 0.9
-      
-      $quantiles
-      [1] 0.1 0.9
-      
-      $quantile_type
-      [1] 2
-      
-      $test_mean
-      [1] 0
-      
-
-# summary_formats works as expected
-
-    Code
-      res
-    Output
-                         n                  sum                 mean 
-                     "xx."               "xx.x"               "xx.x" 
-                        sd                   se              mean_sd 
-                    "xx.x"               "xx.x"        "xx.x (xx.x)" 
-                   mean_se              mean_ci             mean_sei 
-             "xx.x (xx.x)"     "(xx.xx, xx.xx)"     "(xx.xx, xx.xx)" 
-                  mean_sdi            mean_pval               median 
-          "(xx.xx, xx.xx)"              "xx.xx"               "xx.x" 
-                       mad            median_ci            quantiles 
-                    "xx.x"     "(xx.xx, xx.xx)"        "xx.x - xx.x" 
-                       iqr                range                  min 
-                    "xx.x"        "xx.x - xx.x"               "xx.x" 
-                       max         median_range                   cv 
-                    "xx.x" "xx.x (xx.x - xx.x)"               "xx.x" 
-                 geom_mean         geom_mean_ci              geom_cv 
-                    "xx.x"     "(xx.xx, xx.xx)"               "xx.x" 
-
----
-
-    Code
-      res
-    Output
-      $n
-      [1] "xx."
-      
-      $count
-      [1] "xx."
-      
-      $count_fraction
-      function(x, ...) {
-        attr(x, "label") <- NULL
-      
-        if (any(is.na(x))) {
-          return("NA")
-        }
-      
-        checkmate::assert_vector(x)
-        checkmate::assert_integerish(x[1])
-        assert_proportion_value(x[2], include_boundaries = TRUE)
-      
-        result <- if (x[1] == 0) {
-          "0"
-        } else {
-          paste0(x[1], " (", round(x[2] * 100, 1), "%)")
-        }
-      
-        return(result)
-      }
-      <environment: namespace:tern>
-      
-      $n_blq
-      [1] "xx."
-      
-      $pval
-      [1] "x.xxxx | (<0.0001)"
-      
-
-# summary_labels works as expected
-
-    Code
-      res
-    Output
-                                  n                           sum 
-                                "n"                         "Sum" 
-                               mean                            sd 
-                             "Mean"                          "SD" 
-                                 se                       mean_sd 
-                               "SE"                   "Mean (SD)" 
-                            mean_se                       mean_ci 
-                        "Mean (SE)"                 "Mean 95% CI" 
-                           mean_sei                      mean_sdi 
-                    "Mean -/+ 1xSE"               "Mean -/+ 1xSD" 
-                          mean_pval                        median 
-      "Mean p-value (H0: mean = 0)"                      "Median" 
-                                mad                     median_ci 
-        "Median Absolute Deviation"               "Median 95% CI" 
-                          quantiles                           iqr 
-                  "25% and 75%-ile"                         "IQR" 
-                              range                           min 
-                        "Min - Max"                     "Minimum" 
-                                max                  median_range 
-                          "Maximum"          "Median (Min - Max)" 
-                                 cv                     geom_mean 
-                           "CV (%)"              "Geometric Mean" 
-                       geom_mean_ci                       geom_cv 
-            "Geometric Mean 95% CI"         "CV % Geometric Mean" 
-
----
-
-    Code
-      res
-    Output
-                                 n                        count 
-                               "n"                      "count" 
-                    count_fraction                        n_blq 
-                  "count_fraction"                      "n_blq" 
-                              pval 
-      "p-value (chi-squared test)" 
-
-# summary_custom works as expected
-
-    Code
-      res
-    Output
-      $stats
-       [1] "n"            "sum"          "mean"         "sd"           "se"          
-       [6] "mean_sd"      "mean_se"      "mean_ci"      "mean_sei"     "mean_sdi"    
-      [11] "mean_pval"    "median"       "mad"          "median_ci"    "quantiles"   
-      [16] "iqr"          "range"        "min"          "max"          "median_range"
-      [21] "cv"           "geom_mean"    "geom_mean_ci" "geom_cv"     
-      
-      $formats
-                         n                  sum                 mean 
-                     "xx."               "xx.x"               "xx.x" 
-                        sd                   se              mean_sd 
-                    "xx.x"               "xx.x"        "xx.x (xx.x)" 
-                   mean_se              mean_ci             mean_sei 
-             "xx.x (xx.x)"     "(xx.xx, xx.xx)"     "(xx.xx, xx.xx)" 
-                  mean_sdi            mean_pval               median 
-          "(xx.xx, xx.xx)"              "xx.xx"               "xx.x" 
-                       mad            median_ci            quantiles 
-                    "xx.x"     "(xx.xx, xx.xx)"        "xx.x - xx.x" 
-                       iqr                range                  min 
-                    "xx.x"        "xx.x - xx.x"               "xx.x" 
-                       max         median_range                   cv 
-                    "xx.x" "xx.x (xx.x - xx.x)"               "xx.x" 
-                 geom_mean         geom_mean_ci              geom_cv 
-                    "xx.x"     "(xx.xx, xx.xx)"               "xx.x" 
-      
-      $labels
-                                  n                           sum 
-                                "n"                         "Sum" 
-                               mean                            sd 
-                             "Mean"                          "SD" 
-                                 se                       mean_sd 
-                               "SE"                   "Mean (SD)" 
-                            mean_se                       mean_ci 
-                        "Mean (SE)"                 "Mean 95% CI" 
-                           mean_sei                      mean_sdi 
-                    "Mean -/+ 1xSE"               "Mean -/+ 1xSD" 
-                          mean_pval                        median 
-      "Mean p-value (H0: mean = 0)"                      "Median" 
-                                mad                     median_ci 
-        "Median Absolute Deviation"               "Median 95% CI" 
-                          quantiles                           iqr 
-                  "25% and 75%-ile"                         "IQR" 
-                              range                           min 
-                        "Min - Max"                     "Minimum" 
-                                max                  median_range 
-                          "Maximum"          "Median (Min - Max)" 
-                                 cv                     geom_mean 
-                           "CV (%)"              "Geometric Mean" 
-                       geom_mean_ci                       geom_cv 
-            "Geometric Mean 95% CI"         "CV % Geometric Mean" 
-      
-      $indent_mods
-                 n          sum         mean           sd           se      mean_sd 
-                 0            0            0            0            0            0 
-           mean_se      mean_ci     mean_sei     mean_sdi    mean_pval       median 
-                 0            0            0            0            0            0 
-               mad    median_ci    quantiles          iqr        range          min 
-                 0            0            0            0            0            0 
-               max median_range           cv    geom_mean geom_mean_ci      geom_cv 
-                 0            0            0            0            0            0 
-      
-
----
-
-    Code
-      res
-    Output
-      $stats
-      [1] "n"     "count"
-      
-      $formats
-      $formats$n
-      [1] "xx.xx"
-      
-      $formats$count
-      [1] "xx."
-      
-      
-      $labels
-          n count 
-        "n"   "#" 
-      
-      $indent_mods
-          n count 
-          2     2 
-      
-
 # s_summary return NA for x length 0L
 
     Code
diff --git a/tests/testthat/_snaps/summary_stats.md b/tests/testthat/_snaps/summary_stats.md
new file mode 100644
index 0000000000..4dd4633f57
--- /dev/null
+++ b/tests/testthat/_snaps/summary_stats.md
@@ -0,0 +1,214 @@
+# summary_formats works as expected
+
+    Code
+      res
+    Output
+                         n                  sum                 mean 
+                     "xx."               "xx.x"               "xx.x" 
+                        sd                   se              mean_sd 
+                    "xx.x"               "xx.x"        "xx.x (xx.x)" 
+                   mean_se              mean_ci             mean_sei 
+             "xx.x (xx.x)"     "(xx.xx, xx.xx)"     "(xx.xx, xx.xx)" 
+                  mean_sdi            mean_pval               median 
+          "(xx.xx, xx.xx)"              "xx.xx"               "xx.x" 
+                       mad            median_ci            quantiles 
+                    "xx.x"     "(xx.xx, xx.xx)"        "xx.x - xx.x" 
+                       iqr                range                  min 
+                    "xx.x"        "xx.x - xx.x"               "xx.x" 
+                       max         median_range                   cv 
+                    "xx.x" "xx.x (xx.x - xx.x)"               "xx.x" 
+                 geom_mean         geom_mean_ci              geom_cv 
+                    "xx.x"     "(xx.xx, xx.xx)"               "xx.x" 
+
+---
+
+    Code
+      res
+    Output
+      $n
+      [1] "xx."
+      
+      $count
+      [1] "xx."
+      
+      $count_fraction
+      function(x, ...) {
+        attr(x, "label") <- NULL
+      
+        if (any(is.na(x))) {
+          return("NA")
+        }
+      
+        checkmate::assert_vector(x)
+        checkmate::assert_integerish(x[1])
+        assert_proportion_value(x[2], include_boundaries = TRUE)
+      
+        result <- if (x[1] == 0) {
+          "0"
+        } else {
+          paste0(x[1], " (", round(x[2] * 100, 1), "%)")
+        }
+      
+        return(result)
+      }
+      <environment: namespace:tern>
+      
+      $n_blq
+      [1] "xx."
+      
+      $pval
+      [1] "x.xxxx | (<0.0001)"
+      
+
+# summary_labels works as expected
+
+    Code
+      res
+    Output
+                                  n                           sum 
+                                "n"                         "Sum" 
+                               mean                            sd 
+                             "Mean"                          "SD" 
+                                 se                       mean_sd 
+                               "SE"                   "Mean (SD)" 
+                            mean_se                       mean_ci 
+                        "Mean (SE)"                 "Mean 95% CI" 
+                           mean_sei                      mean_sdi 
+                    "Mean -/+ 1xSE"               "Mean -/+ 1xSD" 
+                          mean_pval                        median 
+      "Mean p-value (H0: mean = 0)"                      "Median" 
+                                mad                     median_ci 
+        "Median Absolute Deviation"               "Median 95% CI" 
+                          quantiles                           iqr 
+                  "25% and 75%-ile"                         "IQR" 
+                              range                           min 
+                        "Min - Max"                     "Minimum" 
+                                max                  median_range 
+                          "Maximum"          "Median (Min - Max)" 
+                                 cv                     geom_mean 
+                           "CV (%)"              "Geometric Mean" 
+                       geom_mean_ci                       geom_cv 
+            "Geometric Mean 95% CI"         "CV % Geometric Mean" 
+
+---
+
+    Code
+      res
+    Output
+                                 n                        count 
+                               "n"                      "count" 
+                    count_fraction                        n_blq 
+                  "count_fraction"                      "n_blq" 
+                              pval 
+      "p-value (chi-squared test)" 
+
+# summary_custom works as expected
+
+    Code
+      res
+    Output
+      $stats
+       [1] "n"            "sum"          "mean"         "sd"           "se"          
+       [6] "mean_sd"      "mean_se"      "mean_ci"      "mean_sei"     "mean_sdi"    
+      [11] "mean_pval"    "median"       "mad"          "median_ci"    "quantiles"   
+      [16] "iqr"          "range"        "min"          "max"          "median_range"
+      [21] "cv"           "geom_mean"    "geom_mean_ci" "geom_cv"     
+      
+      $formats
+                         n                  sum                 mean 
+                     "xx."               "xx.x"               "xx.x" 
+                        sd                   se              mean_sd 
+                    "xx.x"               "xx.x"        "xx.x (xx.x)" 
+                   mean_se              mean_ci             mean_sei 
+             "xx.x (xx.x)"     "(xx.xx, xx.xx)"     "(xx.xx, xx.xx)" 
+                  mean_sdi            mean_pval               median 
+          "(xx.xx, xx.xx)"              "xx.xx"               "xx.x" 
+                       mad            median_ci            quantiles 
+                    "xx.x"     "(xx.xx, xx.xx)"        "xx.x - xx.x" 
+                       iqr                range                  min 
+                    "xx.x"        "xx.x - xx.x"               "xx.x" 
+                       max         median_range                   cv 
+                    "xx.x" "xx.x (xx.x - xx.x)"               "xx.x" 
+                 geom_mean         geom_mean_ci              geom_cv 
+                    "xx.x"     "(xx.xx, xx.xx)"               "xx.x" 
+      
+      $labels
+                                  n                           sum 
+                                "n"                         "Sum" 
+                               mean                            sd 
+                             "Mean"                          "SD" 
+                                 se                       mean_sd 
+                               "SE"                   "Mean (SD)" 
+                            mean_se                       mean_ci 
+                        "Mean (SE)"                 "Mean 95% CI" 
+                           mean_sei                      mean_sdi 
+                    "Mean -/+ 1xSE"               "Mean -/+ 1xSD" 
+                          mean_pval                        median 
+      "Mean p-value (H0: mean = 0)"                      "Median" 
+                                mad                     median_ci 
+        "Median Absolute Deviation"               "Median 95% CI" 
+                          quantiles                           iqr 
+                  "25% and 75%-ile"                         "IQR" 
+                              range                           min 
+                        "Min - Max"                     "Minimum" 
+                                max                  median_range 
+                          "Maximum"          "Median (Min - Max)" 
+                                 cv                     geom_mean 
+                           "CV (%)"              "Geometric Mean" 
+                       geom_mean_ci                       geom_cv 
+            "Geometric Mean 95% CI"         "CV % Geometric Mean" 
+      
+      $indent_mods
+                 n          sum         mean           sd           se      mean_sd 
+                 0            0            0            0            0            0 
+           mean_se      mean_ci     mean_sei     mean_sdi    mean_pval       median 
+                 0            0            0            0            0            0 
+               mad    median_ci    quantiles          iqr        range          min 
+                 0            0            0            0            0            0 
+               max median_range           cv    geom_mean geom_mean_ci      geom_cv 
+                 0            0            0            0            0            0 
+      
+
+---
+
+    Code
+      res
+    Output
+      $stats
+      [1] "n"     "count"
+      
+      $formats
+      $formats$n
+      [1] "xx.xx"
+      
+      $formats$count
+      [1] "xx."
+      
+      
+      $labels
+          n count 
+        "n"   "#" 
+      
+      $indent_mods
+          n count 
+          2     2 
+      
+
+# control_analyze_vars works with customized parameters
+
+    Code
+      res
+    Output
+      $conf_level
+      [1] 0.9
+      
+      $quantiles
+      [1] 0.1 0.9
+      
+      $quantile_type
+      [1] 2
+      
+      $test_mean
+      [1] 0
+      
+
diff --git a/tests/testthat/test-analyze_variables.R b/tests/testthat/test-analyze_variables.R
index 016d44c786..70d44f0914 100644
--- a/tests/testthat/test-analyze_variables.R
+++ b/tests/testthat/test-analyze_variables.R
@@ -1,51 +1,3 @@
-testthat::test_that("control_analyze_vars works with customized parameters", {
-  result <- control_analyze_vars(
-    conf_level = 0.9,
-    quantiles = c(0.1, 0.9)
-  )
-
-  res <- testthat::expect_silent(result)
-  testthat::expect_snapshot(res)
-})
-
-testthat::test_that("control_analyze_vars fails wrong inputs", {
-  testthat::expect_error(control_analyze_vars(quantiles = c(25, 75)))
-  testthat::expect_error(control_analyze_vars(conf_level = 95))
-})
-
-testthat::test_that("summary_formats works as expected", {
-  result <- summary_formats()
-  res <- testthat::expect_silent(result)
-  testthat::expect_snapshot(res)
-
-  result <- summary_formats(type = "counts", include_pval = TRUE)
-  res <- testthat::expect_silent(result)
-  testthat::expect_snapshot(res)
-})
-
-testthat::test_that("summary_labels works as expected", {
-  result <- summary_labels()
-  res <- testthat::expect_silent(result)
-  testthat::expect_snapshot(res)
-
-  result <- summary_labels(type = "counts", include_pval = TRUE)
-  res <- testthat::expect_silent(result)
-  testthat::expect_snapshot(res)
-})
-
-testthat::test_that("summary_custom works as expected", {
-  result <- summary_custom()
-  res <- testthat::expect_silent(result)
-  testthat::expect_snapshot(res)
-
-  result <- summary_custom(
-    type = "counts", stats_custom = c("n", "count"),
-    formats_custom = c(n = "xx.xx"), labels_custom = c(count = "#"), indent_mods_custom = 2L
-  )
-  res <- testthat::expect_silent(result)
-  testthat::expect_snapshot(res)
-})
-
 testthat::test_that("s_summary return NA for x length 0L", {
   x <- numeric()
 
diff --git a/tests/testthat/test-summary_stats.R b/tests/testthat/test-summary_stats.R
new file mode 100644
index 0000000000..281de0be1f
--- /dev/null
+++ b/tests/testthat/test-summary_stats.R
@@ -0,0 +1,47 @@
+testthat::test_that("summary_formats works as expected", {
+  result <- summary_formats()
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+
+  result <- summary_formats(type = "counts", include_pval = TRUE)
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+})
+
+testthat::test_that("summary_labels works as expected", {
+  result <- summary_labels()
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+
+  result <- summary_labels(type = "counts", include_pval = TRUE)
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+})
+
+testthat::test_that("summary_custom works as expected", {
+  result <- summary_custom()
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+
+  result <- summary_custom(
+    type = "counts", stats_custom = c("n", "count"),
+    formats_custom = c(n = "xx.xx"), labels_custom = c(count = "#"), indent_mods_custom = 2L
+  )
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+})
+
+testthat::test_that("control_analyze_vars works with customized parameters", {
+  result <- control_analyze_vars(
+    conf_level = 0.9,
+    quantiles = c(0.1, 0.9)
+  )
+
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+})
+
+testthat::test_that("control_analyze_vars fails wrong inputs", {
+  testthat::expect_error(control_analyze_vars(quantiles = c(25, 75)))
+  testthat::expect_error(control_analyze_vars(conf_level = 95))
+})

From 3d990980f85b1d6f37bb1afad26544099bd9ab56 Mon Sep 17 00:00:00 2001
From: "27856297+dependabot-preview[bot]@users.noreply.github.com"
 <27856297+dependabot-preview[bot]@users.noreply.github.com>
Date: Fri, 4 Aug 2023 10:20:22 +0000
Subject: [PATCH 33/33] [skip actions] Roxygen Man Pages Auto Update

---
 man/compare_variables.Rd | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd
index de6271c6a2..076db930d3 100644
--- a/man/compare_variables.Rd
+++ b/man/compare_variables.Rd
@@ -78,10 +78,11 @@ by a statistics function.}
 \item{verbose}{(\code{logical})\cr Whether warnings and messages should be printed. Mainly used
 to print out information about factor casting. Defaults to \code{TRUE}.}
 
-\item{.N_col}{(\code{count})\cr row-wise N (row group count) for the group of observations being analyzed
-(i.e. with no column-based subsetting) that is passed by \code{rtables}.}
+\item{.N_col}{(\code{integer})\cr column-wise N (column count) for the full column being analyzed that is typically
+passed by \code{rtables}.}
 
-\item{.N_row}{(\code{count})\cr column-wise N (column count) for the full column that is passed by \code{rtables}.}
+\item{.N_row}{(\code{integer})\cr row-wise N (row group count) for the group of observations being analyzed
+(i.e. with no column-based subsetting) that is typically passed by \code{rtables}.}
 
 \item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.}