insightsengineering · ddsjoberg · Jul 27, 2023 · Jul 27, 2023
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -1,3 +1,4 @@
 ^cardinal\.Rproj$
 ^\.Rproj\.user$
 ^LICENSE\.md$
+^README\.Rmd$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -9,7 +9,10 @@ License: MIT + file LICENSE
 Imports: 
     cli (>= 3.6.1),
     dplyr (>= 1.1.2),
-    rlang (>= 1.1.1)
+    rlang (>= 1.1.1),
+    tidyr (>= 1.3.0)
+Suggests: 
+    broom
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,3 +1,30 @@
 # Generated by roxygen2: do not edit by hand
 
+export("%>%")
+export(all_of)
+export(any_of)
+export(ard_categorical)
+export(ard_continuous)
+export(ard_ttest)
+export(contains)
+export(ends_with)
+export(everything)
+export(last_col)
+export(matches)
+export(num_range)
+export(one_of)
+export(starts_with)
+export(vars)
+importFrom(dplyr,"%>%")
+importFrom(dplyr,all_of)
+importFrom(dplyr,any_of)
+importFrom(dplyr,contains)
+importFrom(dplyr,ends_with)
+importFrom(dplyr,everything)
+importFrom(dplyr,last_col)
+importFrom(dplyr,matches)
+importFrom(dplyr,num_range)
+importFrom(dplyr,one_of)
+importFrom(dplyr,starts_with)
+importFrom(dplyr,vars)
 importFrom(rlang,"%||%")
diff --git a/R/ard_comparison.R b/R/ard_comparison.R
@@ -0,0 +1,43 @@
+#' Comparison ARD Statistics
+#'
+#' @param data a data frame
+#' @param by charcter column name to compare by
+#' @param variable charadter column name to be compared
+#' @param ... arguments passed to method.
+#'
+#' @return data frame
+#' @name ard_comparison
+#'
+#' @examples
+#' ard_ttest(data = mtcars, by = "am", variable = "hp")
+NULL
+
+#' @rdname ard_comparison
+#' @export
+ard_ttest <- function(data, by, variable, conf.level = 0.95, ...) {
+  # check installed packages ---------------------------------------------------
+  rlang::check_installed("broom")
+
+  # perform t-test and format results ------------------------------------------
+  stats::t.test(data[[variable]] ~ data[[by]], conf.level = conf.level, ...) |>
+    broom::tidy() |>
+    dplyr::mutate(
+      conf.level = conf.level,
+      dplyr::across(everything(), .fns = list),
+      strata1 = by,
+      variable = variable,
+      context = "ttest"
+    ) |>
+    tidyr::pivot_longer(
+      cols = -c("strata1", "variable", "context"),
+      names_to = "stat_name",
+      values_to = "statistic"
+    ) |>
+    dplyr::mutate(
+      strata1_level =
+        dplyr::case_when(
+          .data$stat_name %in% "estimate1" ~ unique(data[[by]]) |> stats::na.omit() |>  sort() |> dplyr::first() |> list(),
+          .data$stat_name %in% "estimate2" ~ unique(data[[by]]) |> stats::na.omit() |>sort() |> dplyr::last() |> list(),
+        )
+    )
+}
diff --git a/R/ard_simple.R b/R/ard_simple.R
@@ -0,0 +1,162 @@
+#' Simple ARD Statistics
+#'
+#' Compute Analysis Results Data (ARD) for simple summary statistics from
+#' continuous and categorical data.
+#'
+#' @param data a data frame
+#' @param by columns to compute statistics by. Default are the columns
+#' returned by `dplyr::group_vars(data)`.
+#' @param statistics a named list of functions that return a summary statistic,
+#' e.g. `list(mpg = list(mean = \(x) mean(x, na.rm = TRUE)))`
+#' @param include columns to include in summaries. Default is `everything()`.
+#'
+#' @return a data frame
+#' @name ard_simple
+#'
+#' @examples
+#' ard_continuous(mtcars, by = cyl, include = c(mpg, hp))
+#' ard_categorical(mtcars, by = cyl, include = c(am, gear))
+NULL
+
+#' @rdname ard_simple
+#' @export
+ard_continuous <- function(data, by = dplyr::group_vars(data), statistics = NULL, include = everything()) {
+  # process arguments -----------------------------------------------------------
+  by <- dplyr::select(data, {{ by }}) |> colnames()
+  all_summary_variables <- dplyr::select(data, {{ include }}) |> colnames() |> setdiff(by)
+  data <- dplyr::ungroup(data)
+
+  # check inputs (will make this more robust later) ----------------------------
+
+  # setting default statistics -------------------------------------------------
+  statistics <-
+    all_summary_variables |>
+    lapply(function(x) statistics[[x]] %||% .default_continuous_statistics()) |>
+    stats::setNames(nm = all_summary_variables)
+
+  df_statsistics <-
+    lapply(
+      X = all_summary_variables,
+      FUN = function(x) {
+        dplyr::tibble(
+          variable = x,
+          stat_name = names(statistics[[x]])
+        )
+      }
+    ) |>
+    dplyr::bind_rows()
+
+  # calculate statistics -------------------------------------------------------
+  data |>
+    tidyr::nest(
+      .by = all_of(by),
+      .key = "...ard_nested_data..."
+    ) |>
+    # setting column names for stratum levels
+    dplyr::mutate(!!!(list(by) |> stats::setNames(paste0("strata", seq_along(by)))), .before = 0L) |>
+    dplyr::rename(!!!(list(by) |> stats::setNames(paste0("strata", seq_along(by), "_levels")))) |>
+    dplyr::mutate(
+      ..ard_all_stats.. =
+        lapply(
+          .data[["...ard_nested_data..."]],
+          FUN = function(nested_data) {
+            df_statsistics |>
+              dplyr::mutate(
+                statistic =
+                  .mapply(
+                    FUN = function(variable, stat_name) {
+                      do.call(statistics[[variable]][[stat_name]], args = list(nested_data[[variable]]))
+                    },
+                    dots =
+                      list(
+                        df_statsistics$variable,
+                        df_statsistics$stat_name
+                      ),
+                    MoreArgs = NULL
+                  )
+              )
+          }
+        )
+    ) |>
+    dplyr::select(-"...ard_nested_data...") |>
+    tidyr::unnest(cols = "..ard_all_stats..") |>
+    dplyr::mutate(context = "continuous")
+}
+
+#' @rdname ard_simple
+#' @export
+ard_categorical <- function(data, by = dplyr::group_vars(data), include = everything()) {
+  # process arguments -----------------------------------------------------------
+  by <- dplyr::select(data, {{ by }}) |> colnames()
+  all_summary_variables <- dplyr::select(data, {{ include }}) |> colnames() |> setdiff(by)
+  data <- dplyr::ungroup(data)
+
+  # check inputs (will make this more robust later) ----------------------------
+
+  # calculating summary stats --------------------------------------------------
+  # first, calculating variable-level stats
+  statistics <-
+    rep_len(
+      list(.default_continuous_statistics()[c("N", "N_miss", "N_tot")]),
+      length.out = length(all_summary_variables)
+    ) |>
+    stats::setNames(nm = all_summary_variables)
+
+  df_ard <-
+    ard_continuous(data = data, by = !!all_of(by), statistics = statistics, include = !!all_of(all_summary_variables))
+
+  # second, tabulate variable
+  df_ard_tablulation <-
+    lapply(
+      X = all_summary_variables,
+      FUN = function(x) {
+        ard_continuous(
+          data = data |> dplyr::select(all_of(c(by, x))) |> tidyr::drop_na(),
+          by = !!all_of(by),
+          statistics =
+            list(
+              table = function(x) {
+                dplyr::tibble(
+                  variable_level = unique(x) |> sort(),
+                  n = table(x) |> as.integer(),
+                  p = .data$n / sum(.data$n)
+                )
+              }
+          ) |>
+            list() |>
+            setNames(nm = x)
+        ) |>
+          dplyr::select(-"stat_name") |>
+          tidyr::unnest(cols = "statistic") |>
+          dplyr::mutate(
+            dplyr::across(c("variable_level", "n", "p"), .fns = as.list)
+          ) |>
+          tidyr::pivot_longer(
+            cols = c("n", "p"),
+            names_to = "stat_name",
+            values_to = "statistic"
+          )
+      }
+    ) |>
+    dplyr::bind_rows()
+
+  # bind data frames with stats, and return to user ----------------------------
+  dplyr::bind_rows(df_ard_tablulation, df_ard) |>
+    dplyr::mutate(context = "categorical")
+}
+
+
+
+
+
+.default_continuous_statistics <- function() {
+  list(
+    N = function(x) sum(!is.na(x)),
+    N_miss = function(x) sum(is.na(x)),
+    N_tot = function(x) length(x),
+    mean = function(x) mean(x, na.rm = TRUE),
+    sd = function(x) sd(x, na.rm = TRUE),
+    min = function(x) min(x, na.rm = TRUE),
+    max = function(x) max(x, na.rm = TRUE)
+  )
+}
diff --git a/R/reexports.R b/R/reexports.R
@@ -0,0 +1,49 @@
+# dplyr ------------------------------------------------------------------------
+#' @export
+#' @importFrom dplyr %>%
+dplyr::`%>%`
+
+#' @importFrom dplyr starts_with
+#' @export
+dplyr::starts_with
+
+#' @importFrom dplyr ends_with
+#' @export
+dplyr::ends_with
+
+#' @importFrom dplyr contains
+#' @export
+dplyr::contains
+
+#' @importFrom dplyr matches
+#' @export
+dplyr::matches
+
+#' @importFrom dplyr num_range
+#' @export
+dplyr::num_range
+
+#' @importFrom dplyr all_of
+#' @export
+dplyr::all_of
+
+#' @importFrom dplyr any_of
+#' @export
+dplyr::any_of
+
+#' @importFrom dplyr everything
+#' @export
+dplyr::everything
+
+#' @importFrom dplyr last_col
+#' @export
+dplyr::last_col
+
+#' @importFrom dplyr one_of
+#' @export
+dplyr::one_of
+
+#' @importFrom dplyr vars
+#' @export
+dplyr::vars
+
diff --git a/R/utils.R b/R/utils.R
diff --git a/README.Rmd b/README.Rmd
@@ -0,0 +1,51 @@
+---
+output: github_document
+---
+
+<!-- README.md is generated from README.Rmd. Please edit that file -->
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>",
+  fig.path = "man/figures/README-",
+  out.width = "100%"
+)
+```
+
+# cardinal
+
+<!-- badges: start -->
+<!-- badges: end -->
+
+The goal of cardinal is to ...
+
+## Installation
+
+You can install the development version of cardinal from [GitHub](https://github.com/) with:
+
+``` r
+# install.packages("devtools")
+devtools::install_github("insightsengineering/cardinal")
+```
+
+## Example
+
+This is a basic example which shows you how to solve a common problem:
+
+```{r example}
+library(cardinal)
+
+ard_continuous(mtcars, by = cyl, include = c(mpg, hp)) |> 
+  # convert list columns to character for a nicer print
+  dplyr::mutate(across(where(is.list), unlist))
+
+ard_categorical(mtcars, by = cyl, include = c(am, gear)) |> 
+  # convert list columns to character for a nicer print
+  dplyr::mutate(across(where(is.list), ~lapply(., \(x) if (!is.null(x)) x else NA) |> unlist()))
+
+ard_ttest(data = mtcars, by = "am", variable = "hp") |> 
+  # convert list columns to character for a nicer print
+  dplyr::mutate(across(where(is.list), ~lapply(., \(x) if (!is.null(x)) x else NA) |> unlist()))
+```
+