Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

initial commit #1

Merged
merged 1 commit into from
Jul 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
^cardinal\.Rproj$
^\.Rproj\.user$
^LICENSE\.md$
^README\.Rmd$
5 changes: 4 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ License: MIT + file LICENSE
Imports:
cli (>= 3.6.1),
dplyr (>= 1.1.2),
rlang (>= 1.1.1)
rlang (>= 1.1.1),
tidyr (>= 1.3.0)
Suggests:
broom
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
27 changes: 27 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,3 +1,30 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(all_of)
export(any_of)
export(ard_categorical)
export(ard_continuous)
export(ard_ttest)
export(contains)
export(ends_with)
export(everything)
export(last_col)
export(matches)
export(num_range)
export(one_of)
export(starts_with)
export(vars)
importFrom(dplyr,"%>%")
importFrom(dplyr,all_of)
importFrom(dplyr,any_of)
importFrom(dplyr,contains)
importFrom(dplyr,ends_with)
importFrom(dplyr,everything)
importFrom(dplyr,last_col)
importFrom(dplyr,matches)
importFrom(dplyr,num_range)
importFrom(dplyr,one_of)
importFrom(dplyr,starts_with)
importFrom(dplyr,vars)
importFrom(rlang,"%||%")
43 changes: 43 additions & 0 deletions R/ard_comparison.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#' Comparison ARD Statistics
#'
#' @param data a data frame
#' @param by charcter column name to compare by
#' @param variable charadter column name to be compared
#' @param ... arguments passed to method.
#'
#' @return data frame
#' @name ard_comparison
#'
#' @examples
#' ard_ttest(data = mtcars, by = "am", variable = "hp")
NULL

#' @rdname ard_comparison
#' @export
ard_ttest <- function(data, by, variable, conf.level = 0.95, ...) {
# check installed packages ---------------------------------------------------
rlang::check_installed("broom")

# perform t-test and format results ------------------------------------------
stats::t.test(data[[variable]] ~ data[[by]], conf.level = conf.level, ...) |>
broom::tidy() |>
dplyr::mutate(
conf.level = conf.level,
dplyr::across(everything(), .fns = list),
strata1 = by,
variable = variable,
context = "ttest"
) |>
tidyr::pivot_longer(
cols = -c("strata1", "variable", "context"),
names_to = "stat_name",
values_to = "statistic"
) |>
dplyr::mutate(
strata1_level =
dplyr::case_when(
.data$stat_name %in% "estimate1" ~ unique(data[[by]]) |> stats::na.omit() |> sort() |> dplyr::first() |> list(),
.data$stat_name %in% "estimate2" ~ unique(data[[by]]) |> stats::na.omit() |>sort() |> dplyr::last() |> list(),
)
)
}
162 changes: 162 additions & 0 deletions R/ard_simple.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#' Simple ARD Statistics
#'
#' Compute Analysis Results Data (ARD) for simple summary statistics from
#' continuous and categorical data.
#'
#' @param data a data frame
#' @param by columns to compute statistics by. Default are the columns
#' returned by `dplyr::group_vars(data)`.
#' @param statistics a named list of functions that return a summary statistic,
#' e.g. `list(mpg = list(mean = \(x) mean(x, na.rm = TRUE)))`
#' @param include columns to include in summaries. Default is `everything()`.
#'
#' @return a data frame
#' @name ard_simple
#'
#' @examples
#' ard_continuous(mtcars, by = cyl, include = c(mpg, hp))
#' ard_categorical(mtcars, by = cyl, include = c(am, gear))
NULL

#' @rdname ard_simple
#' @export
ard_continuous <- function(data, by = dplyr::group_vars(data), statistics = NULL, include = everything()) {
# process arguments -----------------------------------------------------------
by <- dplyr::select(data, {{ by }}) |> colnames()
all_summary_variables <- dplyr::select(data, {{ include }}) |> colnames() |> setdiff(by)
data <- dplyr::ungroup(data)

# check inputs (will make this more robust later) ----------------------------

# setting default statistics -------------------------------------------------
statistics <-
all_summary_variables |>
lapply(function(x) statistics[[x]] %||% .default_continuous_statistics()) |>
stats::setNames(nm = all_summary_variables)

df_statsistics <-
lapply(
X = all_summary_variables,
FUN = function(x) {
dplyr::tibble(
variable = x,
stat_name = names(statistics[[x]])
)
}
) |>
dplyr::bind_rows()

# calculate statistics -------------------------------------------------------
data |>
tidyr::nest(
.by = all_of(by),
.key = "...ard_nested_data..."
) |>
# setting column names for stratum levels
dplyr::mutate(!!!(list(by) |> stats::setNames(paste0("strata", seq_along(by)))), .before = 0L) |>
dplyr::rename(!!!(list(by) |> stats::setNames(paste0("strata", seq_along(by), "_levels")))) |>
dplyr::mutate(
..ard_all_stats.. =
lapply(
.data[["...ard_nested_data..."]],
FUN = function(nested_data) {
df_statsistics |>
dplyr::mutate(
statistic =
.mapply(
FUN = function(variable, stat_name) {
do.call(statistics[[variable]][[stat_name]], args = list(nested_data[[variable]]))
},
dots =
list(
df_statsistics$variable,
df_statsistics$stat_name
),
MoreArgs = NULL
)
)
}
)
) |>
dplyr::select(-"...ard_nested_data...") |>
tidyr::unnest(cols = "..ard_all_stats..") |>
dplyr::mutate(context = "continuous")
}

#' @rdname ard_simple
#' @export
ard_categorical <- function(data, by = dplyr::group_vars(data), include = everything()) {
# process arguments -----------------------------------------------------------
by <- dplyr::select(data, {{ by }}) |> colnames()
all_summary_variables <- dplyr::select(data, {{ include }}) |> colnames() |> setdiff(by)
data <- dplyr::ungroup(data)

# check inputs (will make this more robust later) ----------------------------

# calculating summary stats --------------------------------------------------
# first, calculating variable-level stats
statistics <-
rep_len(
list(.default_continuous_statistics()[c("N", "N_miss", "N_tot")]),
length.out = length(all_summary_variables)
) |>
stats::setNames(nm = all_summary_variables)

df_ard <-
ard_continuous(data = data, by = !!all_of(by), statistics = statistics, include = !!all_of(all_summary_variables))

# second, tabulate variable
df_ard_tablulation <-
lapply(
X = all_summary_variables,
FUN = function(x) {
ard_continuous(
data = data |> dplyr::select(all_of(c(by, x))) |> tidyr::drop_na(),
by = !!all_of(by),
statistics =
list(
table = function(x) {
dplyr::tibble(
variable_level = unique(x) |> sort(),
n = table(x) |> as.integer(),
p = .data$n / sum(.data$n)
)
}
) |>
list() |>
setNames(nm = x)
) |>
dplyr::select(-"stat_name") |>
tidyr::unnest(cols = "statistic") |>
dplyr::mutate(
dplyr::across(c("variable_level", "n", "p"), .fns = as.list)
) |>
tidyr::pivot_longer(
cols = c("n", "p"),
names_to = "stat_name",
values_to = "statistic"
)
}
) |>
dplyr::bind_rows()

# bind data frames with stats, and return to user ----------------------------
dplyr::bind_rows(df_ard_tablulation, df_ard) |>
dplyr::mutate(context = "categorical")
}





.default_continuous_statistics <- function() {
list(
N = function(x) sum(!is.na(x)),
N_miss = function(x) sum(is.na(x)),
N_tot = function(x) length(x),
mean = function(x) mean(x, na.rm = TRUE),
sd = function(x) sd(x, na.rm = TRUE),
min = function(x) min(x, na.rm = TRUE),
max = function(x) max(x, na.rm = TRUE)
)
}
49 changes: 49 additions & 0 deletions R/reexports.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# dplyr ------------------------------------------------------------------------
#' @export
#' @importFrom dplyr %>%
dplyr::`%>%`

#' @importFrom dplyr starts_with
#' @export
dplyr::starts_with

#' @importFrom dplyr ends_with
#' @export
dplyr::ends_with

#' @importFrom dplyr contains
#' @export
dplyr::contains

#' @importFrom dplyr matches
#' @export
dplyr::matches

#' @importFrom dplyr num_range
#' @export
dplyr::num_range

#' @importFrom dplyr all_of
#' @export
dplyr::all_of

#' @importFrom dplyr any_of
#' @export
dplyr::any_of

#' @importFrom dplyr everything
#' @export
dplyr::everything

#' @importFrom dplyr last_col
#' @export
dplyr::last_col

#' @importFrom dplyr one_of
#' @export
dplyr::one_of

#' @importFrom dplyr vars
#' @export
dplyr::vars

Empty file added R/utils.R
Empty file.
51 changes: 51 additions & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
---
output: github_document
---

<!-- README.md is generated from README.Rmd. Please edit that file -->

```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.path = "man/figures/README-",
out.width = "100%"
)
```

# cardinal

<!-- badges: start -->
<!-- badges: end -->

The goal of cardinal is to ...

## Installation

You can install the development version of cardinal from [GitHub](https://github.com/) with:

``` r
# install.packages("devtools")
devtools::install_github("insightsengineering/cardinal")
```

## Example

This is a basic example which shows you how to solve a common problem:

```{r example}
library(cardinal)

ard_continuous(mtcars, by = cyl, include = c(mpg, hp)) |>
# convert list columns to character for a nicer print
dplyr::mutate(across(where(is.list), unlist))

ard_categorical(mtcars, by = cyl, include = c(am, gear)) |>
# convert list columns to character for a nicer print
dplyr::mutate(across(where(is.list), ~lapply(., \(x) if (!is.null(x)) x else NA) |> unlist()))

ard_ttest(data = mtcars, by = "am", variable = "hp") |>
# convert list columns to character for a nicer print
dplyr::mutate(across(where(is.list), ~lapply(., \(x) if (!is.null(x)) x else NA) |> unlist()))
```

Loading