Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ard_categorical(denominator) updates for missing variables #169

Merged
merged 7 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 59 additions & 13 deletions R/ard_categorical.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ ard_categorical <- function(data,
strata = {{ strata }}
)
data <- dplyr::ungroup(data)
.check_whether_na_counts(data[variables])

process_formula_selectors(
data = data[variables],
Expand Down Expand Up @@ -285,6 +286,22 @@ ard_categorical <- function(data,
)
}

.check_whether_na_counts <- function(data, call = parent.frame()) {
walk(
names(data),
function(x) {
if (all(is.na(data[[x]])) && !inherits(data[[x]], c("logical", "factor"))) {
cli::cli_abort(
c("Column {.val {x}} is all missing and cannot by tabulated.",
i = "Only columns of class {.cls logical} and {.cls factor} can be tabulated when all values are missing."
),
call = call
)
}
}
)
}

#' Results from `table()` as Data Frame
#'
#' Takes the results from [table()] and returns them as a data frame.
Expand All @@ -301,31 +318,43 @@ ard_categorical <- function(data,
#' a character vector indicating columns in data
#' @param strata (`character`)\cr
#' a character vector indicating columns in data
#' @param useNA (`string`)\cr
#' one of `"no"` and `"always"`. Will be passed to `table(useNA)`.
#'
#' @keywords internal
#' @return data frame
#'
#' @examples
#' cards:::.table_as_df(ADSL, variable = "ARM", by = "AGEGR1", strata = NULL)
.table_as_df <- function(data, variable = NULL, by = NULL, strata = NULL, count_column = "...ard_n...") {
.table_as_df <- function(data, variable = NULL, by = NULL, strata = NULL,
useNA = c("no", "always"), count_column = "...ard_n...") {
useNA <- match.arg(useNA)
# tabulate results and save in data frame
...ard_tab_vars... <- c(by, strata, variable)
df_table <-
data[...ard_tab_vars...] |>
dplyr::mutate(across(where(is.logical), ~ factor(., levels = c("FALSE", "TRUE")))) |>
with(inject(table(!!!syms(...ard_tab_vars...)))) |>
with(inject(table(!!!syms(...ard_tab_vars...), useNA = !!useNA))) |>
dplyr::as_tibble(n = count_column)

# construct a matching data frame with the variables in their original type/class
df_original_types <-
lapply(c(by, strata, variable), function(x) .unique_and_sorted(data[[x]])) |>
lapply(
c(by, strata, variable),
function(x) .unique_and_sorted(data[[x]], useNA = useNA)
) |>
stats::setNames(c(by, strata, variable)) %>%
{tidyr::expand_grid(!!!.)} |> # styler: off
dplyr::arrange(!!!syms(rev(...ard_tab_vars...)))

# if all columns match, then replace the coerced character cols with their original type/class
all_cols_equal <-
every(c(by, strata, variable), ~ all(df_table[[.x]] == df_original_types[[.x]]))
every(
c(by, strata, variable),
~ all(
df_table[[.x]] == df_original_types[[.x]] | (is.na(df_table[[.x]]) & is.na(df_original_types[[.x]]))
)
)
if (isTRUE(all_cols_equal)) {
df_table <-
dplyr::bind_cols(df_original_types, df_table[count_column], .name_repair = "minimal")
Expand Down Expand Up @@ -380,11 +409,18 @@ ard_categorical <- function(data,
variables,
function(variable) {
.table_as_df(
tidyr::drop_na(data, all_of(c(by, strata, variable))),
data,
variable = variable,
by = by,
strata = strata,
count_column = "...ard_N..."
)
count_column = "...ard_N...",
useNA = "always"
) |>
tidyr::drop_na(all_of(c(by, strata, variable))) |>
dplyr::summarise(
.by = all_of(c(by, strata)),
...ard_N... = sum(.data$...ard_N...)
)
}
) |>
stats::setNames(variables)
Expand All @@ -411,11 +447,13 @@ ard_categorical <- function(data,
variables,
list(
.table_as_df(
tidyr::drop_na(denominator, any_of(c(by, strata))),
denominator,
by = intersect(by, names(denominator)),
strata = intersect(strata, names(denominator)),
count_column = "...ard_N..."
)
count_column = "...ard_N...",
useNA = "always"
) |>
tidyr::drop_na(any_of(c(by, strata)))
)
)
}
Expand All @@ -440,10 +478,18 @@ ard_categorical <- function(data,
variables,
function(variable) {
.table_as_df(
tidyr::drop_na(data, all_of(c(by, strata, variable))),
data,
variable = variable,
count_column = "...ard_N..."
)
by = by,
strata = strata,
count_column = "...ard_N...",
useNA = "always"
) |>
tidyr::drop_na(all_of(c(by, strata, variable))) |>
dplyr::summarise(
.by = all_of(variable),
...ard_N... = sum(.data$...ard_N...)
)
}
) |>
stats::setNames(variables)
Expand Down
27 changes: 11 additions & 16 deletions R/ard_dichotomous.R
Original file line number Diff line number Diff line change
Expand Up @@ -126,22 +126,17 @@ maximum_variable_values <- function(data) {
imap(
values,
function(value, column) {
if (length(value) != 1L || is_empty(value) || is.na(value) || is.nan(value) || is.infinite(value)) {
cli::cli_abort(c(
"Error in argument {.arg values} for variable {.val {column}}.",
"i" = "The length of the value must be one and not one of {.val {c(NA, NaN, Inf)}}."
), call = call)
}
if (inherits(data[[column]], "factor") && !value %in% levels(data[[column]])) {
cli::cli_abort(c(
"Error in argument {.arg values} for variable {.val {column}}.",
"i" = "A value of {.val {value}} was passed, but must be one of {.val {levels(data[[column]])}}."
), call = call)
} else if (!value %in% data[[column]]) {
cli::cli_abort(c(
"Error in argument {.arg values} for variable {.val {column}}.",
"i" = "A value of {.val {value}} was passed, but must be one of {.val {unique(data[[column]]) |> na.omit() |> sort()}}."
), call = call)
accepted_values <- .unique_and_sorted(data[[column]])
ddsjoberg marked this conversation as resolved.
Show resolved Hide resolved
if (length(value) != 1L || !value %in% accepted_values) {
message <- "Error in argument {.arg values} for variable {.val {column}}."
cli::cli_abort(
if (length(value) != 1L) {
c(message, "i" = "The value must be one of {.val {accepted_values}}.")
} else {
c(message, "i" = "A value of {.val {value}} was passed, but must be one of {.val {accepted_values}}.")
},
call = call
)
}
}
) |>
Expand Down
19 changes: 15 additions & 4 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,28 @@
#' cards:::.unique_and_sorted(c(FALSE, TRUE, TRUE, FALSE))
#'
#' cards:::.unique_and_sorted(c(5, 5:1))
.unique_and_sorted <- function(x) {
.unique_and_sorted <- function(x, useNA = c("no", "always")) {
# styler: off
useNA <- match.arg(useNA)
# if a factor return a factor that includes the same levels (including unobserved levels)
if (inherits(x, "factor")) {
return(factor(levels(x), levels = levels(x)))
return(
factor(
if (useNA == "no") levels(x)
else c(levels(x), NA_character_),
levels = levels(x)
)
)
}
if (inherits(x, "logical")) {
return(c(TRUE, FALSE))
if (useNA == "no") return(c(TRUE, FALSE))
else return(c(TRUE, FALSE, NA))
}

# otherwise, return a simple unique and sort of the vector
unique(x) |> sort()
if (useNA == "no") return(unique(x) |> sort())
else return(unique(x) |> sort() |> c(NA))
# styler: on
}


Expand Down
4 changes: 4 additions & 0 deletions man/dot-table_as_df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/dot-unique_and_sorted.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions tests/testthat/_snaps/ard_categorical.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,12 @@
getElement(names(sort(table(x), decreasing = TRUE)), 1)
}, length = function(x) length(x))))

# ard_categorical() and all NA columns

Code
ard_categorical(dplyr::mutate(ADSL, AGEGR1 = NA_character_), variables = AGEGR1)
Condition
Error in `ard_categorical()`:
! Column "AGEGR1" is all missing and cannot by tabulated.
i Only columns of class <logical> and <factor> can be tabulated when all values are missing.

4 changes: 2 additions & 2 deletions tests/testthat/_snaps/ard_dichotomous.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
Condition
Error in `ard_dichotomous()`:
! Error in argument `values` for variable "cyl".
i The length of the value must be one and not one of NA, NaN, and Inf.
i The value must be one of 4, 6, and 8.

---

Expand All @@ -37,7 +37,7 @@
Condition
Error in `ard_dichotomous()`:
! Error in argument `values` for variable "Species".
i A value of "not_a_species" was passed, but must be one of "setosa", "versicolor", and "virginica".
i A value of "not_a_species" was passed, but must be one of setosa, versicolor, and virginica.

---

Expand Down
Loading
Loading