Skip to content

Commit

Permalink
Merge pull request #1889 from rstudio/extract-body-enhance
Browse files Browse the repository at this point in the history
Enhance the `extract_body()` function by adding inclusion options
  • Loading branch information
rich-iannone authored Sep 28, 2024
2 parents 78f8afb + ae3c249 commit f9626de
Show file tree
Hide file tree
Showing 6 changed files with 1,195 additions and 111 deletions.
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
# gt (development version)

## Breaking changes

* The `extract_body()` function now, by default, will not display columns that have been hidden (e.g., by `cols_hide()` or `cols_merge*()`); the previous behavior can be restored by using `incl_hidden_cols = TRUE`.

## New features

* Creating a caption with `tab_caption()` will now be preserved in Latex output with `as_latex()`. Cross-referencing a table using the internal cross-referencing system of **bookdown** is now enabled for PDF and HTML outputs (for HTML, set `options("htmltools.preserve.raw" = FALSE)`). Quarto users should use the `tbl-cap` and `label` cell options.

* PDF output now defaults to a full-width floating environment using `tabular*` (@AronGullickson, #1588). Float position can be controlled by the `latex.tbl.pos` argument in `tab_options`. Quarto users can alternatively use the `tbl-pos` argument to control positioning. To use a `longtable` environment instead, use `tab_option(latex.use_longtable = TRUE)`.

* New arguments have been added to `extract_body()` to better control which columns will be present in the returned data (#1875). (#1889)

* The `locale` argument of `gt()` now defaults to `getOption("gt.locale")` if set (#1894).

## Interactive table support
Expand Down
245 changes: 234 additions & 11 deletions R/extract.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,25 @@
#' value for `build_stage` then the entire build for the table body (i.e., up
#' to and including the `"footnotes_attached"` stage) will be performed before
#' returning the data frame.
#'
#' @param incl_hidden_cols *Should hidden columns be included?*
#'
#' `scalar<logical>` // *default:* `FALSE`
#'
#' Certain columns may be hidden from final display via [cols_hide()]. By
#' default, those columns won't be part of the extracted data frame. However,
#' we can choose to include them by using `incl_hidden_cols = TRUE`.
#'
#' @param incl_stub_cols *Should stub columns be included?*
#'
#' `scalar<logical>` // *default:* `TRUE`
#'
#' Any stub columns in the **gt** object (which may consist of a grouping
#' column and a column for row labels) are included in the extracted data for
#' clarity but clearly marked with the names `"::group_id::"` and
#' `"::rowname::"`. We can exclude them by setting `incl_stub_cols = FALSE`.
#'
#' @inheritParams rlang::args_dots_empty
#'
#' @param output *Output format*
#'
Expand All @@ -95,6 +114,61 @@
#' `"html"` (the default), `"latex"`, `"rtf"`, or `"word"`.
#'
#' @return A data frame or tibble object containing the table body.
#'
#' @section Examples:
#'
#' Use a modified version of [`sp500`] the dataset to create a **gt** table with
#' row groups and row labels. Formatting will be applied to the date- and
#' currency-based columns.
#'
#' ```r
#' gt_tbl <-
#' sp500 |>
#' dplyr::filter(date >= "2015-01-05" & date <= "2015-01-16") |>
#' dplyr::arrange(date) |>
#' dplyr::mutate(week = paste0("W", strftime(date, format = "%V"))) |>
#' dplyr::select(-adj_close, -volume) |>
#' gt(
#' rowname_col = "date",
#' groupname_col = "week"
#' ) |>
#' fmt_date(columns = date, date_style = "day_month_year") |>
#' fmt_currency(columns = c(open, high, low, close)) |>
#' cols_hide(columns = c(high, low))
#'
#' gt_tbl
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_extract_body_1.png")`
#' }}
#'
#' Using `extract_body()` on the **gt** object (`gt_tbl`) will provide us with
#' a tibble that contains the fully built data cells for the `output` context
#' (in this case, `"html"`).
#'
#' ```{r}
#' extract_body(gt_tbl)
#' ```
#'
#' To provide us with a better frame of reference, the grouping and row label
#' values are provided as the first columns in the returned output. We could
#' suppress those in the output by setting `incl_stub_cols = FALSE`.
#'
#' ```{r}
#' extract_body(gt_tbl, incl_stub_cols = FALSE)
#' ```
#'
#' The `high` and `low` columns were hidden via [`cols_hide()`] and so they
#' won't be shown in the returned data unless we use `incl_hidden_cols = TRUE`.
#'
#' ```{r}
#' extract_body(
#' gt_tbl,
#' incl_stub_cols = FALSE,
#' incl_hidden_cols = TRUE
#' )
#' ```
#'
#' @family table export functions
#' @section Function ID:
Expand All @@ -107,49 +181,156 @@
extract_body <- function(
data,
build_stage = NULL,
incl_hidden_cols = FALSE,
incl_stub_cols = TRUE,
...,
output = c("html", "latex", "rtf", "word", "grid")
) {

# Perform input object validation
stop_if_not_gt_tbl(data = data)

# If `build_stage` is given a keyword value, check that value is valid
if (!is.null(build_stage)) {

rlang::arg_match0(
build_stage,
values = c(
"init", "fmt_applied", "sub_applied", "unfmt_included", "cols_merged",
"body_reassembled", "text_transformed", "footnotes_attached"
)
)
}

# Ensure that `output` is matched correctly to one option
output <- rlang::arg_match(output)
rlang::check_dots_empty()

data <- dt_body_build(data = data)
# Generate vector of columns to include in output
if (isTRUE(incl_hidden_cols)) {

boxhead_df <- dt_boxhead_get(data = data)

included_cols <-
boxhead_df$var[boxhead_df$type %in% c("default", "hidden")]

} else {
included_cols <- dt_boxhead_get_vars_default(data = data)
}

# If there are any stub columns, get the column names for that component
group_col <- dt_boxhead_get_vars_groups(data = data)
if (is.na(group_col)) {
group_col <- NULL
}

rowname_col <- dt_boxhead_get_var_stub(data = data)
if (is.na(rowname_col)) {
rowname_col <- NULL
}

stub_cols <- c(group_col, rowname_col)

if (isTRUE(incl_stub_cols)) {

# Add stub columns to `included_cols`, if any are present; and deduplicate
included_cols <- unique(c(stub_cols, included_cols))
}

data <- dt_body_build(data = data)

if (identical(build_stage, "init")) {
return(data[["_body"]])

out_df <-
assemble_body_extract(
data = data,
included_cols = included_cols,
incl_stub_cols = incl_stub_cols,
group_col = group_col,
rowname_col = rowname_col
)

return(out_df)
}

data <- render_formats(data = data, context = output)

if (identical(build_stage, "fmt_applied")) {
return(data[["_body"]])

out_df <-
assemble_body_extract(
data = data,
included_cols = included_cols,
incl_stub_cols = incl_stub_cols,
group_col = group_col,
rowname_col = rowname_col
)

return(out_df)
}

data <- render_substitutions(data = data, context = output)

if (identical(build_stage, "sub_applied")) {
return(data[["_body"]])

out_df <-
assemble_body_extract(
data = data,
included_cols = included_cols,
incl_stub_cols = incl_stub_cols,
group_col = group_col,
rowname_col = rowname_col
)

return(out_df)
}

data <- migrate_unformatted_to_output(data = data, context = output)

if (identical(build_stage, "unfmt_included")) {
return(data[["_body"]])

out_df <-
assemble_body_extract(
data = data,
included_cols = included_cols,
incl_stub_cols = incl_stub_cols,
group_col = group_col,
rowname_col = rowname_col
)

return(out_df)
}

data <- perform_col_merge(data = data, context = output)

if (identical(build_stage, "cols_merged")) {
return(data[["_body"]])

out_df <-
assemble_body_extract(
data = data,
included_cols = included_cols,
incl_stub_cols = incl_stub_cols,
group_col = group_col,
rowname_col = rowname_col
)

return(out_df)
}

data <- dt_body_reassemble(data = data)

if (identical(build_stage, "body_reassembled")) {
return(data[["_body"]])

out_df <-
assemble_body_extract(
data = data,
included_cols = included_cols,
incl_stub_cols = incl_stub_cols,
group_col = group_col,
rowname_col = rowname_col
)

return(out_df)
}

data <- reorder_stub_df(data = data)
Expand All @@ -159,7 +340,17 @@ extract_body <- function(
data <- perform_text_transforms(data = data)

if (identical(build_stage, "text_transformed")) {
return(data[["_body"]])

out_df <-
assemble_body_extract(
data = data,
included_cols = included_cols,
incl_stub_cols = incl_stub_cols,
group_col = group_col,
rowname_col = rowname_col
)

return(out_df)
}

data <- dt_boxhead_build(data = data, context = output)
Expand All @@ -173,11 +364,43 @@ extract_body <- function(
data <- resolve_footnotes_styles(data = data, tbl_type = "footnotes")
data <- apply_footnotes_to_output(data = data, context = output)

if (is.null(build_stage) || identical(build_stage, "footnotes_attached")) {
return(data[["_body"]])
if (is.null(build_stage) || identical(build_stage, "footnotes_attached")) {

out_df <-
assemble_body_extract(
data = data,
included_cols = included_cols,
incl_stub_cols = incl_stub_cols,
group_col = group_col,
rowname_col = rowname_col
)
}

data[["_body"]]
out_df
}

assemble_body_extract <- function(
data,
included_cols,
incl_stub_cols,
group_col,
rowname_col
) {

out_df <- data[["_body"]][, included_cols]

if (isTRUE(incl_stub_cols)) {

if (!is.null(group_col)) {
names(out_df)[names(out_df) == group_col] <- "::group_id::"
}

if (!is.null(rowname_col)) {
names(out_df)[names(out_df) == rowname_col] <- "::rowname::"
}
}

out_df
}

# extract_summary() ------------------------------------------------------------
Expand Down
Binary file added images/man_extract_body_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

3 comments on commit f9626de

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.