Skip to content

Commit

Permalink
Improve performance for summary rows (follow-up to rstudio#1732) with…
Browse files Browse the repository at this point in the history
… `.by` instead of `group_by()`, `vctrs::vec_slice()`
  • Loading branch information
olivroy committed Jul 5, 2024
1 parent f4a8dc7 commit ec7e83d
Showing 1 changed file with 24 additions and 14 deletions.
38 changes: 24 additions & 14 deletions R/dt_summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,9 @@ dt_summary_build <- function(data, context) {

# Filter to only the groups targeted in the group-wise case
select_data_tbl <-
dplyr::filter(
vctrs::vec_slice(
select_data_tbl,
.data[[group_id_col_private]] %in% groups
select_data_tbl[[group_id_col_private]] %in% groups
)
}

Expand Down Expand Up @@ -418,20 +418,23 @@ dt_summary_build <- function(data, context) {
unname(labels_processed[names(labels_processed) == summary_dfs_display[i, ][["::row_id::"]]])
}

summary_dfs_display <-
dplyr::mutate_at(
summary_dfs_display,
.vars = columns_excl,
.funs = function(x) {NA_character_}
)
# set columns that we exclude as NA
summary_dfs_display[, columns_excl] <- NA_character_

for (group in groups) {

# vctrs::vec_slice is a fast replacement of dplyr::filter
group_summary_data_df <-
dplyr::filter(summary_dfs_data, .data[[group_id_col_private]] == .env$group)
vctrs::vec_slice(
summary_dfs_data,
summary_dfs_data[[group_id_col_private]] == group
)

group_summary_display_df <-
dplyr::filter(summary_dfs_display, .data[[group_id_col_private]] == .env$group)
vctrs::vec_slice(
summary_dfs_display,
summary_dfs_display[[group_id_col_private]] == group
)

group_summary_display_df$`::side::` <- side

Expand Down Expand Up @@ -461,11 +464,18 @@ dt_summary_build <- function(data, context) {
arrangement <-
unique(summary_df_display_list[[i]][, rowname_col_private, drop = TRUE])

# remove ::group_id:: from data
summary_df_display_list[[i]][[group_id_col_private]] <- NULL
# find location of last_no_na for each row name
summary_df_display_list[[i]] <-
summary_df_display_list[[i]] %>%
dplyr::select(-.env$group_id_col_private) %>%
dplyr::group_by(.data[[rowname_col_private]]) %>%
dplyr::summarize_all(last_non_na)
dplyr::summarise(
.data = summary_df_display_list[[i]],
dplyr::across(
dplyr::everything(),
.fns = last_non_na
),
.by = dplyr::all_of(rowname_col_private)
)

summary_df_display_list[[i]] <-
summary_df_display_list[[i]][
Expand Down

0 comments on commit ec7e83d

Please sign in to comment.