From 00ddc5c6dc3744cae25419433939db58b2004f2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 30 Jan 2025 14:37:13 +0100 Subject: [PATCH 1/7] feat: Add funnel arg to `rel_to_df.duckdb_relation()` --- R/funnel.R | 7 +++++++ R/relational-duckdb.R | 24 ++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/R/funnel.R b/R/funnel.R index 358d73426..03e81a011 100644 --- a/R/funnel.R +++ b/R/funnel.R @@ -19,6 +19,13 @@ new_duckdb_tibble <- function(x, class = NULL, funnel = "open", error_call = cal class ) + funnel_parsed <- funnel_parse(funnel, error_call) + funnel_attr <- c( + rows = if (is.finite(funnel_parsed$n_rows)) funnel_parsed$n_rows, + cells = if (is.finite(funnel_parsed$n_cells)) funnel_parsed$n_cells + ) + attr(x, "funnel") <- funnel_attr + x } diff --git a/R/relational-duckdb.R b/R/relational-duckdb.R index f75fb4dda..2838f9e49 100644 --- a/R/relational-duckdb.R +++ b/R/relational-duckdb.R @@ -196,8 +196,28 @@ vec_ptype_safe <- function(x) { } #' @export -rel_to_df.duckdb_relation <- function(rel, ..., allow_materialization = TRUE, n_rows = Inf, n_cells = Inf) { - duckdb$rel_to_altrep(rel, allow_materialization, n_rows, n_cells) +rel_to_df.duckdb_relation <- function( + rel, + ..., + funnel = NULL, + allow_materialization = TRUE, + n_rows = Inf, + n_cells = Inf +) { + if (is.null(funnel)) { + # Legacy + return(duckdb$rel_to_altrep(rel, allow_materialization, n_rows, n_cells)) + } + + funnel_parsed <- funnel_parse(funnel) + out <- duckdb$rel_to_altrep( + rel, + allow_materialization = funnel_parsed$allow_materialization, + n_rows = funnel_parsed$n_rows, + n_cells = funnel_parsed$n_cells + ) + + new_duckdb_tibble(out, funnel = funnel) } #' @export From 622e3b98f9daee4b30703d8ba79b2a20ebfca53b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 30 Jan 2025 14:37:43 +0100 Subject: [PATCH 2/7] collect() --- R/funnel.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/funnel.R b/R/funnel.R index 03e81a011..5eaf0d7c3 100644 --- a/R/funnel.R +++ b/R/funnel.R @@ -110,7 +110,7 @@ collect.funneled_duckplyr_df <- function(x, ...) { out <- x } else { rel <- duckdb_rel_from_df(x) - out <- rel_to_df(rel, allow_materialization = TRUE) + out <- rel_to_df(rel, funnel = "open") out <- dplyr_reconstruct(out, x) } From 18c60d96fff0b6e84a8c549ba3ee5f56171ae02a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 30 Jan 2025 14:37:59 +0100 Subject: [PATCH 3/7] Lock mutate() --- R/mutate.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/mutate.R b/R/mutate.R index 256d3966c..600d06e77 100644 --- a/R/mutate.R +++ b/R/mutate.R @@ -23,7 +23,7 @@ mutate.duckplyr_df <- function(.data, ..., .by = NULL, .keep = c("all", "used", names_used <- character() names_new <- character() - current_data <- rel_to_df(rel) + current_data <- rel_to_df(rel, funnel = "closed") # FIXME: use fewer projections for (i in seq_along(dots)) { @@ -71,7 +71,7 @@ mutate.duckplyr_df <- function(.data, ..., .by = NULL, .keep = c("all", "used", } rel <- rel_project(rel, unname(exprs)) - current_data <- rel_to_df(rel) + current_data <- rel_to_df(rel, funnel = "closed") } if (length(by_names) > 0) { From 64f42283f607358727bbd33dbfeda7ad65a9c19f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 30 Jan 2025 14:38:06 +0100 Subject: [PATCH 4/7] read_sql_duckdb() --- R/sql.R | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/R/sql.R b/R/sql.R index 649d5d3b9..b14c964ba 100644 --- a/R/sql.R +++ b/R/sql.R @@ -32,12 +32,5 @@ read_sql_duckdb <- function(sql, ..., funnel = c(cells = 1e6), con = NULL) { meta_rel_register(rel, expr(duckdb$rel_from_sql(con, !!sql))) - df <- rel_to_df(rel, allow_materialization = FALSE) - out <- new_duckdb_tibble(df, funnel = "closed") - - if (!identical(funnel, "closed")) { - out <- as_duckdb_tibble(out, funnel = funnel) - } - - out + rel_to_df(rel, funnel = funnel) } From 5853aba9310d5260c5e65aeb9a394cc87e270ada Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 30 Jan 2025 14:38:12 +0100 Subject: [PATCH 5/7] Restore after summarise() --- R/summarise.R | 4 ++-- tests/testthat/test-funnel.R | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/R/summarise.R b/R/summarise.R index 245c02202..30fddfdd0 100644 --- a/R/summarise.R +++ b/R/summarise.R @@ -51,9 +51,9 @@ summarise.duckplyr_df <- function(.data, ..., .by = NULL, .groups = NULL) { out_rel <- oo_restore(out_rel, "___row_number") } - out <- rel_to_df(out_rel) + out <- rel_to_df(out_rel, funnel = get_funnel_duckplyr_df(.data)) # https://github.com/tidyverse/dplyr/pull/6988 - class(out) <- intersect(c("duckplyr_df", "tbl_df", "tbl", "data.frame"), class(.data)) + class(out) <- intersect(c("funneled_duckplyr_df", "duckplyr_df", "tbl_df", "tbl", "data.frame"), class(.data)) return(out) } diff --git a/tests/testthat/test-funnel.R b/tests/testthat/test-funnel.R index 541d651b3..d66e4ad0f 100644 --- a/tests/testthat/test-funnel.R +++ b/tests/testthat/test-funnel.R @@ -55,3 +55,12 @@ test_that("funneling after operation with success", { expect_identical(get_funnel_duckplyr_df(out), c(rows = 5)) expect_error(nrow(out), NA) }) + +test_that("funneling after summarise() with success", { + df <- duckdb_tibble(x = 1:10, .funnel = c(rows = 5)) + out <- df %>% + summarise(n()) + + expect_identical(get_funnel_duckplyr_df(out), c(rows = 5)) + expect_error(nrow(out), NA) +}) From 83e4acae0ece1f9ba86d3636a502bda498cf66dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 30 Jan 2025 14:37:53 +0100 Subject: [PATCH 6/7] duckfun() --- R/io2.R | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/R/io2.R b/R/io2.R index 768bc5013..350e0b392 100644 --- a/R/io2.R +++ b/R/io2.R @@ -144,13 +144,5 @@ duckfun <- function(table_function, args, ..., funnel) { meta_rel_register_file(rel, table_function, path, options) - # Start with funnel, to avoid unwanted materialization - df <- rel_to_df(rel, allow_materialization = FALSE) - out <- new_duckdb_tibble(df, funnel = "closed") - - if (!identical(funnel, "closed")) { - out <- as_duckdb_tibble(out, funnel = funnel) - } - - out + rel_to_df(rel, funnel = funnel) } From d5979b3ba3bdbcae1ec8f1267fadca5cf1f2a2ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 30 Jan 2025 16:07:53 +0100 Subject: [PATCH 7/7] duckplyr_reconstruct() --- R/funnel.R | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/R/funnel.R b/R/funnel.R index 5eaf0d7c3..d1f8f0f65 100644 --- a/R/funnel.R +++ b/R/funnel.R @@ -92,13 +92,9 @@ get_funnel_duckplyr_df <- function(x) { } duckplyr_reconstruct <- function(rel, template) { - funnel <- get_funnel_duckplyr_df(template) - funnel_parsed <- funnel_parse(funnel) out <- rel_to_df( rel, - allow_materialization = funnel_parsed$allow_materialization, - n_rows = funnel_parsed$n_rows, - n_cells = funnel_parsed$n_cells + funnel = get_funnel_duckplyr_df(template) ) dplyr_reconstruct(out, template) }