Skip to content

Commit

Permalink
Merge pull request #46 from Tmonster/add_parquet_write_support
Browse files Browse the repository at this point in the history
  • Loading branch information
krlmlr authored Dec 2, 2023
2 parents a266e34 + 49688dc commit fe77151
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 1 deletion.
4 changes: 4 additions & 0 deletions R/cpp11.R
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,10 @@ rapi_execute <- function(stmt, arrow, integer64) {
.Call(`_duckdb_rapi_execute`, stmt, arrow, integer64)
}

rapi_rel_to_parquet <- function(rel, file_name) {
invisible(.Call(`_duckdb_rapi_rel_to_parquet`, rel, file_name))
}

rapi_adbc_init_func <- function() {
.Call(`_duckdb_rapi_adbc_init_func`)
}
Expand Down
2 changes: 2 additions & 0 deletions R/relational.R
Original file line number Diff line number Diff line change
Expand Up @@ -399,3 +399,5 @@ rel_from_table_function <- function(con, function_name, positional_parameters =
rapi_rel_from_table_function(con@conn_ref, function_name, positional_parameters, named_parameters)
}

rel_to_parquet <- rapi_rel_to_parquet

9 changes: 9 additions & 0 deletions src/cpp11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,14 @@ extern "C" SEXP _duckdb_rapi_execute(SEXP stmt, SEXP arrow, SEXP integer64) {
return cpp11::as_sexp(rapi_execute(cpp11::as_cpp<cpp11::decay_t<duckdb::stmt_eptr_t>>(stmt), cpp11::as_cpp<cpp11::decay_t<bool>>(arrow), cpp11::as_cpp<cpp11::decay_t<bool>>(integer64)));
END_CPP11
}
// statement.cpp
void rapi_rel_to_parquet(duckdb::rel_extptr_t rel, std::string file_name);
extern "C" SEXP _duckdb_rapi_rel_to_parquet(SEXP rel, SEXP file_name) {
BEGIN_CPP11
rapi_rel_to_parquet(cpp11::as_cpp<cpp11::decay_t<duckdb::rel_extptr_t>>(rel), cpp11::as_cpp<cpp11::decay_t<std::string>>(file_name));
return R_NilValue;
END_CPP11
}
// utils.cpp
SEXP rapi_adbc_init_func();
extern "C" SEXP _duckdb_rapi_adbc_init_func() {
Expand Down Expand Up @@ -416,6 +424,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_duckdb_rapi_rel_sql", (DL_FUNC) &_duckdb_rapi_rel_sql, 2},
{"_duckdb_rapi_rel_to_altrep", (DL_FUNC) &_duckdb_rapi_rel_to_altrep, 1},
{"_duckdb_rapi_rel_to_df", (DL_FUNC) &_duckdb_rapi_rel_to_df, 1},
{"_duckdb_rapi_rel_to_parquet", (DL_FUNC) &_duckdb_rapi_rel_to_parquet, 2},
{"_duckdb_rapi_rel_to_sql", (DL_FUNC) &_duckdb_rapi_rel_to_sql, 1},
{"_duckdb_rapi_rel_tostring", (DL_FUNC) &_duckdb_rapi_rel_tostring, 1},
{"_duckdb_rapi_rel_union_all", (DL_FUNC) &_duckdb_rapi_rel_union_all, 2},
Expand Down
4 changes: 4 additions & 0 deletions src/statement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,3 +357,7 @@ bool FetchArrowChunk(ChunkScanState &scan_state, ClientProperties options, Appen
return duckdb_execute_R_impl(result, integer64);
}
}

[[cpp11::register]] void rapi_rel_to_parquet(duckdb::rel_extptr_t rel, std::string file_name) {
rel->rel->WriteParquet(file_name);
}
37 changes: 37 additions & 0 deletions tests/testthat/test_parquet.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,40 @@ test_that("parquet reader works with the binary as string flag", {
expect_true(res[1] == "VARCHAR")
dbDisconnect(con, shutdown = TRUE)
})

test_that("duckdb_write_parquet() works as expected", {
con <- dbConnect(duckdb())

tf <- tempfile()

# write to parquet
iris_rel <- rel_from_df(con, iris)
rel_to_parquet(iris_rel, tf)

res_rel <- rel_from_table_function(con, 'read_parquet', list(tf))
res_df <- rel_to_altrep(res_rel)
res_df$Species <- as.factor(res_df$Species)
expect_true(identical(res_df, iris))


# nulls
iris_na <- iris
iris_na[[2]][42] <- NA

iris_na_rel <- duckdb:::rel_from_df(con, iris_na)
duckdb:::rel_to_parquet(iris_na_rel, tf)

res_rel <- duckdb:::rel_from_table_function(con, 'read_parquet', list(tf))
res_df <- duckdb:::rel_to_altrep(res_rel)
res_df$Species <- as.factor(res_df$Species)
expect_true(identical(res_df, iris_na))
})

test_that("duckdb rel_to_parquet() throws error with no file name", {
con <- dbConnect(duckdb())

# write to parquet
iris_rel <- rel_from_df(con, iris)
expect_error(rel_to_parquet(iris_rel, ""))
})

2 changes: 1 addition & 1 deletion tests/testthat/test_read.R
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,4 @@ test_that("duckdb_read_csv() works as expected", {
expect_true(identical(res, iris))

dbDisconnect(con, shutdown = TRUE)
})
})

0 comments on commit fe77151

Please sign in to comment.