From 6a5c2d964f2f5a485d6ec0b1f8b650d559f655ad Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Sun, 21 May 2023 18:03:50 -0500 Subject: [PATCH 1/3] Ensure pointers to children are in external pointer This requires access to two otherwise static functions in nanoarrow Also minor edits to src/Makevars.in for enhanced legibility --- src/Makevars.in | 13 +++--- src/arrowio.cpp | 113 ++++++++++++++++++++++++++++++++++++++++++++++-- src/nanoarrow.c | 8 ++-- 3 files changed, 120 insertions(+), 14 deletions(-) diff --git a/src/Makevars.in b/src/Makevars.in index 560fc612ca..328a1e7015 100644 --- a/src/Makevars.in +++ b/src/Makevars.in @@ -1,19 +1,18 @@ ## We need C++17 to use TileDB's C++ API CXX_STD = CXX17 -## For macOS aka Darwin need to set minimum version 10.14 for macOS -PKG_CXX17FLAGS = @CXX17_MACOS@ - -## We need the TileDB Headers -PKG_CPPFLAGS = -I. -I../inst/include/ @TILEDB_INCLUDE@ @TILEDB_SILENT_BUILD@ +## We need the TileDB Headers; for macOS aka Darwin need to set minimum version 10.14 for macOS +PKG_CPPFLAGS = -I. -I../inst/include/ @CXX17_MACOS@ @TILEDB_INCLUDE@ @TILEDB_SILENT_BUILD@ ## We also need the TileDB library PKG_LIBS = @CXX17_MACOS@ @TILEDB_LIBS@ @TILEDB_RPATH@ all: $(SHLIB) # if we are - # - not on Window NT (a tip from data.table) # - on macOS aka Darwin which needs this # - the library is present (implying non-system library use) # then let us call install_name_tool - if [ "$(OS)" != "Windows_NT" ] && [ `uname -s` = 'Darwin' ] && [ -f ../inst/tiledb/lib/libtiledb.dylib ] && [ -f tiledb.so ]; then install_name_tool -change libz.1.dylib @rpath/libz.1.dylib ../inst/tiledb/lib/libtiledb.dylib; install_name_tool -add_rpath @loader_path/../tiledb/lib tiledb.so; fi + @if [ `uname -s` = 'Darwin' ] && [ -f ../inst/tiledb/lib/libtiledb.dylib ] && [ -f tiledb.so ]; then \ + install_name_tool -change libz.1.dylib @rpath/libz.1.dylib ../inst/tiledb/lib/libtiledb.dylib; \ + install_name_tool -add_rpath @loader_path/../tiledb/lib tiledb.so; \ + fi diff --git a/src/arrowio.cpp b/src/arrowio.cpp index 4cde7d1672..f786ebb7b4 100644 --- a/src/arrowio.cpp +++ b/src/arrowio.cpp @@ -274,6 +274,113 @@ Rcpp::XPtr array_owning_xptr(void) { return array_xptr; } +// Helper function to register a finalizer -- eg for debugging purposes +inline void registerXptrFinalizer(SEXP s, R_CFinalizer_t f, bool onexit = true) { + R_RegisterCFinalizerEx(s, f, onexit ? TRUE : FALSE); +} + +Rcpp::XPtr schema_setup_struct(Rcpp::XPtr schxp, int64_t n_children) { + ArrowSchema* schema = schxp.get(); + auto type = NANOARROW_TYPE_STRUCT; + + ArrowSchemaInit(schema); // modified from ArrowSchemaInitFromType() + int result = ArrowSchemaSetType(schema, type); + if (result != NANOARROW_OK) { + schema->release(schema); + Rcpp::stop("Error setting struct schema"); + } + + // now adapted from ArrowSchemaAllocateChildren + if (schema->children != NULL) Rcpp::stop("Error allocation as children not null"); + + if (n_children > 0) { + auto ptr = (struct ArrowSchema**) ArrowMalloc(n_children * sizeof(struct ArrowSchema*)); + Rcpp::XPtr schema_ptrxp = make_xptr(ptr, false); + schema->children = schema_ptrxp.get(); + if (schema->children == NULL) Rcpp::stop("Failed to allocate ArrowSchema*"); + + schema->n_children = n_children; + memset(schema->children, 0, n_children * sizeof(struct ArrowSchema*)); + + for (int64_t i = 0; i < n_children; i++) { + schema->children[i] = schema_owning_xptr(); + if (schema->children[i] == NULL) Rcpp::stop("Error allocation schema child %ld", i); + schema->children[i]->release = NULL; + } + } + return schxp; +} + +extern "C" { + void ArrowArrayRelease(struct ArrowArray *array); // made non-static in nanoarrow.c + ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array, // ditto + enum ArrowType storage_type); +} + +Rcpp::XPtr array_setup_struct(Rcpp::XPtr arrxp, int64_t n_children) { + ArrowArray* array = arrxp.get(); + auto storage_type = NANOARROW_TYPE_STRUCT; + + array->length = 0; + array->null_count = 0; + array->offset = 0; + array->n_buffers = 0; + array->n_children = 0; + array->buffers = NULL; + array->children = NULL; + array->dictionary = NULL; + array->release = &ArrowArrayRelease; + array->private_data = NULL; + + auto private_data = (struct ArrowArrayPrivateData*) ArrowMalloc(sizeof(struct ArrowArrayPrivateData)); + if (private_data == NULL) { + array->release = NULL; + Rcpp::stop("Error allocating array private data"); + } + ArrowBitmapInit(&private_data->bitmap); + ArrowBufferInit(&private_data->buffers[0]); + ArrowBufferInit(&private_data->buffers[1]); + private_data->buffer_data[0] = NULL; + private_data->buffer_data[1] = NULL; + private_data->buffer_data[2] = NULL; + array->private_data = private_data; + array->buffers = (const void**)(&private_data->buffer_data); + int result = ArrowArraySetStorageType(array, storage_type); + if (result != NANOARROW_OK) { + array->release(array); + Rcpp::stop("Error setting array storage type"); + } + + ArrowLayoutInit(&private_data->layout, storage_type); + // We can only know this not to be true when initializing based on a schema so assume this to be true. + private_data->union_type_id_is_child_index = 1; + + + // remainder from ArrowArrayAllocateChildren() + if (array->children != NULL) Rcpp::stop("Error allocating array children as pointer not null"); + + if (n_children == 0) { + return arrxp; + } + + auto ptr = (struct ArrowArray**) ArrowMalloc(n_children * sizeof(struct ArrowArray*)); + Rcpp::XPtr array_ptrxp = make_xptr(ptr, false); + array->children = array_ptrxp.get(); + if (array->children == NULL) Rcpp::stop("Failed to allocated ArrayArray*"); + + memset(array->children, 0, n_children * sizeof(struct ArrowArray*)); + + for (int64_t i = 0; i < n_children; i++) { + array->children[i] = array_owning_xptr(); + if (array->children[i] == NULL) Rcpp::stop("Error allocation array child %ld", i); + array->children[i]->release = NULL; + } + array->n_children = n_children; + return arrxp; +} + + + // [[Rcpp::export]] Rcpp::List libtiledb_to_arrow(Rcpp::XPtr ab, Rcpp::XPtr qry) { @@ -283,10 +390,8 @@ Rcpp::List libtiledb_to_arrow(Rcpp::XPtr ab, auto ncol = names.size(); Rcpp::XPtr schemaxp = schema_owning_xptr(); Rcpp::XPtr arrayxp = array_owning_xptr(); - ArrowSchemaInitFromType((ArrowSchema*)R_ExternalPtrAddr(schemaxp), NANOARROW_TYPE_STRUCT); - ArrowSchemaAllocateChildren((ArrowSchema*)R_ExternalPtrAddr(schemaxp), ncol); - ArrowArrayInitFromType((ArrowArray*)R_ExternalPtrAddr(arrayxp), NANOARROW_TYPE_STRUCT); - ArrowArrayAllocateChildren((ArrowArray*)R_ExternalPtrAddr(arrayxp), ncol); + schemaxp = schema_setup_struct(schemaxp, ncol); + arrayxp = array_setup_struct(arrayxp, ncol); arrayxp->length = 0; diff --git a/src/nanoarrow.c b/src/nanoarrow.c index 0b8fc359e3..1d31884b19 100644 --- a/src/nanoarrow.c +++ b/src/nanoarrow.c @@ -1748,7 +1748,8 @@ ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer, #include "nanoarrow.h" -static void ArrowArrayRelease(struct ArrowArray* array) { +// -- changed for tiledb-r static +void ArrowArrayRelease(struct ArrowArray* array) { // Release buffers held by this array struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; @@ -1791,8 +1792,9 @@ static void ArrowArrayRelease(struct ArrowArray* array) { array->release = NULL; } -static ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array, - enum ArrowType storage_type) { +// -- changed for tiledb-r static +ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array, + enum ArrowType storage_type) { switch (storage_type) { case NANOARROW_TYPE_UNINITIALIZED: case NANOARROW_TYPE_NA: From 2adb6f186f0cadbbc32ebad1847bbdf399579b8a Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Sun, 21 May 2023 18:19:15 -0500 Subject: [PATCH 2/3] Update NEWS.md [ci skip] --- NEWS.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NEWS.md b/NEWS.md index 4ab8673f12..e06e00d039 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,6 +13,12 @@ * Consolidation and vacuum calls now reflect the state of the global context object (#547) +* Pointers to 'Arrow Table' objects representing the table columns are now in external pointers too (#550) + +## Build and Test Systems + +* 'sudo' mode is reenabled for package 'bspm' used in the continuous integration at GitHub Actions (#549) + # tiledb 0.19.1 From d06a6af4cda3cd644c381b99f7ae3c294b1d8afa Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Sun, 21 May 2023 19:25:08 -0500 Subject: [PATCH 3/3] Update initial arrow export function as well --- src/arrowio.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/arrowio.cpp b/src/arrowio.cpp index f786ebb7b4..48f62fe5a7 100644 --- a/src/arrowio.cpp +++ b/src/arrowio.cpp @@ -187,6 +187,10 @@ XPtr libtiledb_query_import_buffer(XPtr ctx, return(query); } +Rcpp::XPtr schema_owning_xptr(void); +Rcpp::XPtr array_owning_xptr(void); +Rcpp::XPtr schema_setup_struct(Rcpp::XPtr schxp, int64_t n_children); +Rcpp::XPtr array_setup_struct(Rcpp::XPtr arrxp, int64_t n_children); // [[Rcpp::export]] Rcpp::List libtiledb_query_export_arrow_table(XPtr ctx, @@ -196,12 +200,11 @@ Rcpp::List libtiledb_query_export_arrow_table(XPtr ctx, size_t ncol = names.size(); tiledb::arrow::ArrowAdapter adapter(ctx, query); - ArrowSchema* schemap = schema_owning_ptr(); - ArrowArray* arrayp = array_owning_ptr(); - ArrowSchemaInitFromType(schemap, NANOARROW_TYPE_STRUCT); - ArrowSchemaAllocateChildren(schemap, ncol); - ArrowArrayInitFromType(arrayp, NANOARROW_TYPE_STRUCT); - ArrowArrayAllocateChildren(arrayp, ncol); + Rcpp::XPtr schemap = schema_owning_xptr(); + Rcpp::XPtr arrayp = array_owning_xptr(); + schemap = schema_setup_struct(schemap, ncol); + arrayp = array_setup_struct(arrayp, ncol); + arrayp->length = 0; for (size_t i=0; i ctx, names[i], chldschemap->format, chldarrayp->length, chldarrayp->null_count, chldarrayp->n_buffers)); } - SEXP xparray = R_MakeExternalPtr((void*) arrayp, R_NilValue, R_NilValue); - SEXP xpschema = R_MakeExternalPtr((void*) schemap, R_NilValue, R_NilValue); - Rcpp::List as = Rcpp::List::create(Rcpp::Named("array_data") = xparray, - Rcpp::Named("schema") = xpschema); + Rcpp::List as = Rcpp::List::create(Rcpp::Named("array_data") = arrayp, + Rcpp::Named("schema") = schemap); return as; #else Rcpp::stop("This function requires TileDB (2.2.0 or greater).");