Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure pointers to children are in external pointer #550

Merged
merged 3 commits into from
May 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@

* Consolidation and vacuum calls now reflect the state of the global context object (#547)

* Pointers to 'Arrow Table' objects representing the table columns are now in external pointers too (#550)

## Build and Test Systems

* 'sudo' mode is reenabled for package 'bspm' used in the continuous integration at GitHub Actions (#549)


# tiledb 0.19.1

Expand Down
13 changes: 6 additions & 7 deletions src/Makevars.in
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
## We need C++17 to use TileDB's C++ API
CXX_STD = CXX17

## For macOS aka Darwin need to set minimum version 10.14 for macOS
PKG_CXX17FLAGS = @CXX17_MACOS@

## We need the TileDB Headers
PKG_CPPFLAGS = -I. -I../inst/include/ @TILEDB_INCLUDE@ @TILEDB_SILENT_BUILD@
## We need the TileDB Headers; for macOS aka Darwin need to set minimum version 10.14 for macOS
PKG_CPPFLAGS = -I. -I../inst/include/ @CXX17_MACOS@ @TILEDB_INCLUDE@ @TILEDB_SILENT_BUILD@

## We also need the TileDB library
PKG_LIBS = @CXX17_MACOS@ @TILEDB_LIBS@ @TILEDB_RPATH@

all: $(SHLIB)
# if we are
# - not on Window NT (a tip from data.table)
# - on macOS aka Darwin which needs this
# - the library is present (implying non-system library use)
# then let us call install_name_tool
if [ "$(OS)" != "Windows_NT" ] && [ `uname -s` = 'Darwin' ] && [ -f ../inst/tiledb/lib/libtiledb.dylib ] && [ -f tiledb.so ]; then install_name_tool -change libz.1.dylib @rpath/libz.1.dylib ../inst/tiledb/lib/libtiledb.dylib; install_name_tool -add_rpath @loader_path/../tiledb/lib tiledb.so; fi
@if [ `uname -s` = 'Darwin' ] && [ -f ../inst/tiledb/lib/libtiledb.dylib ] && [ -f tiledb.so ]; then \
install_name_tool -change libz.1.dylib @rpath/libz.1.dylib ../inst/tiledb/lib/libtiledb.dylib; \
install_name_tool -add_rpath @loader_path/../tiledb/lib tiledb.so; \
fi
134 changes: 120 additions & 14 deletions src/arrowio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ XPtr<tiledb::Query> libtiledb_query_import_buffer(XPtr<tiledb::Context> ctx,
return(query);
}

Rcpp::XPtr<ArrowSchema> schema_owning_xptr(void);
Rcpp::XPtr<ArrowArray> array_owning_xptr(void);
Rcpp::XPtr<ArrowSchema> schema_setup_struct(Rcpp::XPtr<ArrowSchema> schxp, int64_t n_children);
Rcpp::XPtr<ArrowArray> array_setup_struct(Rcpp::XPtr<ArrowArray> arrxp, int64_t n_children);

// [[Rcpp::export]]
Rcpp::List libtiledb_query_export_arrow_table(XPtr<tiledb::Context> ctx,
Expand All @@ -196,12 +200,11 @@ Rcpp::List libtiledb_query_export_arrow_table(XPtr<tiledb::Context> ctx,
size_t ncol = names.size();
tiledb::arrow::ArrowAdapter adapter(ctx, query);

ArrowSchema* schemap = schema_owning_ptr();
ArrowArray* arrayp = array_owning_ptr();
ArrowSchemaInitFromType(schemap, NANOARROW_TYPE_STRUCT);
ArrowSchemaAllocateChildren(schemap, ncol);
ArrowArrayInitFromType(arrayp, NANOARROW_TYPE_STRUCT);
ArrowArrayAllocateChildren(arrayp, ncol);
Rcpp::XPtr<ArrowSchema> schemap = schema_owning_xptr();
Rcpp::XPtr<ArrowArray> arrayp = array_owning_xptr();
schemap = schema_setup_struct(schemap, ncol);
arrayp = array_setup_struct(arrayp, ncol);

arrayp->length = 0;

for (size_t i=0; i<ncol; i++) {
Expand All @@ -224,11 +227,9 @@ Rcpp::List libtiledb_query_export_arrow_table(XPtr<tiledb::Context> ctx,
names[i], chldschemap->format, chldarrayp->length, chldarrayp->null_count, chldarrayp->n_buffers));

}
SEXP xparray = R_MakeExternalPtr((void*) arrayp, R_NilValue, R_NilValue);
SEXP xpschema = R_MakeExternalPtr((void*) schemap, R_NilValue, R_NilValue);

Rcpp::List as = Rcpp::List::create(Rcpp::Named("array_data") = xparray,
Rcpp::Named("schema") = xpschema);
Rcpp::List as = Rcpp::List::create(Rcpp::Named("array_data") = arrayp,
Rcpp::Named("schema") = schemap);
return as;
#else
Rcpp::stop("This function requires TileDB (2.2.0 or greater).");
Expand Down Expand Up @@ -274,6 +275,113 @@ Rcpp::XPtr<ArrowArray> array_owning_xptr(void) {
return array_xptr;
}

// Helper function to register a finalizer -- eg for debugging purposes
inline void registerXptrFinalizer(SEXP s, R_CFinalizer_t f, bool onexit = true) {
R_RegisterCFinalizerEx(s, f, onexit ? TRUE : FALSE);
}

Rcpp::XPtr<ArrowSchema> schema_setup_struct(Rcpp::XPtr<ArrowSchema> schxp, int64_t n_children) {
ArrowSchema* schema = schxp.get();
auto type = NANOARROW_TYPE_STRUCT;

ArrowSchemaInit(schema); // modified from ArrowSchemaInitFromType()
int result = ArrowSchemaSetType(schema, type);
if (result != NANOARROW_OK) {
schema->release(schema);
Rcpp::stop("Error setting struct schema");
}

// now adapted from ArrowSchemaAllocateChildren
if (schema->children != NULL) Rcpp::stop("Error allocation as children not null");

if (n_children > 0) {
auto ptr = (struct ArrowSchema**) ArrowMalloc(n_children * sizeof(struct ArrowSchema*));
Rcpp::XPtr<ArrowSchema*> schema_ptrxp = make_xptr(ptr, false);
schema->children = schema_ptrxp.get();
if (schema->children == NULL) Rcpp::stop("Failed to allocate ArrowSchema*");

schema->n_children = n_children;
memset(schema->children, 0, n_children * sizeof(struct ArrowSchema*));

for (int64_t i = 0; i < n_children; i++) {
schema->children[i] = schema_owning_xptr();
if (schema->children[i] == NULL) Rcpp::stop("Error allocation schema child %ld", i);
schema->children[i]->release = NULL;
}
}
return schxp;
}

extern "C" {
void ArrowArrayRelease(struct ArrowArray *array); // made non-static in nanoarrow.c
ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array, // ditto
enum ArrowType storage_type);
}

Rcpp::XPtr<ArrowArray> array_setup_struct(Rcpp::XPtr<ArrowArray> arrxp, int64_t n_children) {
ArrowArray* array = arrxp.get();
auto storage_type = NANOARROW_TYPE_STRUCT;

array->length = 0;
array->null_count = 0;
array->offset = 0;
array->n_buffers = 0;
array->n_children = 0;
array->buffers = NULL;
array->children = NULL;
array->dictionary = NULL;
array->release = &ArrowArrayRelease;
array->private_data = NULL;

auto private_data = (struct ArrowArrayPrivateData*) ArrowMalloc(sizeof(struct ArrowArrayPrivateData));
if (private_data == NULL) {
array->release = NULL;
Rcpp::stop("Error allocating array private data");
}
ArrowBitmapInit(&private_data->bitmap);
ArrowBufferInit(&private_data->buffers[0]);
ArrowBufferInit(&private_data->buffers[1]);
private_data->buffer_data[0] = NULL;
private_data->buffer_data[1] = NULL;
private_data->buffer_data[2] = NULL;
array->private_data = private_data;
array->buffers = (const void**)(&private_data->buffer_data);
int result = ArrowArraySetStorageType(array, storage_type);
if (result != NANOARROW_OK) {
array->release(array);
Rcpp::stop("Error setting array storage type");
}

ArrowLayoutInit(&private_data->layout, storage_type);
// We can only know this not to be true when initializing based on a schema so assume this to be true.
private_data->union_type_id_is_child_index = 1;


// remainder from ArrowArrayAllocateChildren()
if (array->children != NULL) Rcpp::stop("Error allocating array children as pointer not null");

if (n_children == 0) {
return arrxp;
}

auto ptr = (struct ArrowArray**) ArrowMalloc(n_children * sizeof(struct ArrowArray*));
Rcpp::XPtr<ArrowArray*> array_ptrxp = make_xptr(ptr, false);
array->children = array_ptrxp.get();
if (array->children == NULL) Rcpp::stop("Failed to allocated ArrayArray*");

memset(array->children, 0, n_children * sizeof(struct ArrowArray*));

for (int64_t i = 0; i < n_children; i++) {
array->children[i] = array_owning_xptr();
if (array->children[i] == NULL) Rcpp::stop("Error allocation array child %ld", i);
array->children[i]->release = NULL;
}
array->n_children = n_children;
return arrxp;
}



// [[Rcpp::export]]
Rcpp::List libtiledb_to_arrow(Rcpp::XPtr<tiledb::ArrayBuffers> ab,
Rcpp::XPtr<tiledb::Query> qry) {
Expand All @@ -283,10 +391,8 @@ Rcpp::List libtiledb_to_arrow(Rcpp::XPtr<tiledb::ArrayBuffers> ab,
auto ncol = names.size();
Rcpp::XPtr<ArrowSchema> schemaxp = schema_owning_xptr();
Rcpp::XPtr<ArrowArray> arrayxp = array_owning_xptr();
ArrowSchemaInitFromType((ArrowSchema*)R_ExternalPtrAddr(schemaxp), NANOARROW_TYPE_STRUCT);
ArrowSchemaAllocateChildren((ArrowSchema*)R_ExternalPtrAddr(schemaxp), ncol);
ArrowArrayInitFromType((ArrowArray*)R_ExternalPtrAddr(arrayxp), NANOARROW_TYPE_STRUCT);
ArrowArrayAllocateChildren((ArrowArray*)R_ExternalPtrAddr(arrayxp), ncol);
schemaxp = schema_setup_struct(schemaxp, ncol);
arrayxp = array_setup_struct(arrayxp, ncol);

arrayxp->length = 0;

Expand Down
8 changes: 5 additions & 3 deletions src/nanoarrow.c
Original file line number Diff line number Diff line change
Expand Up @@ -1748,7 +1748,8 @@ ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer,

#include "nanoarrow.h"

static void ArrowArrayRelease(struct ArrowArray* array) {
// -- changed for tiledb-r static
void ArrowArrayRelease(struct ArrowArray* array) {
// Release buffers held by this array
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
Expand Down Expand Up @@ -1791,8 +1792,9 @@ static void ArrowArrayRelease(struct ArrowArray* array) {
array->release = NULL;
}

static ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array,
enum ArrowType storage_type) {
// -- changed for tiledb-r static
ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array,
enum ArrowType storage_type) {
switch (storage_type) {
case NANOARROW_TYPE_UNINITIALIZED:
case NANOARROW_TYPE_NA:
Expand Down