diff --git a/DESCRIPTION b/DESCRIPTION index 56ceaf8a6b..5a7b3fb7f8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: tiledb Type: Package -Version: 0.21.1.12 +Version: 0.21.1.13 Title: Universal Storage Engine for Sparse and Dense Multidimensional Arrays Authors@R: c(person("TileDB, Inc.", role = c("aut", "cph")), person("Dirk", "Eddelbuettel", email = "dirk@tiledb.com", role = "cre")) diff --git a/NEWS.md b/NEWS.md index 9157638750..d6aed10c3b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -24,6 +24,8 @@ * R objects can be (de-)serialized to and from VFS paths (#608) +* Enumeration support has been extended to some cases only supported by Arrow (#609) + ## Bug Fixes * The DESCRIPTION file now correctly refers to macOS 10.14 (#596) diff --git a/R/ArraySchemaEvolution.R b/R/ArraySchemaEvolution.R index c0e9b7ee49..f7470d1dbe 100644 --- a/R/ArraySchemaEvolution.R +++ b/R/ArraySchemaEvolution.R @@ -123,7 +123,7 @@ tiledb_array_schema_evolution_drop_enumeration <- function(object, attrname) { #' Evolve an Array Schema by adding an empty Enumeration #' -#' @param asc An ArraySchemaEvolution object +#' @param ase An ArraySchemaEvolution object #' @param enum_name A character value with the Enumeration name #' @param type_str A character value with the TileDB type, defaults to \sQuote{ASCII} #' @param cell_val_num An integer with number values per cell, defaults to \code{NA_integer_} to @@ -137,7 +137,7 @@ tiledb_array_schema_evolution_add_enumeration_empty <- function(ase, enum_name, ordered = FALSE, ctx = tiledb_get_context()) { stopifnot("Argument 'ase' must be an Array Schema Evolution object" = - is(object, "tiledb_array_schema_evolution"), + is(ase, "tiledb_array_schema_evolution"), "Argument 'enum_name' must be character" = is.character(enum_name), "Argument 'type_str' must be character" = is.character(type_str), "Argument 'cell_val_num' must be integer" = is.integer(cell_val_num), diff --git a/R/Attribute.R b/R/Attribute.R index 3839b8d5b5..cdb8906058 100644 --- a/R/Attribute.R +++ b/R/Attribute.R @@ -388,6 +388,30 @@ tiledb_attribute_set_enumeration_name <- function(attr, enum_name, ctx = tiledb_ #' @export tiledb_attribute_is_ordered_enumeration_ptr <- function(attr, arrptr, ctx = tiledb_get_context()) { stopifnot("The 'attr' argument must be an attribute" = is(attr, "tiledb_attr"), - "The 'arr' argument must be an external pointer" = is(arrptr, "externalptr")) + "The 'arrptr' argument must be an external pointer" = is(arrptr, "externalptr")) libtiledb_attribute_is_ordered_enumeration(ctx@ptr, attr@ptr, arrptr) } + +# internal function to access enumeration data type +#' @noRd +tiledb_attribute_get_enumeration_type <- function(attr, arr, ctx = tiledb_get_context()) { + stopifnot("The 'attr' argument must be an attribute" = is(attr, "tiledb_attr"), + "The 'arr' argument must be an array" = is(arr, "tiledb_array")) + libtiledb_attribute_get_enumeration_type(ctx@ptr, attr@ptr, arr@ptr) +} + +# internal function to access enumeration data type +#' @noRd +tiledb_attribute_get_enumeration_type_ptr <- function(attr, arrptr, ctx = tiledb_get_context()) { + stopifnot("The 'attr' argument must be an attribute" = is(attr, "tiledb_attr"), + "The 'arrptr' argument must be an external pointer" = is(arrptr, "externalptr")) + libtiledb_attribute_get_enumeration_type(ctx@ptr, attr@ptr, arrptr) +} + +# internal function to get (non-string) enumeration vector +#' @noRd +tiledb_attribute_get_enumeration_vector_ptr <- function(attr, arrptr, ctx = tiledb_get_context()) { + stopifnot("The 'attr' argument must be an attribute" = is(attr, "tiledb_attr"), + "The 'arrptr' argument must be an external pointer" = is(arrptr, "externalptr")) + libtiledb_attribute_get_enumeration_vector(ctx@ptr, attr@ptr, arrptr) +} diff --git a/R/RcppExports.R b/R/RcppExports.R index 6f8397a8bb..4f344b717d 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -302,6 +302,14 @@ libtiledb_attribute_has_enumeration <- function(ctx, attr) { .Call(`_tiledb_libtiledb_attribute_has_enumeration`, ctx, attr) } +libtiledb_attribute_get_enumeration_type <- function(ctx, attr, arr) { + .Call(`_tiledb_libtiledb_attribute_get_enumeration_type`, ctx, attr, arr) +} + +libtiledb_attribute_get_enumeration_vector <- function(ctx, attr, arr) { + .Call(`_tiledb_libtiledb_attribute_get_enumeration_vector`, ctx, attr, arr) +} + libtiledb_attribute_get_enumeration <- function(ctx, attr, arr) { .Call(`_tiledb_libtiledb_attribute_get_enumeration`, ctx, attr, arr) } diff --git a/R/TileDBArray.R b/R/TileDBArray.R index 96df9020f0..9fd55e1109 100644 --- a/R/TileDBArray.R +++ b/R/TileDBArray.R @@ -619,8 +619,18 @@ setMethod("[", "tiledb_array", ordered_dict <- dictionaries for (ii in seq_along(dictionaries)) { if (isTRUE(alldictionary[ii])) { - dictionaries[[ii]] <- tiledb_attribute_get_enumeration_ptr(attrs[[allnames[ii]]], arrptr) - ordered_dict[[ii]] <- tiledb_attribute_is_ordered_enumeration_ptr(attrs[[allnames[ii]]], arrptr) + attr <- attrs[[allnames[ii]]] + tpstr <- tiledb_attribute_get_enumeration_type_ptr(attr, arrptr) + if (tpstr %in% c("ASCII", "UTF8")) { + dictionaries[[ii]] <- tiledb_attribute_get_enumeration_ptr(attr, arrptr) + } else if (tpstr %in% c("FLOAT32", "FLOAT64", "BOOL", + "UINT8", "UINT16", "UINT32", "UINT64", + "INT8", "INT16", "INT32", "INT64")) { + dictionaries[[ii]] <- tiledb_attribute_get_enumeration_vector_ptr(attr, arrptr) + } else { + stop("Unsupported enumeration vector payload of type '%s'", tpstr, call. = FALSE) + } + ordered_dict[[ii]] <- tiledb_attribute_is_ordered_enumeration_ptr(attr, arrptr) attr(dictionaries[[ii]], "ordered") <- ordered_dict[[ii]] } } @@ -1015,8 +1025,12 @@ setMethod("[", "tiledb_array", if (min(col, na.rm=TRUE) == 2 && max(col, na.rm=TRUE) == length(dct) + 1) col <- col - 1L - attr(col, "levels") <- dct - attr(col, "class") <- if (ord) c("ordered", "factor") else "factor" + if (inherits(dct, "character")) { + attr(col, "levels") <- dct + attr(col, "class") <- if (ord) c("ordered", "factor") else "factor" + } else { + col <- dct[col] + } } col } diff --git a/man/tiledb_array_schema_evolution_add_enumeration_empty.Rd b/man/tiledb_array_schema_evolution_add_enumeration_empty.Rd index 6f94afc721..328e5a81e4 100644 --- a/man/tiledb_array_schema_evolution_add_enumeration_empty.Rd +++ b/man/tiledb_array_schema_evolution_add_enumeration_empty.Rd @@ -14,6 +14,8 @@ tiledb_array_schema_evolution_add_enumeration_empty( ) } \arguments{ +\item{ase}{An ArraySchemaEvolution object} + \item{enum_name}{A character value with the Enumeration name} \item{type_str}{A character value with the TileDB type, defaults to \sQuote{ASCII}} @@ -25,8 +27,6 @@ flag the \code{NA} value use for character values} or \code{ordered} (when \code{TRUE})} \item{ctx}{Optional tiledb_ctx object} - -\item{asc}{An ArraySchemaEvolution object} } \description{ Evolve an Array Schema by adding an empty Enumeration diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index efa2428ec7..2b2d9b67c6 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -847,6 +847,32 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// libtiledb_attribute_get_enumeration_type +Rcpp::String libtiledb_attribute_get_enumeration_type(XPtr ctx, XPtr attr, XPtr arr); +RcppExport SEXP _tiledb_libtiledb_attribute_get_enumeration_type(SEXP ctxSEXP, SEXP attrSEXP, SEXP arrSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< XPtr >::type ctx(ctxSEXP); + Rcpp::traits::input_parameter< XPtr >::type attr(attrSEXP); + Rcpp::traits::input_parameter< XPtr >::type arr(arrSEXP); + rcpp_result_gen = Rcpp::wrap(libtiledb_attribute_get_enumeration_type(ctx, attr, arr)); + return rcpp_result_gen; +END_RCPP +} +// libtiledb_attribute_get_enumeration_vector +SEXP libtiledb_attribute_get_enumeration_vector(XPtr ctx, XPtr attr, XPtr arr); +RcppExport SEXP _tiledb_libtiledb_attribute_get_enumeration_vector(SEXP ctxSEXP, SEXP attrSEXP, SEXP arrSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< XPtr >::type ctx(ctxSEXP); + Rcpp::traits::input_parameter< XPtr >::type attr(attrSEXP); + Rcpp::traits::input_parameter< XPtr >::type arr(arrSEXP); + rcpp_result_gen = Rcpp::wrap(libtiledb_attribute_get_enumeration_vector(ctx, attr, arr)); + return rcpp_result_gen; +END_RCPP +} // libtiledb_attribute_get_enumeration std::vector libtiledb_attribute_get_enumeration(XPtr ctx, XPtr attr, XPtr arr); RcppExport SEXP _tiledb_libtiledb_attribute_get_enumeration(SEXP ctxSEXP, SEXP attrSEXP, SEXP arrSEXP) { @@ -3594,6 +3620,8 @@ static const R_CallMethodDef CallEntries[] = { {"_tiledb_libtiledb_attribute_set_nullable", (DL_FUNC) &_tiledb_libtiledb_attribute_set_nullable, 2}, {"_tiledb_libtiledb_attribute_get_nullable", (DL_FUNC) &_tiledb_libtiledb_attribute_get_nullable, 1}, {"_tiledb_libtiledb_attribute_has_enumeration", (DL_FUNC) &_tiledb_libtiledb_attribute_has_enumeration, 2}, + {"_tiledb_libtiledb_attribute_get_enumeration_type", (DL_FUNC) &_tiledb_libtiledb_attribute_get_enumeration_type, 3}, + {"_tiledb_libtiledb_attribute_get_enumeration_vector", (DL_FUNC) &_tiledb_libtiledb_attribute_get_enumeration_vector, 3}, {"_tiledb_libtiledb_attribute_get_enumeration", (DL_FUNC) &_tiledb_libtiledb_attribute_get_enumeration, 3}, {"_tiledb_libtiledb_attribute_set_enumeration", (DL_FUNC) &_tiledb_libtiledb_attribute_set_enumeration, 3}, {"_tiledb_libtiledb_attribute_is_ordered_enumeration", (DL_FUNC) &_tiledb_libtiledb_attribute_is_ordered_enumeration, 3}, diff --git a/src/libtiledb.cpp b/src/libtiledb.cpp index 1959bf400a..2cb930f97f 100644 --- a/src/libtiledb.cpp +++ b/src/libtiledb.cpp @@ -1588,6 +1588,68 @@ bool libtiledb_attribute_has_enumeration(XPtr ctx, return res; } +// [[Rcpp::export]] +Rcpp::String libtiledb_attribute_get_enumeration_type(XPtr ctx, + XPtr attr, + XPtr arr) { + + check_xptr_tag(ctx); + check_xptr_tag(attr); + check_xptr_tag(arr); +#if TILEDB_VERSION >= TileDB_Version(2,17,0) + auto enmrname = tiledb::AttributeExperimental::get_enumeration_name(*ctx.get(), *attr.get()); + if (enmrname == std::nullopt) { + Rcpp::stop("No enumeration name for attribute"); + } + auto enmr = tiledb::ArrayExperimental::get_enumeration(*ctx.get(), *arr.get(), enmrname.value()); + if (enmr.ptr() == nullptr) { + Rcpp::stop("No enumeration for given attribute."); + } + Rcpp::String res = Rcpp::as(Rcpp::wrap(_tiledb_datatype_to_string(enmr.type()))); +#else + Rcpp::String res = Rcpp::as(NA_STRING); +#endif + return res; +} + +// [[Rcpp::export]] +SEXP libtiledb_attribute_get_enumeration_vector(XPtr ctx, + XPtr attr, + XPtr arr) { + check_xptr_tag(ctx); + check_xptr_tag(attr); + check_xptr_tag(arr); + SEXP res = R_NilValue; +#if TILEDB_VERSION >= TileDB_Version(2,17,0) + auto enmrname = tiledb::AttributeExperimental::get_enumeration_name(*ctx.get(), *attr.get()); + if (enmrname == std::nullopt) { + Rcpp::stop("No enumeration name for attribute"); + } + auto enmr = tiledb::ArrayExperimental::get_enumeration(*ctx.get(), *arr.get(), enmrname.value()); + if (enmr.ptr() == nullptr) { + Rcpp::stop("No enumeration for given attribute."); + } + auto dtype = enmr.type(); + if (dtype == TILEDB_FLOAT32 || dtype == TILEDB_FLOAT64) { + auto v = enmr.as_vector(); + res = Rcpp::wrap(v); + } else if (dtype == TILEDB_INT8 || dtype == TILEDB_INT16 || dtype == TILEDB_INT32 || + dtype == TILEDB_UINT8 || dtype == TILEDB_UINT16 || dtype == TILEDB_UINT32) { + auto v = enmr.as_vector(); + res = Rcpp::wrap(v); + } else if (dtype == TILEDB_INT64 || dtype == TILEDB_UINT64) { + auto v = enmr.as_vector(); + res = Rcpp::toInteger64(v); + } else if (dtype == TILEDB_BOOL) { + auto v = enmr.as_vector(); + res = Rcpp::wrap(v); + } else { + Rcpp::stop("Unsupported non-string type '%s'", _tiledb_datatype_to_string(dtype)); + } +#endif + return res; +} + // [[Rcpp::export]] std::vector libtiledb_attribute_get_enumeration(XPtr ctx, XPtr attr, @@ -1610,6 +1672,7 @@ std::vector libtiledb_attribute_get_enumeration(XPtr libtiledb_attribute_set_enumeration(XPtr ctx, XPtr attr,