diff --git a/.Rbuildignore b/.Rbuildignore index 18cd4ec347..cd0dc5d559 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,7 +2,7 @@ ^\.Rproj\.user$ ^\.travis\.yml$ ^_pgkdown.yml -^.*\.tar\.gz$ +^tiledb.*\.tar\.gz$ ^docs ^_pkgdown.yml$ ^inst/examples/quickstart_dense$ diff --git a/.github/r-ci.sh b/.github/r-ci.sh index e47c1eebf8..a3f6c309ce 100755 --- a/.github/r-ci.sh +++ b/.github/r-ci.sh @@ -192,9 +192,11 @@ BootstrapLinuxOptions() { # InstallPandoc 'linux/debian/x86_64' #fi if [[ "${USE_BSPM}" != "FALSE" ]]; then - sudo Rscript --vanilla -e 'install.packages("bspm", repos="https://cran.r-project.org")' + #sudo Rscript --vanilla -e 'install.packages("bspm", repos="https://cran.r-project.org")' + sudo Rscript --vanilla -e 'remotes::install_url("https://cloud.r-project.org/src/contrib/Archive/bspm/bspm_0.3.10.tar.gz")' echo "suppressMessages(bspm::enable())" | sudo tee --append /etc/R/Rprofile.site >/dev/null - echo "options(bspm.sudo=TRUE)" | sudo tee --append /etc/R/Rprofile.site >/dev/null + ##--not needed with 0.3.10 echo "options(bspm.version.check=FALSE)" | sudo tee --append /etc/R/Rprofile.site >/dev/null + ##--not needed here echo "options(bspm.sudo=TRUE)" | sudo tee --append /etc/R/Rprofile.site >/dev/null fi } @@ -319,16 +321,12 @@ RBinaryInstall() { InstallGithub() { #EnsureDevtools - #echo "Installing GitHub packages: $@" - # Install the package. - #Rscript -e 'library(devtools); library(methods); install_github(commandArgs(TRUE), build_vignettes = FALSE)' "$@" sudo Rscript -e 'remotes::install_github(commandArgs(TRUE))' "$@" } InstallDeps() { #EnsureDevtools - #Rscript -e 'library(devtools); library(methods); install_deps(dependencies = TRUE)' sudo Rscript -e 'remotes::install_deps(".")' } diff --git a/NEWS.md b/NEWS.md index 519b3c1694..ea83ba39cc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,10 @@ * This release of the R package builds against [TileDB 2.14.1](https://github.com/TileDB-Inc/TileDB/releases/tag/2.14.1), and has also been tested against earlier releases as well as the development version (#502). +## Breaking Changes + +* The validity map coding of nullable strings has been corrected: validity map values of one are now interpreted as valid/non-null for full compatibility with other TileDB projects. Previously written arrays with nullable strings can be read by setting the config option `r.legacy_validity_mode` to `true`; the option also permits to write to an older installation. A conversion helper script is provided in `scripts/legacy_validity_convert.r`. (#517) + ## Improvements * Attributes can now be created, written and read from in (explicit) UTF8 types (and CHAR and ASCII already behaved correctly with respect to utf8 data) (#510) diff --git a/R/Metadata.R b/R/Metadata.R index 585cec0dfc..a51592b261 100644 --- a/R/Metadata.R +++ b/R/Metadata.R @@ -1,6 +1,6 @@ # MIT License # -# Copyright (c) 2017-2021 TileDB Inc. +# Copyright (c) 2017-2023 TileDB Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -77,10 +77,10 @@ tiledb_put_metadata <- function(arr, key, val) { } -##' Return a TileDB Array Metadata object given by key +##' Return all TileDB Array Metadata objects as a named list ##' ##' @param arr A TileDB Array object, or a character URI describing one -##' @return A object stored in the Metadata under the given key +##' @return A named list with all Metadata objects indexed by the given key ##' @export tiledb_get_all_metadata <- function(arr) { stopifnot(`Argument 'arr' must be a (dense or sparse) TileDB array` = .isArray(arr), diff --git a/R/RcppExports.R b/R/RcppExports.R index a8c62e1c35..c89d9263cf 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -568,8 +568,16 @@ libtiledb_query_buffer_var_char_alloc_direct <- function(szoffsets, szdata, null .Call(`_tiledb_libtiledb_query_buffer_var_char_alloc_direct`, szoffsets, szdata, nullable, cols) } -libtiledb_query_buffer_var_char_create <- function(vec, nullable) { - .Call(`_tiledb_libtiledb_query_buffer_var_char_create`, vec, nullable) +libtiledb_query_buffer_var_char_get_legacy_validity_value <- function(ctx, validity_override = FALSE) { + .Call(`_tiledb_libtiledb_query_buffer_var_char_get_legacy_validity_value`, ctx, validity_override) +} + +libtiledb_query_buffer_var_char_legacy_validity_mode <- function(ctx, buf, validity_override = FALSE) { + .Call(`_tiledb_libtiledb_query_buffer_var_char_legacy_validity_mode`, ctx, buf, validity_override) +} + +libtiledb_query_buffer_var_char_create <- function(vec, nullable, legacy_validity = FALSE) { + .Call(`_tiledb_libtiledb_query_buffer_var_char_create`, vec, nullable, legacy_validity) } libtiledb_query_set_buffer_var_char <- function(query, attr, bufptr) { diff --git a/R/TileDBArray.R b/R/TileDBArray.R index 5ff3b6597c..6205e73189 100644 --- a/R/TileDBArray.R +++ b/R/TileDBArray.R @@ -1,6 +1,6 @@ # MIT License # -# Copyright (c) 2017-2022 TileDB Inc. +# Copyright (c) 2017-2023 TileDB Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -836,6 +836,7 @@ setMethod("[", "tiledb_array", if (type %in% c("CHAR", "ASCII", "UTF8")) { spdl::debug("[getBuffer] '{}' allocating 'char' {} rows given budget of {}", name, resrv, memory_budget) buf <- libtiledb_query_buffer_var_char_alloc_direct(resrv, memory_budget, nullable) + buf <- libtiledb_query_buffer_var_char_legacy_validity_mode(ctx@ptr, buf) qryptr <- libtiledb_query_set_buffer_var_char(qryptr, name, buf) buf } else { @@ -1242,6 +1243,7 @@ setMethod("[<-", "tiledb_array", else { if (sparse) "UNORDERED" else "COL_MAJOR" }) buflist <- vector(mode="list", length=nc) + legacy_validity <- libtiledb_query_buffer_var_char_get_legacy_validity_value(ctx@ptr) for (colnam in allnames) { ## when an index column is use this may be unordered to remap to position in 'nm' names @@ -1249,7 +1251,7 @@ setMethod("[<-", "tiledb_array", if (alltypes[k] %in% c("CHAR", "ASCII", "UTF8")) { # variable length txtvec <- as.character(value[[k]]) spdl::debug("[tiledb_array] '[<-' alloc char buffer {} '{}': {}", k, colnam, alltypes[k]) - buflist[[k]] <- libtiledb_query_buffer_var_char_create(txtvec, allnullable[k]) + buflist[[k]] <- libtiledb_query_buffer_var_char_create(txtvec, allnullable[k], legacy_validity) qryptr <- libtiledb_query_set_buffer_var_char(qryptr, colnam, buflist[[k]]) } else { col <- value[[k]] diff --git a/R/Utils.R b/R/Utils.R index a49a8a1bbd..262b29e54e 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -1,6 +1,6 @@ # MIT License # -# Copyright (c) 2017-2022 TileDB Inc. +# Copyright (c) 2017-2023 TileDB Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -223,3 +223,77 @@ r_to_tiledb_type <- function(x) { .assertArray <- function(arr) { stopifnot(is(arr, "tiledb_sparse") || is(arr, "tiledb_dense") || is(arr, "tiledb_array")) } + +## conversion helper from (and to) legacy validity map for nullable strings +.legacy_validity <- function(inuri, + outdir = NULL, + fromlegacy = TRUE, + tolegacy = FALSE, + usetmp = FALSE, + verbose = FALSE, + debug = FALSE) { + + stopifnot("'inuri' must be an existing directory" = dir.exists(inuri)) + + if (verbose) + cat("Running with tiledb R package version", format(packageVersion("tiledb")), + "and TileDB Core version", format(tiledb_version(TRUE)), "\n") + + array <- basename(inuri) + if (debug) print(summary(tiledb_array(inuri, strings_as_factors=TRUE)[])) + + newdir <- "" + if (isTRUE(usetmp)) newdir <- tempfile() + if (!is.null(outdir)) newdir <- outdir + if (newdir == "") + stop("If '--usetmp' is not given then '--out OUT' must be given.", call. = FALSE) + + if (!dir.exists(newdir)) dir.create(newdir) + #res <- file.copy(inuri, newdir, recursive=TRUE) + newuri <- file.path(newdir, array) + + arr <- tiledb_array(inuri) + attrlst <- attrs(schema(arr)) + is_nullable_string <- function(x) datatype(x) %in% c("ASCII", "CHAR", "UTF8") && + tiledb_attribute_get_nullable(x) + stringcols <- Filter(is_nullable_string, attrlst) + if (length(stringcols) == 0) { + stop("No string columns in array so nothing to do. Exiting.\n", call. = FALSE) + } + dimnames <- sapply(dimensions(domain(schema(arr))), name) + + oldcfg <- cfg <- tiledb_config() + cfg["r.legacy_validity_mode"] <- if (fromlegacy) "true" else "false" + ctx <- tiledb_ctx(cfg) + dat <- tiledb_array(inuri, return_as="data.frame", strings_as_factors=TRUE)[] + if (debug) print(summary(dat)) + + arr <- tiledb_array(inuri) + arr <- tiledb_array_open(arr, "READ") + nmd <- tiledb_num_metadata(arr) + if (nmd > 0) metadatalist <- tiledb_get_all_metadata(arr) + if (debug) print(metadatalist) + + cfg["r.legacy_validity_mode"] <- if (tolegacy) "true" else "false" + ctx <- tiledb_ctx(cfg) + fromDataFrame(dat, newuri, col_index=dimnames) + + if (nmd > 0) { + arr <- tiledb_array(newuri) + arr <- tiledb_array_open(arr, "WRITE") + for (nm in names(metadatalist)) { + invisible(tiledb_put_metadata(arr, nm, metadatalist[[nm]])) + if (debug) print(metadatalist[[nm]]) + } + invisible(tiledb_array_close(arr)) + } + + chk <- tiledb_array(newuri, strings_as_factors=TRUE)[] + if (debug) { + cat("Written back.\n") + print(summary(chk)) + } + if (verbose) cat("Done.\n") + ctx <- tiledb_ctx(oldcfg) # reset + invisible() +} diff --git a/inst/include/tiledb.h b/inst/include/tiledb.h index ba74e706ff..ca7d425c2b 100644 --- a/inst/include/tiledb.h +++ b/inst/include/tiledb.h @@ -29,6 +29,7 @@ struct var_length_char_buffer { int32_t rows, cols; // dimension from subarray bool nullable; // flag std::vector validity_map; // for nullable vectors + bool legacy_validity; // for legacy validity mode }; typedef struct var_length_char_buffer vlc_buf_t; diff --git a/inst/sampledata/legacy_validity.tar.gz b/inst/sampledata/legacy_validity.tar.gz new file mode 100644 index 0000000000..e8dba9801c Binary files /dev/null and b/inst/sampledata/legacy_validity.tar.gz differ diff --git a/inst/sampledata/legacy_write.tar.gz b/inst/sampledata/legacy_write.tar.gz new file mode 100644 index 0000000000..5b5726476e Binary files /dev/null and b/inst/sampledata/legacy_write.tar.gz differ diff --git a/inst/scripts/legacy_validity_convert.r b/inst/scripts/legacy_validity_convert.r new file mode 100755 index 0000000000..30c60db505 --- /dev/null +++ b/inst/scripts/legacy_validity_convert.r @@ -0,0 +1,29 @@ +#!/usr/bin/env Rscript + +## load docopt and tiledb +suppressMessages({ + library(docopt) # for command-line argument parsing and help generation + library(tiledb) +}) + +## configuration for docopt +doc <- "Usage: convert.t [-h] [-v] [-d] [-t] [-o OUTDIR | -u] INDIR + +-o --out OUTDIR write converted array into OUTDIR, if not given conversion in place [default: ] +-u --usetmp do not require output directory and use a temporary directory [default: FALSE] +-t --tolegacy convert to (instead of from) legacy validity mode +-v --verbose show extra output while processing +-d --debug show extra debug information +-h --help show this help tex +" + +opt <- docopt(doc) # docopt parsing +#if (opt$debug) print(opt) + +tiledb:::.legacy_validity(inuri=opt$INDIR, + outdir=opt$out, + fromlegacy=!opt$tolegacy, + tolegacy=opt$tolegacy, + usetmp=opt$usetmp, + verbose=opt$verbose, + debug=opt$debug) diff --git a/inst/tinytest/test_query.R b/inst/tinytest/test_query.R index 91900f263e..3cd9932c29 100644 --- a/inst/tinytest/test_query.R +++ b/inst/tinytest/test_query.R @@ -263,12 +263,13 @@ uri <- tempfile() pp <- palmerpenguins::penguins fromDataFrame(pp, uri, sparse = TRUE, col_index = c("species", "year")) -qc <- parse_query_condition(body_mass_g > 4000 && sex == "male") +qc <- parse_query_condition(body_mass_g > 4000 || island == "Biscoe" || sex == "male") arr <- tiledb_array(uri) qry <- tiledb_query(arr, "DELETE") qry <- tiledb_query_set_condition(qry, qc) tiledb_query_submit(qry) tiledb_query_finalize(qry) -oo <- tiledb_array(uri, return_as="data.frame")[] -expect_equal(nrow(oo), 177) # instead of 344 pre-deletion +oo <- tiledb_array(uri, return_as="data.frame", strings_as_factors=TRUE)[] + +expect_equal(nrow(oo), 84) # instead of 344 pre-deletion diff --git a/inst/tinytest/test_tiledbarray.R b/inst/tinytest/test_tiledbarray.R index 2c94fbb4ba..114e618125 100644 --- a/inst/tinytest/test_tiledbarray.R +++ b/inst/tinytest/test_tiledbarray.R @@ -1472,3 +1472,71 @@ oo <- penguins expect_equal(sum(is.na(oo$sex)), sum(is.na(pp$sex))) expect_equal(sum(oo$sex == "male"), sum(pp$sex == "male")) expect_equal(sum(oo$sex == "female"), sum(pp$sex == "female")) + + +## [214] legacy validity mode +tdir <- tempfile() +tgzfile <- system.file("sampledata", "legacy_validity.tar.gz", package="tiledb") +untar(tarfile = tgzfile, exdir = tdir) +uri <- file.path(tdir, "legacy_validity") +cfg <- tiledb_config() +oldcfg <- cfg +cfg["r.legacy_validity_mode"] <- "true" +ctx <- tiledb_ctx(cfg) +arr <- tiledb_array(uri, strings_as_factors=FALSE, return_as="data.frame")[] +expect_equal(dim(arr)[1], 10) +expect_equal(dim(arr)[2], 3) +expect_equivalent(arr, data.frame(key=1:10, + val1=c(letters[1:4], NA, letters[6:7], NA, letters[9:10]), + val2=LETTERS[1:10])) +expect_equal(arr$val1, c(letters[1:4], NA, letters[6:7], NA, letters[9:10])) +ctx <- tiledb_ctx(oldcfg) # reset config + +## [218] test conversion with metadata +outdir <- tempfile() +dir.create(outdir) +tiledb:::.legacy_validity(uri, outdir, fromlegacy=TRUE) +outuri <- file.path(outdir, "legacy_validity") +chk <- tiledb_array(outuri, return_as="data.frame")[] +expect_equal(dim(arr)[1], 10) +expect_equal(dim(arr)[2], 3) +expect_equivalent(arr, data.frame(key=1:10, + val1=c(letters[1:4], NA, letters[6:7], NA, letters[9:10]), + val2=LETTERS[1:10])) +expect_equal(arr$val1, c(letters[1:4], NA, letters[6:7], NA, letters[9:10])) +arr <- tiledb_array(outuri) +arr <- tiledb_array_open(arr, "READ") +expect_equal(tiledb_num_metadata(arr), 2) # two sets of meta data +mdlst <- tiledb_get_all_metadata(arr) +expect_equal(mdlst[["data"]], c(123L, 456L, 789L)) +expect_equal(mdlst[["text"]], "the quick brown fox") + + +## [225] test conversion: larger penguins example +tdir <- tempfile() +tgzfile <- system.file("sampledata", "legacy_write.tar.gz", package="tiledb") +untar(tarfile = tgzfile, exdir = tdir) +inuri <- file.path(tdir, "legacy_write", "penguins") + +outdir <- tempfile() +dir.create(outdir) +cfg["r.legacy_validity_mode"] <- "false" # reset to no conversion to read 'before' +ctx <- tiledb_ctx(cfg) +before <- tiledb_array(inuri, strings_as_factors=TRUE)[] +expect_equal(sum(is.na(before$sex)), 333) + +tiledb:::.legacy_validity(inuri, outdir, fromlegacy=TRUE) +outuri <- file.path(outdir, "penguins") +after <- tiledb_array(outuri, strings_as_factors=TRUE)[] +expect_equal(sum(is.na(after$sex)), 11) +for (col in colnames(before)[-c(1,8)]) # exclude __tiledb_rows and sex + expect_equal(before[[col]], after[[col]]) + +newout <- tempfile() +dir.create(newout) +tiledb:::.legacy_validity(outuri, newout, tolegacy=TRUE) +rvturi <- file.path(newout, "penguins") +revert <- tiledb_array(rvturi, strings_as_factors=TRUE)[] +expect_equal(sum(is.na(revert$sex)), 333) +for (col in colnames(before)[-c(1,8)]) # exclude __tiledb_rows + expect_equal(before[[col]], revert[[col]]) diff --git a/man/tiledb_get_all_metadata.Rd b/man/tiledb_get_all_metadata.Rd index 54f9ce5404..2833cafa66 100644 --- a/man/tiledb_get_all_metadata.Rd +++ b/man/tiledb_get_all_metadata.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/Metadata.R \name{tiledb_get_all_metadata} \alias{tiledb_get_all_metadata} -\title{Return a TileDB Array Metadata object given by key} +\title{Return all TileDB Array Metadata objects as a named list} \usage{ tiledb_get_all_metadata(arr) } @@ -10,8 +10,8 @@ tiledb_get_all_metadata(arr) \item{arr}{A TileDB Array object, or a character URI describing one} } \value{ -A object stored in the Metadata under the given key +A named list with all Metadata objects indexed by the given key } \description{ -Return a TileDB Array Metadata object given by key +Return all TileDB Array Metadata objects as a named list } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index a8ccf437d8..a3d8e34555 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -1639,15 +1639,41 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// libtiledb_query_buffer_var_char_get_legacy_validity_value +bool libtiledb_query_buffer_var_char_get_legacy_validity_value(XPtr ctx, bool validity_override); +RcppExport SEXP _tiledb_libtiledb_query_buffer_var_char_get_legacy_validity_value(SEXP ctxSEXP, SEXP validity_overrideSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< XPtr >::type ctx(ctxSEXP); + Rcpp::traits::input_parameter< bool >::type validity_override(validity_overrideSEXP); + rcpp_result_gen = Rcpp::wrap(libtiledb_query_buffer_var_char_get_legacy_validity_value(ctx, validity_override)); + return rcpp_result_gen; +END_RCPP +} +// libtiledb_query_buffer_var_char_legacy_validity_mode +XPtr libtiledb_query_buffer_var_char_legacy_validity_mode(XPtr ctx, XPtr buf, bool validity_override); +RcppExport SEXP _tiledb_libtiledb_query_buffer_var_char_legacy_validity_mode(SEXP ctxSEXP, SEXP bufSEXP, SEXP validity_overrideSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< XPtr >::type ctx(ctxSEXP); + Rcpp::traits::input_parameter< XPtr >::type buf(bufSEXP); + Rcpp::traits::input_parameter< bool >::type validity_override(validity_overrideSEXP); + rcpp_result_gen = Rcpp::wrap(libtiledb_query_buffer_var_char_legacy_validity_mode(ctx, buf, validity_override)); + return rcpp_result_gen; +END_RCPP +} // libtiledb_query_buffer_var_char_create -XPtr libtiledb_query_buffer_var_char_create(CharacterVector vec, bool nullable); -RcppExport SEXP _tiledb_libtiledb_query_buffer_var_char_create(SEXP vecSEXP, SEXP nullableSEXP) { +XPtr libtiledb_query_buffer_var_char_create(CharacterVector vec, bool nullable, bool legacy_validity); +RcppExport SEXP _tiledb_libtiledb_query_buffer_var_char_create(SEXP vecSEXP, SEXP nullableSEXP, SEXP legacy_validitySEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< CharacterVector >::type vec(vecSEXP); Rcpp::traits::input_parameter< bool >::type nullable(nullableSEXP); - rcpp_result_gen = Rcpp::wrap(libtiledb_query_buffer_var_char_create(vec, nullable)); + Rcpp::traits::input_parameter< bool >::type legacy_validity(legacy_validitySEXP); + rcpp_result_gen = Rcpp::wrap(libtiledb_query_buffer_var_char_create(vec, nullable, legacy_validity)); return rcpp_result_gen; END_RCPP } @@ -3312,7 +3338,9 @@ static const R_CallMethodDef CallEntries[] = { {"_tiledb_libtiledb_query_set_subarray", (DL_FUNC) &_tiledb_libtiledb_query_set_subarray, 2}, {"_tiledb_libtiledb_query_set_buffer", (DL_FUNC) &_tiledb_libtiledb_query_set_buffer, 3}, {"_tiledb_libtiledb_query_buffer_var_char_alloc_direct", (DL_FUNC) &_tiledb_libtiledb_query_buffer_var_char_alloc_direct, 4}, - {"_tiledb_libtiledb_query_buffer_var_char_create", (DL_FUNC) &_tiledb_libtiledb_query_buffer_var_char_create, 2}, + {"_tiledb_libtiledb_query_buffer_var_char_get_legacy_validity_value", (DL_FUNC) &_tiledb_libtiledb_query_buffer_var_char_get_legacy_validity_value, 2}, + {"_tiledb_libtiledb_query_buffer_var_char_legacy_validity_mode", (DL_FUNC) &_tiledb_libtiledb_query_buffer_var_char_legacy_validity_mode, 3}, + {"_tiledb_libtiledb_query_buffer_var_char_create", (DL_FUNC) &_tiledb_libtiledb_query_buffer_var_char_create, 3}, {"_tiledb_libtiledb_query_set_buffer_var_char", (DL_FUNC) &_tiledb_libtiledb_query_set_buffer_var_char, 3}, {"_tiledb_libtiledb_query_get_buffer_var_char", (DL_FUNC) &_tiledb_libtiledb_query_get_buffer_var_char, 3}, {"_tiledb_libtiledb_query_get_buffer_var_char_simple", (DL_FUNC) &_tiledb_libtiledb_query_get_buffer_var_char_simple, 1}, diff --git a/src/libtiledb.cpp b/src/libtiledb.cpp index 8ced8a7785..a2ef824adf 100644 --- a/src/libtiledb.cpp +++ b/src/libtiledb.cpp @@ -2691,17 +2691,41 @@ XPtr libtiledb_query_buffer_var_char_alloc_direct(double szoffsets, d buf->cols = cols; buf->nullable = nullable; buf->validity_map.resize(static_cast(szdata)); + buf->legacy_validity = false; // for legacy validity mode + return buf; +} + +// [[Rcpp::export]] +bool libtiledb_query_buffer_var_char_get_legacy_validity_value(XPtr ctx, + bool validity_override = false) { + check_xptr_tag(ctx); + XPtr cfg = libtiledb_ctx_config(ctx); + Rcpp::CharacterVector vec = libtiledb_config_get(cfg, "r.legacy_validity_mode"); + bool legacy_validity = std::string("true") == std::string(vec[0]) || validity_override; + return legacy_validity; +} + +// [[Rcpp::export]] +XPtr libtiledb_query_buffer_var_char_legacy_validity_mode(XPtr ctx, + XPtr buf, + bool validity_override = false) { + buf->legacy_validity = libtiledb_query_buffer_var_char_get_legacy_validity_value(ctx, + validity_override); + spdl::debug(tfm::format("[libtiledb_query_buffer_var_char_legacy_validity_mode] " + "legacy_validity set to %s", buf->legacy_validity ? "true" : "false")); return buf; } // assigning (for a write) allocates // [[Rcpp::export]] -XPtr libtiledb_query_buffer_var_char_create(CharacterVector vec, bool nullable) { +XPtr libtiledb_query_buffer_var_char_create(CharacterVector vec, bool nullable, + bool legacy_validity = false) { size_t n = vec.size(); XPtr bufptr = make_xptr(new vlc_buf_t); bufptr->offsets.resize(n); bufptr->validity_map.resize(n); bufptr->nullable = nullable; + bufptr->legacy_validity = legacy_validity; bufptr->str = ""; uint64_t cumlen = 0; for (size_t i=0; i libtiledb_query_buffer_var_char_create(CharacterVector vec, bool bufptr->str += s; cumlen += s.length(); if (nullable) { - bufptr->validity_map[i] = vec[i] == NA_STRING; + if (legacy_validity) { + bufptr->validity_map[i] = vec[i] == R_NaString; + } else { + bufptr->validity_map[i] = vec[i] != R_NaString; + } } } bufptr->rows = bufptr->cols = 0; // signal unassigned for the write case @@ -2763,10 +2791,17 @@ CharacterMatrix libtiledb_query_get_buffer_var_char(XPtr bufptr, CharacterMatrix mat(bufptr->rows, bufptr->cols); for (size_t i = 0; i < n; i++) { if (bufptr->nullable) { - if (bufptr->validity_map[i] == 0) - mat[i] = std::string(&bufptr->str[bufptr->offsets[i]], str_sizes[i]); - else - mat[i] = R_NaString; + if (bufptr->legacy_validity) { + if (bufptr->validity_map[i] == 0) + mat[i] = std::string(&bufptr->str[bufptr->offsets[i]], str_sizes[i]); + else + mat[i] = R_NaString; + } else { + if (bufptr->validity_map[i] != 0) + mat[i] = std::string(&bufptr->str[bufptr->offsets[i]], str_sizes[i]); + else + mat[i] = R_NaString; + } } else { mat[i] = std::string(&bufptr->str[bufptr->offsets[i]], str_sizes[i]); } diff --git a/src/shmem.cpp b/src/shmem.cpp index 62011a8cf2..41f684b8c0 100644 --- a/src/shmem.cpp +++ b/src/shmem.cpp @@ -180,7 +180,7 @@ XPtr vlcbuf_from_shmem(std::string datapath, std::string dtype) { buf->rows = buf->offsets.size(); buf->cols = 2; // value not used buf->nullable = false; // default, overridden below if validity path used - + buf->legacy_validity = false; // may need to open door to config option here too if (debug) Rcpp::Rcout << datapath << " " << offsetspath << " data:" << buf->str.size() << " offsets:" << buf->offsets.size();