Skip to content

Commit

Permalink
sandbox fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Oct 27, 2024
1 parent ced7ac4 commit a1e5b1c
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 39 deletions.
4 changes: 4 additions & 0 deletions apis/python/src/tiledbsoma/_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,10 @@ def read(
timestamp=handle.timestamp and (0, handle.timestamp),
)

# XXX comment this, and make it conditional on core 2.27 or more, and, new-shape feature-flag enabled
if coords == ():
coords = tuple(slice(0, e - 1) for e in data_shape)

self._set_reader_coords(sr, coords)

arrow_tables = []
Expand Down
16 changes: 6 additions & 10 deletions apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1854,14 +1854,12 @@ def _write_matrix_to_denseNDArray(
# * Compute chunk sizes for both and take the minimum.
chunk_size_using_nnz = int(math.ceil(tiledb_create_options.goal_chunk_nnz / ncol))

# try:
## not scipy csr/csc
# itemsize = matrix.itemsize
# except AttributeError:
## scipy csr/csc
# except AttributeError:
# XXX TEMP
itemsize = 8
try:
# not scipy csr/csc
itemsize = matrix.itemsize
except AttributeError:
# scipy csr/csc
itemsize = matrix.data.itemsize

total_nbytes = matrix.size * itemsize
nbytes_num_chunks = math.ceil(
Expand Down Expand Up @@ -1911,8 +1909,6 @@ def _write_matrix_to_denseNDArray(
else:
tensor = pa.Tensor.from_numpy(chunk.toarray())
if matrix.ndim == 2:
### XXX BUG soma_ndarray.write((slice(i, i2), slice(None)), tensor)
### soma_ndarray.write((slice(i, i2), slice(0, ncol - 1)), tensor)
soma_ndarray.write((slice(i, i2), slice(0, ncol)), tensor)
else:
soma_ndarray.write((slice(i, i2),), tensor)
Expand Down
10 changes: 5 additions & 5 deletions apis/r/R/Init.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
# Check major and minor but not micro: sc-50464
rpkg_lib_version <- paste(rpkg_lib[["major"]], rpkg_lib[["minor"]], sep = ".")
soma_lib_version <- libtiledbsoma_version(compact = TRUE, major_minor_only = TRUE)
# if (rpkg_lib_version != soma_lib_version) {
# msg <- sprintf("TileDB Core version %s used by TileDB-R package, but TileDB-SOMA uses %s",
# sQuote(rpkg_lib_version), sQuote(soma_lib_version))
# packageStartupMessage(msg)
# }
if (rpkg_lib_version != soma_lib_version) {
msg <- sprintf("TileDB Core version %s used by TileDB-R package, but TileDB-SOMA uses %s",
sQuote(rpkg_lib_version), sQuote(soma_lib_version))
packageStartupMessage(msg)
}

# This is temporary for https://github.com/single-cell-data/TileDB-SOMA/issues/2407
# It will be removed once 2407 is complete.
Expand Down
4 changes: 2 additions & 2 deletions apis/r/R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ createSchemaFromArrow <- function(uri, nasp, nadimap, nadimsp, sparse, datatype,
invisible(.Call(`_tiledbsoma_createSchemaFromArrow`, uri, nasp, nadimap, nadimsp, sparse, datatype, pclst, ctxxp, tsvec))
}

writeArrayFromArrow <- function(uri, naap, nasp, ctxxp, arraytype = "", config = NULL, tsvec = NULL) {
invisible(.Call(`_tiledbsoma_writeArrayFromArrow`, uri, naap, nasp, ctxxp, arraytype, config, tsvec))
writeArrayFromArrow <- function(uri, naap, nasp, coords_list, ctxxp, arraytype = "", config = NULL, tsvec = NULL) {
invisible(.Call(`_tiledbsoma_writeArrayFromArrow`, uri, naap, nasp, coords_list, ctxxp, arraytype, config, tsvec))
}

#' @noRd
Expand Down
1 change: 1 addition & 0 deletions apis/r/R/SOMADenseNDArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ SOMADenseNDArray <- R6::R6Class(
#arr[] <- values
writeArrayFromArrow(
uri = self$uri,
coords,
naap = naap,
nasp = nasp,
ctxxp = private$.soma_context,
Expand Down
9 changes: 5 additions & 4 deletions apis/r/src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,18 +41,19 @@ BEGIN_RCPP
END_RCPP
}
// writeArrayFromArrow
void writeArrayFromArrow(const std::string& uri, naxpArray naap, naxpSchema nasp, Rcpp::XPtr<somactx_wrap_t> ctxxp, const std::string arraytype, Rcpp::Nullable<Rcpp::CharacterVector> config, Rcpp::Nullable<Rcpp::DatetimeVector> tsvec);
RcppExport SEXP _tiledbsoma_writeArrayFromArrow(SEXP uriSEXP, SEXP naapSEXP, SEXP naspSEXP, SEXP ctxxpSEXP, SEXP arraytypeSEXP, SEXP configSEXP, SEXP tsvecSEXP) {
void writeArrayFromArrow(const std::string& uri, naxpArray naap, naxpSchema nasp, Rcpp::List coords_list, Rcpp::XPtr<somactx_wrap_t> ctxxp, const std::string arraytype, Rcpp::Nullable<Rcpp::CharacterVector> config, Rcpp::Nullable<Rcpp::DatetimeVector> tsvec);
RcppExport SEXP _tiledbsoma_writeArrayFromArrow(SEXP uriSEXP, SEXP naapSEXP, SEXP naspSEXP, SEXP coords_listSEXP, SEXP ctxxpSEXP, SEXP arraytypeSEXP, SEXP configSEXP, SEXP tsvecSEXP) {
BEGIN_RCPP
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const std::string& >::type uri(uriSEXP);
Rcpp::traits::input_parameter< naxpArray >::type naap(naapSEXP);
Rcpp::traits::input_parameter< naxpSchema >::type nasp(naspSEXP);
Rcpp::traits::input_parameter< Rcpp::List >::type coords_list(coords_listSEXP);
Rcpp::traits::input_parameter< Rcpp::XPtr<somactx_wrap_t> >::type ctxxp(ctxxpSEXP);
Rcpp::traits::input_parameter< const std::string >::type arraytype(arraytypeSEXP);
Rcpp::traits::input_parameter< Rcpp::Nullable<Rcpp::CharacterVector> >::type config(configSEXP);
Rcpp::traits::input_parameter< Rcpp::Nullable<Rcpp::DatetimeVector> >::type tsvec(tsvecSEXP);
writeArrayFromArrow(uri, naap, nasp, ctxxp, arraytype, config, tsvec);
writeArrayFromArrow(uri, naap, nasp, coords_list, ctxxp, arraytype, config, tsvec);
return R_NilValue;
END_RCPP
}
Expand Down Expand Up @@ -775,7 +776,7 @@ END_RCPP
static const R_CallMethodDef CallEntries[] = {
{"_tiledbsoma_createSOMAContext", (DL_FUNC) &_tiledbsoma_createSOMAContext, 1},
{"_tiledbsoma_createSchemaFromArrow", (DL_FUNC) &_tiledbsoma_createSchemaFromArrow, 9},
{"_tiledbsoma_writeArrayFromArrow", (DL_FUNC) &_tiledbsoma_writeArrayFromArrow, 7},
{"_tiledbsoma_writeArrayFromArrow", (DL_FUNC) &_tiledbsoma_writeArrayFromArrow, 8},
{"_tiledbsoma_c_group_create", (DL_FUNC) &_tiledbsoma_c_group_create, 4},
{"_tiledbsoma_c_group_open", (DL_FUNC) &_tiledbsoma_c_group_open, 4},
{"_tiledbsoma_c_group_member_count", (DL_FUNC) &_tiledbsoma_c_group_member_count, 1},
Expand Down
58 changes: 40 additions & 18 deletions apis/r/src/arrow.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include <Rcpp/Lighter> // for R interface to C++
#include <Rcpp/Lighter> // for R interface to C++

#include <nanoarrow/r.h> // for C/C++ interface to Arrow (via header exported from the R package)
#include <RcppInt64> // for fromInteger64
Expand Down Expand Up @@ -175,6 +175,7 @@ void writeArrayFromArrow(
const std::string& uri,
naxpArray naap,
naxpSchema nasp,
Rcpp::List coords_list,
Rcpp::XPtr<somactx_wrap_t> ctxxp,
const std::string arraytype = "",
Rcpp::Nullable<Rcpp::CharacterVector> config = R_NilValue,
Expand Down Expand Up @@ -202,23 +203,6 @@ void writeArrayFromArrow(
// shared pointer to TileDB Context from SOMAContext -- not needed here
// std::shared_ptr<tiledb::Context> ctx = sctx->tiledb_ctx();

// // if we hae a coonfig, use it
// std::shared_ptr<tdbs::SOMAContext> somactx;
// if (config.isNotNull()) {
// std::map<std::string, std::string> smap;
// auto config_vec = config.as();
// auto config_names =
// Rcpp::as<Rcpp::CharacterVector>(config_vec.names()); for (auto &name
// : config_names) {
// std::string param = Rcpp::as<std::string>(name);
// std::string value = Rcpp::as<std::string>(config_vec[param]);
// smap[param] = value;
// }
// somactx = std::make_shared<tdbs::SOMAContext>(smap);
// } else {
// somactx = std::make_shared<tdbs::SOMAContext>();
// }

// optional timestamp range
std::optional<tdbs::TimestampRange> tsrng = makeTimestampRange(tsvec);

Expand Down Expand Up @@ -258,6 +242,44 @@ void writeArrayFromArrow(
}

arrup.get()->set_array_data(std::move(schema), std::move(array));

// For dense arrays, we need to specify the subrange for the write.
// If we don't, the core domain is used.
//
// * With the new shape feature (core 2.27, tiledbsoma 1.15) the
// core domain (soma maxdomain) is huge while the core current domain
// (soma domain) is small.
//
// * It's important to be able to write subarrays. E.g. a dense 2D
// array is 1,000,000 x 60,000 but we want to write the first 3000
// rows.

if (arraytype == "SOMADenseNDArray") {
auto dim_names = arrup->dimension_names();

if (dim_names.size() != coords_list.length()) {
// This is internal error not used error since this should have been
// a stopifnot at the R level, caught already before we got here.
Rcpp::stop(tfm::format(
"dense array write: internal error: ndim %d != ncoord %d",
dim_names.size(),
coords_list.length()));
}

for (int i = 0; i < dim_names.size(); i++) {
auto dim_name = dim_names[i];
std::vector<int> slot_values = Rcpp::as<std::vector<int>>(
coords_list[i]);
int lo = *std::min_element(slot_values.begin(), slot_values.end());
int hi = *std::max_element(slot_values.begin(), slot_values.end());
spdl::debug(
"dense array write: dim {} set range lo {} hi {}", dim_name, lo, hi);
std::pair<int64_t, int64_t> lo_hi(int64_t{lo}, int64_t{hi});
std::vector<std::pair<int64_t, int64_t>> range({lo_hi});
arrup.get()->set_dim_ranges(dim_name, range);
}
}

arrup.get()->write();
arrup.get()->close();
}

0 comments on commit a1e5b1c

Please sign in to comment.