Skip to content

Commit

Permalink
Allow returning query data as a data.frame
Browse files Browse the repository at this point in the history
This suppose a new slot property to allow specifying to return query
results as a data.frame. This will construct dimensional columns from
the coordinates.
  • Loading branch information
Shelnutt2 committed Jan 30, 2019
1 parent cc9dff7 commit a723ffe
Show file tree
Hide file tree
Showing 8 changed files with 212 additions and 54 deletions.
66 changes: 54 additions & 12 deletions R/DenseArray.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' @exportClass tiledb_dense
setClass("tiledb_dense",
slots = list(ctx = "tiledb_ctx", uri = "character", ptr = "externalptr"))
slots = list(ctx = "tiledb_ctx", uri = "character", as.data.frame = "logical", ptr = "externalptr"))

#' Constructs a tiledb_dense object backed by a persisted tiledb array uri
#'
Expand All @@ -9,7 +9,7 @@ setClass("tiledb_dense",
#' @param query_type optionally loads the array in "READ" or "WRITE" only modes.
#' @return tiledb_dense array object
#' @export
tiledb_dense <- function(ctx, uri, query_type = c("READ", "WRITE")) {
tiledb_dense <- function(ctx, uri, query_type = c("READ", "WRITE"), as.data.frame=TRUE) {
query_type = match.arg(query_type)
if (missing(ctx) || !is(ctx, "tiledb_ctx")) {
stop("argument ctx must be a tiledb_ctx")
Expand All @@ -24,7 +24,7 @@ tiledb_dense <- function(ctx, uri, query_type = c("READ", "WRITE")) {
stop("array URI must be a dense array")
}
array_xptr <- libtiledb_array_close(array_xptr)
new("tiledb_dense", ctx = ctx, uri = uri, ptr = array_xptr)
new("tiledb_dense", ctx = ctx, uri = uri, as.data.frame = as.data.frame, ptr = array_xptr)
}

setMethod("show", "tiledb_dense",
Expand Down Expand Up @@ -132,25 +132,42 @@ subarray_dim <- function(sub) {
return(sub_dim)
}

attribute_buffers <- function(sch, dom, sub, filter_attributes=list()) {
attribute_buffers <- function(array, sch, dom, sub, filter_attributes=list()) {
stopifnot(is(sch, "tiledb_array_schema"))
stopifnot(is(dom, "tiledb_domain"))
sub_dim <- subarray_dim(sub)
ncells <- prod(sub_dim)
is_scalar <- all(sub_dim == 1L)

attributes <- list()

# first alloc coordinate buffer if we are returning a data.frame
if(array@as.data.frame) {
ncells_coords <- libtiledb_array_max_buffer_elements(array@ptr, sub, libtiledb_coords())
if (is.integral(dom)) {
attributes[["coords"]] <- integer(length = ncells_coords)
} else {
attributes[["coords"]] <- numeric(length = ncells_coords)
}
}

attrs <- tiledb::attrs(sch)
if (length(filter_attributes) > 0) {
attrs <- Filter(function(a) is.element(name(a), filter_attributes), attrs)
}
for(attr in attrs) {
aname <- tiledb::name(attr)
type <- tiledb_datatype_R_type(tiledb::datatype(attr))
# If we are going to get it as a dataframe we need to use max buffer elements to get proper buffer size
if(array@as.data.frame) {
ncells <- libtiledb_array_max_buffer_elements(array@ptr, sub, aname)
}
buff <- vector(mode = type, length = ncells)
if (!is_scalar) {
# If its not scalar and we are not getting it as a data.frame set the dimension attribute
if (!is_scalar && !array@as.data.frame) {
attr(buff, "dim") <- sub_dim
}
attributes[[tiledb::name(attr)]] <- buff
attributes[[aname]] <- buff
}
return(attributes)
}
Expand All @@ -169,7 +186,7 @@ setMethod("[", "tiledb_dense",
out <- tryCatch(
{
subarray <- domain_subarray(dom, index = index)
buffers <- attribute_buffers(schema, dom, subarray)
buffers <- attribute_buffers(x, schema, dom, subarray)
qry <- libtiledb_query(ctx@ptr, x@ptr, "READ")
qry <- libtiledb_query_set_layout(qry, "COL_MAJOR")
if (is.integral(dom)) {
Expand All @@ -179,7 +196,12 @@ setMethod("[", "tiledb_dense",
}
attr_names <- names(buffers)
for (idx in seq_along(buffers)) {
qry <- libtiledb_query_set_buffer(qry, attr_names[[idx]], buffers[[idx]])
aname <- attr_names[[idx]]
if (aname == "coords") {
qry <- libtiledb_query_set_buffer(qry, libtiledb_coords(), buffers[[idx]])
} else {
qry <- libtiledb_query_set_buffer(qry, aname, buffers[[idx]])
}
}
qry <- libtiledb_query_submit(qry)
if (libtiledb_query_status(qry) != "COMPLETE") {
Expand All @@ -191,11 +213,31 @@ setMethod("[", "tiledb_dense",
buffers[[i]] <- drop(buffers[[i]])
}
}
# if there is only one buffer, don't return a list of attribute buffers
if (length(buffers) == 1L) {
return(buffers[[1L]])

# get the actual number of results, instead of realloc
# just modify the vector length so there is no additional copy
for (idx in seq_along(attr_names)) {
old_buffer <- buffers[[idx]]
aname <- attr_names[[idx]]
if (aname == "coords") {
ncells <- libtiledb_query_result_buffer_elements(qry, libtiledb_coords())
} else {
ncells <- libtiledb_query_result_buffer_elements(qry, aname)
}
if (ncells < length(old_buffer)) {
buffers[[idx]] <- old_buffer[1:ncells]
}
}

if (x@as.data.frame) {
return(as_data_frame(dom, buffers))
} else {
# if there is only one buffer, don't return a list of attribute buffers
if (length(buffers) == 1L) {
return(buffers[[1L]])
}
return(buffers)
}
return(buffers)
},
finally = {
libtiledb_array_close(x@ptr)
Expand Down
48 changes: 38 additions & 10 deletions R/SparseArray.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' @exportClass "tiledb_sparse"
setClass("tiledb_sparse",
slots = list(ctx = "tiledb_ctx", uri = "character", ptr = "externalptr"))
slots = list(ctx = "tiledb_ctx", uri = "character", as.data.frame = "logical", ptr = "externalptr"))

#' Constructs a tiledb_sparse object backed by a persisted tiledb array uri
#'
Expand All @@ -11,7 +11,7 @@ setClass("tiledb_sparse",
#' @param query_type optionally loads the array in "READ" or "WRITE" only modes.
#' @return tiledb_sparse array object
#' @export
tiledb_sparse <- function(ctx, uri, query_type = c("READ", "WRITE")) {
tiledb_sparse <- function(ctx, uri, query_type = c("READ", "WRITE"), as.data.frame=TRUE) {
query_type = match.arg(query_type)
if (missing(ctx) || !is(ctx, "tiledb_ctx")) {
stop("argument ctx must be a tiledb_ctx")
Expand All @@ -26,7 +26,7 @@ tiledb_sparse <- function(ctx, uri, query_type = c("READ", "WRITE")) {
stop("array URI must be a sparse array")
}
array_xptr <- libtiledb_array_close(array_xptr)
new("tiledb_sparse", ctx = ctx, uri = uri, ptr = array_xptr)
new("tiledb_sparse", ctx = ctx, uri = uri, as.data.frame = as.data.frame, ptr = array_xptr)
}


Expand Down Expand Up @@ -64,6 +64,26 @@ sparse_attribute_buffers <- function(array, sch, dom, sub, filter_attributes=lis
return(attributes)
}

#' Construct a data.frame from query results
as_data_frame <- function(dom, data) {
if (!is(dom, "tiledb_domain")) {
stop("as_data_frame must be called with a tiledb_domain object")
}
# If coordinates are present convert to columns in the data.frame
if (!is.null(data[["coords"]])) {
ndim <- tiledb_ndim(dom)
dimensions <- dimensions(dom)
for (i in seq(1, ndim, 1)) {
dim_name <- name(dimensions[[i]])
l = list()
l[[dim_name]] = data$coords[seq(i, length(data$coords), ndim)]
data = c(data, l)
}
data$coords <- NULL
}
return(as.data.frame(data))
}

setMethod("[", "tiledb_sparse",
function(x, i, j, ..., drop = FALSE) {
index <- nd_index_from_syscall(sys.call(), parent.frame())
Expand Down Expand Up @@ -93,7 +113,7 @@ setMethod("[", "tiledb_sparse",
if (aname == "coords") {
qry <- libtiledb_query_set_buffer(qry, libtiledb_coords(), buffers[[idx]])
} else {
qry <- libtiledb_query_set_buffer(qry, aname, buffers[[idx]])
qry <- libtiledb_query_set_buffer(qry, aname, buffers[[idx]])
}
}
qry <- libtiledb_query_submit(qry)
Expand All @@ -114,11 +134,15 @@ setMethod("[", "tiledb_sparse",
buffers[[idx]] <- old_buffer[1:ncells]
}
}
if (x@as.data.frame) {
return(as_data_frame(dom, buffers))
} else {
# if there is only one buffer, don't return a list of attribute buffers
if (length(buffers) == 1L) {
return(buffers[[1L]])
}
return(buffers)
return(buffers)
}
},
finally = {
libtiledb_array_close(x@ptr)
Expand Down Expand Up @@ -265,7 +289,7 @@ tiledb_subarray <- function(A, subarray_vector, attrs=c()) {
if (is.sparse(A)) {
buffers <- sparse_attribute_buffers(A, schema, dom, subarray_vector, attrs)
} else {
buffers <- attribute_buffers(schema, dom, subarray_vector, attrs)
buffers <- attribute_buffers(A, schema, dom, subarray_vector, attrs)
}
qry <- libtiledb_query(ctx@ptr, A@ptr, "READ")
qry <- libtiledb_query_set_layout(qry, "COL_MAJOR")
Expand Down Expand Up @@ -297,11 +321,15 @@ tiledb_subarray <- function(A, subarray_vector, attrs=c()) {
buffers[[idx]] <- old_buffer[1:ncells]
}
}
# if there is only one buffer, don't return a list of attribute buffers
if (length(buffers) == 1L) {
return(buffers[[1L]])
if (A@as.data.frame) {
return(as_data_frame(dom, buffers))
} else {
# if there is only one buffer, don't return a list of attribute buffers
if (length(buffers) == 1L) {
return(buffers[[1L]])
}
return(buffers)
}
return(buffers)
},
finally = {
libtiledb_array_close(A@ptr)
Expand Down
11 changes: 11 additions & 0 deletions man/as_data_frame.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/tiledb_dense.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/tiledb_sparse.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/tiledb_subarray.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit a723ffe

Please sign in to comment.