Skip to content

Commit

Permalink
Support explicit timestamps in fromDataFrame convenience helper (#719)
Browse files Browse the repository at this point in the history
* Support explicit timestamps in fromDataFrame convenience helper

* Condition new tests on 2.15.0 or later
  • Loading branch information
eddelbuettel authored Jun 11, 2024
1 parent b4a868b commit 6e05e06
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 5 deletions.
16 changes: 13 additions & 3 deletions R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@
##' @param offsets_filters A character vector with filters for coordinates, default is \code{ZSTD}.
##' @param validity_filters A character vector with filters for coordinates, default is \code{RLE}.
##' @param debug Logical flag to select additional output.
##' @param timestamps Vector with up to two \code{POSIXct} variables denoting open intervals; default
##' is length zero where start and end are set (implicitly) to current time; in case of one value it
##' is used as the interval end, and in case of two values they are taken as start and end. This
##' applies to write and append modes only and not to schema creation.
##' @return Null, invisibly.
##' @examples
##' \dontshow{ctx <- tiledb_ctx(limitTileDBCores())}
Expand All @@ -83,10 +87,13 @@ fromDataFrame <- function(obj, uri, col_index=NULL, sparse=TRUE, allows_dups=spa
capacity = 10000L, tile_domain = NULL, tile_extent = NULL,
mode = c("ingest", "schema_only", "append"), filter_list = NULL,
coords_filters = "ZSTD", offsets_filters = "ZSTD",
validity_filters = "RLE", debug = FALSE) {
validity_filters = "RLE", debug = FALSE,
timestamps = as.POSIXct(double(), origin="1970-01-01")) {

stopifnot("Argument 'obj' should be a 'data.frame' (or a related object)" = inherits(obj, "data.frame"),
"Argument 'uri' should be a character variable" = is.character(uri))
"Argument 'uri' should be a character variable" = is.character(uri),
"Argument 'timestamps' must be a POSIXct vector" = inherits(timestamps, "POSIXct"),
"Argument 'timestamps' must be 0, 1 or 2 values" = length(timestamps) %in% c(0L, 1L, 2L))
if (!is.null(col_index) && is.character(col_index)) col_index <- match(col_index, colnames(obj))
dims <- dim(obj)
mode <- match.arg(mode)
Expand Down Expand Up @@ -276,7 +283,10 @@ fromDataFrame <- function(obj, uri, col_index=NULL, sparse=TRUE, allows_dups=spa
tiledb_array_create(uri, schema)

if (mode != "schema_only") {
df <- tiledb_array(uri, query_type = "WRITE")
df <- switch(length(timestamps) + 1, # switch takes ints starting at one
tiledb_array(uri, query_type = "WRITE"),
tiledb_array(uri, query_type = "WRITE", timestamp_end=timestamps[1]),
tiledb_array(uri, query_type = "WRITE", timestamp_start=timestamps[1], timestamp_end=timestamps[2]))
## when setting an index when likely want 'sparse write to dense array
if (!is.null(col_index) && !sparse)
query_layout(df) <- "UNORDERED"
Expand Down
25 changes: 25 additions & 0 deletions inst/tinytest/test_dataframe.R
Original file line number Diff line number Diff line change
Expand Up @@ -340,3 +340,28 @@ fromDataFrame(D, uri, col_index=1)
arr <- tiledb_array(uri, return_as="data.frame")
res <- arr[]
expect_equivalent(res, D)


## fromDataFrame with timestamps
if (tiledb_version(TRUE) < "2.15.0") exit_file("Remaining tests require TileDB 2.15.0 or later")
D <- data.frame(key=(1:10)*10, value=letters[1:10])
uri <- tempfile()
now <- Sys.time()
fromDataFrame(D, uri) # no timestamps
expect_equal(nrow(tiledb_array(uri, return_as="data.frame")[]), 10)
expect_equal(nrow(tiledb_array(uri, return_as="data.frame", timestamp_end=as.POSIXct(100, origin="1970-01-01"))[]), 0)
expect_equal(nrow(tiledb_array(uri, return_as="data.frame", timestamp_start=now + 1)[]), 0)
unlink(uri, recursive=TRUE)

fromDataFrame(D, uri, timestamps=as.POSIXct(100, origin="1970-01-01")) # end timestamps
expect_equal(nrow(tiledb_array(uri, return_as="data.frame")[]), 10)
expect_equal(nrow(tiledb_array(uri, return_as="data.frame", timestamp_end=as.POSIXct(50, origin="1970-01-01"))[]), 0)
expect_equal(nrow(tiledb_array(uri, return_as="data.frame", timestamp_start=as.POSIXct(50, origin="1970-01-01"))[]), 10)
expect_equal(nrow(tiledb_array(uri, return_as="data.frame", timestamp_start=as.POSIXct(150, origin="1970-01-01"))[]), 0)
unlink(uri, recursive=TRUE)

fromDataFrame(D, uri, timestamps=c(as.POSIXct(100, origin="1970-01-01"), as.POSIXct(100, origin="1970-01-01"))) # start and end
expect_equal(nrow(tiledb_array(uri, return_as="data.frame")[]), 10)
expect_equal(nrow(tiledb_array(uri, return_as="data.frame", timestamp_end=as.POSIXct(50, origin="1970-01-01"))[]), 0)
expect_equal(nrow(tiledb_array(uri, return_as="data.frame", timestamp_start=as.POSIXct(50, origin="1970-01-01"))[]), 10)
expect_equal(nrow(tiledb_array(uri, return_as="data.frame", timestamp_start=as.POSIXct(150, origin="1970-01-01"))[]), 0)
2 changes: 1 addition & 1 deletion inst/tinytest/test_timetravel.R
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ invisible( tiledb_array_create(tmp, schema) )
I <- c(1, 2, 2)
J <- c(1, 4, 3)
data <- c(1L, 2L, 3L)
now1 <- as.POSIXct(60, tz="UTC") # the epoch plus one minute
now1 <- as.POSIXct(60, tz="UTC", origin="1970-01-01") # the epoch plus one minute
A <- tiledb_array(uri = tmp, timestamp_start=now1, timestamp_end=now1)
A[I, J] <- data

Expand Down
8 changes: 7 additions & 1 deletion man/fromDataFrame.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 6e05e06

Please sign in to comment.