Skip to content

Commit

Permalink
Add support for OR query condition operations (#417)
Browse files Browse the repository at this point in the history
* add OR queries (with test for TileDB core functionality)

* Add empty result set OR example

* documentation update for query condition
  • Loading branch information
eddelbuettel authored Jun 7, 2022
1 parent fdb0dd6 commit 5072545
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 16 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ LinkingTo: Rcpp
Suggests: tinytest, simplermarkdown, curl, bit64, Matrix, palmerpenguins, nycflights13, data.table, tibble
VignetteBuilder: simplermarkdown
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.2
RoxygenNote: 7.2.0
Encoding: UTF-8
27 changes: 14 additions & 13 deletions R/QueryCondition.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# MIT License
#
# Copyright (c) 2021 TileDB Inc.
# Copyright (c) 2021-2022 TileDB Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -37,8 +37,8 @@ setClass("tiledb_query_condition",
#' @return A 'tiledb_query_condition' object
#' @export
tiledb_query_condition <- function(ctx = tiledb_get_context()) {
stopifnot(`The argument must be a ctx object` = is(ctx, "tiledb_ctx"),
`This function needs TileDB 2.3.0 or newer` = tiledb_version(TRUE) >= "2.3.0")
stopifnot("The argument must be a ctx object" = is(ctx, "tiledb_ctx"),
"This function needs TileDB 2.3.0 or newer" = tiledb_version(TRUE) >= "2.3.0")
ptr <- libtiledb_query_condition(ctx@ptr)
query_condition <- new("tiledb_query_condition", ptr = ptr, init = FALSE)
invisible(query_condition)
Expand All @@ -61,11 +61,11 @@ tiledb_query_condition <- function(ctx = tiledb_get_context()) {
#' @return The initialized 'tiledb_query_condition' object
#' @export
tiledb_query_condition_init <- function(attr, value, dtype, op, qc = tiledb_query_condition()) {
stopifnot(`Argument 'qc' with query condition object required` = is(qc, "tiledb_query_condition"),
`Argument 'attr' must be character` = is.character(attr),
`Argument 'value' must be of length one` = is.vector(value) && all.equal(length(value),1),
`Argument 'dtype' must be character` = is.character(dtype),
`Argument 'op' must be character` = is.character(op))
stopifnot("Argument 'qc' with query condition object required" = is(qc, "tiledb_query_condition"),
"Argument 'attr' must be character" = is.character(attr),
"Argument 'value' must be of length one" = is.vector(value) && all.equal(length(value),1),
"Argument 'dtype' must be character" = is.character(dtype),
"Argument 'op' must be character" = is.character(op))
op <- match.arg(op, c("LT", "LE", "GT", "GE", "EQ", "NE"))
## maybe check dtype too
libtiledb_query_condition_init(qc@ptr, attr, value, dtype, op)
Expand All @@ -75,17 +75,18 @@ tiledb_query_condition_init <- function(attr, value, dtype, op, qc = tiledb_quer

#' Combine two 'tiledb_query_condition' objects
#'
#' Combines two query condition object using a relatiional operator. Note that at present
#' only 'AND' is supported.
#' Combines two query condition object using a relatiional operator. Support for operator
#' 'AND' is generally available, the 'OR' operator is available if TileDB 2.10 or newer is
#' used.
#' @param lhs A 'tiledb_query_condition' object on the left-hand side of the relation
#' @param rhs A 'tiledb_query_condition' object on the left-hand side of the relation
#' @param op A character value with then relation, this must be one of 'AND', 'OR' or 'NOT'.
#' @return The combined 'tiledb_query_condition' object
#' @export
tiledb_query_condition_combine <- function(lhs, rhs, op) {
stopifnot(`Argument 'lhs' must be a query condition object` = is(lhs, "tiledb_query_condition"),
`Argument 'rhs' must be a query condition object` = is(rhs, "tiledb_query_condition"),
`Argument 'op' must be a character` = is.character(op))
stopifnot("Argument 'lhs' must be a query condition object" = is(lhs, "tiledb_query_condition"),
"Argument 'rhs' must be a query condition object" = is(rhs, "tiledb_query_condition"),
"Argument 'op' must be a character" = is.character(op))
op <- match.arg(op, c("AND", "OR", "NOT"))
qc <- tiledb_query_condition()
qc@ptr <- libtiledb_query_condition_combine(lhs@ptr, rhs@ptr, op)
Expand Down
53 changes: 53 additions & 0 deletions inst/tinytest/test_querycondition.R
Original file line number Diff line number Diff line change
Expand Up @@ -231,3 +231,56 @@ expect_equal(dim(fullarr), c(50,3))
subarr <- tiledb_array(uri, as.data.frame=TRUE,
query_condition=parse_query_condition(region == "Northeast"))[]
expect_equal(dim(subarr), c(9,3))


## Testing OR condition

## Pre-test: will return NA in case of error ie when TileDB Core does not yet have OR support
## wrapped in 'class()' to avoid a warning of 'is.na() applied to non-list or vector S4'
if (is.na(tryCatch(class(qc <- parse_query_condition(x3 == 1 || x4 == 2)), error = function(e) NA)))
exit_file("Skipping for lack of 'OR' support in TileDB")

## Re-create penguins
uri <- tempfile()
fromDataFrame(penguins, uri, sparse=TRUE)

## Basics
qc <- tiledb_query_condition_init("year", 2009, "INT32", "EQ")
arrwithqc <- tiledb_array(uri, as.data.frame=TRUE, query_condition=qc)
expect_equal(NROW(arrwithqc[]), 120L)

lhs <- tiledb_query_condition_init("year", 2008, "INT32", "GE")
rhs <- tiledb_query_condition_init("year", 2008, "INT32", "LE")
qc <- tiledb_query_condition_combine(lhs, rhs, "AND")
arrwithqc <- tiledb_array(uri, as.data.frame=TRUE, query_condition=qc)
expect_equal(NROW(arrwithqc[]), 114L) # basically a different way of writing EQ via '<= && >='

lhs <- tiledb_query_condition_init("year", 2008, "INT32", "GE")
rhs <- tiledb_query_condition_init("year", 2008, "INT32", "LE")
qc <- tiledb_query_condition_combine(lhs, rhs, "OR")
arrwithqc <- tiledb_array(uri, as.data.frame=TRUE, query_condition=qc)
expect_equal(NROW(arrwithqc[]), 344L) # the OR makes it unconstrained via '<= || >='

## simple OR
qc <- parse_query_condition(species == "Adelie" || species == "Chinstrap")
arr <- tiledb_array(uri, as.data.frame=TRUE, query_condition=qc)
## Note that in R '||' is used for length-1 comparison, and '|' along a vector so '|' here
expect_equal(NROW(arr[]), sum(with(penguins, species == "Adelie" | species == "Chinstrap")))

## three elements works too
qc <- parse_query_condition(species == "Adelie" || species == "Chinstrap" || year >= 2009)
arr <- tiledb_array(uri, as.data.frame=TRUE, query_condition=qc)
expect_equal(NROW(arr[]),
sum(with(penguins, species == "Adelie" | species == "Chinstrap" | year >= 2009)))

## three elements works too as does mixing AND and OR
qc <- parse_query_condition(species == "Adelie" || species == "Chinstrap" && year >= 2009)
arr <- tiledb_array(uri, as.data.frame=TRUE, query_condition=qc)
expect_equal(NROW(arr[]),
sum(with(penguins, species == "Adelie" | species == "Chinstrap" & year >= 2009)))

## empty sets are fine
qc <- parse_query_condition(year < 2008 || year > 2010)
arr <- tiledb_array(uri, as.data.frame=TRUE, query_condition=qc)
expect_equal(NROW(arr[]),
sum(with(penguins, year < 2008 | year > 2010)))
5 changes: 3 additions & 2 deletions man/tiledb_query_condition_combine.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 5072545

Please sign in to comment.