Skip to content

Commit

Permalink
Merge pull request #54 from Public-Health-Scotland/feature/multiple_f…
Browse files Browse the repository at this point in the history
…iltering

Enable multiple filters to be supplied by using `get_resource_sql`
  • Loading branch information
Moohan authored Jan 21, 2025
2 parents e383229 + 80d4b70 commit bc9a12a
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 26 deletions.
35 changes: 33 additions & 2 deletions R/get_resource.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,43 @@ get_resource <- function(res_id,
# check res_id
check_res_id(res_id)

parsed_col_select <- parse_col_select(col_select)
parsed_row_filters <- parse_row_filters(row_filters)

if (is.logical(parsed_row_filters) && !parsed_row_filters) {
if (!is.null(row_filters)) {
col_select_sql <- dplyr::if_else(
is.null(col_select),
"*",
paste0("\"", paste(col_select, collapse = "\",\""), "\"")
)

row_filters_sql <- paste(
purrr::imap_chr(
row_filters,
function(value, col) paste0("\"", col, "\"=\'", value, "\'", collapse = " OR ")
),
collapse = ") AND ("
)

sql <- sprintf(
"SELECT %s FROM \"%s\" WHERE (%s) %s",
col_select_sql,
res_id,
row_filters_sql,
dplyr::if_else(is.null(rows), "", paste("LIMIT", rows))
)

return(get_resource_sql(sql))
}
}

# define query
query <- list(
id = res_id,
limit = rows,
q = parse_row_filters(row_filters),
fields = parse_col_select(col_select)
q = parsed_row_filters,
fields = parsed_col_select
)

# if dump should be used, use it
Expand Down
45 changes: 24 additions & 21 deletions R/parse_row_filters.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#' @return a json as a character string
#' @keywords internal
#' @noRd
parse_row_filters <- function(row_filters) {
parse_row_filters <- function(row_filters, call = rlang::caller_env()) {
# exit function if no filters
if (is.null(row_filters)) {
return(NULL)
Expand All @@ -14,42 +14,45 @@ parse_row_filters <- function(row_filters) {
if (class(row_filters) != "list" && !is.character(row_filters) && !is.numeric(row_filters)) {
cli::cli_abort(
"{.arg row_filters} must be a named {.cls list} or a named
{.cls character} or {.cls numeric} vector, not a {.cls {class(row_filters)}}."
{.cls character} or {.cls numeric} vector, not a {.cls {class(row_filters)}}.",
call = call
)
}

# Ensure it's elements are named
if (is.null(names(row_filters)) || any(names(row_filters) == "")) {
cli::cli_abort("{.arg row_filters} should be a named {.cls list}.")
cli::cli_abort(
"{.arg row_filters} should be a named {.cls list}.",
call = call
)
}

# check if any filters in list have length > 1
too_many <- purrr::map_lgl(row_filters, ~ length(.x) > 1)

if (any(too_many)) {
cli::cli_abort(c(
"Invalid input for {.arg row_filters}",
i = "The {.val {names(row_filters)[which(too_many)]}} filter{?s} {?has/have} too many values.",
x = "The {.arg row_filters} list must only contain vectors of length 1."
))
}

# check if any items in the list/vector are duplicates
duplicates <- duplicated(names(row_filters))
if (any(duplicates)) {
cli::cli_abort(c(
"Invalid input for {.arg row_filters}",
x = "The {.val {names(row_filters)[which(duplicates)]}} filter{?s} {?is/are} duplicated.",
i = "Only one filter per field is currently supported by {.fun get_resource}."
))
cli::cli_abort(
c(
"Invalid input for {.arg row_filters}",
x = "The {.val {names(row_filters)[which(duplicates)]}} filter{?s} {?is/are} duplicated.",
i = "Only one filter per field is currently supported by {.fun get_resource}."
),
call = call
)
}

# check if any filters in list have length > 1
multiple <- purrr::map_lgl(row_filters, ~ length(.x) > 1)

if (any(multiple)) {
# Default to using SQL
return(FALSE)
}

filter_body <- paste0(
'"', names(row_filters), '":"', row_filters, '"',
collapse = ","
)

return(
paste0("{", filter_body, "}")
)
return(paste0("{", filter_body, "}"))
}
16 changes: 16 additions & 0 deletions tests/testthat/test-get_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,19 @@ test_that("get_dataset filters error properly", {
regexp = "API error"
)
})

test_that("get_dataset works with multiple filters", {
n_resources <- 3
columns <- c("Date", "PracticeCode", "HSCP", "AllAges")

data <- get_dataset("gp-practice-populations",
max_resources = n_resources,
row_filters = list(PracticeCode = c("10002", "10017")),
col_select = columns
)

expect_s3_class(data, "tbl_df")
expect_equal(nrow(data), n_resources * 6)
expect_named(data, columns)
expect_true(all(data[["PracticeCode"]] %in% c("10002", "10017")))
})
94 changes: 94 additions & 0 deletions tests/testthat/test-get_resource.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,97 @@ test_that("returns data with row specifications", {
expect_equal(nrow(get_resource(res_id = gp_list_apr_2021, rows = 999)), 926) %>%
expect_warning()
})

test_that("returns data for multiple filters", {
data_row_limit <- get_resource(
res_id = "e4985a62-9d59-4e71-8800-3f7ca29ffe0c",
col_select = c("GPPractice", "DMDCode"),
row_filters = list("GPPractice" = c("80005", "80202")),
rows = 100
)

expect_s3_class(data_row_limit, "tbl_df")
expect_equal(nrow(data_row_limit), 100)
expect_named(data_row_limit, c("GPPractice", "DMDCode"))

data_full <- get_resource(
res_id = "e4985a62-9d59-4e71-8800-3f7ca29ffe0c",
col_select = c("GPPractice", "DMDCode", "PrescribedType"),
row_filters = list(
"GPPractice" = c("80005", "80202"),
"PrescribedType" = "AMP"
)
)

expect_s3_class(data_full, "tbl_df")
expect_equal(nrow(data_full), 1108)
expect_named(data_full, c("GPPractice", "DMDCode", "PrescribedType"))
expect_length(unique(data_full$GPPractice), 2)
expect_length(unique(data_full$PrescribedType), 1)
})

test_that("returns data for multiple filters in mixed format", {
delays <- get_resource(
res_id = "fd354e4b-6211-48ba-8e4f-8356a5ed4215",
col_select = c("MonthOfDelay", "ReasonForDelay", "NumberOfDelayedBedDays"),
row_filters = list("HBT" = "S08000031", MonthOfDelay = c(201607:201707))
)

expect_s3_class(delays, "tbl_df")
expect_equal(nrow(delays), 195)
expect_named(delays, c("MonthOfDelay", "ReasonForDelay", "NumberOfDelayedBedDays"))
expect_length(unique(delays$MonthOfDelay), 13)
})

test_that("returns data for multiple filters for all columns", {
prescriptions <- get_resource(
res_id = "d1fbede3-98c4-436e-9e75-2ed807a36075",
row_filters = list(
"HBT" = "S08000015",
"DMDCode" = c("940711000001101", "1004511000001101", "1014311000001109")
)
)

expect_s3_class(prescriptions, "tbl_df")
expect_equal(nrow(prescriptions), 114)
expect_named(prescriptions, c(
"HBT",
"GPPractice",
"DMDCode",
"BNFItemCode",
"BNFItemDescription",
"PrescribedType",
"NumberOfPaidItems",
"PaidQuantity",
"GrossIngredientCost",
"PaidDateMonth"
))
expect_length(unique(prescriptions$GPPractice), 55)
expect_setequal(
prescriptions$DMDCode,
c("940711000001101", "1004511000001101", "1014311000001109")
)
expect_setequal(prescriptions$HBT, "S08000015")
})

test_that("errors on invalid filters", {
# non-existent column in row_filters
expect_error(
delays <- get_resource(
res_id = "fd354e4b-6211-48ba-8e4f-8356a5ed4215",
col_select = c("MonthOfDelay", "ReasonForDelay", "NumberOfDelayedBedDays"),
row_filters = c("HBT" = "S08000031", "Month" = 201607)
),
regexp = "row_filters: invalid value"
)

# non-existent column in col_select
expect_error(
delays <- get_resource(
res_id = "fd354e4b-6211-48ba-8e4f-8356a5ed4215",
col_select = c("Month", "ReasonForDelay", "NumberOfDelayedBedDays"),
row_filters = c("HBT" = "S08000031", "MonthOfDelay" = 201607)
),
regexp = "col_select: invalid value"
)
})
5 changes: 2 additions & 3 deletions tests/testthat/test-parse_row_filters.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,9 @@ test_that("throws error for bad types", {
)
})

test_that("throws error for length > 1", {
expect_error(
test_that("returns FALSE for length > 1", {
expect_false(
parse_row_filters(list(x = letters)),
regexp = " has too many values\\."
)
})

Expand Down

0 comments on commit bc9a12a

Please sign in to comment.