Merge pull request #54 from Public-Health-Scotland/feature/multiple_f…

…iltering Enable multiple filters to be supplied by using `get_resource_sql`
Public-Health-Scotland · Jan 21, 2025 · bc9a12a · bc9a12a
2 parents e383229 + 80d4b70
commit bc9a12a
Show file tree

Hide file tree

Showing 5 changed files with 169 additions and 26 deletions.
diff --git a/R/get_resource.R b/R/get_resource.R
@@ -40,12 +40,43 @@ get_resource <- function(res_id,
   # check res_id
   check_res_id(res_id)
 
+  parsed_col_select <- parse_col_select(col_select)
+  parsed_row_filters <- parse_row_filters(row_filters)
+
+  if (is.logical(parsed_row_filters) && !parsed_row_filters) {
+    if (!is.null(row_filters)) {
+      col_select_sql <- dplyr::if_else(
+        is.null(col_select),
+        "*",
+        paste0("\"", paste(col_select, collapse = "\",\""), "\"")
+      )
+
+      row_filters_sql <- paste(
+        purrr::imap_chr(
+          row_filters,
+          function(value, col) paste0("\"", col, "\"=\'", value, "\'", collapse = " OR ")
+        ),
+        collapse = ") AND ("
+      )
+
+      sql <- sprintf(
+        "SELECT %s FROM \"%s\" WHERE (%s) %s",
+        col_select_sql,
+        res_id,
+        row_filters_sql,
+        dplyr::if_else(is.null(rows), "", paste("LIMIT", rows))
+      )
+
+      return(get_resource_sql(sql))
+    }
+  }
+
   # define query
   query <- list(
     id = res_id,
     limit = rows,
-    q = parse_row_filters(row_filters),
-    fields = parse_col_select(col_select)
+    q = parsed_row_filters,
+    fields = parsed_col_select
   )
 
   # if dump should be used, use it

diff --git a/R/parse_row_filters.R b/R/parse_row_filters.R
@@ -4,7 +4,7 @@
 #' @return a json as a character string
 #' @keywords internal
 #' @noRd
-parse_row_filters <- function(row_filters) {
+parse_row_filters <- function(row_filters, call = rlang::caller_env()) {
   # exit function if no filters
   if (is.null(row_filters)) {
     return(NULL)
@@ -14,42 +14,45 @@ parse_row_filters <- function(row_filters) {
   if (class(row_filters) != "list" && !is.character(row_filters) && !is.numeric(row_filters)) {
     cli::cli_abort(
       "{.arg row_filters} must be a named {.cls list} or a named
-      {.cls character} or {.cls numeric} vector, not a {.cls {class(row_filters)}}."
+      {.cls character} or {.cls numeric} vector, not a {.cls {class(row_filters)}}.",
+      call = call
     )
   }
 
   # Ensure it's elements are named
   if (is.null(names(row_filters)) || any(names(row_filters) == "")) {
-    cli::cli_abort("{.arg row_filters} should be a named {.cls list}.")
+    cli::cli_abort(
+      "{.arg row_filters} should be a named {.cls list}.",
+      call = call
+    )
   }
 
-  # check if any filters in list have length > 1
-  too_many <- purrr::map_lgl(row_filters, ~ length(.x) > 1)
-
-  if (any(too_many)) {
-    cli::cli_abort(c(
-      "Invalid input for {.arg row_filters}",
-      i = "The {.val {names(row_filters)[which(too_many)]}} filter{?s} {?has/have} too many values.",
-      x = "The {.arg row_filters} list must only contain vectors of length 1."
-    ))
-  }
 
   # check if any items in the list/vector are duplicates
   duplicates <- duplicated(names(row_filters))
   if (any(duplicates)) {
-    cli::cli_abort(c(
-      "Invalid input for {.arg row_filters}",
-      x = "The {.val {names(row_filters)[which(duplicates)]}} filter{?s} {?is/are} duplicated.",
-      i = "Only one filter per field is currently supported by {.fun get_resource}."
-    ))
+    cli::cli_abort(
+      c(
+        "Invalid input for {.arg row_filters}",
+        x = "The {.val {names(row_filters)[which(duplicates)]}} filter{?s} {?is/are} duplicated.",
+        i = "Only one filter per field is currently supported by {.fun get_resource}."
+      ),
+      call = call
+    )
+  }
+
+  # check if any filters in list have length > 1
+  multiple <- purrr::map_lgl(row_filters, ~ length(.x) > 1)
+
+  if (any(multiple)) {
+    # Default to using SQL
+    return(FALSE)
   }
 
   filter_body <- paste0(
     '"', names(row_filters), '":"', row_filters, '"',
     collapse = ","
   )
 
-  return(
-    paste0("{", filter_body, "}")
-  )
+  return(paste0("{", filter_body, "}"))
 }
diff --git a/tests/testthat/test-get_dataset.R b/tests/testthat/test-get_dataset.R
@@ -50,3 +50,19 @@ test_that("get_dataset filters error properly", {
     regexp = "API error"
   )
 })
+
+test_that("get_dataset works with multiple filters", {
+  n_resources <- 3
+  columns <- c("Date", "PracticeCode", "HSCP", "AllAges")
+
+  data <- get_dataset("gp-practice-populations",
+    max_resources = n_resources,
+    row_filters = list(PracticeCode = c("10002", "10017")),
+    col_select = columns
+  )
+
+  expect_s3_class(data, "tbl_df")
+  expect_equal(nrow(data), n_resources * 6)
+  expect_named(data, columns)
+  expect_true(all(data[["PracticeCode"]] %in% c("10002", "10017")))
+})
diff --git a/tests/testthat/test-get_resource.R b/tests/testthat/test-get_resource.R
@@ -30,3 +30,97 @@ test_that("returns data with row specifications", {
   expect_equal(nrow(get_resource(res_id = gp_list_apr_2021, rows = 999)), 926) %>%
     expect_warning()
 })
+
+test_that("returns data for multiple filters", {
+  data_row_limit <- get_resource(
+    res_id = "e4985a62-9d59-4e71-8800-3f7ca29ffe0c",
+    col_select = c("GPPractice", "DMDCode"),
+    row_filters = list("GPPractice" = c("80005", "80202")),
+    rows = 100
+  )
+
+  expect_s3_class(data_row_limit, "tbl_df")
+  expect_equal(nrow(data_row_limit), 100)
+  expect_named(data_row_limit, c("GPPractice", "DMDCode"))
+
+  data_full <- get_resource(
+    res_id = "e4985a62-9d59-4e71-8800-3f7ca29ffe0c",
+    col_select = c("GPPractice", "DMDCode", "PrescribedType"),
+    row_filters = list(
+      "GPPractice" = c("80005", "80202"),
+      "PrescribedType" = "AMP"
+    )
+  )
+
+  expect_s3_class(data_full, "tbl_df")
+  expect_equal(nrow(data_full), 1108)
+  expect_named(data_full, c("GPPractice", "DMDCode", "PrescribedType"))
+  expect_length(unique(data_full$GPPractice), 2)
+  expect_length(unique(data_full$PrescribedType), 1)
+})
+
+test_that("returns data for multiple filters in mixed format", {
+  delays <- get_resource(
+    res_id = "fd354e4b-6211-48ba-8e4f-8356a5ed4215",
+    col_select = c("MonthOfDelay", "ReasonForDelay", "NumberOfDelayedBedDays"),
+    row_filters = list("HBT" = "S08000031", MonthOfDelay = c(201607:201707))
+  )
+
+  expect_s3_class(delays, "tbl_df")
+  expect_equal(nrow(delays), 195)
+  expect_named(delays, c("MonthOfDelay", "ReasonForDelay", "NumberOfDelayedBedDays"))
+  expect_length(unique(delays$MonthOfDelay), 13)
+})
+
+test_that("returns data for multiple filters for all columns", {
+  prescriptions <- get_resource(
+    res_id = "d1fbede3-98c4-436e-9e75-2ed807a36075",
+    row_filters = list(
+      "HBT" = "S08000015",
+      "DMDCode" = c("940711000001101", "1004511000001101", "1014311000001109")
+    )
+  )
+
+  expect_s3_class(prescriptions, "tbl_df")
+  expect_equal(nrow(prescriptions), 114)
+  expect_named(prescriptions, c(
+    "HBT",
+    "GPPractice",
+    "DMDCode",
+    "BNFItemCode",
+    "BNFItemDescription",
+    "PrescribedType",
+    "NumberOfPaidItems",
+    "PaidQuantity",
+    "GrossIngredientCost",
+    "PaidDateMonth"
+  ))
+  expect_length(unique(prescriptions$GPPractice), 55)
+  expect_setequal(
+    prescriptions$DMDCode,
+    c("940711000001101", "1004511000001101", "1014311000001109")
+  )
+  expect_setequal(prescriptions$HBT, "S08000015")
+})
+
+test_that("errors on invalid filters", {
+  # non-existent column in row_filters
+  expect_error(
+    delays <- get_resource(
+      res_id = "fd354e4b-6211-48ba-8e4f-8356a5ed4215",
+      col_select = c("MonthOfDelay", "ReasonForDelay", "NumberOfDelayedBedDays"),
+      row_filters = c("HBT" = "S08000031", "Month" = 201607)
+    ),
+    regexp = "row_filters: invalid value"
+  )
+
+  # non-existent column in col_select
+  expect_error(
+    delays <- get_resource(
+      res_id = "fd354e4b-6211-48ba-8e4f-8356a5ed4215",
+      col_select = c("Month", "ReasonForDelay", "NumberOfDelayedBedDays"),
+      row_filters = c("HBT" = "S08000031", "MonthOfDelay" = 201607)
+    ),
+    regexp = "col_select: invalid value"
+  )
+})
diff --git a/tests/testthat/test-parse_row_filters.R b/tests/testthat/test-parse_row_filters.R
@@ -23,10 +23,9 @@ test_that("throws error for bad types", {
   )
 })
 
-test_that("throws error for length > 1", {
-  expect_error(
+test_that("returns FALSE for length > 1", {
+  expect_false(
     parse_row_filters(list(x = letters)),
-    regexp = " has too many values\\."
   )
 })