lvaudor · maelle · Oct 19, 2023 · Oct 12, 2023 · Oct 12, 2023 · Oct 12, 2023
diff --git a/R/spq_mutate.R b/R/spq_mutate.R
@@ -7,13 +7,18 @@
 #'
 #' ```r
 #' # common name of a plant species in different languages
-#' # the triplet pattern "wd:Q331676 wdt:P1843 ?statement" creates the variable statement
+#' # the triplet pattern "wd:Q331676 wdt:P1843 ?statement"
+#' # creates the variable statement
 #' # hence our writing it in reverse within the spq_mutate() function
 #' spq_init() %>%
 #' spq_mutate(statement = wdt::P1843(wd::Q331676)) %>%
 #' spq_mutate(lang = lang(statement))
 #' ```
-spq_mutate = function(.query, ..., .label = NA, .within_box = c(NA, NA), .within_distance = c(NA, NA)) {
+spq_mutate = function(.query,
+                      ...,
+                      .label = NA,
+                      .within_box = c(NA, NA),
+                      .within_distance = c(NA, NA)) {
   variables = purrr::map(rlang::enquos(...), spq_treat_mutate_argument)
   variable_names = names(variables)
 
@@ -53,8 +58,6 @@ spq_mutate = function(.query, ..., .label = NA, .within_box = c(NA, NA), .within
   for (var in normal_variables) {
     name = sprintf("?%s", names(normal_variables)[normal_variables == var])
 
-    .query = spq_select(.query, spq(name))
-
     formula_df = get_varformula(var)
     .query = track_vars(
       .query = .query,
@@ -64,6 +67,8 @@ spq_mutate = function(.query, ..., .label = NA, .within_box = c(NA, NA), .within
       fun = sub("\\)$", "", sub("\\(.*", "", formula_df[["formula"]]))
     )
 
+    .query = track_structure(.query, name = name, selected = TRUE)
+
   }
 
   # 'Triple' variables
@@ -110,7 +115,7 @@ spq_treat_mutate_argument = function(arg, arg_name) {
     rlang::expr_text(arg) %>% str_remove("^~")
   }
 
-  if (!grepl("::", code)) {
+  if (!grepl("::", code, fixed = TRUE)) {
     spq_translate_dsl(code)
   } else {
     spq_parse_verb_object(code, reverse = TRUE)

diff --git a/R/spq_select.R b/R/spq_select.R
@@ -7,12 +7,23 @@
 #' @export
 #' @examples
 #'
-#' query = spq_init()
-#' spq_select(query, count = n (human), eyecolor_label, haircolor_label)
+#' spq_init() |>
+#'   spq_prefix(prefixes = c(dct = "http://purl.org/dc/terms/")) |>
+#'   spq_add(spq('?lexemeId dct:language wd:Q1860')) |>
+#'   spq_add(spq("?lexemeId wikibase:lemma ?lemma")) |>
+#'   spq_filter(str_detect(lemma, '^pota.*')) |>
+#'   spq_select(- lemma)
+#'
+#' spq_init() |>
+#'   spq_prefix(prefixes = c(dct = "http://purl.org/dc/terms/")) |>
+#'   spq_add(spq('?lexemeId dct:language wd:Q1860')) |>
+#'   spq_add(spq("?lexemeId wikibase:lemma ?lemma")) |>
+#'   spq_filter(str_detect(lemma, '^pota.*')) |>
+#'   spq_select(lemma)
 spq_select = function(.query = NULL, ..., .spq_duplicate = NULL) {
   if (!is.null(.spq_duplicate)) {
-    original_spq_duplicate <- .spq_duplicate
-    .spq_duplicate <- toupper(.spq_duplicate)
+    original_spq_duplicate = .spq_duplicate
+    .spq_duplicate = toupper(.spq_duplicate)
     if (!(.spq_duplicate %in% c("DISTINCT", "REDUCED"))) {
       cli::cli_abort(c(
         x = "Wrong value for {.arg .spq_duplicate} argument ({original_spq_duplicate}).",
@@ -21,7 +32,7 @@ spq_select = function(.query = NULL, ..., .spq_duplicate = NULL) {
       )
     }
   }
-  .query[["spq_duplicate"]] <- .spq_duplicate
+  .query[["spq_duplicate"]] = .spq_duplicate
 
   variables = purrr::map_chr(rlang::enquos(...), spq_treat_argument)
 
@@ -31,20 +42,53 @@ spq_select = function(.query = NULL, ..., .spq_duplicate = NULL) {
     add_as
   )
 
+  plus_variables = variables %>%
+    str_subset("^\\-\\?", negate = TRUE)
+
+  if (length(plus_variables) > 0) {
+
+    check_variables_present(.query, plus_variables)
+
+    if (is.data.frame(.query[["structure"]])) {
+      .query[["structure"]][["selected"]] = FALSE
+    }
+
+    .query = purrr::reduce(
+      plus_variables,
+      \(.query, var) track_structure(.query, name = var, selected = TRUE),
+      .init = .query
+    )
+  }
+
   minus_variables = variables %>%
     str_subset("^\\-\\?") %>%
     str_remove("\\-")
 
-  plus_variables = variables %>%
-    str_subset("^\\-\\?", negate = TRUE)
+  if (length(minus_variables) > 0) {
+    check_variables_present(.query, minus_variables)
 
-  for (var in plus_variables) {
-    .query <- track_structure(.query, name = var, selected = TRUE)
+    .query = purrr::reduce(
+      minus_variables,
+      \(.query, var) track_structure(.query, name = var, selected = FALSE),
+      .init = .query
+    )
   }
 
-  for (var in minus_variables) {
-    .query <- track_structure(.query, name = var, selected = FALSE)
+  return(.query)
+}
+
+check_variables_present <- function(query, variables) {
+
+  if (nzchar(Sys.getenv("GLITTER.TESTING.SELECT"))) {
+    return()
   }
 
-  return(.query)
+  absent_variables <- setdiff(variables, query[["vars"]][["name"]])
+
+  if (length(absent_variables) > 0) {
+    cli::cli_abort(c(
+      "Can't use {.fun spq_select} on absent variables: {toString(absent_variables)}.",
+      i = "Did you forget a call to {.fun spq_add}, {.fun spq_mutate} or {.fun spq_label}?"
+    ))
+  }
 }
diff --git a/R/spq_summarise.R b/R/spq_summarise.R
@@ -81,7 +81,7 @@ spq_summarise = function(.query, ...) {
       ancestor = ancestor,
       fun = fun
     )
-    .query = spq_select(.query, spq(name))
+    .query = track_structure(.query, name, selected = TRUE)
 
   }
 

diff --git a/R/spq_tally.R b/R/spq_tally.R
@@ -3,8 +3,8 @@
 #' These functions are inspired by `dplyr::count()` and `dplyr::tally()`.
 #' `spq_tally()` assumes you've already done the grouping.
 #' @inheritParams spq_arrange
-#' @param sort If `TRUE`, will show the largest groups at the top. (like the `sort` argument
-#' of `dplyr::tally()`)
+#' @param sort If `TRUE`, will show the largest groups at the top.
+#' (like the `sort` argument of `dplyr::tally()`)
 #' @param name Name for the count column (like the `name` argument
 #' of `dplyr::tally()`)
 #'
@@ -57,13 +57,13 @@ spq_tally = function(.query, sort = FALSE, name = "n") {
 
   .query = track_vars(
     .query,
-    name = sprintf("?%s", name),
+    name = question_mark(name),
     formula = full_formula,
     fun = "COUNT",
     ancestor = "*"
   )
 
-  .query = spq_select(.query, name)
+  .query = track_structure(.query, name = question_mark(name), selected = TRUE)
 
   if (sort) {
     .query <- spq_arrange(.query, spq(sprintf("DESC(?%s)", name)))

diff --git a/man/spq_select.Rd b/man/spq_select.Rd
diff --git a/tests/testthat/_snaps/spq_select.md b/tests/testthat/_snaps/spq_select.md
@@ -64,6 +64,19 @@
       }
 
 
+---
+
+    Code
+      spq_select(query, lang, count = n_distinct(article)) %>% spq_select(lang)
+    Output
+
+      SELECT ?lang
+      WHERE {
+
+
+      }
+
+
 # spq_select works with SPARQL
 
     Code
@@ -139,3 +152,25 @@
     x Wrong value for `.spq_duplicate` argument (reduce).
     i Use either `NULL`, "distinct" or "reduced".
 
+# spq_select tells a variable isn't there
+
+    Code
+      spq_init() %>% spq_add("?station wdt:P16 wd:Q1552") %>% spq_add(
+        "?station wdt:P31 wd:Q928830") %>% spq_add("?station wdt:P625 ?coords") %>%
+        spq_select(station_label, blop)
+    Condition
+      Error in `check_variables_present()`:
+      ! Can't use `spq_select()` on absent variables: ?station_label, ?blop.
+      i Did you forget a call to `spq_add()`, `spq_mutate()` or `spq_label()`?
+
+---
+
+    Code
+      spq_init() %>% spq_add("?station wdt:P16 wd:Q1552") %>% spq_add(
+        "?station wdt:P31 wd:Q928830") %>% spq_add("?station wdt:P625 ?coords") %>%
+        spq_label(station) %>% spq_select(station_label, blop)
+    Condition
+      Error in `check_variables_present()`:
+      ! Can't use `spq_select()` on absent variables: ?blop.
+      i Did you forget a call to `spq_add()`, `spq_mutate()` or `spq_label()`?
+
diff --git a/tests/testthat/test-spq_group_by.R b/tests/testthat/test-spq_group_by.R
@@ -1,4 +1,6 @@
 test_that("spq_group_by works with R syntax", {
+  withr::local_envvar("GLITTER.TESTING.SELECT" = "yep")
+
   expect_snapshot(
     spq_init() %>%
       spq_select(population, countryLabel) %>%
@@ -7,6 +9,8 @@ test_that("spq_group_by works with R syntax", {
 })
 
 test_that("spq_group_by works with R syntax - string", {
+  withr::local_envvar("GLITTER.TESTING.SELECT" = "yep")
+
   expect_snapshot(
     spq_init() %>%
       spq_select(population, countryLabel) %>%

diff --git a/tests/testthat/test-spq_select.R b/tests/testthat/test-spq_select.R
@@ -1,12 +1,17 @@
 test_that("spq_select works with R syntax", {
+  withr::local_envvar("GLITTER.TESTING.SELECT" = "yep")
+
   query <- spq_init()
+
   expect_snapshot(
     spq_select(query, count = n (human), eyecolorLabel, haircolorLabel)
   )
+
   expect_snapshot(
     spq_select(query, count = n (human), eyecolorLabel, haircolorLabel) %>%
-    spq_select(- haircolorLabel)
+      spq_select(- haircolorLabel)
   )
+
   expect_snapshot(
     spq_select(query, birthyear = year(birthdate))
   )
@@ -18,23 +23,32 @@ test_that("spq_select works with R syntax", {
   expect_snapshot(
     spq_select(query, lang, count = n_distinct(article))
   )
+
+  expect_snapshot(
+    spq_select(query, lang, count = n_distinct(article)) %>%
+      spq_select(lang)
+  )
 })
 
 test_that("spq_select works with SPARQL", {
+  withr::local_envvar("GLITTER.TESTING.SELECT" = "yep")
   query <- spq_init()
   expect_snapshot(
     spq_select(query, spq("?lang"), spq("(COUNT(DISTINCT ?article) AS ?count"))
   )
 })
 
 test_that("spq_select works with both", {
+  withr::local_envvar("GLITTER.TESTING.SELECT" = "yep")
   query <- spq_init()
   expect_snapshot(
     spq_select(query, lang, spq("(COUNT(DISTINCT ?article) AS ?count"))
   )
 })
 
 test_that("spq_select errors well", {
+  withr::local_envvar("GLITTER.TESTING.SELECT" = "yep")
+
   query <- spq_init()
   expect_snapshot_error(
     spq_select(query, birthyear = year(birthdate, abbreviate = TRUE, translate = FALSE))
@@ -46,6 +60,8 @@ test_that("spq_select errors well", {
 })
 
 test_that("spq_select can use DISTINCT and REDUCED", {
+  withr::local_envvar("GLITTER.TESTING.SELECT" = "yep")
+
   query <- spq_init()
   expect_snapshot(
     spq_select(query, year, month, day, .spq_duplicate = "distinct")
@@ -58,3 +74,23 @@ test_that("spq_select can use DISTINCT and REDUCED", {
   )
 
 })
+
+test_that("spq_select tells a variable isn't there", {
+  expect_snapshot(error = TRUE, {
+    spq_init() %>%
+      spq_add("?station wdt:P16 wd:Q1552") %>%
+      spq_add("?station wdt:P31 wd:Q928830") %>%
+      spq_add("?station wdt:P625 ?coords") %>%
+      spq_select(station_label, blop)
+  })
+
+  expect_snapshot(error = TRUE, {
+    spq_init() %>%
+      spq_add("?station wdt:P16 wd:Q1552") %>%
+      spq_add("?station wdt:P31 wd:Q928830") %>%
+      spq_add("?station wdt:P625 ?coords") %>%
+      spq_label(station) %>%
+      spq_select(station_label, blop)
+  })
+
+})
diff --git a/vignettes/articles/explore.Rmd b/vignettes/articles/explore.Rmd
@@ -220,7 +220,7 @@ query_basis %>%
   spq_prefix(prefixes = c("schema" = "http://schema.org/")) %>%
   spq_add("?s a schema:Organization") %>%
   spq_add("?s ?property ?value") %>%
-  spq_select(-value, -s, class, .spq_duplicate = "distinct") %>%
+  spq_select(-value, -s, .spq_duplicate = "distinct") %>%
   spq_perform() %>%
   knitr::kable()
 ```
@@ -232,7 +232,7 @@ query_basis %>%
   spq_prefix(prefixes = c("schema" = "http://schema.org/")) %>%
   spq_add("?s a schema:PostalAddress") %>%
   spq_add("?s ?property ?value") %>%
-  spq_select(-value, -s, class, .spq_duplicate = "distinct") %>%
+  spq_select(-value, -s, .spq_duplicate = "distinct") %>%
   spq_perform() %>%
   knitr::kable()
 ```