Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: spq_select() behavior #203

Merged
merged 12 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions R/spq_mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,18 @@
#'
#' ```r
#' # common name of a plant species in different languages
#' # the triplet pattern "wd:Q331676 wdt:P1843 ?statement" creates the variable statement
#' # the triplet pattern "wd:Q331676 wdt:P1843 ?statement"
#' # creates the variable statement
#' # hence our writing it in reverse within the spq_mutate() function
#' spq_init() %>%
#' spq_mutate(statement = wdt::P1843(wd::Q331676)) %>%
#' spq_mutate(lang = lang(statement))
#' ```
spq_mutate = function(.query, ..., .label = NA, .within_box = c(NA, NA), .within_distance = c(NA, NA)) {
spq_mutate = function(.query,
...,
.label = NA,
.within_box = c(NA, NA),
.within_distance = c(NA, NA)) {
variables = purrr::map(rlang::enquos(...), spq_treat_mutate_argument)
variable_names = names(variables)

Expand Down Expand Up @@ -53,8 +58,6 @@ spq_mutate = function(.query, ..., .label = NA, .within_box = c(NA, NA), .within
for (var in normal_variables) {
name = sprintf("?%s", names(normal_variables)[normal_variables == var])

.query = spq_select(.query, spq(name))

formula_df = get_varformula(var)
.query = track_vars(
.query = .query,
Expand All @@ -64,6 +67,8 @@ spq_mutate = function(.query, ..., .label = NA, .within_box = c(NA, NA), .within
fun = sub("\\)$", "", sub("\\(.*", "", formula_df[["formula"]]))
)

.query = track_structure(.query, name = name, selected = TRUE)

}

# 'Triple' variables
Expand Down Expand Up @@ -110,7 +115,7 @@ spq_treat_mutate_argument = function(arg, arg_name) {
rlang::expr_text(arg) %>% str_remove("^~")
}

if (!grepl("::", code)) {
if (!grepl("::", code, fixed = TRUE)) {
spq_translate_dsl(code)
} else {
spq_parse_verb_object(code, reverse = TRUE)
Expand Down
68 changes: 56 additions & 12 deletions R/spq_select.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,23 @@
#' @export
#' @examples
#'
#' query = spq_init()
#' spq_select(query, count = n (human), eyecolor_label, haircolor_label)
#' spq_init() |>
#' spq_prefix(prefixes = c(dct = "http://purl.org/dc/terms/")) |>
#' spq_add(spq('?lexemeId dct:language wd:Q1860')) |>
#' spq_add(spq("?lexemeId wikibase:lemma ?lemma")) |>
#' spq_filter(str_detect(lemma, '^pota.*')) |>
#' spq_select(- lemma)
#'
#' spq_init() |>
#' spq_prefix(prefixes = c(dct = "http://purl.org/dc/terms/")) |>
#' spq_add(spq('?lexemeId dct:language wd:Q1860')) |>
#' spq_add(spq("?lexemeId wikibase:lemma ?lemma")) |>
#' spq_filter(str_detect(lemma, '^pota.*')) |>
#' spq_select(lemma)
spq_select = function(.query = NULL, ..., .spq_duplicate = NULL) {
if (!is.null(.spq_duplicate)) {
original_spq_duplicate <- .spq_duplicate
.spq_duplicate <- toupper(.spq_duplicate)
original_spq_duplicate = .spq_duplicate
.spq_duplicate = toupper(.spq_duplicate)
if (!(.spq_duplicate %in% c("DISTINCT", "REDUCED"))) {
cli::cli_abort(c(
x = "Wrong value for {.arg .spq_duplicate} argument ({original_spq_duplicate}).",
Expand All @@ -21,7 +32,7 @@ spq_select = function(.query = NULL, ..., .spq_duplicate = NULL) {
)
}
}
.query[["spq_duplicate"]] <- .spq_duplicate
.query[["spq_duplicate"]] = .spq_duplicate

variables = purrr::map_chr(rlang::enquos(...), spq_treat_argument)

Expand All @@ -31,20 +42,53 @@ spq_select = function(.query = NULL, ..., .spq_duplicate = NULL) {
add_as
)

plus_variables = variables %>%
str_subset("^\\-\\?", negate = TRUE)

if (length(plus_variables) > 0) {

check_variables_present(.query, plus_variables)

if (is.data.frame(.query[["structure"]])) {
.query[["structure"]][["selected"]] = FALSE
}

.query = purrr::reduce(
plus_variables,
\(.query, var) track_structure(.query, name = var, selected = TRUE),
.init = .query
)
}

minus_variables = variables %>%
str_subset("^\\-\\?") %>%
str_remove("\\-")

plus_variables = variables %>%
str_subset("^\\-\\?", negate = TRUE)
if (length(minus_variables) > 0) {
check_variables_present(.query, minus_variables)

for (var in plus_variables) {
.query <- track_structure(.query, name = var, selected = TRUE)
.query = purrr::reduce(
minus_variables,
\(.query, var) track_structure(.query, name = var, selected = FALSE),
.init = .query
)
}

for (var in minus_variables) {
.query <- track_structure(.query, name = var, selected = FALSE)
return(.query)
}

check_variables_present <- function(query, variables) {

if (nzchar(Sys.getenv("GLITTER.TESTING.SELECT"))) {
return()
}

return(.query)
absent_variables <- setdiff(variables, query[["vars"]][["name"]])

if (length(absent_variables) > 0) {
cli::cli_abort(c(
"Can't use {.fun spq_select} on absent variables: {toString(absent_variables)}.",
i = "Did you forget a call to {.fun spq_add}, {.fun spq_mutate} or {.fun spq_label}?"
))
}
}
2 changes: 1 addition & 1 deletion R/spq_summarise.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ spq_summarise = function(.query, ...) {
ancestor = ancestor,
fun = fun
)
.query = spq_select(.query, spq(name))
.query = track_structure(.query, name, selected = TRUE)

}

Expand Down
8 changes: 4 additions & 4 deletions R/spq_tally.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
#' These functions are inspired by `dplyr::count()` and `dplyr::tally()`.
#' `spq_tally()` assumes you've already done the grouping.
#' @inheritParams spq_arrange
#' @param sort If `TRUE`, will show the largest groups at the top. (like the `sort` argument
#' of `dplyr::tally()`)
#' @param sort If `TRUE`, will show the largest groups at the top.
#' (like the `sort` argument of `dplyr::tally()`)
#' @param name Name for the count column (like the `name` argument
#' of `dplyr::tally()`)
#'
Expand Down Expand Up @@ -57,13 +57,13 @@ spq_tally = function(.query, sort = FALSE, name = "n") {

.query = track_vars(
.query,
name = sprintf("?%s", name),
name = question_mark(name),
formula = full_formula,
fun = "COUNT",
ancestor = "*"
)

.query = spq_select(.query, name)
.query = track_structure(.query, name = question_mark(name), selected = TRUE)

if (sort) {
.query <- spq_arrange(.query, spq(sprintf("DESC(?%s)", name)))
Expand Down
15 changes: 13 additions & 2 deletions man/spq_select.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions tests/testthat/_snaps/spq_select.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,19 @@
}


---

Code
spq_select(query, lang, count = n_distinct(article)) %>% spq_select(lang)
Output

SELECT ?lang
WHERE {


}


# spq_select works with SPARQL

Code
Expand Down Expand Up @@ -139,3 +152,25 @@
x Wrong value for `.spq_duplicate` argument (reduce).
i Use either `NULL`, "distinct" or "reduced".

# spq_select tells a variable isn't there

Code
spq_init() %>% spq_add("?station wdt:P16 wd:Q1552") %>% spq_add(
"?station wdt:P31 wd:Q928830") %>% spq_add("?station wdt:P625 ?coords") %>%
spq_select(station_label, blop)
Condition
Error in `check_variables_present()`:
! Can't use `spq_select()` on absent variables: ?station_label, ?blop.
i Did you forget a call to `spq_add()`, `spq_mutate()` or `spq_label()`?

---

Code
spq_init() %>% spq_add("?station wdt:P16 wd:Q1552") %>% spq_add(
"?station wdt:P31 wd:Q928830") %>% spq_add("?station wdt:P625 ?coords") %>%
spq_label(station) %>% spq_select(station_label, blop)
Condition
Error in `check_variables_present()`:
! Can't use `spq_select()` on absent variables: ?blop.
i Did you forget a call to `spq_add()`, `spq_mutate()` or `spq_label()`?

4 changes: 4 additions & 0 deletions tests/testthat/test-spq_group_by.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
test_that("spq_group_by works with R syntax", {
withr::local_envvar("GLITTER.TESTING.SELECT" = "yep")

expect_snapshot(
spq_init() %>%
spq_select(population, countryLabel) %>%
Expand All @@ -7,6 +9,8 @@ test_that("spq_group_by works with R syntax", {
})

test_that("spq_group_by works with R syntax - string", {
withr::local_envvar("GLITTER.TESTING.SELECT" = "yep")

expect_snapshot(
spq_init() %>%
spq_select(population, countryLabel) %>%
Expand Down
38 changes: 37 additions & 1 deletion tests/testthat/test-spq_select.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
test_that("spq_select works with R syntax", {
withr::local_envvar("GLITTER.TESTING.SELECT" = "yep")

query <- spq_init()

expect_snapshot(
spq_select(query, count = n (human), eyecolorLabel, haircolorLabel)
)

expect_snapshot(
spq_select(query, count = n (human), eyecolorLabel, haircolorLabel) %>%
spq_select(- haircolorLabel)
spq_select(- haircolorLabel)
)

expect_snapshot(
spq_select(query, birthyear = year(birthdate))
)
Expand All @@ -18,23 +23,32 @@ test_that("spq_select works with R syntax", {
expect_snapshot(
spq_select(query, lang, count = n_distinct(article))
)

expect_snapshot(
spq_select(query, lang, count = n_distinct(article)) %>%
spq_select(lang)
)
})

test_that("spq_select works with SPARQL", {
withr::local_envvar("GLITTER.TESTING.SELECT" = "yep")
query <- spq_init()
expect_snapshot(
spq_select(query, spq("?lang"), spq("(COUNT(DISTINCT ?article) AS ?count"))
)
})

test_that("spq_select works with both", {
withr::local_envvar("GLITTER.TESTING.SELECT" = "yep")
query <- spq_init()
expect_snapshot(
spq_select(query, lang, spq("(COUNT(DISTINCT ?article) AS ?count"))
)
})

test_that("spq_select errors well", {
withr::local_envvar("GLITTER.TESTING.SELECT" = "yep")

query <- spq_init()
expect_snapshot_error(
spq_select(query, birthyear = year(birthdate, abbreviate = TRUE, translate = FALSE))
Expand All @@ -46,6 +60,8 @@ test_that("spq_select errors well", {
})

test_that("spq_select can use DISTINCT and REDUCED", {
withr::local_envvar("GLITTER.TESTING.SELECT" = "yep")

query <- spq_init()
expect_snapshot(
spq_select(query, year, month, day, .spq_duplicate = "distinct")
Expand All @@ -58,3 +74,23 @@ test_that("spq_select can use DISTINCT and REDUCED", {
)

})

test_that("spq_select tells a variable isn't there", {
expect_snapshot(error = TRUE, {
spq_init() %>%
spq_add("?station wdt:P16 wd:Q1552") %>%
spq_add("?station wdt:P31 wd:Q928830") %>%
spq_add("?station wdt:P625 ?coords") %>%
spq_select(station_label, blop)
})

expect_snapshot(error = TRUE, {
spq_init() %>%
spq_add("?station wdt:P16 wd:Q1552") %>%
spq_add("?station wdt:P31 wd:Q928830") %>%
spq_add("?station wdt:P625 ?coords") %>%
spq_label(station) %>%
spq_select(station_label, blop)
})

})
4 changes: 2 additions & 2 deletions vignettes/articles/explore.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ query_basis %>%
spq_prefix(prefixes = c("schema" = "http://schema.org/")) %>%
spq_add("?s a schema:Organization") %>%
spq_add("?s ?property ?value") %>%
spq_select(-value, -s, class, .spq_duplicate = "distinct") %>%
spq_select(-value, -s, .spq_duplicate = "distinct") %>%
spq_perform() %>%
knitr::kable()
```
Expand All @@ -232,7 +232,7 @@ query_basis %>%
spq_prefix(prefixes = c("schema" = "http://schema.org/")) %>%
spq_add("?s a schema:PostalAddress") %>%
spq_add("?s ?property ?value") %>%
spq_select(-value, -s, class, .spq_duplicate = "distinct") %>%
spq_select(-value, -s, .spq_duplicate = "distinct") %>%
spq_perform() %>%
knitr::kable()
```
Expand Down