-
Notifications
You must be signed in to change notification settings - Fork 9
/
api.R
306 lines (258 loc) · 8.85 KB
/
api.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
## ----------------------------------------------------------------------
s_data <- new.env(parent = emptyenv())
#' Search CRAN packages
#'
#' @description
#' `pkg_search()` starts a new search query, or shows the details of the
#' previous query, if called without arguments.
#'
#' `ps()` is an alias to `pkg_search()`.
#'
#' `more()` retrieves that next page of results for the previous query.
#'
#' @details
#' Note that the search needs a working Internet connection.
#'
#' @param query Search query string. If this argument is missing or
#' `NULL`, then the results of the last query are printed, in
#' _short_ and _long_ formats, in turns for successive
#' `pkg_search()` calls. If this argument is missing, then all
#' other arguments are ignored.
#' @param format Default formatting of the results. _short_ only
#' outputs the name and title of the packages, _long_ also
#' prints the author, last version, full description and URLs.
#' Note that this only affects the default printing, and you can
#' still inspect the full results, even if you specify _short_
#' here.
#' @param from Where to start listing the results, for pagination.
#' @param size The number of results to list.
#' @return A tibble with columns:
#' * `score`: Score of the hit. See Section _Scoring_ for some details.
#' * `package`: Package name.
#' * `version`: Latest package version.
#' * `title`: Package title.
#' * `description`: Short package description.
#' * `date`: Time stamp of the last release.
#' * `maintainer_name`: Name of the package maintainer.
#' * `maintainer_email`: Email address of the package maintainer.
#' * `revdeps`: Number of (strong and weak) reverse dependencies of the
#' package.
#' * `downloads_last_month`: Raw number of package downloads last month,
#' from the RStudio CRAN mirror.
#' * `license`: Package license.
#' * `url`: Package URL(s).
#' * `bugreports`: URL of issue tracker, or email address for bug reports.
#'
#' @export
#' @examples
#' \dontshow{ asNamespace("pkgsearch")$exif(pingr::is_online(), \{ }
#' # Example
#' ps("survival")
#'
#' # Pagination
#' ps("networks")
#' more()
#'
#' # Details
#' ps("visualization")
#' ps()
#'
#' # See the underlying tibble
#' ps("ropensci")
#' ps()[]
#' \dontshow{ \}) }
pkg_search <- function(query = NULL, format = c("short", "long"),
from = 1, size = 10) {
if (is.null(query)) return(pkg_search_again())
format <- match.arg(format)
server <- Sys.getenv("R_PKG_SEARCH_SERVER", "search.r-pkg.org")
port <- as.integer(Sys.getenv("R_PKG_SEARCH_PORT", "80"))
make_pkg_search(query, format, from, size, server, port)
}
#' @rdname pkg_search
#' @export
ps <- pkg_search
make_pkg_search <- function(query, format, from, size, server, port) {
qry <- make_query(query = query)
rsp <- do_query(qry, server = server, port = port, from = from,
size = size)
rst <- format_result(rsp, query = query, format = format, from = from,
size = size, server = server, port = port)
s_data$prev_q <- list(type = "simple", result = rst)
rst
}
#' @rdname pkg_search
#' @export
more <- function(format = NULL, size = NULL) {
if (is.null(s_data$prev_q)) {
throw(new_error("No query, start with 'pkg_search()'"))
}
rst <- s_data$prev_q$result
if (s_data$prev_q$type == "simple") {
make_pkg_search(
query = meta(rst)$query,
format = format %||% meta(rst)$format,
from = meta(rst)$from + meta(rst)$size,
size = size %||% meta(rst)$size,
server = meta(rst)$server,
port = meta(rst)$port
)
} else if (s_data$prev_q$type == "advanced") {
advanced_search(
json = meta(rst)$qstr,
format = format %||% meta(rst)$format,
from = meta(rst)$from + meta(rst)$size,
size = size %||% meta(rst)$size
)
} else {
throw(new_error("Unknown search type, internal pkgsearch error :("))
}
}
#' @importFrom jsonlite toJSON
make_query <- function(query) {
check_string(query)
fields <- c("Package^20", "Title^10", "Description^2",
"Author^5", "Maintainer^6", "_all")
query_object <- list(
query = list(
function_score = list(
functions = list(
list(
field_value_factor = list(
field = "revdeps",
modifier = "sqrt",
factor = 1)
)
),
query = list(
bool = list(
## This is simply word by work match, scores add up for fields
must = list(
list(multi_match = list(
query = query,
type = "most_fields"
))
),
should = list(
## This is matching the complete phrase, so it takes priority
list(multi_match = list(
query = query,
fields = c("Title^10", "Description^2", "_all"),
type = "phrase",
analyzer = "english_and_synonyms",
boost = 10
)),
## This is if all words match (but not as a phrase)
list(multi_match = list(
query = query,
fields = fields,
operator = "and",
analyzer = "english_and_synonyms",
boost = 5
))
)
)
)
)
)
)
toJSON(query_object, auto_unbox = TRUE, pretty = TRUE)
}
#' @importFrom httr POST add_headers stop_for_status content
#' @importFrom jsonlite fromJSON
do_query <- function(query, server, port, from, size) {
check_count(from)
check_count(size)
url <- "http://" %+% server %+% ":" %+% as.character(port) %+%
"/package/_search?from=" %+% as.character(from - 1) %+%
"&size=" %+% as.character(size)
result <- POST(
url, body = query,
add_headers("Content-Type" = "application/json"))
rethrow(
stop_for_status(result),
new_query_error(result, "search server failure")
)
content(result, as = "text")
}
new_query_error <- function(response, ...) {
cond <- new_error(...)
class(cond) <- c("pkgsearch_query_error", class(cond))
cond$response <- response
cond
}
#' @export
print.pkgsearch_query_error <- function(x, ...) {
# The call to the httr method is quite tedious and not very useful,
# so we remove it
x$parent$call <- NULL
# default print method for the error itself
err$print_this(x, ...)
# error message from Elastic, if any
tryCatch({
rsp <- x$response
cnt <- fromJSON(content(rsp, as = "text"), simplifyVector = FALSE)
if ("error" %in% names(cnt) &&
"root_cause" %in% names(cnt$error) &&
"reason" %in% names(cnt$error$root_cause[[1]])) {
cat("", cnt$error$root_cause[[1]]$reason, "", sep = "\n")
}
}, error = function(x) NULL)
# parent error(s)
err$print_parents(x, ...)
invisible(x)
}
#' @importFrom parsedate parse_iso_8601
format_result <- function(result, query, format, from, size, server,
port, ...) {
result <- fromJSON(result, simplifyVector = FALSE)
meta <- list(
query = query,
format = format,
from = from,
size = size,
server = server,
port = port,
total = result$hits$total,
max_score = result$hits$max_score,
took = result$took,
timed_out = result$timed_out,
...
)
sources <- map(result$hits$hits, "[[", "_source")
maintainer <- map_chr(sources, "[[", "Maintainer")
df <- data.frame(
stringsAsFactors = FALSE,
score = map_dbl(result$hits$hits, "[[", "_score"),
package = map_chr(result$hits$hits, "[[", "_id"),
version = package_version(map_chr(sources, "[[", "Version")),
title = map_chr(sources, "[[", "Title"),
description = map_chr(sources, "[[", "Description"),
date = parse_iso_8601(map_chr(sources, "[[", "date")),
maintainer_name = gsub("\\s+<.*$", "", maintainer),
maintainer_email = gsub("^.*<([^>]+)>.*$", "\\1", maintainer, perl = TRUE),
revdeps = map_int(sources, "[[", "revdeps"),
downloads_last_month = map_int(sources, function(x) x$downloads %||% 1L),
license = map_chr(sources, "[[", "License"),
url = map_chr(sources, function(x) x$URL %||% NA_character_),
bugreports = map_chr(sources, function(x) x$BugReports %||% NA_character_),
package_data = I(sources)
)
attr(df, "metadata") <- meta
requireNamespace("tibble", quietly = TRUE)
class(df) <- unique(c("pkg_search_result", "tbl_df", "tbl", class(df)))
df
}
#' @export
`[.pkg_search_result` <- function(x, i, j, drop = FALSE) {
class(x) <- setdiff(class(x), "pkg_search_result")
NextMethod("[")
}
pkg_search_again <- function() {
if (is.null(s_data$prev_q)) {
throw(new_error("No query given, and no previous query"))
}
format <- meta(s_data$prev_q$result)$format
meta(s_data$prev_q$result)$format <- if (format == "short") "long" else "short"
s_data$prev_q$result
}