Skip to content

Commit

Permalink
change all content calls to text and utf8 encoding, #98
Browse files Browse the repository at this point in the history
started working on fail better, #101
  • Loading branch information
sckott committed Feb 2, 2016
1 parent 7ff152a commit 1b5e2de
Show file tree
Hide file tree
Showing 13 changed files with 85 additions and 65 deletions.
3 changes: 1 addition & 2 deletions R/cr_citation_count.r
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ cr_citation_count <- function(doi, url = "http://www.crossref.org/openurl/",
# args$format=as.character("unixref")
cite_count <- GET(url, query = args, ...)
stop_for_status(cite_count)
cite_count_data <- content(cite_count, as = "text")
ans <- xmlParse(cite_count_data)
ans <- xmlParse(ct_utf8(cite_count))
if(get_attr(ans, "status") == "unresolved") NA else as.numeric(get_attr(ans, "fl_count"))
}

Expand Down
12 changes: 9 additions & 3 deletions R/cr_cn.r
Original file line number Diff line number Diff line change
Expand Up @@ -153,15 +153,21 @@
"onix-xml" = "text/xml")
parser <- select[[format]]
if (raw) {
content(response, "text")
ct_utf8(response)
} else {
out <- content(response, "parsed", parser, "UTF-8")
out <- ct_utf8(response)
if (format == "text") {
out <- gsub("\n", "", out)
}
if (format == "bibentry") {
out <- parse_bibtex(out)
}
if (parser == "application/json") {
out <- jsonlite::fromJSON(out)
}
if (parser == "text/xml") {
out <- xml2::read_xml(out)
}
out
}
}
Expand Down Expand Up @@ -191,7 +197,7 @@ parse_bibtex <- function(x){

warn_status <- function(x) {
if (x$status_code > 202) {
mssg <- content(x)
mssg <- ct_utf8(x)
if (!is.character(mssg)) {
mssg <- if (x$status_code == 406) {
"(406) - probably bad format type"
Expand Down
32 changes: 16 additions & 16 deletions R/cr_ft_text.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
#' ### pdf
#' cr_ft_text(links, "pdf", read=FALSE)
#' cr_ft_text(links, "pdf")
#'
#'
#' ### another pensoft e.g.
#' links <- cr_ft_links("10.3897/phytokeys.42.7604", "all")
#' pdf_read <- cr_ft_text(url = links, type = "pdf", read=FALSE, verbose = FALSE)
Expand Down Expand Up @@ -101,10 +101,10 @@
#' ## plain text
#' link <- cr_ft_links(out$data$DOI[1], "plain")
#' # res <- cr_ft_text(url = link, "plain")
#'
#'
#' ## Wiley
#' Sys.setenv(CROSSREF_TDM = "your-key")
#'
#'
#' ### all wiley
#' out <- cr_members(311, filter=c(has_full_text = TRUE, type = 'journal-article'), works = TRUE)
#' dois <- out$data$DOI[1:10]
Expand All @@ -116,11 +116,11 @@
#' # res[[i]] <- cr_ft_text(tmp, type = "pdf", cache=FALSE)
#' # }
#' # res
#'
#' #### older dates
#' out <- cr_members(311, filter=c(has_full_text = TRUE,
#'
#' #### older dates
#' out <- cr_members(311, filter=c(has_full_text = TRUE,
#' type = 'journal-article', until_created_date = "2013-12-31"), works = TRUE)
#'
#'
#' dois <- out$data$DOI[1:10]
#' # res <- list()
#' # for (i in seq_along(dois)) {
Expand All @@ -130,7 +130,7 @@
#' # res[[i]] <- cr_ft_text(tmp, type = "pdf", cache=FALSE)
#' # }
#' # res
#'
#'
#' ### wiley subset with CC By 4.0 license
#' lic <- "http://creativecommons.org/licenses/by/4.0/"
#' out <- cr_members(311, filter=c(has_full_text = TRUE, license.url = lic), works = TRUE)
Expand All @@ -146,7 +146,7 @@

cr_ft_text <- function(url, type='xml', path = "~/.crossref", overwrite = TRUE,
read=TRUE, verbose=TRUE, cache=TRUE, ...) {

auth <- cr_auth(url, type)
switch( pick_type(type, url),
xml = getTEXT(get_url(url, 'xml'), type, auth, ...),
Expand Down Expand Up @@ -213,7 +213,7 @@ cr_auth <- function(url, type) {
# add_headers(`CR-Clickthrough-Client-Token` = key, Accept = type)
},
`311` = {
add_headers(`CR-Clickthrough-Client-Token` = Sys.getenv("CROSSREF_TDM"),
add_headers(`CR-Clickthrough-Client-Token` = Sys.getenv("CROSSREF_TDM"),
Accept = type)
}
)
Expand All @@ -227,8 +227,8 @@ cr_auth <- function(url, type) {
getTEXT <- function(x, type, auth, ...){
res <- GET(x, auth, ...)
switch(type,
xml = XML::xmlParse(httr::content(res, as = "text")),
plain = httr::content(res, as = "text"))
xml = XML::xmlParse(ct_utf8(res)),
plain = ct_utf8(res))
}

getPDF <- function(url, path, auth, overwrite, type, read, verbose, cache=FALSE, ...) {
Expand All @@ -254,10 +254,10 @@ getPDF <- function(url, path, auth, overwrite, type, read, verbose, cache=FALSE,
}
} else {
if (verbose) message("Downloading pdf...")
res <- GET(url,
accept("application/pdf"),
write_disk(path = filepath, overwrite = overwrite),
auth,
res <- GET(url,
accept("application/pdf"),
write_disk(path = filepath, overwrite = overwrite),
auth,
config(followlocation = TRUE), ...)
filepath <- res$request$output$path
}
Expand Down
27 changes: 14 additions & 13 deletions R/cr_search.r
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}
#' @examples \dontrun{
#' cr_search(query = c("renear", "palmer"))
#'
#'
#' # limit to 4 results
#' cr_search(query = c("renear", "palmer"), rows = 4)
#'
Expand All @@ -42,19 +42,19 @@
#'
#' # find all the records of articles from a journal ISBN
#' cr_search(query = "1461-0248", type="Journal Article")
#'
#'
#' # curl stuff
#' library('httr')
#' cr_search(doi = "10.1890/10-0340.1", config=verbose())
#' cr_search(query = c("renear", "palmer"), rows = 40, config=progress())
#' }

`cr_search` <- function(query=NULL, doi=NULL, page=NULL, rows=NULL, sort=NULL,
year=NULL, type=NULL, ...)
{
url <- "http://search.labs.crossref.org/dois"
if (!is.null(doi)) {
doi <- as.character(doi)
`cr_search` <- function(query=NULL, doi=NULL, page=NULL, rows=NULL, sort=NULL,
year=NULL, type=NULL, ...) {
#url <- "http://search.labs.crossref.org/dois"
url <- "http://search.crossref.org/dois"
if (!is.null(doi)) {
doi <- as.character(doi)
}
if (is.null(doi)) {
cr_search_GET(url, query, page, rows, sort, year, type, ...)
Expand All @@ -64,15 +64,16 @@
}

cr_search_GET <- function(url, x, page, rows, sort, year, type, ...){
args <- cr_compact(list(q=x, page=page, rows=rows, sort=sort, year=year, type=type))
args <- cr_compact(list(q = x, page = page, rows = rows,
sort = sort, year = year, type = type))
tt <- GET(url, query = args, ...)
stop_for_status(tt)
res <- content(tt, as = "text")
res <- ct_utf8(tt)
tmp <- jsonlite::fromJSON(res)
if(NROW(tmp) == 0) NULL else col_classes(tmp, c("character","numeric","integer","character","character","character","numeric"))
if (NROW(tmp) == 0) NULL else col_classes(tmp, c("character","numeric","integer","character","character","character","numeric"))
}

asnum <- function(x){
tmp <- tryCatch(as.numeric(x), warning=function(w) w)
if(is(tmp, "simpleWarning")) x else tmp
tmp <- tryCatch(as.numeric(x), warning = function(w) w)
if (is(tmp, "simpleWarning")) x else tmp
}
10 changes: 5 additions & 5 deletions R/cr_search2.r
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# Search the CrossRef Metatdata API.
#
#
# @param query Query terms.
# @param doi Search by a single DOI or many DOIs.
# @param page Page to return from results.
# @param rows Number of records to return.
# @param sort Sort either by "score" or "year".
# @param year Year to search.
# @param type Record type, e.g., "Journal Article" or "Journal Issue"
#
#
# @details See \url{http://search.labs.crossref.org/} for more info on this
# Crossref API service.
# @examples \dontrun{
Expand All @@ -18,7 +18,7 @@ cr_search2 <- function(query, doi = NULL, page = NULL, rows = NULL,
sort = NULL, year = NULL, type = NULL)
{
url = "http://search.labs.crossref.org/dois"

replacenull <- function(x){
x[sapply(x, is.null)] <- NA
x
Expand All @@ -28,7 +28,7 @@ cr_search2 <- function(query, doi = NULL, page = NULL, rows = NULL,
args <- cr_compact(list(q=query, page=page, rows=rows, sort=sort, year=year, type=type))
tt <- GET(url, query=args)
stop_for_status(tt)
res <- content(tt, as = "text")
res <- ct_utf8(tt)
out <- fromJSON(res)
out2 <- llply(out, replacenull)
output <- ldply(out2, function(x) as.data.frame(x, stringsAsFactors = FALSE))
Expand All @@ -37,7 +37,7 @@ cr_search2 <- function(query, doi = NULL, page = NULL, rows = NULL,
{
doicall <- function(x) {
args <- cr_compact(list(q=x, page=page, rows=rows, sort=sort, year=year, type=type))
out <- content(GET(url, query=args))
out <- ct_utf8(GET(url, query=args))
out2 <- llply(out, replacenull)
output <- ldply(out2, function(x) as.data.frame(x, stringsAsFactors = FALSE))
if(nrow(output)==0){"no results"} else{output}
Expand Down
2 changes: 1 addition & 1 deletion R/cr_search_free.r
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,6 @@
{
tt <- POST(url, config = c(content_type_json(), accept_json()), body=jsonlite::toJSON(query))
stop_for_status(tt)
res <- content(tt, as = "text")
res <- ct_utf8(tt)
fromJSON(res)$results
}
10 changes: 5 additions & 5 deletions R/crosscite.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
#'
#' @export
#' @param dois Search by a single DOI or many DOIs.
#' @param style a CSL style (for text format only). See \code{\link{get_styles}}
#' for options. Default: apa. If there's a style that CrossRef doesn't support you'll get a
#' @param style a CSL style (for text format only). See \code{\link{get_styles}}
#' for options. Default: apa. If there's a style that CrossRef doesn't support you'll get a
#' \code{(500) Internal Server Error}
#' @param locale Language locale. See \code{?Sys.getlocale}
#' @template moreargs
#' @details See \url{http://www.crosscite.org/cn/} for more info on the
#' Crossref Content Negotiation API service.
#'
#'
#' This function is now deprecated. It will be removed in the next version
#' of this package. Use \code{\link{cr_cn}} instead.
#'
Expand All @@ -20,7 +20,7 @@
#' }

`crosscite` <- function(dois, style = 'apa', locale = "en-US", .progress = "none", ...) {
.Deprecated(new = "cr_cn", package = "rcrossref",
.Deprecated(new = "cr_cn", package = "rcrossref",
msg = "crosscite is deprecated - will be removed in next version, use cr_cn")
if(length(dois) > 1) {
llply(dois, function(z, ...) {
Expand All @@ -40,7 +40,7 @@ ccite <- function(doi, style, locale, ...) {
args <- cr_compact(list(doi = doi, style = style, locale = locale))
res <- GET(ccurl(), query = args, ...)
stop_for_status(res)
gsub("\n", "", content(res, "text"))
gsub("\n", "", ct_utf8(res))
}

ccurl <- function() "http://crosscite.org/citeproc/format"
8 changes: 4 additions & 4 deletions R/get_styles.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
#' get_styles()[1:5]
#' }

get_styles <- function(...){
get_styles <- function(...){
comm <- GET("https://api.github.com/repos/citation-style-language/styles/commits?per_page=1", ...)
commres <- content(comm)
commres <- jsonlite::fromJSON(ct_utf8(comm), FALSE)
sha <- commres[[1]]$sha
sty <- GET(sprintf("https://api.github.com/repos/citation-style-language/styles/git/trees/%s", sha))
res <- content(sty)
res <- jsonlite::fromJSON(ct_utf8(sty), FALSE)
files <- sapply(res$tree, "[[", "path")
csls <- grep("\\.csl", files, value = TRUE)
vapply(csls, function(x) strsplit(x, "\\.csl")[[1]][[1]], "", USE.NAMES = FALSE)
vapply(csls, function(x) strsplit(x, "\\.csl")[[1]][[1]], "", USE.NAMES = FALSE)
}
4 changes: 2 additions & 2 deletions R/id_converter.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
args <- list(tool = "my_tool", email = "my_email@example.com",
ids = x, idtype = type, format = "json")
res <- GET(idcon_base(), query = args, ...)
if (res$status_code > 201) stop(content(res)$message, call. = FALSE)
jsonlite::fromJSON(content(res, as = "text"))
if (res$status_code > 201) stop(jsonlite::fromJSON(ct_utf8(res))$message, call. = FALSE)
jsonlite::fromJSON(ct_utf8(res))
}

idcon_base <- function() "http://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0"
20 changes: 17 additions & 3 deletions R/zzz.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
cr_compact <- function(x) Filter(Negate(is.null), x)

ct_utf8 <- function(x) httr::content(x, as = "text", encoding = "UTF-8")

asl <- function(z) {
# z <- tolower(z)
if (is.logical(z) || tolower(z) == "true" || tolower(z) == "false") {
Expand All @@ -26,13 +28,13 @@ cr_GET <- function(endpoint, args, todf = TRUE, on_error = warning, parse = TRUE
list(message = NULL)
} else {
stopifnot(res$headers$`content-type` == "application/json;charset=UTF-8")
res <- content(res, as = "text", encoding = "UTF-8")
res <- ct_utf8(res)
if (parse) jsonlite::fromJSON(res, todf) else res
}
}

get_err <- function(x) {
tmp <- content(x)
tmp <- jsonlite::fromJSON(ct_utf8(x), FALSE)
if (is(tmp, "list")) {
tmp$message[[1]]$message
} else {
Expand Down Expand Up @@ -66,16 +68,28 @@ check_limit <- function(x) {
}
}

check_number <- function(x) {
call <- deparse(substitute(x))
if (!is.null(x)) {
tt <- tryCatch(as.numeric(x), warning = function(w) w)
if (is(tt, "warning") || !class(x) %in% c('integer', 'numeric')) {
stop(call, " value illegal, must be an integer", call. = FALSE)
}
}
}

ifnullna <- function(x) {
if (is.null(x)) NA else x
}

prep_args <- function(query, filter, offset, limit, sample, sort, order, facet, cursor) {
check_limit(limit)
check_number(offset)
check_number(sample)
filter <- filter_handler(filter)
facet <- if (facet) "t" else NULL
cr_compact(list(query = query, filter = filter, offset = offset, rows = limit,
sample = sample, sort = sort, order = order, facet = facet,
cursor = cursor))

}
6 changes: 3 additions & 3 deletions man/cr_ft_text.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/crosscite.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 1b5e2de

Please sign in to comment.