change all content calls to text and utf8 encoding, #98

started working on fail better, #101
ropensci · Feb 2, 2016 · 1b5e2de · 1b5e2de
1 parent 7ff152a
commit 1b5e2de
Show file tree

Hide file tree

Showing 13 changed files with 85 additions and 65 deletions.
diff --git a/R/cr_citation_count.r b/R/cr_citation_count.r
@@ -29,8 +29,7 @@ cr_citation_count <- function(doi, url = "http://www.crossref.org/openurl/",
 #  args$format=as.character("unixref")
   cite_count <- GET(url, query = args, ...)
   stop_for_status(cite_count)
-  cite_count_data <- content(cite_count, as = "text")
-  ans <- xmlParse(cite_count_data)
+  ans <- xmlParse(ct_utf8(cite_count))
   if(get_attr(ans, "status") == "unresolved") NA else as.numeric(get_attr(ans, "fl_count"))
 }
 

diff --git a/R/cr_cn.r b/R/cr_cn.r
@@ -153,15 +153,21 @@
         "onix-xml" = "text/xml")
       parser <- select[[format]]
       if (raw) {
-        content(response, "text")
+        ct_utf8(response)
       } else {
-        out <- content(response, "parsed", parser, "UTF-8")
+        out <- ct_utf8(response)
         if (format == "text") {
           out <- gsub("\n", "", out)
         }
         if (format == "bibentry") {
           out <- parse_bibtex(out)
         }
+        if (parser == "application/json") {
+          out <- jsonlite::fromJSON(out)
+        }
+        if (parser == "text/xml") {
+          out <- xml2::read_xml(out)
+        }
         out
       }
     }
@@ -191,7 +197,7 @@ parse_bibtex <- function(x){
 
 warn_status <- function(x) {
   if (x$status_code > 202) {
-    mssg <- content(x)
+    mssg <- ct_utf8(x)
     if (!is.character(mssg)) {
       mssg <- if (x$status_code == 406) {
         "(406) - probably bad format type"

diff --git a/R/cr_ft_text.R b/R/cr_ft_text.R
@@ -50,7 +50,7 @@
 #' ### pdf
 #' cr_ft_text(links, "pdf", read=FALSE)
 #' cr_ft_text(links, "pdf")
-#' 
+#'
 #' ### another pensoft e.g.
 #' links <- cr_ft_links("10.3897/phytokeys.42.7604", "all")
 #' pdf_read <- cr_ft_text(url = links, type = "pdf", read=FALSE, verbose = FALSE)
@@ -101,10 +101,10 @@
 #' ## plain text
 #' link <- cr_ft_links(out$data$DOI[1], "plain")
 #' # res <- cr_ft_text(url = link, "plain")
-#' 
+#'
 #' ## Wiley
 #' Sys.setenv(CROSSREF_TDM = "your-key")
-#' 
+#'
 #' ### all wiley
 #' out <- cr_members(311, filter=c(has_full_text = TRUE, type = 'journal-article'), works = TRUE)
 #' dois <- out$data$DOI[1:10]
@@ -116,11 +116,11 @@
 #' #   res[[i]] <- cr_ft_text(tmp, type = "pdf", cache=FALSE)
 #' # }
 #' # res
-#' 
-#' #### older dates 
-#' out <- cr_members(311, filter=c(has_full_text = TRUE, 
+#'
+#' #### older dates
+#' out <- cr_members(311, filter=c(has_full_text = TRUE,
 #'       type = 'journal-article', until_created_date = "2013-12-31"), works = TRUE)
-#'      
+#'
 #' dois <- out$data$DOI[1:10]
 #' # res <- list()
 #' # for (i in seq_along(dois)) {
@@ -130,7 +130,7 @@
 #' #   res[[i]] <- cr_ft_text(tmp, type = "pdf", cache=FALSE)
 #' # }
 #' # res
-#' 
+#'
 #' ### wiley subset with CC By 4.0 license
 #' lic <- "http://creativecommons.org/licenses/by/4.0/"
 #' out <- cr_members(311, filter=c(has_full_text = TRUE, license.url = lic), works = TRUE)
@@ -146,7 +146,7 @@
 
 cr_ft_text <- function(url, type='xml', path = "~/.crossref", overwrite = TRUE,
   read=TRUE, verbose=TRUE, cache=TRUE, ...) {
-  
+
   auth <- cr_auth(url, type)
   switch( pick_type(type, url),
           xml = getTEXT(get_url(url, 'xml'), type, auth, ...),
@@ -213,7 +213,7 @@ cr_auth <- function(url, type) {
           # add_headers(`CR-Clickthrough-Client-Token` = key, Accept = type)
         },
         `311` = {
-          add_headers(`CR-Clickthrough-Client-Token` = Sys.getenv("CROSSREF_TDM"), 
+          add_headers(`CR-Clickthrough-Client-Token` = Sys.getenv("CROSSREF_TDM"),
                       Accept = type)
         }
     )
@@ -227,8 +227,8 @@ cr_auth <- function(url, type) {
 getTEXT <- function(x, type, auth, ...){
   res <- GET(x, auth, ...)
   switch(type,
-         xml = XML::xmlParse(httr::content(res, as = "text")),
-         plain = httr::content(res, as = "text"))
+         xml = XML::xmlParse(ct_utf8(res)),
+         plain = ct_utf8(res))
 }
 
 getPDF <- function(url, path, auth, overwrite, type, read, verbose, cache=FALSE, ...) {
@@ -254,10 +254,10 @@ getPDF <- function(url, path, auth, overwrite, type, read, verbose, cache=FALSE,
     }
   } else {
     if (verbose) message("Downloading pdf...")
-    res <- GET(url, 
-               accept("application/pdf"), 
-               write_disk(path = filepath, overwrite = overwrite), 
-               auth, 
+    res <- GET(url,
+               accept("application/pdf"),
+               write_disk(path = filepath, overwrite = overwrite),
+               auth,
                config(followlocation = TRUE), ...)
     filepath <- res$request$output$path
   }

diff --git a/R/cr_search.r b/R/cr_search.r
@@ -17,7 +17,7 @@
 #' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}
 #' @examples \dontrun{
 #' cr_search(query = c("renear", "palmer"))
-#' 
+#'
 #' # limit to 4 results
 #' cr_search(query = c("renear", "palmer"), rows = 4)
 #'
@@ -42,19 +42,19 @@
 #'
 #' # find all the records of articles from a journal ISBN
 #' cr_search(query = "1461-0248", type="Journal Article")
-#' 
+#'
 #' # curl stuff
 #' library('httr')
 #' cr_search(doi = "10.1890/10-0340.1", config=verbose())
 #' cr_search(query = c("renear", "palmer"), rows = 40, config=progress())
 #' }
 
-`cr_search` <- function(query=NULL, doi=NULL, page=NULL, rows=NULL, sort=NULL, 
-  year=NULL, type=NULL, ...)
-{
-  url <- "http://search.labs.crossref.org/dois"
-  if (!is.null(doi)) { 
-    doi <- as.character(doi) 
+`cr_search` <- function(query=NULL, doi=NULL, page=NULL, rows=NULL, sort=NULL,
+  year=NULL, type=NULL, ...) {
+  #url <- "http://search.labs.crossref.org/dois"
+  url <- "http://search.crossref.org/dois"
+  if (!is.null(doi)) {
+    doi <- as.character(doi)
   }
   if (is.null(doi)) {
     cr_search_GET(url, query, page, rows, sort, year, type, ...)
@@ -64,15 +64,16 @@
 }
 
 cr_search_GET <- function(url, x, page, rows, sort, year, type, ...){
-  args <- cr_compact(list(q=x, page=page, rows=rows, sort=sort, year=year, type=type))
+  args <- cr_compact(list(q = x, page = page, rows = rows,
+                          sort = sort, year = year, type = type))
   tt <- GET(url, query = args, ...)
   stop_for_status(tt)
-  res <- content(tt, as = "text")
+  res <- ct_utf8(tt)
   tmp <- jsonlite::fromJSON(res)
-  if(NROW(tmp) == 0) NULL else col_classes(tmp, c("character","numeric","integer","character","character","character","numeric"))
+  if (NROW(tmp) == 0) NULL else col_classes(tmp, c("character","numeric","integer","character","character","character","numeric"))
 }
 
 asnum <- function(x){
-  tmp <- tryCatch(as.numeric(x), warning=function(w) w)
-  if(is(tmp, "simpleWarning")) x else tmp
+  tmp <- tryCatch(as.numeric(x), warning = function(w) w)
+  if (is(tmp, "simpleWarning")) x else tmp
 }
diff --git a/R/cr_search2.r b/R/cr_search2.r
@@ -1,13 +1,13 @@
 # Search the CrossRef Metatdata API.
-# 
+#
 # @param query Query terms.
 # @param doi Search by a single DOI or many DOIs.
 # @param page Page to return from results.
 # @param rows Number of records to return.
 # @param sort Sort either by "score" or "year".
 # @param year Year to search.
 # @param type Record type, e.g., "Journal Article" or "Journal Issue"
-# 
+#
 # @details See \url{http://search.labs.crossref.org/} for more info on this
 #   	Crossref API service.
 # @examples \dontrun{
@@ -18,7 +18,7 @@ cr_search2 <- function(query, doi = NULL, page = NULL, rows = NULL,
                       sort = NULL, year = NULL, type = NULL)
 {
   url = "http://search.labs.crossref.org/dois"
-  
+
   replacenull <- function(x){
     x[sapply(x, is.null)] <- NA
     x
@@ -28,7 +28,7 @@ cr_search2 <- function(query, doi = NULL, page = NULL, rows = NULL,
     args <- cr_compact(list(q=query, page=page, rows=rows, sort=sort, year=year, type=type))
     tt <- GET(url, query=args)
     stop_for_status(tt)
-    res <- content(tt, as = "text")
+    res <- ct_utf8(tt)
     out <- fromJSON(res)
     out2 <- llply(out, replacenull)
     output <- ldply(out2, function(x) as.data.frame(x, stringsAsFactors = FALSE))
@@ -37,7 +37,7 @@ cr_search2 <- function(query, doi = NULL, page = NULL, rows = NULL,
   {
     doicall <- function(x) {
       args <- cr_compact(list(q=x, page=page, rows=rows, sort=sort, year=year, type=type))
-      out <- content(GET(url, query=args))
+      out <- ct_utf8(GET(url, query=args))
       out2 <- llply(out, replacenull)
       output <- ldply(out2, function(x) as.data.frame(x, stringsAsFactors = FALSE))
       if(nrow(output)==0){"no results"} else{output}

diff --git a/R/cr_search_free.r b/R/cr_search_free.r
@@ -38,6 +38,6 @@
 {
 	tt <- POST(url, config = c(content_type_json(), accept_json()), body=jsonlite::toJSON(query))
   stop_for_status(tt)
-  res <- content(tt, as = "text")
+  res <- ct_utf8(tt)
 	fromJSON(res)$results
 }
diff --git a/R/crosscite.R b/R/crosscite.R
@@ -2,14 +2,14 @@
 #'
 #' @export
 #' @param dois Search by a single DOI or many DOIs.
-#' @param style a CSL style (for text format only). See \code{\link{get_styles}} 
-#' for options. Default: apa. If there's a style that CrossRef doesn't support you'll get a 
+#' @param style a CSL style (for text format only). See \code{\link{get_styles}}
+#' for options. Default: apa. If there's a style that CrossRef doesn't support you'll get a
 #' \code{(500) Internal Server Error}
 #' @param locale Language locale. See \code{?Sys.getlocale}
 #' @template moreargs
 #' @details See \url{http://www.crosscite.org/cn/} for more info on the
 #'   	Crossref Content Negotiation API service.
-#'   	
+#'
 #' This function is now deprecated. It will be removed in the next version
 #' of this package. Use \code{\link{cr_cn}} instead.
 #'
@@ -20,7 +20,7 @@
 #' }
 
 `crosscite` <- function(dois, style = 'apa', locale = "en-US", .progress = "none", ...) {
-  .Deprecated(new = "cr_cn", package = "rcrossref", 
+  .Deprecated(new = "cr_cn", package = "rcrossref",
               msg = "crosscite is deprecated - will be removed in next version, use cr_cn")
   if(length(dois) > 1) {
     llply(dois, function(z, ...) {
@@ -40,7 +40,7 @@ ccite <- function(doi, style, locale, ...) {
   args <- cr_compact(list(doi = doi, style = style, locale = locale))
   res <- GET(ccurl(), query = args, ...)
   stop_for_status(res)
-  gsub("\n", "", content(res, "text"))
+  gsub("\n", "", ct_utf8(res))
 }
 
 ccurl <- function() "http://crosscite.org/citeproc/format"
diff --git a/R/get_styles.R b/R/get_styles.R
@@ -5,13 +5,13 @@
 #' get_styles()[1:5]
 #' }
 
-get_styles <- function(...){  
+get_styles <- function(...){
   comm <- GET("https://api.github.com/repos/citation-style-language/styles/commits?per_page=1", ...)
-  commres <- content(comm)
+  commres <- jsonlite::fromJSON(ct_utf8(comm), FALSE)
   sha <- commres[[1]]$sha
   sty <- GET(sprintf("https://api.github.com/repos/citation-style-language/styles/git/trees/%s", sha))
-  res <- content(sty)
+  res <- jsonlite::fromJSON(ct_utf8(sty), FALSE)
   files <- sapply(res$tree, "[[", "path")
   csls <- grep("\\.csl", files, value = TRUE)
-  vapply(csls, function(x) strsplit(x, "\\.csl")[[1]][[1]], "", USE.NAMES = FALSE) 
+  vapply(csls, function(x) strsplit(x, "\\.csl")[[1]][[1]], "", USE.NAMES = FALSE)
 }
diff --git a/R/id_converter.R b/R/id_converter.R
@@ -22,8 +22,8 @@
   args <- list(tool = "my_tool", email = "my_email@example.com", 
                ids = x, idtype = type, format = "json")
   res <- GET(idcon_base(), query = args, ...)
-  if (res$status_code > 201) stop(content(res)$message, call. = FALSE)
-  jsonlite::fromJSON(content(res, as = "text"))
+  if (res$status_code > 201) stop(jsonlite::fromJSON(ct_utf8(res))$message, call. = FALSE)
+  jsonlite::fromJSON(ct_utf8(res))
 }
 
 idcon_base <- function() "http://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0"
diff --git a/R/zzz.R b/R/zzz.R
@@ -1,5 +1,7 @@
 cr_compact <- function(x) Filter(Negate(is.null), x)
 
+ct_utf8 <- function(x) httr::content(x, as = "text", encoding = "UTF-8")
+
 asl <- function(z) {
   # z <- tolower(z)
   if (is.logical(z) || tolower(z) == "true" || tolower(z) == "false") {
@@ -26,13 +28,13 @@ cr_GET <- function(endpoint, args, todf = TRUE, on_error = warning, parse = TRUE
     list(message = NULL)
   } else {
     stopifnot(res$headers$`content-type` == "application/json;charset=UTF-8")
-    res <- content(res, as = "text", encoding = "UTF-8")
+    res <- ct_utf8(res)
     if (parse) jsonlite::fromJSON(res, todf) else res
   }
 }
 
 get_err <- function(x) {
-  tmp <- content(x)
+  tmp <- jsonlite::fromJSON(ct_utf8(x), FALSE)
   if (is(tmp, "list")) {
     tmp$message[[1]]$message
   } else {
@@ -66,16 +68,28 @@ check_limit <- function(x) {
   }
 }
 
+check_number <- function(x) {
+  call <- deparse(substitute(x))
+  if (!is.null(x)) {
+    tt <- tryCatch(as.numeric(x), warning = function(w) w)
+    if (is(tt, "warning") || !class(x) %in% c('integer', 'numeric')) {
+      stop(call, " value illegal, must be an integer", call. = FALSE)
+    }
+  }
+}
+
 ifnullna <- function(x) {
   if (is.null(x)) NA else x
 }
 
 prep_args <- function(query, filter, offset, limit, sample, sort, order, facet, cursor) {
   check_limit(limit)
+  check_number(offset)
+  check_number(sample)
   filter <- filter_handler(filter)
   facet <- if (facet) "t" else NULL
   cr_compact(list(query = query, filter = filter, offset = offset, rows = limit,
                   sample = sample, sort = sort, order = order, facet = facet,
                   cursor = cursor))
-  
+
 }
diff --git a/man/cr_ft_text.Rd b/man/cr_ft_text.Rd
diff --git a/man/crosscite.Rd b/man/crosscite.Rd