Merge pull request #16 from mpjashby/dev

Merge dev for version 0.3.4
mpjashby · Nov 7, 2023 · 663a45f · 663a45f
2 parents 798d1f4 + 021053f
commit 663a45f
Show file tree

Hide file tree

Showing 43 changed files with 1,050 additions and 437 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -12,3 +12,4 @@
 ^pkgdown$
 ^CRAN-SUBMISSION$
 ^\.github$
+^README.Rmd.orig$
diff --git a/.github/.gitignore b/.github/.gitignore
@@ -0,0 +1 @@
+*.html
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -0,0 +1,29 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+    branches: [main, master]
+
+name: R-CMD-check
+
+jobs:
+  R-CMD-check:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      R_KEEP_PKG_SOURCE: yes
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::rcmdcheck
+          needs: check
+
+      - uses: r-lib/actions/check-r-package@v2
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,9 +1,9 @@
 Package: crimedata
 Title: Access Crime Data from the Open Crime Database
-Version: 0.3.1
+Version: 0.3.4
 Authors@R: person("Matthew", "Ashby", email = "matthew.ashby@ucl.ac.uk", 
     role = c("aut", "cre", "cph"), 
-    comment = c(ORCID = "<https://orcid.org/0000-0003-4201-9239>"))
+    comment = c(ORCID = "0000-0003-4201-9239"))
 Description: Gives convenient access to publicly available police-recorded open
     crime data from large cities in the United States that are included in the
     Crime Open Database <https://osf.io/zyaqn/>.
@@ -12,16 +12,15 @@ License: MIT + file LICENSE
 Language: en-US
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.3
 Suggests: 
     testthat,
-    covr,
     knitr,
     rmarkdown
 Imports: 
     digest,
     dplyr,
-    httr,
+    osfr,
     purrr,
     rlang,
     sf,

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,21 @@
+# crimedata 0.3.4
+
+* Changed how ORCID is included in package DESCRIPTION file.
+
+
+# crimedata 0.3.3
+
+* Re-written tests and docs to ensure no errors/warnings are produced during
+  automatic testing if API is not available (#13).
+* Suppressed progress bars automatically when running non-interactively (#14).
+* Removed unnecessary dependency `httr`.
+
+
+# crimedata 0.3.2
+
+* Update how package-level documentation is signposted for CRAN (#11).
+
+
 # crimedata 0.3.1
 
 * Fixed an error with downloading some data for 2020 that was caused by a 

diff --git a/R/crimedata.R b/R/crimedata.R
@@ -26,6 +26,5 @@
 #'   at any time. It is understood that the data provided at this site is being
 #'   used at one's own risk.
 #'
-#' @docType package
 #' @name crimedata
-NULL
+"_PACKAGE"
diff --git a/R/file_urls.R b/R/file_urls.R
@@ -10,7 +10,7 @@
 #'
 #' @noRd
 #'
-get_file_urls <- function(cache = TRUE, quiet = FALSE) {
+get_file_urls <- function(cache = TRUE, quiet = !interactive()) {
 
   # Check inputs
   if (!rlang::is_logical(cache, n = 1))
@@ -73,78 +73,29 @@ get_file_urls <- function(cache = TRUE, quiet = FALSE) {
 #'
 fetch_file_urls <- function() {
 
-  # Retrieve data types separtely because there seems to be some undocumented
-  # limit on the number of files returned by each API call, even with pagination
-  urls <- c(
-    "https://api.osf.io/v2/nodes/zyaqn/files/osfstorage/5bbde32b7cb18100193c778a/?filter[name]=core",
-    "https://api.osf.io/v2/nodes/zyaqn/files/osfstorage/5bbde32b7cb18100193c778a/?filter[name]=extended",
-    "https://api.osf.io/v2/nodes/zyaqn/files/osfstorage/5bbde32b7cb18100193c778a/?filter[name]=sample"
+  # Download file details
+  files <- osfr::osf_ls_files(
+    osfr::osf_retrieve_node("https://osf.io/zyaqn"),
+    path = "Data for R package",
+    n_max = Inf
   )
 
-  json_values <- purrr::map(urls, function(x) {
-
-    page_url <- x
-
-    # Create an empty list to store result
-    values <- list()
-
-    while (!is.null(page_url)) {
-
-      # Get JSON data
-      json <- httr::content(
-        httr::stop_for_status(httr::GET(page_url)),
-        as = "parsed",
-        type = "application/json"
-      )
-
-      # Update the URL to the next page (or NULL if this is the last page)
-      page_url <- json$links[["next"]]
-
-      # Add results to existing object
-      values <- c(values, json$data)
-
-    }
-
-    # Return list of JSON objects
-    values
-
-  })
-
-  values <- purrr::map_dfr(json_values, function(x) {
-
-    purrr::map_dfr(x, function(y) {
-
-      # Parse the file name into type and year
-      file_name <- as.character(stringr::str_match(
-        y$attributes$name,
-        "^crime_open_database_(core|extended|sample)_(.+)_(\\d+).Rds$"
-      ))
-
-      # Extract city_name
-      city_name <- stringr::str_to_title(
-        stringr::str_replace_all(file_name[3], "_", " ")
-      )
-      if (city_name == "All") {
-        city_name <- "All cities"
-      }
-
-      # Return a list of data for this file
-      list(
-        data_type = file_name[2],
-        city = city_name,
-        year = file_name[4],
-        file_url = y$links$download
-      )
-
-    })
-
-  })
+  # Extract file-name components
+  components <- stringr::str_match(
+    files$name,
+    "^crime_open_database_(core|extended|sample)_(.+)_(\\d+).Rds$"
+  )
 
-  # convert year from character to integer
-  values$year <- as.integer(values$year)
+  # Add components
+  files$data_type <- components[, 2]
+  files$city <- stringr::str_to_title(
+    stringr::str_replace_all(components[, 3], "_", " ")
+  )
+  files$city <- ifelse(files$city == "All", "All cities", files$city)
+  files$year <- as.integer(components[, 4])
 
   # return tibble of links
-  values[order(values$data_type, values$city, values$year), ]
+  files[order(files$data_type, files$city, files$year), ]
 
 }
 
@@ -159,14 +110,9 @@ fetch_file_urls <- function() {
 #'
 #' @return A tibble
 #'
-#' @examples
-#' \donttest{
-#' list_crime_data()
-#' }
-#'
 #' @export
 #'
-list_crime_data <- function(quiet = FALSE) {
+list_crime_data <- function(quiet = !interactive()) {
 
   # Get DF of URLs
   urls <- get_file_urls(quiet = quiet)

diff --git a/R/get_crime_data.R b/R/get_crime_data.R
@@ -22,6 +22,9 @@
 #' Setting output = "sf" returns the data in simple features format by calling
 #' \code{\link[sf:st_as_sf]{sf::st_as_sf(..., crs = 4326, remove = FALSE)}}
 #'
+#' For more details see the help vignette:
+#' \code{vignette("introduction", package = "crimedata")}
+#'
 #' @param years A single integer or vector of integers specifying the years for
 #'   which data should be retrieved. If NULL (the default), data for the most
 #'   recent year will be returned.
@@ -38,24 +41,16 @@
 #'   specifying "sf"?
 #'
 #' @return A tibble containing data from the Open Crime Database.
+#'
 #' @export
 #'
-#' @examples
-#' \donttest{
-#' # Retrieve a 1% sample of data for specific years and cities
-#' get_crime_data(
-#'   years = 2016:2017,
-#'   cities = c("Tucson", "Virginia Beach"),
-#'   quiet = TRUE
-#' )
-#' }
 #'
 get_crime_data <- function(
   years = NULL,
   cities = NULL,
   type = "sample",
   cache = TRUE,
-  quiet = FALSE,
+  quiet = !interactive(),
   output = "tbl"
 ) {
 
@@ -74,9 +69,7 @@ get_crime_data <- function(
   urls$city <- tolower(urls$city)
 
   # If years are not specified, use the most recent available year
-  if (is.null(years)) {
-    years <- max(urls$year)
-  }
+  if (is.null(years)) years <- max(urls$year)
 
   # If cities are not specified, use all available cities
   if (is.null(cities)) {
@@ -109,7 +102,7 @@ get_crime_data <- function(
   }
 
   # check if all specified cities are available
-  if (cities[1] != "all" & !all(cities %in% unique(urls$city))) {
+  if (cities[1] != "all cities" & !all(cities %in% unique(urls$city))) {
     rlang::abort(
       c(
         "Data is not available for one or more of the specified cities.",
@@ -194,54 +187,29 @@ get_crime_data <- function(
 
   } else {
 
-    # Fetch data
-    # purrr::transpose() converts each row of the urls tibble into a list, which
-    # can then by processed by purrr::map()
-    crime_data <- purrr::map_dfr(
-      purrr::transpose(
-        urls,
-        .names = paste0(urls$data_type, urls$city, urls$year)
-      ),
-      function(x) {
-
-        # Report progress
-        if (quiet == FALSE) {
-          rlang::inform(stringr::str_glue(
-            "Downloading {x[['data_type']]} data for ",
-            "{stringr::str_to_title(x[['city']])} in {x[['year']]}"
-          ))
-        }
-
-        # Set name for temporary file
-        temp_file <- tempfile(pattern = "code_data_", fileext = ".Rds")
-
-        # Download remote file
-        if (quiet == TRUE) {
-          writeBin(
-            httr::content(httr::GET(x[["file_url"]]), as = "raw"),
-            temp_file
-          )
-        } else {
-          writeBin(
-            httr::content(
-              httr::GET(x[["file_url"]], httr::progress(type = "down")),
-              as = "raw"
-            ),
-            temp_file
-          )
-        }
-
-        # read file
-        this_crime_data <- readRDS(temp_file)
+    # Create temporary directory
+    temp_dir <- stringr::str_glue("{tempdir()}/crime_data/")
+    if (!dir.exists(temp_dir)) dir.create(temp_dir)
 
-        # remove temporary file
-        file.remove(temp_file)
-
-        # return data from file
-        this_crime_data
+    # Download files
+    osfr::osf_download(
+      urls,
+      path = temp_dir,
+      conflicts = "overwrite",
+      progress = !quiet
+    )
 
-      }
+    # Load data
+    crime_data <- purrr::map_dfr(
+      dir(
+        path = temp_dir,
+        pattern = "^crime_open_database(.+?).Rds$",
+        full.names = TRUE
+      ),
+      readRDS
     )
+
+    # Sort data
     crime_data <- crime_data[order(crime_data$uid), ]
 
     # Store data in cache

diff --git a/R/release_questions.R b/R/release_questions.R
@@ -0,0 +1,6 @@
+#' Custom questions for checking before CRAN release
+#' @noRd
+
+release_questions <- function() {
+  c("Have you re-built README.Rmd and vignettes manually?")
+}