Skip to content

Commit

Permalink
Merge pull request #16 from mpjashby/dev
Browse files Browse the repository at this point in the history
Merge dev for version 0.3.4
  • Loading branch information
mpjashby authored Nov 7, 2023
2 parents 798d1f4 + 021053f commit 663a45f
Show file tree
Hide file tree
Showing 43 changed files with 1,050 additions and 437 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
^pkgdown$
^CRAN-SUBMISSION$
^\.github$
^README.Rmd.orig$
1 change: 1 addition & 0 deletions .github/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.html
29 changes: 29 additions & 0 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]

name: R-CMD-check

jobs:
R-CMD-check:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes
steps:
- uses: actions/checkout@v3

- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
needs: check

- uses: r-lib/actions/check-r-package@v2
9 changes: 4 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
Package: crimedata
Title: Access Crime Data from the Open Crime Database
Version: 0.3.1
Version: 0.3.4
Authors@R: person("Matthew", "Ashby", email = "matthew.ashby@ucl.ac.uk",
role = c("aut", "cre", "cph"),
comment = c(ORCID = "<https://orcid.org/0000-0003-4201-9239>"))
comment = c(ORCID = "0000-0003-4201-9239"))
Description: Gives convenient access to publicly available police-recorded open
crime data from large cities in the United States that are included in the
Crime Open Database <https://osf.io/zyaqn/>.
Expand All @@ -12,16 +12,15 @@ License: MIT + file LICENSE
Language: en-US
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.2
RoxygenNote: 7.2.3
Suggests:
testthat,
covr,
knitr,
rmarkdown
Imports:
digest,
dplyr,
httr,
osfr,
purrr,
rlang,
sf,
Expand Down
18 changes: 18 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
# crimedata 0.3.4

* Changed how ORCID is included in package DESCRIPTION file.


# crimedata 0.3.3

* Re-written tests and docs to ensure no errors/warnings are produced during
automatic testing if API is not available (#13).
* Suppressed progress bars automatically when running non-interactively (#14).
* Removed unnecessary dependency `httr`.


# crimedata 0.3.2

* Update how package-level documentation is signposted for CRAN (#11).


# crimedata 0.3.1

* Fixed an error with downloading some data for 2020 that was caused by a
Expand Down
3 changes: 1 addition & 2 deletions R/crimedata.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,5 @@
#' at any time. It is understood that the data provided at this site is being
#' used at one's own risk.
#'
#' @docType package
#' @name crimedata
NULL
"_PACKAGE"
94 changes: 20 additions & 74 deletions R/file_urls.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#'
#' @noRd
#'
get_file_urls <- function(cache = TRUE, quiet = FALSE) {
get_file_urls <- function(cache = TRUE, quiet = !interactive()) {

# Check inputs
if (!rlang::is_logical(cache, n = 1))
Expand Down Expand Up @@ -73,78 +73,29 @@ get_file_urls <- function(cache = TRUE, quiet = FALSE) {
#'
fetch_file_urls <- function() {

# Retrieve data types separtely because there seems to be some undocumented
# limit on the number of files returned by each API call, even with pagination
urls <- c(
"https://api.osf.io/v2/nodes/zyaqn/files/osfstorage/5bbde32b7cb18100193c778a/?filter[name]=core",
"https://api.osf.io/v2/nodes/zyaqn/files/osfstorage/5bbde32b7cb18100193c778a/?filter[name]=extended",
"https://api.osf.io/v2/nodes/zyaqn/files/osfstorage/5bbde32b7cb18100193c778a/?filter[name]=sample"
# Download file details
files <- osfr::osf_ls_files(
osfr::osf_retrieve_node("https://osf.io/zyaqn"),
path = "Data for R package",
n_max = Inf
)

json_values <- purrr::map(urls, function(x) {

page_url <- x

# Create an empty list to store result
values <- list()

while (!is.null(page_url)) {

# Get JSON data
json <- httr::content(
httr::stop_for_status(httr::GET(page_url)),
as = "parsed",
type = "application/json"
)

# Update the URL to the next page (or NULL if this is the last page)
page_url <- json$links[["next"]]

# Add results to existing object
values <- c(values, json$data)

}

# Return list of JSON objects
values

})

values <- purrr::map_dfr(json_values, function(x) {

purrr::map_dfr(x, function(y) {

# Parse the file name into type and year
file_name <- as.character(stringr::str_match(
y$attributes$name,
"^crime_open_database_(core|extended|sample)_(.+)_(\\d+).Rds$"
))

# Extract city_name
city_name <- stringr::str_to_title(
stringr::str_replace_all(file_name[3], "_", " ")
)
if (city_name == "All") {
city_name <- "All cities"
}

# Return a list of data for this file
list(
data_type = file_name[2],
city = city_name,
year = file_name[4],
file_url = y$links$download
)

})

})
# Extract file-name components
components <- stringr::str_match(
files$name,
"^crime_open_database_(core|extended|sample)_(.+)_(\\d+).Rds$"
)

# convert year from character to integer
values$year <- as.integer(values$year)
# Add components
files$data_type <- components[, 2]
files$city <- stringr::str_to_title(
stringr::str_replace_all(components[, 3], "_", " ")
)
files$city <- ifelse(files$city == "All", "All cities", files$city)
files$year <- as.integer(components[, 4])

# return tibble of links
values[order(values$data_type, values$city, values$year), ]
files[order(files$data_type, files$city, files$year), ]

}

Expand All @@ -159,14 +110,9 @@ fetch_file_urls <- function() {
#'
#' @return A tibble
#'
#' @examples
#' \donttest{
#' list_crime_data()
#' }
#'
#' @export
#'
list_crime_data <- function(quiet = FALSE) {
list_crime_data <- function(quiet = !interactive()) {

# Get DF of URLs
urls <- get_file_urls(quiet = quiet)
Expand Down
86 changes: 27 additions & 59 deletions R/get_crime_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
#' Setting output = "sf" returns the data in simple features format by calling
#' \code{\link[sf:st_as_sf]{sf::st_as_sf(..., crs = 4326, remove = FALSE)}}
#'
#' For more details see the help vignette:
#' \code{vignette("introduction", package = "crimedata")}
#'
#' @param years A single integer or vector of integers specifying the years for
#' which data should be retrieved. If NULL (the default), data for the most
#' recent year will be returned.
Expand All @@ -38,24 +41,16 @@
#' specifying "sf"?
#'
#' @return A tibble containing data from the Open Crime Database.
#'
#' @export
#'
#' @examples
#' \donttest{
#' # Retrieve a 1% sample of data for specific years and cities
#' get_crime_data(
#' years = 2016:2017,
#' cities = c("Tucson", "Virginia Beach"),
#' quiet = TRUE
#' )
#' }
#'
get_crime_data <- function(
years = NULL,
cities = NULL,
type = "sample",
cache = TRUE,
quiet = FALSE,
quiet = !interactive(),
output = "tbl"
) {

Expand All @@ -74,9 +69,7 @@ get_crime_data <- function(
urls$city <- tolower(urls$city)

# If years are not specified, use the most recent available year
if (is.null(years)) {
years <- max(urls$year)
}
if (is.null(years)) years <- max(urls$year)

# If cities are not specified, use all available cities
if (is.null(cities)) {
Expand Down Expand Up @@ -109,7 +102,7 @@ get_crime_data <- function(
}

# check if all specified cities are available
if (cities[1] != "all" & !all(cities %in% unique(urls$city))) {
if (cities[1] != "all cities" & !all(cities %in% unique(urls$city))) {
rlang::abort(
c(
"Data is not available for one or more of the specified cities.",
Expand Down Expand Up @@ -194,54 +187,29 @@ get_crime_data <- function(

} else {

# Fetch data
# purrr::transpose() converts each row of the urls tibble into a list, which
# can then by processed by purrr::map()
crime_data <- purrr::map_dfr(
purrr::transpose(
urls,
.names = paste0(urls$data_type, urls$city, urls$year)
),
function(x) {

# Report progress
if (quiet == FALSE) {
rlang::inform(stringr::str_glue(
"Downloading {x[['data_type']]} data for ",
"{stringr::str_to_title(x[['city']])} in {x[['year']]}"
))
}

# Set name for temporary file
temp_file <- tempfile(pattern = "code_data_", fileext = ".Rds")

# Download remote file
if (quiet == TRUE) {
writeBin(
httr::content(httr::GET(x[["file_url"]]), as = "raw"),
temp_file
)
} else {
writeBin(
httr::content(
httr::GET(x[["file_url"]], httr::progress(type = "down")),
as = "raw"
),
temp_file
)
}

# read file
this_crime_data <- readRDS(temp_file)
# Create temporary directory
temp_dir <- stringr::str_glue("{tempdir()}/crime_data/")
if (!dir.exists(temp_dir)) dir.create(temp_dir)

# remove temporary file
file.remove(temp_file)

# return data from file
this_crime_data
# Download files
osfr::osf_download(
urls,
path = temp_dir,
conflicts = "overwrite",
progress = !quiet
)

}
# Load data
crime_data <- purrr::map_dfr(
dir(
path = temp_dir,
pattern = "^crime_open_database(.+?).Rds$",
full.names = TRUE
),
readRDS
)

# Sort data
crime_data <- crime_data[order(crime_data$uid), ]

# Store data in cache
Expand Down
6 changes: 6 additions & 0 deletions R/release_questions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#' Custom questions for checking before CRAN release
#' @noRd

release_questions <- function() {
c("Have you re-built README.Rmd and vignettes manually?")
}
Loading

0 comments on commit 663a45f

Please sign in to comment.