Skip to content

Commit

Permalink
Merge pull request #44 from mpjashby/dev
Browse files Browse the repository at this point in the history
Release v0.9.0
  • Loading branch information
mpjashby authored Feb 10, 2025
2 parents 51dc28a + b34282e commit ec37981
Show file tree
Hide file tree
Showing 84 changed files with 11,742 additions and 1,414 deletions.
3 changes: 0 additions & 3 deletions CRAN-SUBMISSION

This file was deleted.

6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: sfhotspot
Title: Hot-Spot Analysis with Simple Features
Version: 0.8.0
Version: 0.9.0
Authors@R:
person("Matt", "Ashby", , "matthew.ashby@ucl.ac.uk", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-4201-9239"))
Expand All @@ -20,7 +20,7 @@ URL: http://pkgs.lesscrime.info/sfhotspot/
BugReports: https://github.com/mpjashby/sfhotspot/issues
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
Imports:
ggplot2,
rlang,
Expand All @@ -29,7 +29,7 @@ Imports:
spdep,
tibble
Depends:
R (>= 2.10)
R (>= 3.5)
Suggests:
testthat (>= 3.0.0),
lubridate,
Expand Down
19 changes: 19 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
# sfhotspot 0.9.0

* New dataset `memphis_precincts` showing Memphis Police Department precincts,
which is required to test the new functionality of `hotspot_grid()`.
* `count_points_in_polygons()` now passes through columns in the original
dataset, which makes `hotspot_count()` more useful (#41).
* `hotspot_grid()` if provided with polygons now bases the grid on the
boundary of the polygons rather than the convex hull of the boundary (#42).
* `hotspot_gistar()` now extracts nearest neighbour distance from provided grid
and does not wrongly rely on (and report) an automatically generated cell
size (#38).
* Warnings about grids containing very large numbers of cells is now printed
before the cells are created, helping explain why code may be running slower
than expected (#33).
* Improved error message produced when point data and provided grid do not
overlap (#39).
* Suppressed progress bar previously included in README (#36).


# sfhotspot 0.8.0

* All functions can now handle SF objects in which the geometry column has a
Expand Down
50 changes: 39 additions & 11 deletions R/count_points_in_polygons.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,38 @@ count_points_in_polygons <- function(points, polygons, weights = NULL) {
}
}

# Warn if polygons object contains column names used internally
if ("n" %in% names(polygons)) {
rlang::warn(c(
"Existing column 'n' will be overwritten.",
"i" = "Consider renaming the existing column first."
))
}
if (".polygon_id" %in% names(polygons)) {
rlang::warn(c(
"Existing column '.polygon_id' will be removed.",
"i" = "Consider renaming the existing column first."
))
}
if ("x" %in% names(polygons)) {
rlang::warn(c(
"Existing column 'x' will be removed.",
"i" = "Consider renaming the existing column first."
))
}
if (!rlang::is_null(weights) & "sum" %in% names(polygons)) {
rlang::warn(c(
"Existing column 'sum' will be overwritten.",
"i" = "Consider renaming the existing column first."
))
} else if ("sum" %in% names(polygons)) {
rlang::warn(c(
"Existing column 'sum' will be removed.",
"i" = "Consider renaming the existing column first."
))
}
polygons$n <- polygons$x <- polygons$sum <- polygons$`.polygon_id` <- NULL

# Replace name of geometry column in SF objects if necessary
polygons <- set_geometry_name(polygons)

Expand Down Expand Up @@ -78,17 +110,13 @@ count_points_in_polygons <- function(points, polygons, weights = NULL) {
}

# Remove working columns and convert to SF object
if (!rlang::is_null(weights)) {
counts <- sf::st_as_sf(
tibble::as_tibble(counts[, c("n", "sum", "geometry")]),
sf_column_name = "geometry"
)
} else {
counts <- sf::st_as_sf(
tibble::as_tibble(counts[, c("n", "geometry")]),
sf_column_name = "geometry"
)
}
# This also ensures that `geometry` is the last column, as is the convention
# for SF objects
result_names <- setdiff(names(counts), c("geometry", ".polygon_id", "x"))
counts <- sf::st_as_sf(
tibble::as_tibble(counts[, c(result_names, "geometry")]),
sf_column_name = "geometry"
)

counts

Expand Down
21 changes: 16 additions & 5 deletions R/create_grid.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,28 @@ create_grid <- function(
cell_size <- set_cell_size(data, round = TRUE, quiet = quiet)

# Create buffered convex hull around data
hull <- sf::st_buffer(
sf::st_convex_hull(sf::st_union(data)),
dist = cell_size / 2
)
geometry_types <- as.character(sf::st_geometry_type(data))
if (all(geometry_types %in% c("POLYGON", "MULTIPOLYGON"))) {
hull <- sf::st_buffer(
sf::st_cast(sf::st_boundary(sf::st_union(data)), "MULTIPOLYGON"),
dist = cell_size / 2
)
} else {
hull <- sf::st_buffer(
sf::st_convex_hull(sf::st_union(data)),
dist = cell_size / 2
)
}

# Warn if there will be so many cells that the function will be very slow
hull_bbox <- sf::st_bbox(hull)
cells_n_x <- (hull_bbox$xmax - hull_bbox$xmin) / cell_size
cells_n_y <- (hull_bbox$ymax - hull_bbox$ymin) / cell_size
if (cells_n_x * cells_n_y > 100000 & quiet == FALSE) {
rlang::warn(
# Although this is a warning, warnings are only printed when a function
# finishes, which is no use. Messages are printed immediately, so this has
# to be a message. See https://github.com/mpjashby/sfhotspot/issues/33
rlang::inform(
c(
"The grid will contain a large number of cells",
"!" = "This may cause other functions to run slowly or not work",
Expand Down
28 changes: 20 additions & 8 deletions R/gistar.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#' counts in neighbouring cells when calculating the values of
#' \ifelse{html}{\out{<i>G</i><sub><i>i</i></sub><sup>*</sup>}}{\eqn{G^*_i}}
#' (if \code{include_self = TRUE}, the default) or
#' \ifelse{html}{\out{<i>G</i><sub><i>i</i></sub><sup>*</sup>}}{\eqn{G_i}}
#' \ifelse{html}{\out{<i>G</i><sub><i>i</i></sub>}}{\eqn{G_i}}
#' (if \code{include_self = FALSE}) values? You are unlikely to want to change
#' the default value.
#' @param p_adjust_method The method to be used to adjust \emph{p}-values for
Expand All @@ -33,12 +33,12 @@
#' \code{TRUE}.
#' @return An \code{\link[sf]{sf}} tibble of regular grid cells with
#' corresponding point counts,
#' \ifelse{html}{\out{<i>G</i><sub><i>i</i></sub><sup>*</sup>}}{\eqn{G_i}} or
#' \ifelse{html}{\out{<i>G</i><sub><i>i</i></sub>}}{\eqn{G_i}} or
#' \ifelse{html}{\out{<i>G</i><sub><i>i</i></sub><sup>*</sup>}}{\eqn{G^*_i}}
#' values for each cell. Values greater than zero indicate more points than
#' would be expected for randomly distributed points and values less than zero
#' indicate fewer points. Critical values of
#' \ifelse{html}{\out{<i>G</i><sub><i>i</i></sub><sup>*</sup>}}{\eqn{G_i}} and
#' \ifelse{html}{\out{<i>G</i><sub><i>i</i></sub>}}{\eqn{G_i}} and
#' \ifelse{html}{\out{<i>G</i><sub><i>i</i></sub><sup>*</sup>}}{\eqn{G^*_i}}
#' are given in the manual page for \code{\link[spdep]{localG}}.
#' @noRd
Expand Down Expand Up @@ -92,15 +92,27 @@ gistar <- function(
# Replace name of geometry column in SF objects if necessary
counts <- set_geometry_name(counts)

# Set cell size if not specified
if (rlang::is_null(nb_dist) & rlang::is_null(cell_size))
cell_size <- set_cell_size(counts, round = TRUE, quiet = quiet)
# Get centroids
centroids <- suppressWarnings(sf::st_centroid(counts))

# Set neighbour distance if not specified
if (rlang::is_null(nb_dist)) nb_dist <- cell_size * sqrt(2)
if (rlang::is_null(nb_dist)) {

# Derive cell size from grid cells if required
if (rlang::is_null(cell_size)) {
cell_size <- as.numeric(mean(sf::st_distance(
centroids,
centroids[sf::st_nearest_feature(centroids), ],
by_element = TRUE
)))
}

# Derive neighbour distance from cell size
nb_dist <- cell_size * sqrt(2)

}

# Find neighbours
centroids <- suppressWarnings(sf::st_centroid(counts))
nb <- spdep::dnearneigh(sf::st_coordinates(centroids), 0, nb_dist)

# Determine if each cell should be treated as a neighbour of itself
Expand Down
3 changes: 2 additions & 1 deletion R/hotspot_gistar.R
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,9 @@ hotspot_gistar <- function(

# Set cell size if not specified (do this here because it is needed by both
# `create_grid()` and `gistar()`)
if (rlang::is_null(cell_size))
if (rlang::is_null(cell_size) & rlang::is_null(grid)) {
cell_size <- set_cell_size(data, round = TRUE, quiet = quiet)
}

# Create grid
if (rlang::is_null(grid)) {
Expand Down
16 changes: 16 additions & 0 deletions R/memphis_precincts.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#' Memphis Police Department Precincts
#'
#' A dataset containing the boundaries of Memphis Police Department precincts.
#'
#' @format A simple-features tibble with 9 rows and two variables:
#' \describe{
#' \item{precinct}{the precinct name}
#' \item{geometry}{the boundary of each precinct, stored in simple-features
#' polygon format}
#' }
#'
#' Licence: Public domain <https://data.memphistn.gov/d/tdws-78iq>
#'
#' @source City of Memphis <https://data.memphistn.gov/d/rqqz-pj4u>
#'
"memphis_precincts"
27 changes: 25 additions & 2 deletions R/validate_inputs.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,12 @@ validate_inputs <- function(
if (!rlang::is_null(grid)) {
if (!inherits(grid, "sf"))
rlang::abort("`grid` must be either an SF object or `NULL`.", call = call)
if (any(!sf::st_is(grid, "POLYGON")))
if (any(!sf::st_is(grid, c("POLYGON", "MULTIPOLYGON"))))
rlang::abort(
"`grid` must be `NULL` or an SF object containing polygons.",
paste0(
"`grid` must be `NULL` or an SF object containing polygons or ",
"multipolygons."
),
call = call
)
if (any(sf::st_is_empty(grid))) {
Expand Down Expand Up @@ -117,6 +120,26 @@ validate_inputs <- function(
)
}

# Check that data and grid overlap
if (!rlang::is_null(grid)) {
check_overlap <- sf::st_intersects(
sf::st_union(data),
sf::st_union(grid),
sparse = FALSE
)
if (rlang::is_false(check_overlap[1, 1])) {
rlang::abort(
c(
"`data` and `grid` must overlap",
"i" = paste0(
"Check data (e.g. by mapping) to ensure inputs overlap in space."
)
),
call = call
)
}
}

# Validate `quiet`
if (!rlang::is_logical(quiet, n = 1))
rlang::abort("`quiet` must be one of `TRUE` or `FALSE`.", call = call)
Expand Down
12 changes: 8 additions & 4 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ distributed randomly. In this example, the points represent the locations of
personal robberies in Memphis, which is a dataset included with the package.

```{r example}
#| fig.alt: >
#| A map showing hotspots of robbery in Memphis, TN created using the
#| `hotspot_gistar()` function in the sfhotspot package
# Load packages
library(sf)
library(sfhotspot)
Expand All @@ -123,19 +127,19 @@ library(tidyverse)
# Transform data to UTM zone 15N so that we can think in metres, not decimal
# degrees
memphis_robberies_utm <- st_transform(memphis_robberies, 32615)
memphis_robberies_utm <- st_transform(memphis_robberies, "EPSG:32615")
# Identify hotspots, set all the parameters automatically by not specifying cell
# size, bandwidth, etc.
memphis_robberies_hotspots <- hotspot_gistar(memphis_robberies_utm)
memphis_robberies_htspt <- hotspot_gistar(memphis_robberies_utm, quiet = TRUE)
# Visualise the hotspots by showing only those cells that have significantly
# more points than expected by chance. For those cells, show the estimated
# density of robberies.
memphis_robberies_hotspots %>%
filter(gistar > 0, pvalue < 0.05) %>%
memphis_robberies_htspt |>
filter(gistar > 0, pvalue < 0.05) |>
ggplot(aes(colour = kde, fill = kde)) +
geom_sf() +
scale_colour_distiller(aesthetics = c("colour", "fill"), direction = 1) +
Expand Down
42 changes: 16 additions & 26 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@ sfhotspot has the following functions. All can be used by just supplying
an SF object containing points, or can be configured using the optional
arguments to each function.

| name | use |
|:---------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `hotspot_count()` | Count the number of points in each cell of a regular grid. Cell size can be set by the user or chosen automatically. |
| `hotspot_change()` | Measure the change in the count of points in each cell between two periods of time. |
| `hotspot_kde()` | Estimate kernel density for each cell in a regular grid. Cell size and bandwidth can be set by the user or chosen automatically. |
| `hotspot_dual_kde()` | Compare the kernel density of two layers of points, e.g. to estimate the local risk of an event occurring relative to local population. |
| `hotspot_gistar()` | Calculate the Getis–Ord $G_i^*$ statistic for each cell in a regular grid, while optionally estimating kernel density. Cell size, bandwidth and neighbour distance can be set by the user or chosen automatically. |
| name | use |
|:---|:---|
| `hotspot_count()` | Count the number of points in each cell of a regular grid. Cell size can be set by the user or chosen automatically. |
| `hotspot_change()` | Measure the change in the count of points in each cell between two periods of time. |
| `hotspot_kde()` | Estimate kernel density for each cell in a regular grid. Cell size and bandwidth can be set by the user or chosen automatically. |
| `hotspot_dual_kde()` | Compare the kernel density of two layers of points, e.g. to estimate the local risk of an event occurring relative to local population. |
| `hotspot_gistar()` | Calculate the Getis–Ord $G_i^*$ statistic for each cell in a regular grid, while optionally estimating kernel density. Cell size, bandwidth and neighbour distance can be set by the user or chosen automatically. |
| `hotspot_classify()` | Classify grid cells according to whether they have had significant clusters of points at different time periods. All parameters can be chosen automatically or be set by the user using the `hotspot_classify_params()` helper function. |

The results produced by `hotspot_count()`, `hotspot_change()`,
Expand Down Expand Up @@ -72,10 +72,10 @@ library(sf)
library(sfhotspot)
library(tidyverse)
#> ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
#> ✔ dplyr 1.1.2 ✔ readr 2.1.4
#> ✔ forcats 1.0.0 ✔ stringr 1.5.0
#> ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
#> ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
#> ✔ dplyr 1.1.4 ✔ readr 2.1.5
#> ✔ forcats 1.0.0 ✔ stringr 1.5.1
#> ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
#> ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
#> ✔ purrr 1.0.2
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
Expand All @@ -85,34 +85,24 @@ library(tidyverse)

# Transform data to UTM zone 15N so that we can think in metres, not decimal
# degrees
memphis_robberies_utm <- st_transform(memphis_robberies, 32615)
memphis_robberies_utm <- st_transform(memphis_robberies, "EPSG:32615")


# Identify hotspots, set all the parameters automatically by not specifying cell
# size, bandwidth, etc.
memphis_robberies_hotspots <- hotspot_gistar(memphis_robberies_utm)
#> Cell size set to 500 metres automatically
#> Bandwidth set to 5,592 metres automatically based on rule of thumb
#> The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
#> which was just loaded, will retire in October 2023. Please refer to R-spatial
#> evolution reports for details, especially
#> https://r-spatial.org/r/2023/05/15/evolution4.html. It may be desirable to make
#> the sf package available; package maintainers should consider adding sf to
#> Suggests:. The sp package is now running under evolution status 2 (status 2
#> uses the sf package in place of rgdal)
#> Done: [--------------------------------------------------------------------] .Done: [======================================------------------------------] .Done: [=======================================-----------------------------] .Done: [========================================----------------------------] .Done: [=========================================---------------------------] .Done: [==========================================--------------------------] .Done: [===========================================-------------------------] .Done: [============================================------------------------] .Done: [=============================================-----------------------] .Done: [==============================================----------------------] .Done: [===============================================---------------------] .Done: [================================================--------------------] .Done: [=================================================-------------------] .Done: [==================================================------------------] .Done: [===================================================-----------------] .Done: [====================================================----------------] .Done: [=====================================================---------------] .Done: [======================================================--------------] .Done: [=======================================================-------------] .Done: [========================================================------------] .Done: [=========================================================-----------] .Done: [==========================================================----------] .Done: [===========================================================---------] .Done: [============================================================--------] .Done: [=============================================================-------] .Done: [==============================================================------] .Done: [===============================================================-----] .Done: [================================================================----] .Done: [=================================================================---] .Done: [==================================================================--] .Done: [===================================================================-] .Done: [====================================================================] .
memphis_robberies_htspt <- hotspot_gistar(memphis_robberies_utm, quiet = TRUE)


# Visualise the hotspots by showing only those cells that have significantly
# more points than expected by chance. For those cells, show the estimated
# density of robberies.
memphis_robberies_hotspots %>%
filter(gistar > 0, pvalue < 0.05) %>%
memphis_robberies_htspt |>
filter(gistar > 0, pvalue < 0.05) |>
ggplot(aes(colour = kde, fill = kde)) +
geom_sf() +
scale_colour_distiller(aesthetics = c("colour", "fill"), direction = 1) +
labs(title = "Density of robberies in Memphis, 2019") +
theme_void()
```

<img src="man/figures/README-example-1.png" width="100%" />
<img src="man/figures/README-example-1.png" alt="A map showing hotspots of robbery in Memphis, TN created using the `hotspot_gistar()` function in the sfhotspot package" width="100%" />
7 changes: 7 additions & 0 deletions data-raw/memphis_precincts.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# This script prepares the `memphis_precincts` dataset

memphis_precincts <- sf::read_sf("https://data.memphistn.gov/resource/rqqz-pj4u.geojson") |>
dplyr::group_by(precinct) |>
dplyr::summarise()

usethis::use_data(memphis_precincts, overwrite = TRUE)
Binary file added data/memphis_precincts.rda
Binary file not shown.
Loading

0 comments on commit ec37981

Please sign in to comment.