Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure non-intersecting sf-geometries in get_eurostat_geospatial #202

Merged
merged 5 commits into from
May 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ importFrom(lubridate,year)
importFrom(methods,as)
importFrom(readr,col_character)
importFrom(readr,read_tsv)
importFrom(sf,st_buffer)
importFrom(sf,st_read)
importFrom(sp,merge)
importFrom(sp,spplot)
Expand Down
1 change: 1 addition & 0 deletions R/firstlib.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#' @importFrom readr read_tsv
#' @importFrom readr col_character
#' @importFrom sf st_read
#' @importFrom sf st_buffer
#' @importFrom sp merge
#' @importFrom sp spplot
#' @importFrom stringi stri_match_first_regex
Expand Down
126 changes: 82 additions & 44 deletions R/get_eurostat_geospatial.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,23 @@
#' \code{\link{tempdir}}. Directory can also be set with
#' \code{option} eurostat_cache_dir.
#' @param crs projection of the map: 4-digit \href{https://spatialreference.org/ref/epsg/}{EPSG code}. One of:
#' @param make_valid logical; ensure that valid (multi-)polygon features are returned
#' if \code{output_class="sf"}, see Details. Current default \code{FALSE}, will be changed
#' in the future.
#'
#' \itemize{
#' \item "4326" - WGS84
#' \item "3035" - ETRS89 / ETRS-LAEA
#' \item "3857" - Pseudo-Mercator
#' }
#' @export
#' @details The data source URL is \url{http://ec.europa.eu/eurostat/web/gisco/geodata/reference-data/administrative-units-statistical-units}.
#' The source provides feature collections as line strings (GeoJSON format),
#' not as (multi-)polygons which, in some cases, yields invalid
#' self-intersecting (multi-)polygon geometries (for some years/resolutions).
#' This can cause problems, e.g., when using these geometries as input argument
#' to \code{sf::st_interpolate_aw()}). \code{make_valid = TRUE} makes sure that
#' only valid (multi-)polygons are returned, example included below.
#' @author Markus Kainu <markuskainu@gmail.com>
antagomir marked this conversation as resolved.
Show resolved Hide resolved
#' @return a sf, data_frame or SpatialPolygonDataFrame.
#' @examples
Expand All @@ -41,11 +51,36 @@
#' spdf <- get_eurostat_geospatial(output_class = "spdf", resolution = "10", nuts_level = "3")
#' }
#'
get_eurostat_geospatial <- function(output_class="sf",
resolution="60",
#' \dontrun{
#' # -------------------------------------------------------------------
#' # Minimal example to demonstrate reason/effect of 'make_valid = TRUE'
#' # Spatial data set; rectangle spanning the entire globe with a constant value of 1L.
#' # Requires the R package sf.
#' library("sf")
#' poly <- st_polygon(list(matrix(c(-180, -90, -180, 90, 180, 90, 180, -90, -180, -90), ncol = 2, byrow = TRUE)))
#' data <- st_sf(data.frame(geom = st_sfc(poly), data = 1L), crs = st_crs(4326))
#'
#' # Causing an error: Self-intersection of some points of the geometry
#' NUTS2_A <- get_eurostat_geospatial("sf", 60, nuts_level = 2, year = 2013,
#' crs = 4326, make_valid = FALSE)
#' res <- tryCatch(st_interpolate_aw(data, NUTS2_A, extensive = FALSE),
#' error = function(e) e)
#' print(res)
#'
#' # Resolving the problem using make_valid = TRUE. 'extensive = FALSE' returns
#' # average over each area, thus resulting in a constant value of 1 for each
#' # geometry in NUTS2_B.
#' NUTS2_B <- get_eurostat_geospatial("sf", 60, nuts_level = 2, year = 2013,
#' crs = 4326, make_valid = TRUE)
#' res <- st_interpolate_aw(data, NUTS2_B, extensive = FALSE)
#' print(head(res))
#' }
#'
get_eurostat_geospatial <- function(output_class = "sf",
resolution = "60",
nuts_level = "all", year = "2016",
cache = TRUE, update_cache = FALSE,
cache_dir = NULL, crs = "4326"){
cache_dir = NULL, crs = "4326", make_valid = FALSE){
# Check if you have access to ec.europe.eu.
if (!check_access_to_data()){
message("You have no access to ec.europe.eu.
Expand All @@ -58,35 +93,40 @@ Please check your connection and/or review your proxy settings")
envir = environment(),
package = "eurostat")

# Check resolution is of correct format
resolution <- as.character(resolution)
resolution <- gsub("^0+", "", resolution)
if (!as.numeric(resolution) %in% c(1, 3, 10, 20, 60)) {
stop("Resolution should be one of 01, 1, 03, 3, 10, 20, 60")
}
resolution <- gsub("^1$", "01", resolution)
resolution <- gsub("^3$", "03", resolution)

# Check output_class is of correct format
if (!output_class %in% c("sf", "df", "spdf")) {
stop("output_class should be one of 'sf', 'df' or 'spdf'")
}

stopifnot(length(output_class) == 1L)
output_class <- match.arg(as.character(output_class), c("sf", "df", "spdf"))

# Check resolution is of correct format
stopifnot(length(resolution) == 1L)
resolution <- as.integer(regmatches(resolution, regexpr("^[0-9]+", resolution)))
resolution <- sprintf("%02d", match.arg(as.character(resolution), c(1, 3, 10, 20, 60)))

# Sanity check for nuts_level
stopifnot(length(nuts_level) == 1L)
nuts_level <- regmatches(nuts_level, regexpr("^(all|[0-9]+)", nuts_level))
nuts_level <- match.arg(nuts_level, c("all", 0:3))

# Check year is of correct format
year <- as.character(year)
if (!as.numeric(year) %in% c(2003, 2006, 2010, 2013, 2016, 2021)) {
stop("Year should be one of 2003, 2006, 2010, 2013, 2016 or 2021")
}
year <- match.arg(as.character(year), c(2003, 2006, 2010, 2013, 2016, 2021))

# Sanity check for cache and update_cache
stopifnot(is.logical(cache) && length(cache) == 1 && cache %in% c(TRUE, FALSE))
stopifnot(is.logical(update_cache) && length(update_cache) == 1 && update_cache %in% c(TRUE, FALSE))

# Sanity check for cache_dir
stopifnot(is.null(cache_dir) || (is.character(cache_dir) && length(cache_dir) == 1L))

# Check crs is of correct format
crs <- match.arg(as.character(crs), c(4326, 3035, 3857))

# Invalid combination of year and resolution: stop and show hint/error message.
if (as.numeric(year) == 2003 & as.numeric(resolution) == 60) {
stop("NUTS 2003 is not provided at 1:60 million resolution. Try 1:1 million, 1:3 million, 1:10 million or 1:20 million")
}

# Check crs is of correct format
crs <- as.character(crs)
if (!as.numeric(crs) %in% c(4326, 3035, 3857)) {
stop("crs should be one of 4326, 3035 or 3857")
}

# Sanity check for input `make_valid`
stopifnot(is.logical(make_valid) && length(make_valid) == 1L && make_valid %in% c(TRUE, FALSE))

# message("
# COPYRIGHT NOTICE
Expand Down Expand Up @@ -117,25 +157,20 @@ Please check your connection and/or review your proxy settings")

if (resolution == "60" && year == 2016 && crs == "4326"){

if (nuts_level %in% c("all")){
# nuts_levels are mutually exclusive; either "all"
# or one of "0", "1", "2", "3".
if (nuts_level == c("all")){
shp <- eurostat_geodata_60_2016
} else {
shp <- filter(eurostat_geodata_60_2016, LEVL_CODE == as.integer(nuts_level))
}
if (nuts_level == "0") shp <- filter(eurostat_geodata_60_2016,
LEVL_CODE == 0)
if (nuts_level == "1") shp <- filter(eurostat_geodata_60_2016,
LEVL_CODE == 1)
if (nuts_level == "2") shp <- filter(eurostat_geodata_60_2016,
LEVL_CODE == 2)
if (nuts_level == "3") shp <- filter(eurostat_geodata_60_2016,
LEVL_CODE == 3)

if (output_class == "df"){
nuts_sp <- as(shp, "Spatial")
nuts_sp$id <- row.names(nuts_sp)
nuts_ff <- broom::tidy(nuts_sp)
shp <- left_join(nuts_ff,nuts_sp@data)
}
if (output_class == "spdf"){
} else if (output_class == "spdf"){
shp <- as(shp, "Spatial")
}

Expand Down Expand Up @@ -168,7 +203,7 @@ Please check your connection and/or review your proxy settings")
}

# if cache = FALSE or update or new: dowload else read from cache
if (!cache || update_cache || !file.exists(cache_file)){
if (!cache | update_cache | !file.exists(cache_file)){

if (nuts_level %in% c("0","all")){
url <- paste0("http://ec.europa.eu/eurostat/cache/GISCO/distribution/v2/nuts/geojson/NUTS_RG_",resolution,"M_",year,"_",crs,"_LEVL_0.geojson")
Expand Down Expand Up @@ -209,15 +244,12 @@ Please check your connection and/or review your proxy settings")
}
}

if (nuts_level %in% c("all")){
if (nuts_level == "all") {
shp <- rbind(nuts0, nuts1, nuts2, nuts3)
} else {
shp <- eval(parse(text = paste0("nuts", nuts_level)))
}

if (nuts_level == "0") shp <- nuts0
if (nuts_level == "1") shp <- nuts1
if (nuts_level == "2") shp <- nuts2
if (nuts_level == "3") shp <- nuts3

if (output_class == "df"){
nuts_sp <- as(shp, "Spatial")
nuts_sp$id <- row.names(nuts_sp)
Expand Down Expand Up @@ -285,6 +317,12 @@ Please check your connection and/or review your proxy settings")
#
# # --------------------------
# ")
if (output_class == "sf" & make_valid) {
shp <- st_buffer(shp, 0)
} else {
warning(paste("Default of 'make_valid' for 'output_class=\"sf\"'",
"will be changed in the future (see function details)."))
}

# Adding a `geo` column for easier joins with dplyr
shp$geo <- shp$NUTS_ID
Expand Down
41 changes: 39 additions & 2 deletions man/get_eurostat_geospatial.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading