Skip to content

Commit

Permalink
Merge pull request #202 from retostauffer/master
Browse files Browse the repository at this point in the history
Ensure non-intersecting sf-geometries in get_eurostat_geospatial
  • Loading branch information
antagomir authored May 9, 2021
2 parents 8247f72 + 461a12f commit e044e10
Show file tree
Hide file tree
Showing 5 changed files with 266 additions and 46 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ importFrom(lubridate,year)
importFrom(methods,as)
importFrom(readr,col_character)
importFrom(readr,read_tsv)
importFrom(sf,st_buffer)
importFrom(sf,st_read)
importFrom(sp,merge)
importFrom(sp,spplot)
Expand Down
1 change: 1 addition & 0 deletions R/firstlib.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#' @importFrom readr read_tsv
#' @importFrom readr col_character
#' @importFrom sf st_read
#' @importFrom sf st_buffer
#' @importFrom sp merge
#' @importFrom sp spplot
#' @importFrom stringi stri_match_first_regex
Expand Down
126 changes: 82 additions & 44 deletions R/get_eurostat_geospatial.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,23 @@
#' \code{\link{tempdir}}. Directory can also be set with
#' \code{option} eurostat_cache_dir.
#' @param crs projection of the map: 4-digit \href{https://spatialreference.org/ref/epsg/}{EPSG code}. One of:
#' @param make_valid logical; ensure that valid (multi-)polygon features are returned
#' if \code{output_class="sf"}, see Details. Current default \code{FALSE}, will be changed
#' in the future.
#'
#' \itemize{
#' \item "4326" - WGS84
#' \item "3035" - ETRS89 / ETRS-LAEA
#' \item "3857" - Pseudo-Mercator
#' }
#' @export
#' @details The data source URL is \url{http://ec.europa.eu/eurostat/web/gisco/geodata/reference-data/administrative-units-statistical-units}.
#' The source provides feature collections as line strings (GeoJSON format),
#' not as (multi-)polygons which, in some cases, yields invalid
#' self-intersecting (multi-)polygon geometries (for some years/resolutions).
#' This can cause problems, e.g., when using these geometries as input argument
#' to \code{sf::st_interpolate_aw()}). \code{make_valid = TRUE} makes sure that
#' only valid (multi-)polygons are returned, example included below.
#' @author Markus Kainu <markuskainu@gmail.com>
#' @return a sf, data_frame or SpatialPolygonDataFrame.
#' @examples
Expand All @@ -41,11 +51,36 @@
#' spdf <- get_eurostat_geospatial(output_class = "spdf", resolution = "10", nuts_level = "3")
#' }
#'
get_eurostat_geospatial <- function(output_class="sf",
resolution="60",
#' \dontrun{
#' # -------------------------------------------------------------------
#' # Minimal example to demonstrate reason/effect of 'make_valid = TRUE'
#' # Spatial data set; rectangle spanning the entire globe with a constant value of 1L.
#' # Requires the R package sf.
#' library("sf")
#' poly <- st_polygon(list(matrix(c(-180, -90, -180, 90, 180, 90, 180, -90, -180, -90), ncol = 2, byrow = TRUE)))
#' data <- st_sf(data.frame(geom = st_sfc(poly), data = 1L), crs = st_crs(4326))
#'
#' # Causing an error: Self-intersection of some points of the geometry
#' NUTS2_A <- get_eurostat_geospatial("sf", 60, nuts_level = 2, year = 2013,
#' crs = 4326, make_valid = FALSE)
#' res <- tryCatch(st_interpolate_aw(data, NUTS2_A, extensive = FALSE),
#' error = function(e) e)
#' print(res)
#'
#' # Resolving the problem using make_valid = TRUE. 'extensive = FALSE' returns
#' # average over each area, thus resulting in a constant value of 1 for each
#' # geometry in NUTS2_B.
#' NUTS2_B <- get_eurostat_geospatial("sf", 60, nuts_level = 2, year = 2013,
#' crs = 4326, make_valid = TRUE)
#' res <- st_interpolate_aw(data, NUTS2_B, extensive = FALSE)
#' print(head(res))
#' }
#'
get_eurostat_geospatial <- function(output_class = "sf",
resolution = "60",
nuts_level = "all", year = "2016",
cache = TRUE, update_cache = FALSE,
cache_dir = NULL, crs = "4326"){
cache_dir = NULL, crs = "4326", make_valid = FALSE){
# Check if you have access to ec.europe.eu.
if (!check_access_to_data()){
message("You have no access to ec.europe.eu.
Expand All @@ -58,35 +93,40 @@ Please check your connection and/or review your proxy settings")
envir = environment(),
package = "eurostat")

# Check resolution is of correct format
resolution <- as.character(resolution)
resolution <- gsub("^0+", "", resolution)
if (!as.numeric(resolution) %in% c(1, 3, 10, 20, 60)) {
stop("Resolution should be one of 01, 1, 03, 3, 10, 20, 60")
}
resolution <- gsub("^1$", "01", resolution)
resolution <- gsub("^3$", "03", resolution)

# Check output_class is of correct format
if (!output_class %in% c("sf", "df", "spdf")) {
stop("output_class should be one of 'sf', 'df' or 'spdf'")
}

stopifnot(length(output_class) == 1L)
output_class <- match.arg(as.character(output_class), c("sf", "df", "spdf"))

# Check resolution is of correct format
stopifnot(length(resolution) == 1L)
resolution <- as.integer(regmatches(resolution, regexpr("^[0-9]+", resolution)))
resolution <- sprintf("%02d", match.arg(as.character(resolution), c(1, 3, 10, 20, 60)))

# Sanity check for nuts_level
stopifnot(length(nuts_level) == 1L)
nuts_level <- regmatches(nuts_level, regexpr("^(all|[0-9]+)", nuts_level))
nuts_level <- match.arg(nuts_level, c("all", 0:3))

# Check year is of correct format
year <- as.character(year)
if (!as.numeric(year) %in% c(2003, 2006, 2010, 2013, 2016, 2021)) {
stop("Year should be one of 2003, 2006, 2010, 2013, 2016 or 2021")
}
year <- match.arg(as.character(year), c(2003, 2006, 2010, 2013, 2016, 2021))

# Sanity check for cache and update_cache
stopifnot(is.logical(cache) && length(cache) == 1 && cache %in% c(TRUE, FALSE))
stopifnot(is.logical(update_cache) && length(update_cache) == 1 && update_cache %in% c(TRUE, FALSE))

# Sanity check for cache_dir
stopifnot(is.null(cache_dir) || (is.character(cache_dir) && length(cache_dir) == 1L))

# Check crs is of correct format
crs <- match.arg(as.character(crs), c(4326, 3035, 3857))

# Invalid combination of year and resolution: stop and show hint/error message.
if (as.numeric(year) == 2003 & as.numeric(resolution) == 60) {
stop("NUTS 2003 is not provided at 1:60 million resolution. Try 1:1 million, 1:3 million, 1:10 million or 1:20 million")
}

# Check crs is of correct format
crs <- as.character(crs)
if (!as.numeric(crs) %in% c(4326, 3035, 3857)) {
stop("crs should be one of 4326, 3035 or 3857")
}

# Sanity check for input `make_valid`
stopifnot(is.logical(make_valid) && length(make_valid) == 1L && make_valid %in% c(TRUE, FALSE))

# message("
# COPYRIGHT NOTICE
Expand Down Expand Up @@ -117,25 +157,20 @@ Please check your connection and/or review your proxy settings")

if (resolution == "60" && year == "2016" && crs == "4326"){

if (nuts_level %in% c("all")){
# nuts_levels are mutually exclusive; either "all"
# or one of "0", "1", "2", "3".
if (nuts_level == c("all")){
shp <- eurostat_geodata_60_2016
} else {
shp <- filter(eurostat_geodata_60_2016, LEVL_CODE == as.integer(nuts_level))
}
if (nuts_level == "0") shp <- filter(eurostat_geodata_60_2016,
LEVL_CODE == 0)
if (nuts_level == "1") shp <- filter(eurostat_geodata_60_2016,
LEVL_CODE == 1)
if (nuts_level == "2") shp <- filter(eurostat_geodata_60_2016,
LEVL_CODE == 2)
if (nuts_level == "3") shp <- filter(eurostat_geodata_60_2016,
LEVL_CODE == 3)

if (output_class == "df"){
nuts_sp <- as(shp, "Spatial")
nuts_sp$id <- row.names(nuts_sp)
nuts_ff <- broom::tidy(nuts_sp)
shp <- left_join(nuts_ff,nuts_sp@data)
}
if (output_class == "spdf"){
} else if (output_class == "spdf"){
shp <- as(shp, "Spatial")
}

Expand Down Expand Up @@ -168,7 +203,7 @@ Please check your connection and/or review your proxy settings")
}

# if cache = FALSE or update or new: dowload else read from cache
if (!cache || update_cache || !file.exists(cache_file)){
if (!cache | update_cache | !file.exists(cache_file)){

if (nuts_level %in% c("0","all")){
url <- paste0("http://ec.europa.eu/eurostat/cache/GISCO/distribution/v2/nuts/geojson/NUTS_RG_",resolution,"M_",year,"_",crs,"_LEVL_0.geojson")
Expand Down Expand Up @@ -209,15 +244,12 @@ Please check your connection and/or review your proxy settings")
}
}

if (nuts_level %in% c("all")){
if (nuts_level == "all") {
shp <- rbind(nuts0, nuts1, nuts2, nuts3)
} else {
shp <- eval(parse(text = paste0("nuts", nuts_level)))
}

if (nuts_level == "0") shp <- nuts0
if (nuts_level == "1") shp <- nuts1
if (nuts_level == "2") shp <- nuts2
if (nuts_level == "3") shp <- nuts3

if (output_class == "df"){
nuts_sp <- as(shp, "Spatial")
nuts_sp$id <- row.names(nuts_sp)
Expand Down Expand Up @@ -285,6 +317,12 @@ Please check your connection and/or review your proxy settings")
#
# # --------------------------
# ")
if (output_class == "sf" & make_valid) {
shp <- st_buffer(shp, 0)
} else {
warning(paste("Default of 'make_valid' for 'output_class=\"sf\"'",
"will be changed in the future (see function details)."))
}

# Adding a `geo` column for easier joins with dplyr
shp$geo <- shp$NUTS_ID
Expand Down
41 changes: 39 additions & 2 deletions man/get_eurostat_geospatial.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit e044e10

Please sign in to comment.