From 1d970f24ecae1630657ca7698a3e38d014a47e3e Mon Sep 17 00:00:00 2001 From: Andrea Gilardi Date: Wed, 6 Nov 2024 11:54:29 +0100 Subject: [PATCH] Add version argument It can be used to download old OSM extracts from Geofabrik provider. Ref #295 --- R/get.R | 8 ++++++++ R/match.R | 17 ++++++++++++----- R/utils.R | 21 +++++++++++++++++++++ man/oe_get.Rd | 8 ++++++++ man/oe_match.Rd | 17 ++++++++++++++++- 5 files changed, 65 insertions(+), 6 deletions(-) diff --git a/R/get.R b/R/get.R index 3031bce8..daa9ebfa 100644 --- a/R/get.R +++ b/R/get.R @@ -44,6 +44,12 @@ #' say that smaller administrative units correspond to bigger levels. If #' `NULL`, the default, the `oe_*` functions will select the highest available #' level. See Details and Examples in [oe_match()]. +#' @param version The version of the OSM extract to download. The default is +#' "latest". Other possible values are typically specified using the format +#' YYMMDD (e.g. "200101"). The complete list of all available historic files +#' for a given extract can be browsed from the Geofabrik website (e.g. +#' and then click on 'raw +#' directory index'). #' @param download_directory Directory to store the file containing OSM data?. #' @param force_download Should the `.osm.pbf` file be updated even if it has #' already been downloaded? `FALSE` by default. This parameter is used to @@ -216,6 +222,7 @@ oe_get = function( match_by = "name", max_string_dist = 1, level = NULL, + version = "latest", download_directory = oe_download_directory(), force_download = FALSE, max_file_size = 5e+8, @@ -246,6 +253,7 @@ oe_get = function( match_by = match_by, max_string_dist = max_string_dist, level = level, + version = version, quiet = quiet ) diff --git a/R/match.R b/R/match.R index 88babf2e..835850bd 100644 --- a/R/match.R +++ b/R/match.R @@ -152,11 +152,13 @@ oe_match.sfc = function( place, provider = "geofabrik", level = NULL, + version = "latest", quiet = FALSE, ... ) { # Load the data associated with the chosen provider. provider_data = load_provider_data(provider) + version <- check_version(version, provider) # Check if place has no CRS (i.e. NA_crs_, see ?st_crs) and, in that case, set # 4326 + raise a warning message. @@ -216,7 +218,6 @@ oe_match.sfc = function( # If, again, there are multiple matches with the same "level", we will select # only the area closest to the input place. if (nrow(matched_zones) > 1L) { - nearest_id_centroid = sf::st_nearest_feature( place, sf::st_centroid(sf::st_geometry(matched_zones)) @@ -231,13 +232,15 @@ oe_match.sfc = function( .subclass = "oe_match_sfcInputMatchedWith" ) + url <- matched_zones[["pbf"]] + url <- adjust_version_in_url(version, url) + # Return a list with the URL and the file_size of the matched place result = list( - url = matched_zones[["pbf"]], + url = url, file_size = matched_zones[["pbf_file_size"]] ) result - } #' @inheritParams oe_get @@ -277,6 +280,7 @@ oe_match.character = function( quiet = FALSE, match_by = "name", max_string_dist = 1, + version = "latest", ... ) { # For the moment we support only length-one character vectors @@ -290,6 +294,7 @@ oe_match.character = function( ) ) } + version <- check_version(version, provider) # See https://github.com/ropensci/osmextract/pull/125 if (place == "ITS Leeds") { @@ -339,7 +344,6 @@ oe_match.character = function( # If the approximate string distance between the best match is greater than # the max_string_dist threshold, then: if (isTRUE(high_distance)) { - # 1. Raise a message oe_message( "No exact match found for place = ", place, @@ -434,8 +438,11 @@ oe_match.character = function( .subclass = "oe_match_characterinputmatchedWith" ) + url <- best_matched_place[["pbf"]] + url <- adjust_version_in_url(version, url) + result = list( - url = best_matched_place[["pbf"]], + url = url, file_size = best_matched_place[["pbf_file_size"]] ) result diff --git a/R/utils.R b/R/utils.R index 96f02579..63894435 100644 --- a/R/utils.R +++ b/R/utils.R @@ -30,6 +30,27 @@ check_layer_provider = function(layer, provider) { invisible(0) } +check_version <- function(version, provider) { + # Currently, the only provider that includes historic data for the OSM + # extracts is geofabrik. + if (version != "latest" && provider != "geofabrik") { + warning( + "version != 'latest' is only supported for 'geofabrik' provider.", + "Overriding it to 'latest'.", + call. = FALSE + ) + return("latest") + } + version +} +adjust_version_in_url <- function(version, url) { + if (version == "latest") { + return(url) + } + gsub("latest(?=\\.osm\\.pbf$)", version, url, perl = TRUE) +} + + # Starting from sf 1.0.2, sf::st_read raises a warning message when both layer # and query arguments are set, while it raises a warning in sf < 1.0.2 when # there are multiple layers and the layer argument is not set. See also diff --git a/man/oe_get.Rd b/man/oe_get.Rd index f9ca3dc4..7c051f71 100644 --- a/man/oe_get.Rd +++ b/man/oe_get.Rd @@ -12,6 +12,7 @@ oe_get( match_by = "name", max_string_dist = 1, level = NULL, + version = "latest", download_directory = oe_download_directory(), force_download = FALSE, max_file_size = 5e+08, @@ -76,6 +77,13 @@ say that smaller administrative units correspond to bigger levels. If \code{NULL}, the default, the \verb{oe_*} functions will select the highest available level. See Details and Examples in \code{\link[=oe_match]{oe_match()}}.} +\item{version}{The version of the OSM extract to download. The default is +"latest". Other possible values are typically specified using the format +YYMMDD (e.g. "200101"). The complete list of all available historic files +for a given extract can be browsed from the Geofabrik website (e.g. +\url{https://download.geofabrik.de/europe/italy.html} and then click on 'raw +directory index').} + \item{download_directory}{Directory to store the file containing OSM data?.} \item{force_download}{Should the \code{.osm.pbf} file be updated even if it has diff --git a/man/oe_match.Rd b/man/oe_match.Rd index 427892c4..e486873f 100644 --- a/man/oe_match.Rd +++ b/man/oe_match.Rd @@ -18,7 +18,14 @@ oe_match(place, ...) \method{oe_match}{sf}(place, ...) -\method{oe_match}{sfc}(place, provider = "geofabrik", level = NULL, quiet = FALSE, ...) +\method{oe_match}{sfc}( + place, + provider = "geofabrik", + level = NULL, + version = "latest", + quiet = FALSE, + ... +) \method{oe_match}{numeric}(place, provider = "geofabrik", quiet = FALSE, ...) @@ -28,6 +35,7 @@ oe_match(place, ...) quiet = FALSE, match_by = "name", max_string_dist = 1, + version = "latest", ... ) } @@ -55,6 +63,13 @@ say that smaller administrative units correspond to bigger levels. If \code{NULL}, the default, the \verb{oe_*} functions will select the highest available level. See Details and Examples in \code{\link[=oe_match]{oe_match()}}.} +\item{version}{The version of the OSM extract to download. The default is +"latest". Other possible values are typically specified using the format +YYMMDD (e.g. "200101"). The complete list of all available historic files +for a given extract can be browsed from the Geofabrik website (e.g. +\url{https://download.geofabrik.de/europe/italy.html} and then click on 'raw +directory index').} + \item{quiet}{Boolean. If \code{FALSE}, the function prints informative messages. Starting from \code{sf} version \href{https://r-spatial.github.io/sf/news/index.html#version-0-9-6-2020-09-13}{0.9.6},