From bb7563ba47ca6b51d27c9617f3cf7a41497879b8 Mon Sep 17 00:00:00 2001 From: Daniel Antal Date: Thu, 24 Jun 2021 14:09:28 +0200 Subject: [PATCH 1/4] connect to iotables and regions --- R/eurostat-package.R | 7 ++ R/get_eurostat_json.R | 48 +++++++++++-- R/regional_coding_functions.R | 130 ++++++++++++++++++++++++++++++++++ man/eurostat-package.Rd | 4 ++ 4 files changed, 185 insertions(+), 4 deletions(-) create mode 100644 R/regional_coding_functions.R diff --git a/R/eurostat-package.R b/R/eurostat-package.R index 6de3ed41..19d7f579 100755 --- a/R/eurostat-package.R +++ b/R/eurostat-package.R @@ -21,5 +21,12 @@ #' See citation("eurostat") #' \url{https://ropengov.github.io/eurostat/} #' @examples library(eurostat) +#' @section regions functions: +#' The most important functions are imported from the regions package. +#' \url{https://ropengov.github.io/regions/} +#' When working with the sub-national data, you may need the full functionality of the regions package. +#' \code\{\link{validate_nuts_regions}}\cr +#' \code\{\link{validate_geo_code}}\cr +#' \code\{\link{recode_nuts}}\cr #' @keywords package NULL diff --git a/R/get_eurostat_json.R b/R/get_eurostat_json.R index 3defe80e..9770611b 100755 --- a/R/get_eurostat_json.R +++ b/R/get_eurostat_json.R @@ -59,6 +59,11 @@ get_eurostat_json <- function(id, filters = NULL, stringsAsFactors = FALSE, ...){ + ## Special products that must be built to matrix + ## User gets message to use iotables::iotables_download() and halt this operation. + user_want_stop <- special_id_values(id) + if ( user_want_stop ) return(NULL) + # Check if you have access to ec.europe.eu. if (!check_access_to_data()){ message("You have no access to ec.europe.eu. @@ -97,6 +102,9 @@ get_eurostat_json <- function(id, filters = NULL, } else if (status == 500){ stop("Failure to get data. Probably filters did not return any data or data exceeded query size limitation. Status code: ", status, msg) + } else if (status == 416) { + stop ("Too many categories have been requested. Maximum is 50.", + status, msg) } else { stop("Failure to get data. Status code: ", status, msg) } @@ -119,7 +127,7 @@ get_eurostat_json <- function(id, filters = NULL, stop("Invalid type ", type) } }) - + variables <- expand.grid(dims_list, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = stringsAsFactors) @@ -130,6 +138,8 @@ get_eurostat_json <- function(id, filters = NULL, inds <- 1 + as.numeric(names(jdat$value)) # 0-indexed if (!length(vals) == length(inds)) {stop("Complex indexing not implemented.")} dat$values[inds] <- vals + + tibble::as_tibble(dat) } @@ -164,8 +174,38 @@ eurostat_json_url <- function(id, filters, lang){ url <- httr::build_url(url_list) url } +# Internal function to give warning if symmetric input-output tables need to download into strict matirx formats. +special_id_values <- function(id) { + siot_id_codes <- c("naio_10_cp1700", "naio_10_pyp1700", + "naio_10_cp1750", "naio_10_pyp1750", + "naio_10_cp15", "naio_10_cp16", + "naio_10_cp1610","naio_10_pyp1610", + "naio_10_cp1620", "naio_10_pyp1620", + "naio_10_cp1630", "naio_10_pyp1630") + if (id %in% siot_id_codes ) { + message("The requested product id is a special input-output matrix.", + "\nTo keep the matrix structure for further use, download it with iotables::iotables_download().\nThe iotables package is an extension for such cases to the eurostat package.") + answer <- readline (prompt = "Do you want to stop downloading now? [y/n] ") + if ( tolower(answer)=="y") TRUE else FALSE + } else { + # By default evaluates to FALSE and no interruption happens + FALSE + } +} +# Internal function to give warning if sub-national geo codes need validation +is_regional_nuts_present <- function(geo) { - - - + potentional_regional_codes <- unique (dat$geo)[nchar(unique (dat$geo))>2] + + potentional_regional_codes <- potentional_regional_codes[! substr(potentional_regional_codes, 1,2) %in% c("EU", "EA")] + + if( length(potentional_regional_codes)>0) { + + types_found <- paste ( sort (unique(validate_geo_code( potentional_regional_codes, nuts_year = 2021))), collapse = ', ') + message("The following sub-national geographical codes present in the dataset:\n", types_found, + "\nRegional and metropolitian area boundaries, codes and names are changing frequently.", + "\nSee ?validate_geo_code, ?validate_nuts_regions and ?recode_nuts or the", + "\n'Mapping Regional Data, Mapping Metadata Problems' vignette for a tutorial.") + } +} \ No newline at end of file diff --git a/R/regional_coding_functions.R b/R/regional_coding_functions.R new file mode 100644 index 00000000..aee816d7 --- /dev/null +++ b/R/regional_coding_functions.R @@ -0,0 +1,130 @@ +#' Recode Region Codes From Source To Target NUTS Typology +#' +#' Validate your geo codes, pair them with the appropriate standard +#' typology, look up potential causes of invalidity in the EU correspondence +#' tables, and look up the appropriate geographical codes in the other +#' (target) typology. For example, validate geo codes in the \code{'NUTS2016'} +#' typology and translate them to the now obsolete the \code{'NUTS2010'} typology +#' to join current data with historical data sets. +#' +#' Imported from the \url{https://ropengov.github.io/regions/}{regions} package. +#' @param dat A data frame with a 3-5 character \code{geo_var} variable +#' to be validated. +#' @param geo_var Defaults to \code{"geo"}. The variable that contains +#' the 3-5 character geo codes to be validated. +#' @param nuts_year The year of the NUTS typology to use. +#' You can select any valid +#' NUTS definition, i.e. \code{1999}, \code{2003}, \code{2006}, +#' \code{2010}, \code{2013}, the currently used \code{2016} and the +#' already announced and defined \code{2021}. Defaults to the current +#' typology in force, which is \code{2016}. +#' @importFrom regions recode_nuts( +#' @return The original data frame with a \code{'geo_var'} column is extended +#' with a \code{'typology'} column that states in which typology is the \code{'geo_var'} +#' a valid code. For invalid codes, looks up potential reasons of invalidity +#' and adds them to the \code{'typology_change'} column, and at last it +#' adds a column of character vector containing the desired codes in the +#' target typology, for example, in the NUTS2013 typology. +#' @examples{ +#' foo <- data.frame ( +#' geo = c("FR", "DEE32", "UKI3" , +#' "HU12", "DED", +#' "FRK"), +#' values = runif(6, 0, 100 ), +#' stringsAsFactors = FALSE ) +#' +#' recode_nuts(foo, nuts_year = 2013) +#' } +#' @export + +#' Validate Conformity With NUTS Geo Codes +#' +#' Validate that \code{geo_var} is conforming with the \code{NUTS1}, +#' \code{NUTS2}, or \code{NUTS3} typologies. +#' While country codes are technically not part of the NUTS typologies, +#' Eurostat de facto uses a \code{NUTS0} typology to identify countries. +#' This de facto typology has three exception which are handled by the +#' \link[regions]{validate_nuts_countries} function. +#' +#' NUTS typologies have different versions, therefore the conformity +#' is validated with one specific versions, which can be any of these: +#' \code{1999}, \code{2003}, \code{2006}, \code{2010}, +#' \code{2013}, the currently used \code{2016} and the already +#' announced and defined \code{2021}. +#' +#' The NUTS typology was codified with the \code{NUTS2003}, and the +#' pre-1999 NUTS typologies may confuse programmatic data processing, +#' given that some NUTS1 regions were identified with country codes +#' in smaller countries that had no \code{NUTS1} divisions. +#' +#' Currently the \code{2016} is used by Eurostat, but many datasets +#' still contain \code{2013} and sometimes earlier metadata. +#' +#' @param dat A data frame with a 3-5 character \code{geo_var} +#' variable to be validated. +#' @param geo_var Defaults to \code{"geo"}. The variable that contains +#' the 3-5 character geo codes to be validated. +#' @param nuts_year The year of the NUTS typology to use. +#' Defaults to \code{2016}. You can select any valid +#' NUTS definition, i.e. \code{1999}, \code{2003}, \code{2006}, +#' \code{2010}, \code{2013}, the currently used \code{2016} and the +#' already announced and defined \code{2021}. +#' @importFrom regions validate_nuts_regions +#' @return Returns the original \code{dat} data frame with a column +#' that specifies the comformity with the NUTS definition of the year +#' \code{nuts_year}. +#' @examples +#' \donttest{ +#' my_reg_data <- data.frame ( +#' geo = c("BE1", "HU102", "FR1", +#' "DED", "FR7", "TR", "DED2", +#' "EL", "XK", "GB"), +#' values = runif(10)) +#' +#' validate_nuts_regions (my_reg_data) +#' +#' validate_nuts_regions (my_reg_data, nuts_year = 2013) +#' +#' validate_nuts_regions (my_reg_data, nuts_year = 2003) +#' } +#' @export + + +#' Validate Conformity with NUTS Geo Codes (vector) +#' +#' Validate that \code{geo} is conforming with the \code{NUTS1}, +#' \code{NUTS2}, or \code{NUTS3} typologies. +#' While country codes are technically not part of the NUTS typologies, +#' Eurostat de facto uses a \code{NUTS0} typology to identify countries. +#' This de facto typology has three exception which are handled by the +#' \link[regions]{validate_nuts_countries} function. +#' +#' NUTS typologies have different versions, therefore the conformity +#' is validated with one specific versions, which can be any of these: +#' \code{1999}, \code{2003}, \code{2006}, \code{2010}, +#' \code{2013}, the currently used \code{2016} and the already +#' announced and defined \code{2021}. +#' +#' The NUTS typology was codified with the \code{NUTS2003}, and the +#' pre-1999 NUTS typologies may confuse programmatic data processing, +#' given that some NUTS1 regions were identified with country codes +#' in smaller countries that had no \code{NUTS1} divisions. +#' +#' Currently the \code{2016} is used by Eurostat, but many datasets +#' still contain \code{2013} and sometimes earlier metadata. +#' @param geo A vector of geographical code to validate. +#' @param nuts_year A valid NUTS edition year. +#' @importFrom regions validate_geo_code +#' @return A character list with the valid typology, or 'invalid' in the cases +#' when the geo coding is not valid. +#' @examples +#' \donttest{ +#' my_reg_data <- data.frame ( +#' geo = c("BE1", "HU102", "FR1", +#' "DED", "FR7", "TR", "DED2", +#' "EL", "XK", "GB"), +#' values = runif(10)) +#' +#' validate_geo_code(my_reg_data$geo) +#' } +#' @export diff --git a/man/eurostat-package.Rd b/man/eurostat-package.Rd index 237b98e6..273dd164 100755 --- a/man/eurostat-package.Rd +++ b/man/eurostat-package.Rd @@ -20,6 +20,10 @@ LazyLoad: \tab yes\cr R Tools for Eurostat Open Data } +\section{}{ +NA +} + \examples{ library(eurostat) } From f2a63cd9cd1c4286e5c448ebfd4befd2f52e5290 Mon Sep 17 00:00:00 2001 From: Daniel Antal Date: Thu, 24 Jun 2021 14:31:38 +0200 Subject: [PATCH 2/4] add imported regions functions --- NAMESPACE | 1 - R/deprecated_regions_functions.R | 5 +---- R/eurostat-package.R | 8 ++------ R/regional_coding_functions.R | 14 +++++++++----- man/eurostat-package.Rd | 6 ++++-- man/recode_to_nuts_2013.Rd | 1 - man/recode_to_nuts_2016.Rd | 1 - 7 files changed, 16 insertions(+), 20 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index ea149ea1..9dd736b6 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,7 +48,6 @@ importFrom(methods,as) importFrom(readr,col_character) importFrom(readr,cols) importFrom(readr,read_tsv) -importFrom(region,validate_nuts_regions) importFrom(regions,recode_nuts) importFrom(regions,validate_nuts_regions) importFrom(rlang,.data) diff --git a/R/deprecated_regions_functions.R b/R/deprecated_regions_functions.R index 9dc119b1..ef127d4e 100644 --- a/R/deprecated_regions_functions.R +++ b/R/deprecated_regions_functions.R @@ -90,7 +90,6 @@ add_nuts_level <- function (dat, geo_labels = "geo") { #' regions::validate_nuts_regions(dat) #' } #' -#' @importFrom region validate_nuts_regions #' @export harmonize_geo_code <- function (dat) { @@ -135,7 +134,6 @@ harmonize_geo_code <- function (dat) { #' )) #' #' recode_to_nuts_2016(test_regional_codes) -#' #' @importFrom regions recode_nuts #' @export @@ -182,7 +180,6 @@ recode_to_nuts_2016 <- function (dat) { #' )) #' #' recode_to_nuts_2013(test_regional_codes) -#' #' @importFrom regions recode_nuts #' @export @@ -190,5 +187,5 @@ recode_to_nuts_2013 <- function (dat) { warning("The 'recode_to_nuts_2013' function is deprecated. Use instead regions::recode_nuts(dat, nuts_year = 2013)") - regions::recode_nuts(dat, nuts_year = 2013) + recode_nuts(dat, nuts_year = 2013) } diff --git a/R/eurostat-package.R b/R/eurostat-package.R index 19d7f579..30986f76 100755 --- a/R/eurostat-package.R +++ b/R/eurostat-package.R @@ -22,11 +22,7 @@ #' \url{https://ropengov.github.io/eurostat/} #' @examples library(eurostat) #' @section regions functions: -#' The most important functions are imported from the regions package. -#' \url{https://ropengov.github.io/regions/} -#' When working with the sub-national data, you may need the full functionality of the regions package. -#' \code\{\link{validate_nuts_regions}}\cr -#' \code\{\link{validate_geo_code}}\cr -#' \code\{\link{recode_nuts}}\cr +#' For working with sub-national statistics the basic functions of the regions package +#' are imported \url{https://ropengov.github.io/regions/}. #' @keywords package NULL diff --git a/R/regional_coding_functions.R b/R/regional_coding_functions.R index aee816d7..e9f14367 100644 --- a/R/regional_coding_functions.R +++ b/R/regional_coding_functions.R @@ -1,4 +1,4 @@ -#' Recode Region Codes From Source To Target NUTS Typology +#' @title Recode Region Codes From Source To Target NUTS Typology #' #' Validate your geo codes, pair them with the appropriate standard #' typology, look up potential causes of invalidity in the EU correspondence @@ -6,8 +6,9 @@ #' (target) typology. For example, validate geo codes in the \code{'NUTS2016'} #' typology and translate them to the now obsolete the \code{'NUTS2010'} typology #' to join current data with historical data sets. -#' +#' #' Imported from the \url{https://ropengov.github.io/regions/}{regions} package. +#' #' @param dat A data frame with a 3-5 character \code{geo_var} variable #' to be validated. #' @param geo_var Defaults to \code{"geo"}. The variable that contains @@ -18,13 +19,14 @@ #' \code{2010}, \code{2013}, the currently used \code{2016} and the #' already announced and defined \code{2021}. Defaults to the current #' typology in force, which is \code{2016}. -#' @importFrom regions recode_nuts( +#' @importFrom regions recode_nuts #' @return The original data frame with a \code{'geo_var'} column is extended #' with a \code{'typology'} column that states in which typology is the \code{'geo_var'} #' a valid code. For invalid codes, looks up potential reasons of invalidity #' and adds them to the \code{'typology_change'} column, and at last it #' adds a column of character vector containing the desired codes in the #' target typology, for example, in the NUTS2013 typology. +#' @family regions functions #' @examples{ #' foo <- data.frame ( #' geo = c("FR", "DEE32", "UKI3" , @@ -37,7 +39,7 @@ #' } #' @export -#' Validate Conformity With NUTS Geo Codes +#' @title Validate Conformity With NUTS Geo Codes #' #' Validate that \code{geo_var} is conforming with the \code{NUTS1}, #' \code{NUTS2}, or \code{NUTS3} typologies. @@ -73,6 +75,7 @@ #' @return Returns the original \code{dat} data frame with a column #' that specifies the comformity with the NUTS definition of the year #' \code{nuts_year}. +#' @family regions functions #' @examples #' \donttest{ #' my_reg_data <- data.frame ( @@ -90,7 +93,7 @@ #' @export -#' Validate Conformity with NUTS Geo Codes (vector) +#' @titel Validate Conformity with NUTS Geo Codes (vector) #' #' Validate that \code{geo} is conforming with the \code{NUTS1}, #' \code{NUTS2}, or \code{NUTS3} typologies. @@ -115,6 +118,7 @@ #' @param geo A vector of geographical code to validate. #' @param nuts_year A valid NUTS edition year. #' @importFrom regions validate_geo_code +#' @family regions functions #' @return A character list with the valid typology, or 'invalid' in the cases #' when the geo coding is not valid. #' @examples diff --git a/man/eurostat-package.Rd b/man/eurostat-package.Rd index 273dd164..962dadd7 100755 --- a/man/eurostat-package.Rd +++ b/man/eurostat-package.Rd @@ -20,8 +20,10 @@ LazyLoad: \tab yes\cr R Tools for Eurostat Open Data } -\section{}{ -NA +\section{regions functions}{ + +For working with sub-national statistics the basic functions of the regions package +are imported \url{https://ropengov.github.io/regions/}. } \examples{ diff --git a/man/recode_to_nuts_2013.Rd b/man/recode_to_nuts_2013.Rd index 60d08f21..513bcf70 100755 --- a/man/recode_to_nuts_2013.Rd +++ b/man/recode_to_nuts_2013.Rd @@ -42,7 +42,6 @@ test_regional_codes <- data.frame ( )) recode_to_nuts_2013(test_regional_codes) - } \author{ Daniel Antal diff --git a/man/recode_to_nuts_2016.Rd b/man/recode_to_nuts_2016.Rd index f4432883..58215337 100755 --- a/man/recode_to_nuts_2016.Rd +++ b/man/recode_to_nuts_2016.Rd @@ -42,7 +42,6 @@ test_regional_codes <- data.frame ( )) recode_to_nuts_2016(test_regional_codes) - } \author{ Daniel Antal From d044c58e1d37c3e6f7c4aebe4a7aa14a32f9c52f Mon Sep 17 00:00:00 2001 From: Daniel Antal Date: Thu, 24 Jun 2021 14:33:14 +0200 Subject: [PATCH 3/4] @export --- R/regional_coding_functions.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/R/regional_coding_functions.R b/R/regional_coding_functions.R index e9f14367..08dd04ef 100644 --- a/R/regional_coding_functions.R +++ b/R/regional_coding_functions.R @@ -27,6 +27,7 @@ #' adds a column of character vector containing the desired codes in the #' target typology, for example, in the NUTS2013 typology. #' @family regions functions +#' @export #' @examples{ #' foo <- data.frame ( #' geo = c("FR", "DEE32", "UKI3" , @@ -37,7 +38,7 @@ #' #' recode_nuts(foo, nuts_year = 2013) #' } -#' @export + #' @title Validate Conformity With NUTS Geo Codes #' @@ -76,6 +77,7 @@ #' that specifies the comformity with the NUTS definition of the year #' \code{nuts_year}. #' @family regions functions +#' @export #' @examples #' \donttest{ #' my_reg_data <- data.frame ( @@ -90,7 +92,7 @@ #' #' validate_nuts_regions (my_reg_data, nuts_year = 2003) #' } -#' @export + #' @titel Validate Conformity with NUTS Geo Codes (vector) @@ -121,6 +123,7 @@ #' @family regions functions #' @return A character list with the valid typology, or 'invalid' in the cases #' when the geo coding is not valid. +#' @export #' @examples #' \donttest{ #' my_reg_data <- data.frame ( @@ -131,4 +134,4 @@ #' #' validate_geo_code(my_reg_data$geo) #' } -#' @export + From 6fba94676bb7f9335448cc4ca72072fa2d970d3f Mon Sep 17 00:00:00 2001 From: Daniel Antal Date: Thu, 24 Jun 2021 14:34:40 +0200 Subject: [PATCH 4/4] details --- R/regional_coding_functions.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/regional_coding_functions.R b/R/regional_coding_functions.R index 08dd04ef..a7a6c1d4 100644 --- a/R/regional_coding_functions.R +++ b/R/regional_coding_functions.R @@ -41,10 +41,10 @@ #' @title Validate Conformity With NUTS Geo Codes -#' -#' Validate that \code{geo_var} is conforming with the \code{NUTS1}, +#' @description Validate that \code{geo_var} is conforming with the \code{NUTS1}, #' \code{NUTS2}, or \code{NUTS3} typologies. -#' While country codes are technically not part of the NUTS typologies, +#' +#' @details While country codes are technically not part of the NUTS typologies, #' Eurostat de facto uses a \code{NUTS0} typology to identify countries. #' This de facto typology has three exception which are handled by the #' \link[regions]{validate_nuts_countries} function.