Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A new helper function and lots of tidying #166

Merged
merged 9 commits into from
Feb 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,22 @@ Depends:
Imports:
broom,
classInt,
countrycode,
countrycode,
curl,
dplyr,
httr,
jsonlite,
lubridate,
RColorBrewer,
readr,
RefManageR,
RefManageR,
sf,
sp,
stringi,
stringr,
tibble,
tidyr
tidyr,
tidyselect
Suggests:
Cairo,
ggplot2,
Expand Down
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export(check_access_to_data)
export(check_nuts2013)
export(clean_eurostat_cache)
export(convert_to_nuts2016)
export(cut_to_classes)
export(dic_order)
export(eurotime2date)
Expand All @@ -29,8 +30,13 @@ importFrom(classInt,classIntervals)
importFrom(countrycode,countrycode)
importFrom(curl,curl_download)
importFrom(dplyr,"%>%")
importFrom(dplyr,add_count)
importFrom(dplyr,anti_join)
importFrom(dplyr,arrange)
importFrom(dplyr,case_when)
importFrom(dplyr,filter)
importFrom(dplyr,full_join)
importFrom(dplyr,inner_join)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,mutate_if)
Expand Down Expand Up @@ -61,6 +67,7 @@ importFrom(tibble,data_frame)
importFrom(tibble,is_tibble)
importFrom(tidyr,gather_)
importFrom(tidyr,separate)
importFrom(tidyselect,all_of)
importFrom(utils,data)
importFrom(utils,download.file)
importFrom(utils,toBibtex)
81 changes: 49 additions & 32 deletions R/check_nuts2013.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
#' regions. Observations with codes ending on \code{'ZZ'} or \code{'XX'} are
#' removed from the returned data table, because these are non-territorial
#' observations or they are outside of the EU.
#' @importFrom dplyr left_join mutate filter rename mutate_if
#' @importFrom dplyr mutate filter rename mutate_if case_when
#' @importFrom dplyr left_join full_join anti_join
#' @examples
#' \dontrun{
#' dat <- eurostat::tgs00026
Expand All @@ -29,25 +30,55 @@ check_nuts2013 <- function (dat) {
changed_regions <- regional_changes_2016 %>%
filter ( change != 'unchanged')

tmp <- dat %>%
mutate_if ( is.factor, as.character ) %>%
left_join ( regional_changes_2016 %>%
select ( code16, change ) %>%
dplyr::rename ( geo = code16 ),
by = 'geo')
## Changed regions to be looked up by their NUTS2016 codes -----------
regional_changes_by_2016 <- regional_changes_2016 %>%
mutate ( geo = code16 ) %>%
filter ( !is.na(code13))

nrow(regional_changes_by_2016)

## adding those that have no equivalent in the previous group
## some regions have to be identified by their old and new codes -----
regional_changes_by_2013 <- regional_changes_2016 %>%
mutate ( geo = code13 ) %>%
filter ( !is.na(code13)) %>%
anti_join ( regional_changes_by_2016,
by = c("code13", "code16", "name", "nuts_level", "change", "geo"))

## Region can be found by new or old NUTS code -----------------------

all_regional_changes <- regional_changes_by_2016 %>%
full_join ( regional_changes_by_2013,
by = c("code13", "code16", "name", "nuts_level",
"change", "geo") )

there_are_changes <- FALSE

if ( any (is.na(tmp$change)) ) {
tmp <- dat %>%
mutate_if ( is.factor, as.character ) %>%
left_join ( regional_changes_2016 %>%
select ( code13, change ) %>%
dplyr::rename ( geo = code13 ),
by = 'geo')

there_are_changes <- TRUE
tmp <- dat %>%
mutate_if ( is.factor, as.character ) %>%
left_join ( all_regional_changes, by = 'geo' ) %>%
mutate ( nuts_level = ifelse (is.na(nuts_level),
9, nuts_level)) %>%
mutate ( nuts_level = case_when (
nuts_level < 9 ~ nuts_level,
nuts_level == 9 & nchar(geo) == 2 ~ 0,
nuts_level == 9 & nchar(geo) == 3 ~ 1,
nuts_level == 9 & nchar(geo) == 4 ~ 2,
nuts_level == 9 & nchar(geo) == 5 ~ 3,
TRUE ~ NA_real_ ))

if ( all ( tmp$change %in% unique(regional_changes_2016$code16) )) {
message ( "All observations are coded with NUTS2016 codes" )
there_are_changes <- FALSE
}

non_eu <- select ( tmp, geo, code13, code16, change ) %>%
mutate ( change = ifelse (rowSums(is.na(.))==3,
"not in the EU", change )) %>%
filter ( change == 'not in the EU' )

tmp <- tmp %>% mutate ( change = ifelse ( geo %in% non_eu$geo,
'not in the EU', change ))


eu_country_vector <- eurostat::eu_countries$code
tmp_country_vector <- unique ( substr(tmp$geo, 1, 2) )
Expand All @@ -59,20 +90,6 @@ check_nuts2013 <- function (dat) {
"In this data frame: ", not_EU_country_vector )
}

if ( any( stringr::str_sub(tmp$geo, -2,-1) %in% c('ZZ', 'XX')) ) {

warning ( "Regional codes ending with ZZ or XX are extra-territorial",
"\n to the EU and they are removed from the data frame.")

}

tmp %>%
mutate ( change = ifelse ( geo %in% not_EU_country_vector ,
'not_EU', change )) %>%
filter ( stringr::str_sub(geo, -3,-1) != "ZZZ",
stringr::str_sub(geo, -2,-1) != "ZZ",
stringr::str_sub(geo, -3,-1) != "XXX",
stringr::str_sub(geo, -2,-1) != "XX" ) %>%
mutate_if ( is.factor, as.character )
tmp

}
77 changes: 77 additions & 0 deletions R/convert_to_nuts2016.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#' @title Recode geo labels and rename regions from NUTS2013 to NUTS2016
#' @description Eurostat mixes NUTS2013 and NUTS2016 geographic label codes
#' in the \code{'geo'} column, which creates time-wise comparativity issues.
#' This function recodes the observations where only the coding changed, and
#' marks discontinued regions, and other regions which may or may not be
#' somehow compared to current \code{'NUTS2016'} boundaries.
#' @param dat A Eurostat data frame downloaded with
#' \code{\link{get_eurostat}}.
#' @author Daniel Antal
#' @return An augmented and potentially relabelled data frame which
#' contains all formerly \code{'NUTS2013'} definition geo labels in the
#' \code{'NUTS2016'} vocabulary when only the code changed, but the
#' boundary did not. It also contains some information on other geo labels
#' that cannot be brought to the current \code{'NUTS2016'} definition.
#' Furthermore, when the official name of the region changed, it will use
#' the new name (if the otherwise the region boundary did not change.)
#' If not called before, the function will use the helper function
#' \code{\link{check_nuts2013}} and \code{\link{harmonize_geo_code}}
#' @importFrom dplyr mutate filter rename arrange case_when
#' @importFrom dplyr left_join inner_join anti_join
#' @importFrom tidyselect all_of
#' @examples
#' \dontrun{
#' eurostat::tgs00026 %>%
#' check_nuts2013() %>%
#' harmonize_geo_code() %>%
#' convert_to_nuts2016()
#'
#' #If check_nuts2013() is not called, the function will call it.
#' eurostat::tgs00026 %>%
#' convert_to_nuts2016()
#' }
#' @export

convert_to_nuts2016 <- function (dat) {

if ( ! all(c("change", "code16", "code13") %in% names (dat)) ) {
tmp <- harmonize_geo_code(dat)
} else {
tmp <- dat
}

nuts_2016_codes <- unique (regional_changes_2016$code16)

tmp <- tmp %>%
mutate ( geo = case_when (
!is.na(geo) ~ geo,
change == "not in the EU" ~ geo,
TRUE ~ code16
))

if ( any (is.na(tmp$geo) && (nuts2016 = TRUE)) ) {
warning ( "The following regions have no geo labels:",
tmp %>%
filter ( is.na(geo) && (nuts2016 = TRUE) ) %>%
as.character(geo) )

}

names_by_nuts2016 <- regional_changes_2016 %>%
filter ( !is.na(code16) )

regions_by_nuts2016_names <- tmp %>%
select ( -name ) %>%
inner_join ( names_by_nuts2016,
by = c("code13", "code16", "nuts_level", "change") )

regions_with_other_names <- tmp %>%
anti_join ( regions_by_nuts2016_names,
by = all_of(names(tmp)) )

rbind ( regions_by_nuts2016_names,
regions_with_other_names ) %>%
arrange ( time, geo, code16 ) %>%
left_join ( nuts_correspondence )

}
Loading