From 0e710219b746b8904ac8c94a2e5a0f05e0bc26b6 Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Thu, 23 May 2024 20:32:33 +0800 Subject: [PATCH] fixed #43 --- .Rhistory | 846 ++++++++++++++--------------- .Rproj.user/shared/notebooks/paths | 2 +- DESCRIPTION | 1 - NEWS.md | 4 + {R => dev}/test.R | 0 vignettes/regioncode-vignette.html | 21 +- 6 files changed, 439 insertions(+), 435 deletions(-) rename {R => dev}/test.R (100%) diff --git a/.Rhistory b/.Rhistory index 7b4b671..ab45ddc 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,220 +1,434 @@ -convert_to = "name", -incomplete_name = "to", -province = TRUE +prov_data <- unique(region_data[, c("prov_code", paste0("199", 8:9, "_nickname"), "area")]) +# Because province nicknames changed in 1999 +year_num <- ifelse(as.numeric(year_from) < 1999, 1998, 1999) +year_from <- ifelse (is.numeric(data_input[1]), "prov_code", "prov_name") +year_to <- switch( +convert_to, +"name" = "prov_name", +"code" = "prov_code", +"area" = "area", +"nameToabbre" = { +year_from <- "prov_name" +paste0(year_num, "_nickname") +}, +"codeToabbre" = { +year_from <- "prov_code" +paste0(year_num, "_nickname") +}, +"abbreToname" = { +year_from <- paste0(year_num, "_nickname") +"prov_name" +}, +"abbreTocode" = { +year_from <- paste0(year_num, "_nickname") +"prov_code" +}, +"abbreToarea" = { +year_from <- paste0(year_num, "_nickname") +"area" +} ) -library(here) -region_table$prov_name[region_table$prov_sname == "宁夏"] -load("D:/Seafile/WW_research/01_Research/R_package/regioncode/R/sysdata.rda") -region_table$prov_name[region_table$prov_sname == "宁夏"] -region_table$prov_name[region_table$prov_sname == "宁夏"] <- "宁夏回族自治区" -save(region_table, corruption, file = here("R", "sysdata.rda")) -library(regioncode) -regioncode::regioncode( -"宁夏回族自治区", -year_from = 2015, +ls_index <- c(year_from, year_to) +} +?switch +province = FALSE +year_from = 1999 +year_to = 2015 +convert_to = "code" +incomplete_name = FALSE +zhixiashi = FALSE +to_dialect = "none" +to_pinyin = FALSE +province = FALSE +# 2 Section of prefectural-level converting +if (to_dialect != "none") { +# 2-1 If convert language zone +year_from <- if (is.numeric(data_input[1])) +paste0(year_from, "_code") +else +paste0(year_from, "_name") +year_to <- if (to_dialect == "dia_group") +"pref_language_all" +else if (to_dialect == "dia_sub_group") +"dia_sub_language_all" +ls_index <- c(year_from, year_to) +} else { +# 2-2 If not convert language zone +year_from <- if (is.numeric(data_input[1])) +paste0(year_from, "_code") +else +paste0(year_from, "_name") +region_data <- region_data[!duplicated(region_data$`2019_code`), ] +year_to <- switch( +convert_to, +"code" = paste0(year_to, "_code"), +"area" = "area", +"name" = paste0(year_to, "_name"), +"rank" = paste0(year_to, "_rank") +) +ls_index <- c(year_from, year_to) +# Using the Municipal codes for within region codes +if (zhixiashi) { +region_zhixiashi <- subset(region_data, zhixiashi) +# Gathering all the needed fields +region_sname <- region_zhixiashi[grep("_sname$", names(region_zhixiashi))] +region_name <- region_zhixiashi[grep("_name$", names(region_zhixiashi))] +region_code <- region_zhixiashi[grep("_code$", names(region_zhixiashi))] +region_remain <- region_zhixiashi[!grepl( +"(_code$|_sname$|_name$|language$|_all$|_nickname$|dia_sub_group$|freq$)", +names(region_zhixiashi) +)] +# Replacing prefectural names and codes with provincial ones +region_name2 <- as.data.frame(matrix( +rep(region_zhixiashi$prov_name, ncol(region_name)), +ncol = ncol(region_name) +)) +names(region_name2) <- names(region_name) +region_sname2 <- as.data.frame(matrix( +rep(region_zhixiashi$prov_sname, ncol(region_sname)), +ncol = ncol(region_sname) +)) +names(region_sname2) <- names(region_sname) +region_code2 <- as.data.frame(matrix( +rep(region_zhixiashi$prov_code, ncol(region_code)), +ncol = ncol(region_code) +)) +names(region_code2) <- names(region_code) +region_zhixiashi <- cbind(region_name2, region_code2, region_sname2, region_remain) +region_zhixiashi <- unique(region_zhixiashi[, order(names(region_zhixiashi))]) +region_province <- unique(region_data[!grepl("language$|_all$|_nickname$|dia_sub_group$|freq$", +names(region_data))]) +region_province <- region_province[, order(names(region_province))] +region_data <- rbind(region_zhixiashi, region_province) +} +} +View(index_municipal) +data_input <- as.data.frame(data_input) +names(data_input) <- ls_index[1] +data_output <- region_data[ls_index] +data_output <- unique(data_output) +?merge +?py +data_input <- as.data.frame(data_input) +names(data_input) <- ls_index[1] +data_output <- unique(region_data[ls_index]) +if (incomplete_name) { +data_input[[ls_index[1]]] <- substr(data_input[[ls_index[1]]], 1, 2) +data_output[[ls_index[1]]] <- substr(data_output[[ls_index[1]]], 1, 2) +} +# Indexing to find the first match quickly and directly +index <- match(data_input[[ls_index[1]]], data_output[[ls_index[1]]]) +data_output <- data_output[index, year_to] +class(data_output) +data_input <- as.data.frame(data_input) +names(data_input) <- ls_index[1] +data_output <- unique(region_data[ls_index]) +if (incomplete_name) { +data_input[[ls_index[1]]] <- substr(data_input[[ls_index[1]]], 1, 2) +data_output[[ls_index[1]]] <- substr(data_output[[ls_index[1]]], 1, 2) +} +# Indexing to find the first match quickly and directly +index <- match(data_input[[ls_index[1]]], data_output[[ls_index[1]]]) +data_output <- data_output[index, year_to, drop = TRUE] +regioncode <- function(data_input, +year_from = 1999, year_to = 2015, -convert_to = "name", -incomplete_name = "to", -province = TRUE +convert_to = "code", +incomplete_name = FALSE, +zhixiashi = FALSE, +to_dialect = "none", +to_pinyin = FALSE, +province = FALSE) { +validate_input(data_input, +year_from, +province, +to_dialect, +convert_to, +zhixiashi, +to_pinyin) +if (province) { +zhixiashi <- FALSE +# 1 Section of province-level converting +if (to_dialect != "none") { +# 1-1 If convert language zone +year_from <- if (is.numeric(data_input[1])) +"prov_code" +else +"prov_name" +ls_index <- if (to_dialect == "dia_super") { +year_to <- "prov_language" +c(year_from, year_to) +} +} else { +# 1-2 If not convert language zone +prov_data <- unique(region_data[, c("prov_code", paste0("199", 8:9, "_nickname"), "area")]) +# Because province nicknames changed in 1999 +year_num <- ifelse(as.numeric(year_from) < 1999, 1998, 1999) +year_from <- if (is.numeric(data_input[1])) +"prov_code" +else +"prov_name" +year_to <- switch( +convert_to, +"name" = "prov_name", +"code" = "prov_code", +"area" = "area", +"nameToabbre" = { +year_from <- "prov_name" +paste0(year_num, "_nickname") +}, +"codeToabbre" = { +year_from <- "prov_code" +paste0(year_num, "_nickname") +}, +"abbreToname" = { +year_from <- paste0(year_num, "_nickname") +"prov_name" +}, +"abbreTocode" = { +year_from <- paste0(year_num, "_nickname") +"prov_code" +}, +"abbreToarea" = { +year_from <- paste0(year_num, "_nickname") +"area" +} ) +ls_index <- c(year_from, year_to) +} +} else { +# 2 Section of prefectural-level converting +if (to_dialect != "none") { +# 2-1 If convert language zone +year_from <- if (is.numeric(data_input[1])) +paste0(year_from, "_code") +else +paste0(year_from, "_name") +year_to <- if (to_dialect == "dia_group") +"pref_language_all" +else if (to_dialect == "dia_sub_group") +"dia_sub_language_all" +ls_index <- c(year_from, year_to) +} else { +# 2-2 If not convert language zone +year_from <- if (is.numeric(data_input[1])) +paste0(year_from, "_code") +else +paste0(year_from, "_name") +region_data <- region_data[!duplicated(region_data$`2019_code`), ] +year_to <- switch( +convert_to, +"code" = paste0(year_to, "_code"), +"area" = "area", +"name" = paste0(year_to, "_name"), +"rank" = paste0(year_to, "_rank") +) +ls_index <- c(year_from, year_to) +# Using the Municipal codes for within region codes +if (zhixiashi) { +region_zhixiashi <- subset(region_data, zhixiashi) +# Gathering all the needed fields +region_sname <- region_zhixiashi[grep("_sname$", names(region_zhixiashi))] +region_name <- region_zhixiashi[grep("_name$", names(region_zhixiashi))] +region_code <- region_zhixiashi[grep("_code$", names(region_zhixiashi))] +region_remain <- region_zhixiashi[!grepl( +"(_code$|_sname$|_name$|language$|_all$|_nickname$|dia_sub_group$|freq$)",names(region_zhixiashi))] +# Replacing prefectural names and codes with provincial ones +region_name2 <- as.data.frame(matrix( +rep(region_zhixiashi$prov_name, ncol(region_name)), +ncol = ncol(region_name) +)) +names(region_name2) <- names(region_name) +region_sname2 <- as.data.frame(matrix( +rep(region_zhixiashi$prov_sname, ncol(region_sname)), +ncol = ncol(region_sname) +)) +names(region_sname2) <- names(region_sname) +region_code2 <- as.data.frame(matrix( +rep(region_zhixiashi$prov_code, ncol(region_code)), +ncol = ncol(region_code) +)) +names(region_code2) <- names(region_code) +region_zhixiashi <- cbind(region_name2, +region_code2, +region_sname2, +region_remain) +region_zhixiashi <- unique(region_zhixiashi[, order(names(region_zhixiashi))]) +region_province <- unique(region_data[!grepl("language$|_all$|_nickname$|dia_sub_group$|freq$", names(region_data))]) +region_province <- region_province[, order(names(region_province))] +region_data <- rbind(region_zhixiashi, region_province) +} +} +} +# Convert the input to a data.frame for later merging +data_input <- as.data.frame(data_input) +names(data_input) <- ls_index[1] +data_output <- unique(region_data[ls_index]) +if (incomplete_name) { +data_input[[ls_index[1]]] <- substr(data_input[[ls_index[1]]], 1, 2) +data_output[[ls_index[1]]] <- substr(data_output[[ls_index[1]]], 1, 2) +} +# Indexing to find the first match quickly and directly +index <- match(data_input[[ls_index[1]]], data_output[[ls_index[1]]]) +data_output <- data_output[index, year_to, drop = TRUE] +# Because '2pinyin' can not be used as a variable name +if (to_pinyin) { +# Predefined mapping for special cases with Chinese characters +special_cases <- c( +"\u9655\u897f" = "shaan_xi", +"\u5185\u8499" = "inner_mongolia", +"\u897f\u85cf" = "tibet", +"\u6fb3\u95e8" = "macao", +"\u9999\u6e2f" = "hong_kong" +) +# Extract the first two characters of each entry in data_output +first_two_chars <- substr(data_output, 1, 2) +# Apply special cases mapping +special_pinyin <- special_cases[first_two_chars] +# Use py function where no special case is matched +data_output <- ifelse(is.na(special_pinyin), +py( +char = first_two_chars, +dic = pydic(method = "toneless", dic = "pinyin2") +), +special_pinyin) +} +return(data_output) +} +# Function to validate input +validate_input <- function(data_input, +year_from, +province, +to_dialect, +convert_to, +zhixiashi, +to_pinyin) { +# Simplified data type checks +if (!(is.character(data_input[1]) || is.numeric(data_input[1]))) { +stop("Invalid input: only region names or division codes are valid.") +} +# Simplify type and value checks +if (!is.numeric(year_from) || +!all(c(province, zhixiashi, to_pinyin) %in% c(TRUE, FALSE))) { +stop("Invalid input: Parameters must be of the correct data type.") +} +# Consolidated conversion checks +valid_conversions <- if (province && to_dialect == "none") { +c( +"name", +"code", +"area", +"rank", +"nameToabbre", +"codeToabbre", +"abbreToname", +"abbreTocode", +"abbreToarea" +) +} else { +c("name", "code", "area", "rank") +} +if (!(convert_to %in% valid_conversions)) { +stop("Invalid input: please choose a valid converting method.") +} +# Specific scenario checks +if (to_pinyin && convert_to == "code" && to_dialect == "none") { +stop("Invalid input: cannot translate administrative codes to pinyin.") +} +} +regioncode(c("东城区", "梅州市")) +load("R/sysdata.rda") +p_load(pinyin) +library(pacman) +p_load(pinyin) +regioncode(c("东城区", "梅州市")) +regioncode(c("北京市", "天津市"), zhixiashi = TRUE) +regioncode(c("河北省", "河南省"), province = TRUE) library(regioncode) library(regioncode) -library(regioncode) -load("D:/Seafile/WW_research/01_Research/R_package/regioncode/R/sysdata.rda") -View(region_table) -names(region_data) -names(region_table) -load("D:/Seafile/WW_research/01_Research/R_package/regioncode/R/sysdata.rda") -library(regioncode) -load("D:/Seafile/WW_research/01_Research/R_package/regioncode/R/sysdata.rda") -names(region_data) -save(corruption, region_data, file = here::here("R", "sysdata.rda")) -load("D:/Seafile/WW_research/01_Research/R_package/regioncode/R/sysdata.rda") -knitr::opts_chunk$set(message = FALSE, warning = FALSE) -if(!require(regioncode)) install.packages("regioncode") -library(regioncode) data("corruption") -# Original 2019 version -corruption$prefecture_id -# 1999 version -regioncode(data_input = corruption$prefecture_id, -convert_to = "code", # default set -year_from = 2019, -year_to = 1989) -# The original name -corruption$prefecture -# Codes to name +# Conversion to the 1989 version regioncode(data_input = corruption$prefecture_id, -convert_to = "name", -year_from = 2019, -year_to = 1989) -# Name to codes of the same year -regioncode(data_input = corruption$prefecture, -convert_to = "code", -year_from = 2019, -year_to = 2019) -# Name to name of a different year -regioncode(data_input = corruption$prefecture, -convert_to = "name", +convert_to = "code", # default setting year_from = 2019, year_to = 1989) -tibble( -preference = corruption$prefecture, -rank = regioncode(data_input = corruption$prefecture, -year_from = 2011, -year_to = 1989, -convert_to="rank") -) -tidyr::tibble( -preference = corruption$prefecture, -rank = regioncode(data_input = corruption$prefecture, -year_from = 2011, -year_to = 1989, -convert_to="rank") -) -load("D:/Seafile/WW_research/01_Research/R_package/regioncode/R/sysdata.rda") -region_data |> names() -knitr::opts_chunk$set(message = FALSE, warning = FALSE) -if(!require(regioncode)) install.packages("regioncode") +# Comparison tibble( code2019 = corruption$prefecture_id, code1989 = regioncode(data_input = corruption$prefecture_id, -convert_to = "code", # default set +convert_to = "code", # default setting year_from = 2019, year_to = 1989), -name1989 = regioncode(data_input = corruption$prefecture_id, -convert_to = "name", # default set -year_from = 2019, -year_to = 1989) -) -library(tidyverse) -tibble( -code2019 = corruption$prefecture_id, -code1989 = regioncode(data_input = corruption$prefecture_id, -convert_to = "code", # default set +name2019 = regioncode(data_input = corruption$prefecture_id, +convert_to = "name", # default setting year_from = 2019, -year_to = 1989), +year_to = 2019), name1989 = regioncode(data_input = corruption$prefecture_id, -convert_to = "name", # default set +convert_to = "name", # default setting year_from = 2019, year_to = 1989) ) -temp2 <- corruption$prefecture_id -# 1989 version -temp <- regioncode(data_input = corruption$prefecture_id, -convert_to = "code", # default set -year_from = 2019, -year_to = 1989) -View(region_data) -usethis::git_vaccinate() #Adds .DS_Store, .Rproj.user, .Rdata, .Rhistory, and .httr-oauth to your global (a.k.a. user-level) .gitignore. This is good practice as it decreases the chance that you will accidentally leak credentials to GitHub. -library(regioncode) knitr::opts_chunk$set(message = FALSE, warning = FALSE) if(!require(regioncode)) install.packages("regioncode") library(regioncode) -library(tidyverse) -library(regioncode) -data("corruption") -# Original 2019 version -corruption$prefecture_id -# 1989 version -temp <- regioncode(data_input = corruption$prefecture_id, -convert_to = "code", # default set -year_from = 2019, -year_to = 1989) -# tibble( -# code2019 = corruption$prefecture_id, -# code1989 = regioncode(data_input = corruption$prefecture_id, -# convert_to = "code", # default set -# year_from = 2019, -# year_to = 1989), -# name1989 = regioncode(data_input = corruption$prefecture_id, -# convert_to = "name", # default set -# year_from = 2019, -# year_to = 1989) -# ) -load("D:/Seafile/WW_research/01_Research/R_package/regioncode/R/sysdata.rda") -# Original 2019 version -corruption$prefecture_id -# 1989 version -temp <- regioncode(data_input = corruption$prefecture_id, -convert_to = "code", # default set +library(dplyr) +# Conversion to the 1989 version +regioncode(data_input = corruption$prefecture_id, +convert_to = "code", # default setting year_from = 2019, year_to = 1989) +# Comparison tibble( code2019 = corruption$prefecture_id, code1989 = regioncode(data_input = corruption$prefecture_id, -convert_to = "code", # default set +convert_to = "code", # default setting year_from = 2019, year_to = 1989), +name2019 = regioncode(data_input = corruption$prefecture_id, +convert_to = "name", # default setting +year_from = 2019, +year_to = 2019), name1989 = regioncode(data_input = corruption$prefecture_id, -convert_to = "name", # default set +convert_to = "name", # default setting year_from = 2019, year_to = 1989) ) +# Original name tibble( -code2019 = corruption$prefecture_id, -code1989 = regioncode(data_input = corruption$prefecture_id, -convert_to = "code", # default set +id = corruption$prefecture_id, +name = corruption$prefecture +) +# Codes to name +regioncode(data_input = corruption$prefecture_id, +convert_to = "name", year_from = 2019, -year_to = 1989), -name2019 = regioncode(data_input = corruption$prefecture_id, -convert_to = "name", # default set +year_to = 1989) +# Name to codes of the same year +regioncode(data_input = corruption$prefecture, +convert_to = "code", year_from = 2019, -year_to = 2019), -name1989 = regioncode(data_input = corruption$prefecture_id, -convert_to = "name", # default set +year_to = 2019) +# Name to name of a different year +regioncode(data_input = corruption$prefecture, +convert_to = "name", year_from = 2019, year_to = 1989) -) -?regioncode # Original full names corruption$prefecture -# Convert to incomplete names in 1989 -fake_incomplete <- regioncode(data_input = corruption$prefecture, -convert_to = "name", -year_from = 2019, -year_to = 1989, -incomplete_name = "to") +fake_incomplete <- corruption$prefecture +index_incomplete <- sample(seq(length(corruption$prefecture)), 7) +fake_incomplete[index_incomplete] <- fake_incomplete[index_incomplete] |> +substr(start = 1, stop = 2) fake_incomplete -# Convert to full names in 2008 -fake_full <- regioncode(data_input = fake_incomplete, +# Conversion to full names in 2008 +regioncode(data_input = fake_incomplete, convert_to = "name", -year_from = 1989, -year_to = 2008, -incomplete_name = "to") -fake_full -names_municipality <- c("北京", "天津", "上海", "重庆") -names_municipality <- c("北京", "天津", "上海", "重庆") -regioncode(data_input = names_municipality, -year_from = 2019, -year_to = 2019, -convert_to = "code", -zhixiashi = FALSE) -regioncode(data_input = names_municipality, -year_from = 2019, -year_to = 2019, -convert_to = "code", -zhixiashi = TRUE) -regioncode(data_input = names_municipality, -year_from = 2019, -year_to = 2019, -convert_to = "code", -zhixiashi = TRUE) -names_municipality <- c("北京", "海淀区", "上海", "静安区") -regioncode(data_input = names_municipality, year_from = 2019, -year_to = 2019, -convert_to = "code", -zhixiashi = FALSE) -regioncode(data_input = names_municipality, -year_from = 2019, -year_to = 2019, -convert_to = "code", -zhixiashi = TRUE) -library(reprex) -names_municipality <- c("北京", # Beijing, a municipality +year_to = 2008, +incomplete_name = TRUE) +names_municipality <- c("北京市", # Beijing, a municipality "海淀区", # A district of Beijing -"上海", # Shanghai, a municipality +"上海市", # Shanghai, a municipality "静安区", # A district of Shanghai "济南市") # A prefecture of Shandong # When `zhixiashi` is FALSE, only the districts are recognized @@ -223,290 +437,76 @@ year_from = 2019, year_to = 2019, convert_to = "code", zhixiashi = FALSE) -# When `zhixiashi` is TRUE, muncipalities are +# When `zhixiashi` is TRUE, municipalities are recognized regioncode(data_input = names_municipality, year_from = 2019, year_to = 2019, convert_to = "code", zhixiashi = TRUE) -reprex( -{ -regioncode(data_input = names_municipality, -year_from = 2019, -year_to = 2019, -convert_to = "code", -zhixiashi = FALSE) -# When `zhixiashi` is TRUE, muncipalities are -regioncode(data_input = names_municipality, -year_from = 2019, -year_to = 2019, -convert_to = "code", -zhixiashi = TRUE) -} -) -reprex( -{ -names_municipality <- c("北京", # Beijing, a municipality -"海淀区", # A district of Beijing -"上海", # Shanghai, a municipality -"静安区", # A district of Shanghai -"济南市") # A prefecture of Shandong -regioncode(data_input = names_municipality, -year_from = 2019, -year_to = 2019, -convert_to = "code", -zhixiashi = FALSE) -# When `zhixiashi` is TRUE, muncipalities are -regioncode(data_input = names_municipality, -year_from = 2019, -year_to = 2019, -convert_to = "code", -zhixiashi = TRUE) -} -) -names_municipality <- c("北京", # Beijing, a municipality -"海淀区", # A district of Beijing -"上海", # Shanghai, a municipality -"静安区", # A district of Shanghai -"济南市") # A prefecture of Shandong -regioncode(data_input = names_municipality, -year_from = 2019, -year_to = 2019, -convert_to = "code", -zhixiashi = FALSE) -reprex( -{ -library(regioncode) -names_municipality <- c("北京", # Beijing, a municipality -"海淀区", # A district of Beijing -"上海", # Shanghai, a municipality -"静安区", # A district of Shanghai -"济南市") # A prefecture of Shandong -regioncode(data_input = names_municipality, -year_from = 2019, -year_to = 2019, -convert_to = "code", -zhixiashi = FALSE) -# When `zhixiashi` is TRUE, muncipalities are -regioncode(data_input = names_municipality, -year_from = 2019, -year_to = 2019, -convert_to = "code", -zhixiashi = TRUE) -} -) -?reprex tibble( -province = corruption$province_id, -prov_name = regioncode(data_input = corruption$province_id, -convert_to = "name", +city = corruption$prefecture, +rank1989 = regioncode(data_input = corruption$prefecture, year_from = 2019, year_to = 1989, -province = TRUE), -prov_abbre = regioncode(data_input = corruption$province_id, -convert_to = "codeToabbre", +convert_to="rank"), +rank2014 = regioncode(data_input = corruption$prefecture, year_from = 2019, -year_to = 1989, -province = TRUE) +year_to = 2014, +convert_to = "rank") ) -source("~/.active-rstudio-document", echo=TRUE) tibble( -province = corruption$province_id, -prov_name = regioncode(data_input = corruption$province_id, -convert_to = "name", -year_from = 2019, -year_to = 2019, -province = TRUE), -prov_abbre = regioncode(data_input = corruption$province_id, -convert_to = "codeToabbre", -year_from = 2019, -year_to = 1989, -province = TRUE) -) -reprex( -{ -library(regioncode) -regioncode(data_input = corruption$province_id, -convert_to = "name", -year_from = 2019, -year_to = 1989, -province = TRUE) -} -) -regioncode(data_input = corruption$prefecture, +city = corruption$prefecture, +cityPY = regioncode(data_input = corruption$prefecture, year_from = 2019, year_to = 1989, convert_to = "name", to_pinyin = TRUE -) -regioncode(data_input = corruption$prefecture, -year_from = 2019, -year_to = 1989, -convert_to = "name", -incomplete_name = "to", -to_pinyin = TRUE -) -regioncode(data_input = corruption$prefecture, +), +areaPY = regioncode(data_input = corruption$prefecture, year_from = 2019, year_to = 1989, convert_to = "area", to_pinyin = TRUE ) -regioncode(data_input = c("山西", "陕西", "内蒙古", "香港"), -year_from = 2019, -year_to = 2008, -convert_to = "name", -incomplete_name = "from", -to_pinyin = TRUE -) -regioncode(data_input = c("山西省", "陕西省", "内蒙古自治区"), -year_from = 2019, -year_to = 2008, -convert_to = "name", -to_pinyin = TRUE -) -reprex( -{ -library(regioncode) -regioncode(data_input = c("山西", "陕西", "内蒙古", "香港"), -year_from = 2019, -year_to = 2008, -convert_to = "name", -incomplete_name = "from", -to_pinyin = TRUE -) -} -) -# Regions with special spelling -regioncode(data_input = c("山西", "陕西", "内蒙古", "香港", "澳门"), -year_from = 2019, -year_to = 2008, -convert_to = "name", -incomplete_name = "from", -province = TRUE, -to_pinyin = TRUE ) # Regions with special spelling regioncode(data_input = c("山西", "陕西", "内蒙古", "香港", "澳门"), year_from = 2019, year_to = 2008, convert_to = "name", -incomplete_name = "both", +incomplete_name = TRUE, province = TRUE, to_pinyin = TRUE ) -library(regioncode) -library(regioncode) -knitr::opts_chunk$set(message = FALSE, warning = FALSE) -if(!require(regioncode)) install.packages("regioncode") -library(regioncode) -library(tidyverse) -library(regioncode) -data("corruption") -# Conversion to the 1989 version -regioncode(data_input = corruption$prefecture_id, -convert_to = "code", # default setting -year_from = 2019, -year_to = 1989) -# Comparison tibble( -code2019 = corruption$prefecture_id, -code1989 = regioncode(data_input = corruption$prefecture_id, -convert_to = "code", # default setting -year_from = 2019, -year_to = 1989), -name2019 = regioncode(data_input = corruption$prefecture_id, -convert_to = "name", # default setting +province = corruption$province_id, +prov_name = regioncode(data_input = corruption$province_id, +convert_to = "name", year_from = 2019, -year_to = 2019), -name1989 = regioncode(data_input = corruption$prefecture_id, -convert_to = "name", # default setting +year_to = 1989, +province = TRUE), +prov_abbre = regioncode(data_input = corruption$province_id, +convert_to = "codeToabbre", year_from = 2019, -year_to = 1989) +year_to = 1989, +province = TRUE) ) -# Conversion to the 1989 version -regioncode(data_input = corruption$prefecture_id, -convert_to = "code", # default setting -year_from = 2019, -year_to = 1989) -corruption$prefecture_id -regioncode(data_input = corruption$prefecture_id, -convert_to = "code", # default setting -year_from = 2019, -year_to = 1989) -regioncode(data_input = corruption$prefecture_id, -convert_to = "name", # default setting +regioncode(data_input = corruption$prefecture, year_from = 2019, -year_to = 2019) -regioncode(data_input = corruption$prefecture_id, -convert_to = "name", # default setting +year_to = 1989, +convert_to = "area") +tibble( +city = corruption$prefecture, +dialectGroup = regioncode(data_input = corruption$prefecture, year_from = 2019, -year_to = 1989) -?switch -?case_when -`%>%` -?`%>%` -library(regioncode) -?bind_rows -library(regioncode) -knitr::opts_chunk$set(message = FALSE, warning = FALSE) -if(!require(regioncode)) install.packages("regioncode") -library(regioncode) -library(tidyverse) -# Regions with special spelling -regioncode(data_input = c("山西", "陕西", "内蒙古", "香港", "澳门"), +year_to = 1989, +to_dialect = "dia_group"), +dialectSubGroup = regioncode(data_input = corruption$prefecture, year_from = 2019, -year_to = 2008, -convert_to = "name", -incomplete_name = TRUE, -province = TRUE, -to_pinyin = TRUE +year_to = 1989, +to_dialect = "dia_sub_group") ) -?ends_with -?pull -library(pkgdown) -build_site() -library(regioncode) -devtools::install(build_vignettes = TRUE) -pkgdown::build_site() -devtools::install(build_vignettes = TRUE) -pkgdown::build_site() -devtools::release() -devtools::install(build_vignettes = TRUE) -pkgdown::build_site() -devtools::release() -prefecture_gini <- readRDS("D:/Desktop/prefecture_gini.rds") -library(tidyverse) -select(prefecture_gini, year, city_code_a2_2, overall_city_80) -select(prefecture_gini, year, city_code_a2_2, overall_city_80) |> rio::export("nies_gini.xlsx") -devtools::install(build_vignettes = TRUE) -devtools::check() -library(regioncode) -?devtools::check() -devtools::check(manual = TRUE) -?match -if (!requireNamespace("pacman", quietly = TRUE)) { -install.packages("pacman") -} -library(pacman) -p_load(pinyin) -load("R/sysdata.rda") -names(region_data) -identical(region_data$1986_sname...1, region_data$1986_sname...8) -region_data$1986_sname...1 == region_data$1986_sname...8 -region_data$`1986_sname...1` == region_data$`1986_sname...8` -identical(region_data$`1986_sname...1`, region_data$`1986_sname...8`) -region_data <- region_data |> select(-`1986_sname...8`) |> rename(`1986_sname` = `1986_sname...1`) -p_load(pinyin, -dplyr) -region_data <- region_data |> select(-`1986_sname...8`) |> rename(`1986_sname` = `1986_sname...1`) -save.image("R/sysdata.rda") -year_from = 1999 -year_to = 2015 -convert_to = "code" -incomplete_name = FALSE -zhixiashi = FALSE -to_dialect = "none" -to_pinyin = FALSE -province = FALSE -data_input <- c("东城区", "梅州市") +pkgbuild::check_build_tools(debug = TRUE) +library(devtools) +detach("package:devtools", unload = TRUE) +install.packages("devtools") diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index 87d60a9..6c40b3f 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -1,9 +1,9 @@ D:/Seafile/WW_research/01_Research/R_package/regioncode/.Rbuildignore="63CD865F" D:/Seafile/WW_research/01_Research/R_package/regioncode/DESCRIPTION="25AF9A25" D:/Seafile/WW_research/01_Research/R_package/regioncode/NAMESPACE="96D5EBCA" +D:/Seafile/WW_research/01_Research/R_package/regioncode/NEWS.md="00FBA817" D:/Seafile/WW_research/01_Research/R_package/regioncode/R/globals.R="56D29EEB" D:/Seafile/WW_research/01_Research/R_package/regioncode/R/regioncode.R="78A4CF23" -D:/Seafile/WW_research/01_Research/R_package/regioncode/R/test.R="9501F140" D:/Seafile/WW_research/01_Research/R_package/regioncode/dev/citylevel2021.R="7C97FCDC" D:/Seafile/WW_research/01_Research/R_package/regioncode/vignettes/regioncode-vignette.Rmd="48AAAEC1" D:/Seafile/WW_research/01_Research/R_package/regioncode/vignettes/s_regioncode.bib="8CF3C239" diff --git a/DESCRIPTION b/DESCRIPTION index 69d922c..90d4238 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,7 +18,6 @@ BugReports: https://github.com/sammo3182/regioncode/issues Depends: R(>= 3.6.0) Imports: - dplyr, pinyin Suggests: rmarkdown, diff --git a/NEWS.md b/NEWS.md index 9a3c3af..a7e3d47 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# 0.2.2 + +- Vectorize the merging functions + # 0.2.1 - Corrected the region codes of Xinjiang diff --git a/R/test.R b/dev/test.R similarity index 100% rename from R/test.R rename to dev/test.R diff --git a/vignettes/regioncode-vignette.html b/vignettes/regioncode-vignette.html index b2b159c..7ac77b6 100644 --- a/vignettes/regioncode-vignette.html +++ b/vignettes/regioncode-vignette.html @@ -12,7 +12,7 @@ - + regioncode: One-Step Solution for Chinese Region Conversions @@ -364,7 +364,7 @@

regioncode: One-Step Solution for Chinese Region Conversions

HU Yue, YE Xinyi

-

2024-03-08

+

2024-05-23

@@ -383,9 +383,10 @@

2024-03-08

Why regioncode?

The Chinese government assigns unique geocodes to each county, city (prefecture), and provincial-level administrative unit. These -“administrative division codes” are consistently adjusted and -updated to align with national and regional development plans (民政部 2022). However, these adjustments may -pose challenges for researchers conducting longitudinal studies or +“administrative division codes” are consistently adjusted +and updated to align with national and regional development plans +(民政部 2022). However, these adjustments +may pose challenges for researchers conducting longitudinal studies or merging geo-based data from different years. For instance, inconsistencies between map data and statistical data can result in erroneous outputs when rendering statistical data on a Chinese map.

@@ -549,8 +550,8 @@

Incomplete Naming of Prefectures

substr(start = 1, stop = 2) fake_incomplete -
##  [1] "济南"   "泰州"   "松江区" "宜昌"   "来宾"   "怀化市" "莆田"   "宜宾"  
-##  [9] "定安"   "襄阳市"
+
##  [1] "济南"   "泰州市" "松江"   "宜昌市" "来宾"   "怀化"   "莆田"   "宜宾市"
+##  [9] "定安"   "襄阳"
# Conversion to full names in 2008
 regioncode(data_input = fake_incomplete, 
            convert_to = "name",
@@ -758,9 +759,9 @@ 

Pinyin

province = TRUE, to_pinyin = TRUE )
-
##             山西                                                    
+
##             <NA>             陕西             内蒙             香港 
 ##        "shan_xi"       "shaan_xi" "inner_mongolia"      "hong_kong" 
-##                  
+##             澳门 
 ##          "macao"
@@ -798,7 +799,7 @@

Provinces

## 5 450000 广西壮族自治区 桂 ## 6 430000 湖南省 湘 ## 7 350000 福建省 闽 -## 8 510000 四川省 蜀 +## 8 510000 四川省 川 ## 9 460000 海南省 琼 ## 10 420000 湖北省 鄂