From 0e710219b746b8904ac8c94a2e5a0f05e0bc26b6 Mon Sep 17 00:00:00 2001
From: Yue Hu <sammo3182@sina.com>
Date: Thu, 23 May 2024 20:32:33 +0800
Subject: [PATCH] fixed #43

---
 .Rhistory                          | 846 ++++++++++++++---------------
 .Rproj.user/shared/notebooks/paths |   2 +-
 DESCRIPTION                        |   1 -
 NEWS.md                            |   4 +
 {R => dev}/test.R                  |   0
 vignettes/regioncode-vignette.html |  21 +-
 6 files changed, 439 insertions(+), 435 deletions(-)
 rename {R => dev}/test.R (100%)

diff --git a/.Rhistory b/.Rhistory
index 7b4b671..ab45ddc 100644
--- a/.Rhistory
+++ b/.Rhistory
@@ -1,220 +1,434 @@
-convert_to = "name",
-incomplete_name = "to",
-province = TRUE
+prov_data <- unique(region_data[, c("prov_code", paste0("199", 8:9, "_nickname"), "area")])
+# Because province nicknames changed in 1999
+year_num <- ifelse(as.numeric(year_from) < 1999, 1998, 1999)
+year_from <- ifelse (is.numeric(data_input[1]), "prov_code", "prov_name")
+year_to <- switch(
+convert_to,
+"name" = "prov_name",
+"code" = "prov_code",
+"area" = "area",
+"nameToabbre" = {
+year_from <- "prov_name"
+paste0(year_num, "_nickname")
+},
+"codeToabbre" = {
+year_from <- "prov_code"
+paste0(year_num, "_nickname")
+},
+"abbreToname" = {
+year_from <- paste0(year_num, "_nickname")
+"prov_name"
+},
+"abbreTocode" = {
+year_from <- paste0(year_num, "_nickname")
+"prov_code"
+},
+"abbreToarea" = {
+year_from <- paste0(year_num, "_nickname")
+"area"
+}
 )
-library(here)
-region_table$prov_name[region_table$prov_sname == "宁夏"]
-load("D:/Seafile/WW_research/01_Research/R_package/regioncode/R/sysdata.rda")
-region_table$prov_name[region_table$prov_sname == "宁夏"]
-region_table$prov_name[region_table$prov_sname == "宁夏"] <- "宁夏回族自治区"
-save(region_table, corruption, file = here("R", "sysdata.rda"))
-library(regioncode)
-regioncode::regioncode(
-"宁夏回族自治区",
-year_from = 2015,
+ls_index <- c(year_from, year_to)
+}
+?switch
+province = FALSE
+year_from = 1999
+year_to = 2015
+convert_to = "code"
+incomplete_name = FALSE
+zhixiashi = FALSE
+to_dialect = "none"
+to_pinyin = FALSE
+province = FALSE
+# 2 Section of prefectural-level converting
+if (to_dialect != "none") {
+# 2-1 If convert language zone
+year_from <- if (is.numeric(data_input[1]))
+paste0(year_from, "_code")
+else
+paste0(year_from, "_name")
+year_to <- if (to_dialect == "dia_group")
+"pref_language_all"
+else if (to_dialect == "dia_sub_group")
+"dia_sub_language_all"
+ls_index <- c(year_from, year_to)
+} else {
+# 2-2 If not convert language zone
+year_from <- if (is.numeric(data_input[1]))
+paste0(year_from, "_code")
+else
+paste0(year_from, "_name")
+region_data <- region_data[!duplicated(region_data$`2019_code`), ]
+year_to <- switch(
+convert_to,
+"code" = paste0(year_to, "_code"),
+"area" = "area",
+"name" = paste0(year_to, "_name"),
+"rank" = paste0(year_to, "_rank")
+)
+ls_index <- c(year_from, year_to)
+# Using the Municipal codes for within region codes
+if (zhixiashi) {
+region_zhixiashi <- subset(region_data, zhixiashi)
+# Gathering all the needed fields
+region_sname <- region_zhixiashi[grep("_sname$", names(region_zhixiashi))]
+region_name <- region_zhixiashi[grep("_name$", names(region_zhixiashi))]
+region_code <- region_zhixiashi[grep("_code$", names(region_zhixiashi))]
+region_remain <- region_zhixiashi[!grepl(
+"(_code$|_sname$|_name$|language$|_all$|_nickname$|dia_sub_group$|freq$)",
+names(region_zhixiashi)
+)]
+# Replacing prefectural names and codes with provincial ones
+region_name2 <- as.data.frame(matrix(
+rep(region_zhixiashi$prov_name, ncol(region_name)),
+ncol = ncol(region_name)
+))
+names(region_name2) <- names(region_name)
+region_sname2 <- as.data.frame(matrix(
+rep(region_zhixiashi$prov_sname, ncol(region_sname)),
+ncol = ncol(region_sname)
+))
+names(region_sname2) <- names(region_sname)
+region_code2 <- as.data.frame(matrix(
+rep(region_zhixiashi$prov_code, ncol(region_code)),
+ncol = ncol(region_code)
+))
+names(region_code2) <- names(region_code)
+region_zhixiashi <- cbind(region_name2, region_code2, region_sname2, region_remain)
+region_zhixiashi <- unique(region_zhixiashi[, order(names(region_zhixiashi))])
+region_province <- unique(region_data[!grepl("language$|_all$|_nickname$|dia_sub_group$|freq$",
+names(region_data))])
+region_province <- region_province[, order(names(region_province))]
+region_data <- rbind(region_zhixiashi, region_province)
+}
+}
+View(index_municipal)
+data_input <- as.data.frame(data_input)
+names(data_input) <- ls_index[1]
+data_output <- region_data[ls_index]
+data_output <- unique(data_output)
+?merge
+?py
+data_input <- as.data.frame(data_input)
+names(data_input) <- ls_index[1]
+data_output <- unique(region_data[ls_index])
+if (incomplete_name) {
+data_input[[ls_index[1]]] <- substr(data_input[[ls_index[1]]], 1, 2)
+data_output[[ls_index[1]]] <- substr(data_output[[ls_index[1]]], 1, 2)
+}
+# Indexing to find the first match quickly and directly
+index <- match(data_input[[ls_index[1]]], data_output[[ls_index[1]]])
+data_output <- data_output[index, year_to]
+class(data_output)
+data_input <- as.data.frame(data_input)
+names(data_input) <- ls_index[1]
+data_output <- unique(region_data[ls_index])
+if (incomplete_name) {
+data_input[[ls_index[1]]] <- substr(data_input[[ls_index[1]]], 1, 2)
+data_output[[ls_index[1]]] <- substr(data_output[[ls_index[1]]], 1, 2)
+}
+# Indexing to find the first match quickly and directly
+index <- match(data_input[[ls_index[1]]], data_output[[ls_index[1]]])
+data_output <- data_output[index, year_to, drop = TRUE]
+regioncode <- function(data_input,
+year_from = 1999,
 year_to = 2015,
-convert_to = "name",
-incomplete_name = "to",
-province = TRUE
+convert_to = "code",
+incomplete_name = FALSE,
+zhixiashi = FALSE,
+to_dialect = "none",
+to_pinyin = FALSE,
+province = FALSE) {
+validate_input(data_input,
+year_from,
+province,
+to_dialect,
+convert_to,
+zhixiashi,
+to_pinyin)
+if (province) {
+zhixiashi <- FALSE
+# 1 Section of province-level converting
+if (to_dialect != "none") {
+# 1-1 If convert language zone
+year_from <- if (is.numeric(data_input[1]))
+"prov_code"
+else
+"prov_name"
+ls_index <- if (to_dialect == "dia_super") {
+year_to <- "prov_language"
+c(year_from, year_to)
+}
+} else {
+# 1-2 If not convert language zone
+prov_data <- unique(region_data[, c("prov_code", paste0("199", 8:9, "_nickname"), "area")])
+# Because province nicknames changed in 1999
+year_num <- ifelse(as.numeric(year_from) < 1999, 1998, 1999)
+year_from <- if (is.numeric(data_input[1]))
+"prov_code"
+else
+"prov_name"
+year_to <- switch(
+convert_to,
+"name" = "prov_name",
+"code" = "prov_code",
+"area" = "area",
+"nameToabbre" = {
+year_from <- "prov_name"
+paste0(year_num, "_nickname")
+},
+"codeToabbre" = {
+year_from <- "prov_code"
+paste0(year_num, "_nickname")
+},
+"abbreToname" = {
+year_from <- paste0(year_num, "_nickname")
+"prov_name"
+},
+"abbreTocode" = {
+year_from <- paste0(year_num, "_nickname")
+"prov_code"
+},
+"abbreToarea" = {
+year_from <- paste0(year_num, "_nickname")
+"area"
+}
 )
+ls_index <- c(year_from, year_to)
+}
+} else {
+# 2 Section of prefectural-level converting
+if (to_dialect != "none") {
+# 2-1 If convert language zone
+year_from <- if (is.numeric(data_input[1]))
+paste0(year_from, "_code")
+else
+paste0(year_from, "_name")
+year_to <- if (to_dialect == "dia_group")
+"pref_language_all"
+else if (to_dialect == "dia_sub_group")
+"dia_sub_language_all"
+ls_index <- c(year_from, year_to)
+} else {
+# 2-2 If not convert language zone
+year_from <- if (is.numeric(data_input[1]))
+paste0(year_from, "_code")
+else
+paste0(year_from, "_name")
+region_data <- region_data[!duplicated(region_data$`2019_code`), ]
+year_to <- switch(
+convert_to,
+"code" = paste0(year_to, "_code"),
+"area" = "area",
+"name" = paste0(year_to, "_name"),
+"rank" = paste0(year_to, "_rank")
+)
+ls_index <- c(year_from, year_to)
+# Using the Municipal codes for within region codes
+if (zhixiashi) {
+region_zhixiashi <- subset(region_data, zhixiashi)
+# Gathering all the needed fields
+region_sname <- region_zhixiashi[grep("_sname$", names(region_zhixiashi))]
+region_name <- region_zhixiashi[grep("_name$", names(region_zhixiashi))]
+region_code <- region_zhixiashi[grep("_code$", names(region_zhixiashi))]
+region_remain <- region_zhixiashi[!grepl(
+"(_code$|_sname$|_name$|language$|_all$|_nickname$|dia_sub_group$|freq$)",names(region_zhixiashi))]
+# Replacing prefectural names and codes with provincial ones
+region_name2 <- as.data.frame(matrix(
+rep(region_zhixiashi$prov_name, ncol(region_name)),
+ncol = ncol(region_name)
+))
+names(region_name2) <- names(region_name)
+region_sname2 <- as.data.frame(matrix(
+rep(region_zhixiashi$prov_sname, ncol(region_sname)),
+ncol = ncol(region_sname)
+))
+names(region_sname2) <- names(region_sname)
+region_code2 <- as.data.frame(matrix(
+rep(region_zhixiashi$prov_code, ncol(region_code)),
+ncol = ncol(region_code)
+))
+names(region_code2) <- names(region_code)
+region_zhixiashi <- cbind(region_name2,
+region_code2,
+region_sname2,
+region_remain)
+region_zhixiashi <- unique(region_zhixiashi[, order(names(region_zhixiashi))])
+region_province <- unique(region_data[!grepl("language$|_all$|_nickname$|dia_sub_group$|freq$", names(region_data))])
+region_province <- region_province[, order(names(region_province))]
+region_data <- rbind(region_zhixiashi, region_province)
+}
+}
+}
+# Convert the input to a data.frame for later merging
+data_input <- as.data.frame(data_input)
+names(data_input) <- ls_index[1]
+data_output <- unique(region_data[ls_index])
+if (incomplete_name) {
+data_input[[ls_index[1]]] <- substr(data_input[[ls_index[1]]], 1, 2)
+data_output[[ls_index[1]]] <- substr(data_output[[ls_index[1]]], 1, 2)
+}
+# Indexing to find the first match quickly and directly
+index <- match(data_input[[ls_index[1]]], data_output[[ls_index[1]]])
+data_output <- data_output[index, year_to, drop = TRUE]
+# Because '2pinyin' can not be used as a variable name
+if (to_pinyin) {
+# Predefined mapping for special cases with Chinese characters
+special_cases <- c(
+"\u9655\u897f" = "shaan_xi",
+"\u5185\u8499" = "inner_mongolia",
+"\u897f\u85cf" = "tibet",
+"\u6fb3\u95e8" = "macao",
+"\u9999\u6e2f" = "hong_kong"
+)
+# Extract the first two characters of each entry in data_output
+first_two_chars <- substr(data_output, 1, 2)
+# Apply special cases mapping
+special_pinyin <- special_cases[first_two_chars]
+# Use py function where no special case is matched
+data_output <- ifelse(is.na(special_pinyin),
+py(
+char = first_two_chars,
+dic = pydic(method = "toneless", dic = "pinyin2")
+),
+special_pinyin)
+}
+return(data_output)
+}
+# Function to validate input
+validate_input <- function(data_input,
+year_from,
+province,
+to_dialect,
+convert_to,
+zhixiashi,
+to_pinyin) {
+# Simplified data type checks
+if (!(is.character(data_input[1]) || is.numeric(data_input[1]))) {
+stop("Invalid input: only region names or division codes are valid.")
+}
+# Simplify type and value checks
+if (!is.numeric(year_from) ||
+!all(c(province, zhixiashi, to_pinyin) %in% c(TRUE, FALSE))) {
+stop("Invalid input: Parameters must be of the correct data type.")
+}
+# Consolidated conversion checks
+valid_conversions <- if (province && to_dialect == "none") {
+c(
+"name",
+"code",
+"area",
+"rank",
+"nameToabbre",
+"codeToabbre",
+"abbreToname",
+"abbreTocode",
+"abbreToarea"
+)
+} else {
+c("name", "code", "area", "rank")
+}
+if (!(convert_to %in% valid_conversions)) {
+stop("Invalid input: please choose a valid converting method.")
+}
+# Specific scenario checks
+if (to_pinyin && convert_to == "code" && to_dialect == "none") {
+stop("Invalid input: cannot translate administrative codes to pinyin.")
+}
+}
+regioncode(c("东城区", "梅州市"))
+load("R/sysdata.rda")
+p_load(pinyin)
+library(pacman)
+p_load(pinyin)
+regioncode(c("东城区", "梅州市"))
+regioncode(c("北京市", "天津市"), zhixiashi = TRUE)
+regioncode(c("河北省", "河南省"), province = TRUE)
 library(regioncode)
 library(regioncode)
-library(regioncode)
-load("D:/Seafile/WW_research/01_Research/R_package/regioncode/R/sysdata.rda")
-View(region_table)
-names(region_data)
-names(region_table)
-load("D:/Seafile/WW_research/01_Research/R_package/regioncode/R/sysdata.rda")
-library(regioncode)
-load("D:/Seafile/WW_research/01_Research/R_package/regioncode/R/sysdata.rda")
-names(region_data)
-save(corruption, region_data, file = here::here("R", "sysdata.rda"))
-load("D:/Seafile/WW_research/01_Research/R_package/regioncode/R/sysdata.rda")
-knitr::opts_chunk$set(message = FALSE, warning = FALSE)
-if(!require(regioncode)) install.packages("regioncode")
-library(regioncode)
 data("corruption")
-# Original 2019 version
-corruption$prefecture_id
-# 1999 version
-regioncode(data_input = corruption$prefecture_id,
-convert_to = "code", # default set
-year_from = 2019,
-year_to = 1989)
-# The original name
-corruption$prefecture
-# Codes to name
+# Conversion to the 1989 version
 regioncode(data_input = corruption$prefecture_id,
-convert_to = "name",
-year_from = 2019,
-year_to = 1989)
-# Name to codes of the same year
-regioncode(data_input = corruption$prefecture,
-convert_to = "code",
-year_from = 2019,
-year_to = 2019)
-# Name to name of a different year
-regioncode(data_input = corruption$prefecture,
-convert_to = "name",
+convert_to = "code", # default setting
 year_from = 2019,
 year_to = 1989)
-tibble(
-preference = corruption$prefecture,
-rank = regioncode(data_input = corruption$prefecture,
-year_from = 2011,
-year_to = 1989,
-convert_to="rank")
-)
-tidyr::tibble(
-preference = corruption$prefecture,
-rank = regioncode(data_input = corruption$prefecture,
-year_from = 2011,
-year_to = 1989,
-convert_to="rank")
-)
-load("D:/Seafile/WW_research/01_Research/R_package/regioncode/R/sysdata.rda")
-region_data |> names()
-knitr::opts_chunk$set(message = FALSE, warning = FALSE)
-if(!require(regioncode)) install.packages("regioncode")
+# Comparison
 tibble(
 code2019 = corruption$prefecture_id,
 code1989 = regioncode(data_input = corruption$prefecture_id,
-convert_to = "code", # default set
+convert_to = "code", # default setting
 year_from = 2019,
 year_to = 1989),
-name1989 = regioncode(data_input = corruption$prefecture_id,
-convert_to = "name", # default set
-year_from = 2019,
-year_to = 1989)
-)
-library(tidyverse)
-tibble(
-code2019 = corruption$prefecture_id,
-code1989 = regioncode(data_input = corruption$prefecture_id,
-convert_to = "code", # default set
+name2019 = regioncode(data_input = corruption$prefecture_id,
+convert_to = "name", # default setting
 year_from = 2019,
-year_to = 1989),
+year_to = 2019),
 name1989 = regioncode(data_input = corruption$prefecture_id,
-convert_to = "name", # default set
+convert_to = "name", # default setting
 year_from = 2019,
 year_to = 1989)
 )
-temp2 <- corruption$prefecture_id
-# 1989 version
-temp <- regioncode(data_input = corruption$prefecture_id,
-convert_to = "code", # default set
-year_from = 2019,
-year_to = 1989)
-View(region_data)
-usethis::git_vaccinate() #Adds .DS_Store, .Rproj.user, .Rdata, .Rhistory, and .httr-oauth to your global (a.k.a. user-level) .gitignore. This is good practice as it decreases the chance that you will accidentally leak credentials to GitHub.
-library(regioncode)
 knitr::opts_chunk$set(message = FALSE, warning = FALSE)
 if(!require(regioncode)) install.packages("regioncode")
 library(regioncode)
-library(tidyverse)
-library(regioncode)
-data("corruption")
-# Original 2019 version
-corruption$prefecture_id
-# 1989 version
-temp <- regioncode(data_input = corruption$prefecture_id,
-convert_to = "code", # default set
-year_from = 2019,
-year_to = 1989)
-# tibble(
-#   code2019 = corruption$prefecture_id,
-#   code1989 = regioncode(data_input = corruption$prefecture_id,
-#            convert_to = "code", # default set
-#            year_from = 2019,
-#            year_to = 1989),
-#   name1989 = regioncode(data_input = corruption$prefecture_id,
-#            convert_to = "name", # default set
-#            year_from = 2019,
-#            year_to = 1989)
-# )
-load("D:/Seafile/WW_research/01_Research/R_package/regioncode/R/sysdata.rda")
-# Original 2019 version
-corruption$prefecture_id
-# 1989 version
-temp <- regioncode(data_input = corruption$prefecture_id,
-convert_to = "code", # default set
+library(dplyr)
+# Conversion to the 1989 version
+regioncode(data_input = corruption$prefecture_id,
+convert_to = "code", # default setting
 year_from = 2019,
 year_to = 1989)
+# Comparison
 tibble(
 code2019 = corruption$prefecture_id,
 code1989 = regioncode(data_input = corruption$prefecture_id,
-convert_to = "code", # default set
+convert_to = "code", # default setting
 year_from = 2019,
 year_to = 1989),
+name2019 = regioncode(data_input = corruption$prefecture_id,
+convert_to = "name", # default setting
+year_from = 2019,
+year_to = 2019),
 name1989 = regioncode(data_input = corruption$prefecture_id,
-convert_to = "name", # default set
+convert_to = "name", # default setting
 year_from = 2019,
 year_to = 1989)
 )
+# Original name
 tibble(
-code2019 = corruption$prefecture_id,
-code1989 = regioncode(data_input = corruption$prefecture_id,
-convert_to = "code", # default set
+id = corruption$prefecture_id,
+name = corruption$prefecture
+)
+# Codes to name
+regioncode(data_input = corruption$prefecture_id,
+convert_to = "name",
 year_from = 2019,
-year_to = 1989),
-name2019 = regioncode(data_input = corruption$prefecture_id,
-convert_to = "name", # default set
+year_to = 1989)
+# Name to codes of the same year
+regioncode(data_input = corruption$prefecture,
+convert_to = "code",
 year_from = 2019,
-year_to = 2019),
-name1989 = regioncode(data_input = corruption$prefecture_id,
-convert_to = "name", # default set
+year_to = 2019)
+# Name to name of a different year
+regioncode(data_input = corruption$prefecture,
+convert_to = "name",
 year_from = 2019,
 year_to = 1989)
-)
-?regioncode
 # Original full names
 corruption$prefecture
-# Convert to incomplete names in 1989
-fake_incomplete <- regioncode(data_input = corruption$prefecture,
-convert_to = "name",
-year_from = 2019,
-year_to = 1989,
-incomplete_name = "to")
+fake_incomplete <- corruption$prefecture
+index_incomplete <- sample(seq(length(corruption$prefecture)), 7)
+fake_incomplete[index_incomplete] <- fake_incomplete[index_incomplete] |>
+substr(start = 1, stop = 2)
 fake_incomplete
-# Convert to full names in 2008
-fake_full <- regioncode(data_input = fake_incomplete,
+# Conversion to full names in 2008
+regioncode(data_input = fake_incomplete,
 convert_to = "name",
-year_from = 1989,
-year_to = 2008,
-incomplete_name = "to")
-fake_full
-names_municipality <- c("北京", "天津", "上海", "重庆")
-names_municipality <- c("北京", "天津", "上海", "重庆")
-regioncode(data_input = names_municipality,
-year_from = 2019,
-year_to = 2019,
-convert_to = "code",
-zhixiashi = FALSE)
-regioncode(data_input = names_municipality,
-year_from = 2019,
-year_to = 2019,
-convert_to = "code",
-zhixiashi = TRUE)
-regioncode(data_input = names_municipality,
-year_from = 2019,
-year_to = 2019,
-convert_to = "code",
-zhixiashi = TRUE)
-names_municipality <- c("北京", "海淀区", "上海", "静安区")
-regioncode(data_input = names_municipality,
 year_from = 2019,
-year_to = 2019,
-convert_to = "code",
-zhixiashi = FALSE)
-regioncode(data_input = names_municipality,
-year_from = 2019,
-year_to = 2019,
-convert_to = "code",
-zhixiashi = TRUE)
-library(reprex)
-names_municipality <- c("北京", # Beijing, a municipality
+year_to = 2008,
+incomplete_name = TRUE)
+names_municipality <- c("北京市", # Beijing, a municipality
 "海淀区", # A district of Beijing
-"上海", # Shanghai, a municipality
+"上海市", # Shanghai, a municipality
 "静安区", # A district of Shanghai
 "济南市") # A prefecture of Shandong
 # When `zhixiashi` is FALSE, only the districts are recognized
@@ -223,290 +437,76 @@ year_from = 2019,
 year_to = 2019,
 convert_to = "code",
 zhixiashi = FALSE)
-# When `zhixiashi` is TRUE, muncipalities are
+# When `zhixiashi` is TRUE, municipalities are recognized
 regioncode(data_input = names_municipality,
 year_from = 2019,
 year_to = 2019,
 convert_to = "code",
 zhixiashi = TRUE)
-reprex(
-{
-regioncode(data_input = names_municipality,
-year_from = 2019,
-year_to = 2019,
-convert_to = "code",
-zhixiashi = FALSE)
-# When `zhixiashi` is TRUE, muncipalities are
-regioncode(data_input = names_municipality,
-year_from = 2019,
-year_to = 2019,
-convert_to = "code",
-zhixiashi = TRUE)
-}
-)
-reprex(
-{
-names_municipality <- c("北京", # Beijing, a municipality
-"海淀区", # A district of Beijing
-"上海", # Shanghai, a municipality
-"静安区", # A district of Shanghai
-"济南市") # A prefecture of Shandong
-regioncode(data_input = names_municipality,
-year_from = 2019,
-year_to = 2019,
-convert_to = "code",
-zhixiashi = FALSE)
-# When `zhixiashi` is TRUE, muncipalities are
-regioncode(data_input = names_municipality,
-year_from = 2019,
-year_to = 2019,
-convert_to = "code",
-zhixiashi = TRUE)
-}
-)
-names_municipality <- c("北京", # Beijing, a municipality
-"海淀区", # A district of Beijing
-"上海", # Shanghai, a municipality
-"静安区", # A district of Shanghai
-"济南市") # A prefecture of Shandong
-regioncode(data_input = names_municipality,
-year_from = 2019,
-year_to = 2019,
-convert_to = "code",
-zhixiashi = FALSE)
-reprex(
-{
-library(regioncode)
-names_municipality <- c("北京", # Beijing, a municipality
-"海淀区", # A district of Beijing
-"上海", # Shanghai, a municipality
-"静安区", # A district of Shanghai
-"济南市") # A prefecture of Shandong
-regioncode(data_input = names_municipality,
-year_from = 2019,
-year_to = 2019,
-convert_to = "code",
-zhixiashi = FALSE)
-# When `zhixiashi` is TRUE, muncipalities are
-regioncode(data_input = names_municipality,
-year_from = 2019,
-year_to = 2019,
-convert_to = "code",
-zhixiashi = TRUE)
-}
-)
-?reprex
 tibble(
-province = corruption$province_id,
-prov_name = regioncode(data_input = corruption$province_id,
-convert_to = "name",
+city = corruption$prefecture,
+rank1989 = regioncode(data_input = corruption$prefecture,
 year_from = 2019,
 year_to = 1989,
-province = TRUE),
-prov_abbre = regioncode(data_input = corruption$province_id,
-convert_to = "codeToabbre",
+convert_to="rank"),
+rank2014 = regioncode(data_input = corruption$prefecture,
 year_from = 2019,
-year_to = 1989,
-province = TRUE)
+year_to = 2014,
+convert_to = "rank")
 )
-source("~/.active-rstudio-document", echo=TRUE)
 tibble(
-province = corruption$province_id,
-prov_name = regioncode(data_input = corruption$province_id,
-convert_to = "name",
-year_from = 2019,
-year_to = 2019,
-province = TRUE),
-prov_abbre = regioncode(data_input = corruption$province_id,
-convert_to = "codeToabbre",
-year_from = 2019,
-year_to = 1989,
-province = TRUE)
-)
-reprex(
-{
-library(regioncode)
-regioncode(data_input = corruption$province_id,
-convert_to = "name",
-year_from = 2019,
-year_to = 1989,
-province = TRUE)
-}
-)
-regioncode(data_input = corruption$prefecture,
+city = corruption$prefecture,
+cityPY = regioncode(data_input = corruption$prefecture,
 year_from = 2019,
 year_to = 1989,
 convert_to = "name",
 to_pinyin = TRUE
-)
-regioncode(data_input = corruption$prefecture,
-year_from = 2019,
-year_to = 1989,
-convert_to = "name",
-incomplete_name = "to",
-to_pinyin = TRUE
-)
-regioncode(data_input = corruption$prefecture,
+),
+areaPY = regioncode(data_input = corruption$prefecture,
 year_from = 2019,
 year_to = 1989,
 convert_to = "area",
 to_pinyin = TRUE
 )
-regioncode(data_input = c("山西", "陕西", "内蒙古", "香港"),
-year_from = 2019,
-year_to = 2008,
-convert_to = "name",
-incomplete_name = "from",
-to_pinyin = TRUE
-)
-regioncode(data_input = c("山西省", "陕西省", "内蒙古自治区"),
-year_from = 2019,
-year_to = 2008,
-convert_to = "name",
-to_pinyin = TRUE
-)
-reprex(
-{
-library(regioncode)
-regioncode(data_input = c("山西", "陕西", "内蒙古", "香港"),
-year_from = 2019,
-year_to = 2008,
-convert_to = "name",
-incomplete_name = "from",
-to_pinyin = TRUE
-)
-}
-)
-# Regions with special spelling
-regioncode(data_input = c("山西", "陕西", "内蒙古", "香港", "澳门"),
-year_from = 2019,
-year_to = 2008,
-convert_to = "name",
-incomplete_name = "from",
-province = TRUE,
-to_pinyin = TRUE
 )
 # Regions with special spelling
 regioncode(data_input = c("山西", "陕西", "内蒙古", "香港", "澳门"),
 year_from = 2019,
 year_to = 2008,
 convert_to = "name",
-incomplete_name = "both",
+incomplete_name = TRUE,
 province = TRUE,
 to_pinyin = TRUE
 )
-library(regioncode)
-library(regioncode)
-knitr::opts_chunk$set(message = FALSE, warning = FALSE)
-if(!require(regioncode)) install.packages("regioncode")
-library(regioncode)
-library(tidyverse)
-library(regioncode)
-data("corruption")
-# Conversion to the 1989 version
-regioncode(data_input = corruption$prefecture_id,
-convert_to = "code", # default setting
-year_from = 2019,
-year_to = 1989)
-# Comparison
 tibble(
-code2019 = corruption$prefecture_id,
-code1989 = regioncode(data_input = corruption$prefecture_id,
-convert_to = "code", # default setting
-year_from = 2019,
-year_to = 1989),
-name2019 = regioncode(data_input = corruption$prefecture_id,
-convert_to = "name", # default setting
+province = corruption$province_id,
+prov_name = regioncode(data_input = corruption$province_id,
+convert_to = "name",
 year_from = 2019,
-year_to = 2019),
-name1989 = regioncode(data_input = corruption$prefecture_id,
-convert_to = "name", # default setting
+year_to = 1989,
+province = TRUE),
+prov_abbre = regioncode(data_input = corruption$province_id,
+convert_to = "codeToabbre",
 year_from = 2019,
-year_to = 1989)
+year_to = 1989,
+province = TRUE)
 )
-# Conversion to the 1989 version
-regioncode(data_input = corruption$prefecture_id,
-convert_to = "code", # default setting
-year_from = 2019,
-year_to = 1989)
-corruption$prefecture_id
-regioncode(data_input = corruption$prefecture_id,
-convert_to = "code", # default setting
-year_from = 2019,
-year_to = 1989)
-regioncode(data_input = corruption$prefecture_id,
-convert_to = "name", # default setting
+regioncode(data_input = corruption$prefecture,
 year_from = 2019,
-year_to = 2019)
-regioncode(data_input = corruption$prefecture_id,
-convert_to = "name", # default setting
+year_to = 1989,
+convert_to = "area")
+tibble(
+city = corruption$prefecture,
+dialectGroup = regioncode(data_input = corruption$prefecture,
 year_from = 2019,
-year_to = 1989)
-?switch
-?case_when
-`%>%`
-?`%>%`
-library(regioncode)
-?bind_rows
-library(regioncode)
-knitr::opts_chunk$set(message = FALSE, warning = FALSE)
-if(!require(regioncode)) install.packages("regioncode")
-library(regioncode)
-library(tidyverse)
-# Regions with special spelling
-regioncode(data_input = c("山西", "陕西", "内蒙古", "香港", "澳门"),
+year_to = 1989,
+to_dialect = "dia_group"),
+dialectSubGroup = regioncode(data_input = corruption$prefecture,
 year_from = 2019,
-year_to = 2008,
-convert_to = "name",
-incomplete_name = TRUE,
-province = TRUE,
-to_pinyin = TRUE
+year_to = 1989,
+to_dialect = "dia_sub_group")
 )
-?ends_with
-?pull
-library(pkgdown)
-build_site()
-library(regioncode)
-devtools::install(build_vignettes = TRUE)
-pkgdown::build_site()
-devtools::install(build_vignettes = TRUE)
-pkgdown::build_site()
-devtools::release()
-devtools::install(build_vignettes = TRUE)
-pkgdown::build_site()
-devtools::release()
-prefecture_gini <- readRDS("D:/Desktop/prefecture_gini.rds")
-library(tidyverse)
-select(prefecture_gini, year, city_code_a2_2, overall_city_80)
-select(prefecture_gini, year, city_code_a2_2, overall_city_80) |> rio::export("nies_gini.xlsx")
-devtools::install(build_vignettes = TRUE)
-devtools::check()
-library(regioncode)
-?devtools::check()
-devtools::check(manual = TRUE)
-?match
-if (!requireNamespace("pacman", quietly = TRUE)) {
-install.packages("pacman")
-}
-library(pacman)
-p_load(pinyin)
-load("R/sysdata.rda")
-names(region_data)
-identical(region_data$1986_sname...1, region_data$1986_sname...8)
-region_data$1986_sname...1 == region_data$1986_sname...8
-region_data$`1986_sname...1` == region_data$`1986_sname...8`
-identical(region_data$`1986_sname...1`, region_data$`1986_sname...8`)
-region_data <- region_data |> select(-`1986_sname...8`) |> rename(`1986_sname` = `1986_sname...1`)
-p_load(pinyin,
-dplyr)
-region_data <- region_data |> select(-`1986_sname...8`) |> rename(`1986_sname` = `1986_sname...1`)
-save.image("R/sysdata.rda")
-year_from = 1999
-year_to = 2015
-convert_to = "code"
-incomplete_name = FALSE
-zhixiashi = FALSE
-to_dialect = "none"
-to_pinyin = FALSE
-province = FALSE
-data_input <- c("东城区", "梅州市")
+pkgbuild::check_build_tools(debug = TRUE)
+library(devtools)
+detach("package:devtools", unload = TRUE)
+install.packages("devtools")
diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths
index 87d60a9..6c40b3f 100644
--- a/.Rproj.user/shared/notebooks/paths
+++ b/.Rproj.user/shared/notebooks/paths
@@ -1,9 +1,9 @@
 D:/Seafile/WW_research/01_Research/R_package/regioncode/.Rbuildignore="63CD865F"
 D:/Seafile/WW_research/01_Research/R_package/regioncode/DESCRIPTION="25AF9A25"
 D:/Seafile/WW_research/01_Research/R_package/regioncode/NAMESPACE="96D5EBCA"
+D:/Seafile/WW_research/01_Research/R_package/regioncode/NEWS.md="00FBA817"
 D:/Seafile/WW_research/01_Research/R_package/regioncode/R/globals.R="56D29EEB"
 D:/Seafile/WW_research/01_Research/R_package/regioncode/R/regioncode.R="78A4CF23"
-D:/Seafile/WW_research/01_Research/R_package/regioncode/R/test.R="9501F140"
 D:/Seafile/WW_research/01_Research/R_package/regioncode/dev/citylevel2021.R="7C97FCDC"
 D:/Seafile/WW_research/01_Research/R_package/regioncode/vignettes/regioncode-vignette.Rmd="48AAAEC1"
 D:/Seafile/WW_research/01_Research/R_package/regioncode/vignettes/s_regioncode.bib="8CF3C239"
diff --git a/DESCRIPTION b/DESCRIPTION
index 69d922c..90d4238 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -18,7 +18,6 @@ BugReports: https://github.com/sammo3182/regioncode/issues
 Depends:
   R(>= 3.6.0)
 Imports:
-  dplyr,
   pinyin
 Suggests:
     rmarkdown,
diff --git a/NEWS.md b/NEWS.md
index 9a3c3af..a7e3d47 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+# 0.2.2
+
+- Vectorize the merging functions
+
 # 0.2.1
 
 - Corrected the region codes of Xinjiang
diff --git a/R/test.R b/dev/test.R
similarity index 100%
rename from R/test.R
rename to dev/test.R
diff --git a/vignettes/regioncode-vignette.html b/vignettes/regioncode-vignette.html
index b2b159c..7ac77b6 100644
--- a/vignettes/regioncode-vignette.html
+++ b/vignettes/regioncode-vignette.html
@@ -12,7 +12,7 @@
 
 <meta name="author" content="HU Yue, YE Xinyi" />
 
-<meta name="date" content="2024-03-08" />
+<meta name="date" content="2024-05-23" />
 
 <title>regioncode: One-Step Solution for Chinese Region Conversions</title>
 
@@ -364,7 +364,7 @@
 <h1 class="title toc-ignore">regioncode: One-Step Solution for Chinese
 Region Conversions</h1>
 <h4 class="author">HU Yue, YE Xinyi</h4>
-<h4 class="date">2024-03-08</h4>
+<h4 class="date">2024-05-23</h4>
 
 
 
@@ -383,9 +383,10 @@ <h4 class="date">2024-03-08</h4>
 <h1>Why <code>regioncode</code>?</h1>
 <p>The Chinese government assigns unique geocodes to each county, city
 (prefecture), and provincial-level administrative unit. These
-“administrative division codes” are consistently <a href="https://www.mca.gov.cn/article/sj/xzqh/1980/">adjusted and
-updated</a> to align with national and regional development plans <span class="citation">(民政部 2022)</span>. However, these adjustments may
-pose challenges for researchers conducting longitudinal studies or
+“administrative division codes” are consistently <a href="https://www.mca.gov.cn/mzsj/xzqh/2022/202202xzqh.html">adjusted
+and updated</a> to align with national and regional development plans
+<span class="citation">(民政部 2022)</span>. However, these adjustments
+may pose challenges for researchers conducting longitudinal studies or
 merging geo-based data from different years. For instance,
 inconsistencies between map data and statistical data can result in
 erroneous outputs when rendering statistical data on a Chinese map.</p>
@@ -549,8 +550,8 @@ <h2>Incomplete Naming of Prefectures</h2>
 <span id="cb15-6"><a href="#cb15-6" tabindex="-1"></a>  <span class="fu">substr</span>(<span class="at">start =</span> <span class="dv">1</span>, <span class="at">stop =</span> <span class="dv">2</span>)</span>
 <span id="cb15-7"><a href="#cb15-7" tabindex="-1"></a></span>
 <span id="cb15-8"><a href="#cb15-8" tabindex="-1"></a>fake_incomplete</span></code></pre></div>
-<pre><code>##  [1] &quot;济南&quot;   &quot;泰州&quot;   &quot;松江区&quot; &quot;宜昌&quot;   &quot;来宾&quot;   &quot;怀化市&quot; &quot;莆田&quot;   &quot;宜宾&quot;  
-##  [9] &quot;定安&quot;   &quot;襄阳市&quot;</code></pre>
+<pre><code>##  [1] &quot;济南&quot;   &quot;泰州市&quot; &quot;松江&quot;   &quot;宜昌市&quot; &quot;来宾&quot;   &quot;怀化&quot;   &quot;莆田&quot;   &quot;宜宾市&quot;
+##  [9] &quot;定安&quot;   &quot;襄阳&quot;</code></pre>
 <div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1" tabindex="-1"></a><span class="co"># Conversion to full names in 2008</span></span>
 <span id="cb17-2"><a href="#cb17-2" tabindex="-1"></a><span class="fu">regioncode</span>(<span class="at">data_input =</span> fake_incomplete, </span>
 <span id="cb17-3"><a href="#cb17-3" tabindex="-1"></a>           <span class="at">convert_to =</span> <span class="st">&quot;name&quot;</span>,</span>
@@ -758,9 +759,9 @@ <h2>Pinyin</h2>
 <span id="cb27-7"><a href="#cb27-7" tabindex="-1"></a>           <span class="at">province =</span> <span class="cn">TRUE</span>,</span>
 <span id="cb27-8"><a href="#cb27-8" tabindex="-1"></a>           <span class="at">to_pinyin =</span> <span class="cn">TRUE</span></span>
 <span id="cb27-9"><a href="#cb27-9" tabindex="-1"></a>           )</span></code></pre></div>
-<pre><code>##             山西                                                    
+<pre><code>##             &lt;NA&gt;             陕西             内蒙             香港 
 ##        &quot;shan_xi&quot;       &quot;shaan_xi&quot; &quot;inner_mongolia&quot;      &quot;hong_kong&quot; 
-##                  
+##             澳门 
 ##          &quot;macao&quot;</code></pre>
 </div>
 <div id="provinces" class="section level2">
@@ -798,7 +799,7 @@ <h2>Provinces</h2>
 ##  5   450000 广西壮族自治区 桂        
 ##  6   430000 湖南省         湘        
 ##  7   350000 福建省         闽        
-##  8   510000 四川省         蜀        
+##  8   510000 四川省         川        
 ##  9   460000 海南省         琼        
 ## 10   420000 湖北省         鄂</code></pre>
 </div>