Always read and write in UTF-8

* Helpers read_lines and write_lines do the right thing * readLines() and writeLines() through errors to prevent accidental re-use in the future * Warn if package encoding is not utf-8 Fixes #564. Fixes #592
r-lib · Aug 17, 2017 · c5f33cb · c5f33cb
1 parent f4a171d
commit c5f33cb
Show file tree

Hide file tree

Showing 18 changed files with 91 additions and 47 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -38,6 +38,7 @@ Suggests:
 LinkingTo: 
     Rcpp
 VignetteBuilder: knitr
+Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 6.0.1.9000
 Remotes: 

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,9 @@
 # roxygen2 6.0.1.9000
 
+* roxygen2 now always reads and writes using UTF-8 encoding. If used with a
+  package that does not have `Encoding: UTF-8` in the DESCRIPTION, you'll
+  now get a warning (#564, #592).
+
 * Usage for data objects now correctly generated, avoiding double escaping
   other components of usage (#562).
 

diff --git a/R/enc.R b/R/enc.R
diff --git a/R/parse.R b/R/parse.R
@@ -4,15 +4,15 @@ parse_package <- function(base_path, load_code, registry, global_options = list(
 
   files <- package_files(base_path)
   parsed <- lapply(files, parse_blocks, env = env, registry = registry,
-                   global_options = global_options, fileEncoding = desc$Encoding %||% "UTF-8")
+                   global_options = global_options)
   blocks <- unlist(parsed, recursive = FALSE)
 
   list(env = env, blocks = blocks)
 }
 
 parse_text <- function(text, registry = default_tags(), global_options = list()) {
   file <- tempfile()
-  writeLines(text, file)
+  write_lines(text, file)
   on.exit(unlink(file))
 
   env <- new.env(parent = parent.env(globalenv()))
@@ -25,9 +25,9 @@ parse_text <- function(text, registry = default_tags(), global_options = list())
   list(env = env, blocks = blocks)
 }
 
-parse_blocks <- function(file, env, registry, global_options = list(), fileEncoding = "UTF-8") {
+parse_blocks <- function(file, env, registry, global_options = list()) {
 
-  lines <- read_lines_enc(file, file_encoding = fileEncoding)
+  lines <- read_lines(file)
   parsed <- parse(text = lines, keep.source = TRUE, srcfile = srcfilecopy(file, lines, isFile = TRUE))
   if (length(parsed) == 0) return()
 

diff --git a/R/rd.R b/R/rd.R
@@ -345,7 +345,7 @@ topic_add_examples <- function(topic, block, base_path) {
       next
     }
 
-    code <- readLines(path)
+    code <- read_lines(path)
     examples <- escape_examples(code)
 
     topic$add_simple_field("examples", examples)

diff --git a/R/roxygenize.R b/R/roxygenize.R
@@ -43,6 +43,11 @@ roxygenize <- function(package.dir = ".",
   dir.create(man_path, recursive = TRUE, showWarnings = FALSE)
   update_roxygen_version(base_path)
 
+  encoding <- desc::desc_get("Encoding", file = base_path)[[1]]
+  if (!identical(encoding, "UTF-8")) {
+    warning("roxygen2 requires Encoding: UTF-8", call. = FALSE)
+  }
+
   options <- load_options(base_path)
   roclets <- roclets %||% options$roclets
 

diff --git a/R/safety.R b/R/safety.R
@@ -14,17 +14,17 @@ first_time <- function(path) {
 made_by_roxygen <- function(path) {
   if (!file.exists(path)) return(TRUE)
 
-  first <- readLines(path, n = 1)
+  first <- read_lines(path, n = 1)
   check_made_by(first)
 }
 
 add_made_by_roxygen <- function(path, comment) {
   if (!file.exists(path)) stop("Can't find ", path, call. = FALSE)
 
-  lines <- readLines(path, warn = FALSE)
+  lines <- read_lines(path)
   if (check_made_by(lines[1])) return()
 
-  writeLines(c(made_by(comment), lines), path)
+  write_lines(c(made_by(comment), lines), path)
 }
 
 check_made_by <- function(first) {

diff --git a/R/utils-io.R b/R/utils-io.R
@@ -0,0 +1,16 @@
+readLines <- function(...) stop("Use read_lines!")
+writeLines <- function(...) stop("Use write_lines!")
+
+read_lines <- function(path, n = -1L) {
+  con <- file(path, open = "r", encoding = "utf-8")
+  on.exit(close(con))
+
+  base::readLines(con, n = n, warn = FALSE)
+}
+
+write_lines <- function(text, path) {
+  con <- file(path, open = "w", encoding = "utf-8")
+  on.exit(close(con))
+
+  base::writeLines(text, con)
+}
diff --git a/R/utils.R b/R/utils.R
@@ -80,7 +80,7 @@ write_if_different <- function(path, contents, check = TRUE) {
     FALSE
   } else {
     cat(sprintf('Writing %s\n', name))
-    writeLines(contents, path, useBytes = TRUE)
+    write_lines(contents, path)
     TRUE
   }
 }
@@ -113,7 +113,7 @@ ignore_files <- function(rfiles, path) {
   rfiles_relative <- sub("^[/]*", "", rfiles_relative)
 
   # Remove any files that match any perl-compatible regexp
-  patterns <- readLines(rbuildignore, warn = FALSE)
+  patterns <- read_lines(rbuildignore)
   patterns <- patterns[patterns != ""]
   matches <- lapply(patterns, grepl, rfiles_relative, perl = TRUE)
   matches <- Reduce("|", matches)

diff --git a/tests/testthat/test-Rbuildignore.R b/tests/testthat/test-Rbuildignore.R
@@ -6,15 +6,14 @@ test_that("roxygen ignores files with matching pattern in .Rbuildignore", {
 
   expect_equal(basename(package_files(test_pkg)), c("a.R", "ignore_me.R"))
 
-  #writeLines("^R/ignore_me.R$", file.path(test_pkg, ".Rbuildignore"))
-  writeChar("^R/ignore_me.R$\n", file.path(test_pkg, ".Rbuildignore"), eos = NULL)
+  write_lines("^R/ignore_me.R$\n", file.path(test_pkg, ".Rbuildignore"))
   expect_equal(basename(package_files(test_pkg)), "a.R")
 })
 
 test_that("roxygen works with empty lines in .Rbuildignore", {
   test_pkg <- temp_copy_pkg(test_path("testRbuildignore"))
   on.exit(unlink(test_pkg, recursive = TRUE))
 
-  writeChar("^R/ignore_me.R$\n\n.nonexistentfile", file.path(test_pkg, ".Rbuildignore"), eos = NULL)
+  write_lines("^R/ignore_me.R$\n\n.nonexistentfile", file.path(test_pkg, ".Rbuildignore"))
   expect_equal(basename(package_files(test_pkg)), "a.R")
 })
diff --git a/tests/testthat/test-nonASCII.R b/tests/testthat/test-nonASCII.R
diff --git a/tests/testthat/test-utf8.R b/tests/testthat/test-utf8.R
@@ -0,0 +1,35 @@
+context("nonASCII")
+
+test_that("can generate nonASCII document", {
+  test_pkg <- temp_copy_pkg(test_path('testNonASCII'))
+  on.exit(unlink(test_pkg, recursive = TRUE), add = TRUE)
+
+  expect_output(roxygenise(test_pkg, roclets = "rd"), "printChineseMsg[.]Rd")
+
+  rd_path <- file.path(test_pkg, "man", "printChineseMsg.Rd")
+  expect_true(file.exists(rd_path))
+  rd <- read_lines(rd_path)
+
+  expect_true(any(grepl("\u6211\u7231\u4e2d\u6587", rd)))
+  expect_true(any(grepl("\u4e2d\u6587\u6ce8\u91ca", rd)))
+
+  # Shouldn't change again
+  expect_output(roxygenise(test_pkg, roclets = "rd"), NA)
+})
+
+
+test_that("unicode escapes are ok", {
+  test_pkg <- temp_copy_pkg(test_path('testUtf8Escape'))
+  on.exit(unlink(test_pkg, recursive = TRUE), add = TRUE)
+
+  expect_output(roxygenise(test_pkg, roclets = "rd"), "a[.]Rd")
+
+  rd_path <- file.path(test_pkg, "man", "a.Rd")
+  expect_true(file.exists(rd_path))
+  rd <- read_lines(rd_path)
+
+  expect_true(any(grepl("7\u00b0C", rd)))
+
+  # Shouldn't change again
+  expect_output(roxygenise(test_pkg, roclets = "rd"), NA)
+})
diff --git a/tests/testthat/testEagerData/DESCRIPTION b/tests/testthat/testEagerData/DESCRIPTION
@@ -5,3 +5,4 @@ Description:
 Author: Hadley <h.wickham@gmail.com>
 Maintainer: Hadley <h.wickham@gmail.com>
 Version: 0.1
+Encoding: UTF-8
diff --git a/tests/testthat/testLazyData/DESCRIPTION b/tests/testthat/testLazyData/DESCRIPTION
@@ -6,3 +6,4 @@ Author: Hadley <h.wickham@gmail.com>
 Maintainer: Hadley <h.wickham@gmail.com>
 Version: 0.1
 LazyData: TRUE
+Encoding: UTF-8
diff --git a/tests/testthat/testNonASCII/DESCRIPTION b/tests/testthat/testNonASCII/DESCRIPTION
@@ -4,5 +4,5 @@ License: GPL-2
 Description:
 Author: Shrektan <shrektan@126.com>
 Maintainer: Shrektan <shrektan@126.com>
-Encoding: GB2312
+Encoding: UTF-8
 Version: 0.1
diff --git a/tests/testthat/testNonASCII/R/a.r b/tests/testthat/testNonASCII/R/a.r
@@ -1,9 +1,6 @@
-# This script is intended to be saved in GB2312 to test if non UTF-8 encoding is
-# supported.
-
-#' 中文注释
+#' 中文注释
 #'
-#' @note 我爱中文。
+#' @note 我爱中文。
 printChineseMsg <- function() {
-  message("我是GB2312的中文字符。")
+  message("我是UTF8的中文字符。")
 }
diff --git a/tests/testthat/testUtf8Escape/DESCRIPTION b/tests/testthat/testUtf8Escape/DESCRIPTION
@@ -0,0 +1,8 @@
+Package: testUtf8Escape
+Title: Check that utf8 escapes are round tripped ok
+License: GPL-2
+Description:
+Author: Hadley <hadley@rstudio.com>
+Maintainer: Hadley <hadley@rstudio.com>
+Encoding: UTF-8
+Version: 0.1
diff --git a/tests/testthat/testUtf8Escape/R/a.r b/tests/testthat/testUtf8Escape/R/a.r
@@ -0,0 +1,4 @@
+#' Title
+#'
+#' @param b Some label
+a <- function(b = '7°C') 1