Skip to content

Commit

Permalink
Always read and write in UTF-8
Browse files Browse the repository at this point in the history
* Helpers read_lines and write_lines do the right thing
* readLines() and writeLines() through errors to prevent accidental re-use in the future
* Warn if package encoding is not utf-8

Fixes #564. Fixes #592
  • Loading branch information
hadley committed Aug 17, 2017
1 parent f4a171d commit c5f33cb
Show file tree
Hide file tree
Showing 18 changed files with 91 additions and 47 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Suggests:
LinkingTo:
Rcpp
VignetteBuilder: knitr
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 6.0.1.9000
Remotes:
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# roxygen2 6.0.1.9000

* roxygen2 now always reads and writes using UTF-8 encoding. If used with a
package that does not have `Encoding: UTF-8` in the DESCRIPTION, you'll
now get a warning (#564, #592).

* Usage for data objects now correctly generated, avoiding double escaping
other components of usage (#562).

Expand Down
8 changes: 0 additions & 8 deletions R/enc.R

This file was deleted.

8 changes: 4 additions & 4 deletions R/parse.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ parse_package <- function(base_path, load_code, registry, global_options = list(

files <- package_files(base_path)
parsed <- lapply(files, parse_blocks, env = env, registry = registry,
global_options = global_options, fileEncoding = desc$Encoding %||% "UTF-8")
global_options = global_options)
blocks <- unlist(parsed, recursive = FALSE)

list(env = env, blocks = blocks)
}

parse_text <- function(text, registry = default_tags(), global_options = list()) {
file <- tempfile()
writeLines(text, file)
write_lines(text, file)
on.exit(unlink(file))

env <- new.env(parent = parent.env(globalenv()))
Expand All @@ -25,9 +25,9 @@ parse_text <- function(text, registry = default_tags(), global_options = list())
list(env = env, blocks = blocks)
}

parse_blocks <- function(file, env, registry, global_options = list(), fileEncoding = "UTF-8") {
parse_blocks <- function(file, env, registry, global_options = list()) {

lines <- read_lines_enc(file, file_encoding = fileEncoding)
lines <- read_lines(file)
parsed <- parse(text = lines, keep.source = TRUE, srcfile = srcfilecopy(file, lines, isFile = TRUE))
if (length(parsed) == 0) return()

Expand Down
2 changes: 1 addition & 1 deletion R/rd.R
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ topic_add_examples <- function(topic, block, base_path) {
next
}

code <- readLines(path)
code <- read_lines(path)
examples <- escape_examples(code)

topic$add_simple_field("examples", examples)
Expand Down
5 changes: 5 additions & 0 deletions R/roxygenize.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ roxygenize <- function(package.dir = ".",
dir.create(man_path, recursive = TRUE, showWarnings = FALSE)
update_roxygen_version(base_path)

encoding <- desc::desc_get("Encoding", file = base_path)[[1]]
if (!identical(encoding, "UTF-8")) {
warning("roxygen2 requires Encoding: UTF-8", call. = FALSE)
}

options <- load_options(base_path)
roclets <- roclets %||% options$roclets

Expand Down
6 changes: 3 additions & 3 deletions R/safety.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@ first_time <- function(path) {
made_by_roxygen <- function(path) {
if (!file.exists(path)) return(TRUE)

first <- readLines(path, n = 1)
first <- read_lines(path, n = 1)
check_made_by(first)
}

add_made_by_roxygen <- function(path, comment) {
if (!file.exists(path)) stop("Can't find ", path, call. = FALSE)

lines <- readLines(path, warn = FALSE)
lines <- read_lines(path)
if (check_made_by(lines[1])) return()

writeLines(c(made_by(comment), lines), path)
write_lines(c(made_by(comment), lines), path)
}

check_made_by <- function(first) {
Expand Down
16 changes: 16 additions & 0 deletions R/utils-io.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
readLines <- function(...) stop("Use read_lines!")
writeLines <- function(...) stop("Use write_lines!")

read_lines <- function(path, n = -1L) {
con <- file(path, open = "r", encoding = "utf-8")
on.exit(close(con))

base::readLines(con, n = n, warn = FALSE)
}

write_lines <- function(text, path) {
con <- file(path, open = "w", encoding = "utf-8")
on.exit(close(con))

base::writeLines(text, con)
}
4 changes: 2 additions & 2 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ write_if_different <- function(path, contents, check = TRUE) {
FALSE
} else {
cat(sprintf('Writing %s\n', name))
writeLines(contents, path, useBytes = TRUE)
write_lines(contents, path)
TRUE
}
}
Expand Down Expand Up @@ -113,7 +113,7 @@ ignore_files <- function(rfiles, path) {
rfiles_relative <- sub("^[/]*", "", rfiles_relative)

# Remove any files that match any perl-compatible regexp
patterns <- readLines(rbuildignore, warn = FALSE)
patterns <- read_lines(rbuildignore)
patterns <- patterns[patterns != ""]
matches <- lapply(patterns, grepl, rfiles_relative, perl = TRUE)
matches <- Reduce("|", matches)
Expand Down
5 changes: 2 additions & 3 deletions tests/testthat/test-Rbuildignore.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@ test_that("roxygen ignores files with matching pattern in .Rbuildignore", {

expect_equal(basename(package_files(test_pkg)), c("a.R", "ignore_me.R"))

#writeLines("^R/ignore_me.R$", file.path(test_pkg, ".Rbuildignore"))
writeChar("^R/ignore_me.R$\n", file.path(test_pkg, ".Rbuildignore"), eos = NULL)
write_lines("^R/ignore_me.R$\n", file.path(test_pkg, ".Rbuildignore"))
expect_equal(basename(package_files(test_pkg)), "a.R")
})

test_that("roxygen works with empty lines in .Rbuildignore", {
test_pkg <- temp_copy_pkg(test_path("testRbuildignore"))
on.exit(unlink(test_pkg, recursive = TRUE))

writeChar("^R/ignore_me.R$\n\n.nonexistentfile", file.path(test_pkg, ".Rbuildignore"), eos = NULL)
write_lines("^R/ignore_me.R$\n\n.nonexistentfile", file.path(test_pkg, ".Rbuildignore"))
expect_equal(basename(package_files(test_pkg)), "a.R")
})
19 changes: 0 additions & 19 deletions tests/testthat/test-nonASCII.R

This file was deleted.

35 changes: 35 additions & 0 deletions tests/testthat/test-utf8.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
context("nonASCII")

test_that("can generate nonASCII document", {
test_pkg <- temp_copy_pkg(test_path('testNonASCII'))
on.exit(unlink(test_pkg, recursive = TRUE), add = TRUE)

expect_output(roxygenise(test_pkg, roclets = "rd"), "printChineseMsg[.]Rd")

rd_path <- file.path(test_pkg, "man", "printChineseMsg.Rd")
expect_true(file.exists(rd_path))
rd <- read_lines(rd_path)

expect_true(any(grepl("\u6211\u7231\u4e2d\u6587", rd)))
expect_true(any(grepl("\u4e2d\u6587\u6ce8\u91ca", rd)))

# Shouldn't change again
expect_output(roxygenise(test_pkg, roclets = "rd"), NA)
})


test_that("unicode escapes are ok", {
test_pkg <- temp_copy_pkg(test_path('testUtf8Escape'))
on.exit(unlink(test_pkg, recursive = TRUE), add = TRUE)

expect_output(roxygenise(test_pkg, roclets = "rd"), "a[.]Rd")

rd_path <- file.path(test_pkg, "man", "a.Rd")
expect_true(file.exists(rd_path))
rd <- read_lines(rd_path)

expect_true(any(grepl("7\u00b0C", rd)))

# Shouldn't change again
expect_output(roxygenise(test_pkg, roclets = "rd"), NA)
})
1 change: 1 addition & 0 deletions tests/testthat/testEagerData/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ Description:
Author: Hadley <h.wickham@gmail.com>
Maintainer: Hadley <h.wickham@gmail.com>
Version: 0.1
Encoding: UTF-8
1 change: 1 addition & 0 deletions tests/testthat/testLazyData/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ Author: Hadley <h.wickham@gmail.com>
Maintainer: Hadley <h.wickham@gmail.com>
Version: 0.1
LazyData: TRUE
Encoding: UTF-8
2 changes: 1 addition & 1 deletion tests/testthat/testNonASCII/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ License: GPL-2
Description:
Author: Shrektan <shrektan@126.com>
Maintainer: Shrektan <shrektan@126.com>
Encoding: GB2312
Encoding: UTF-8
Version: 0.1
9 changes: 3 additions & 6 deletions tests/testthat/testNonASCII/R/a.r
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
# This script is intended to be saved in GB2312 to test if non UTF-8 encoding is
# supported.

#' 中文注释
#' 中文注释
#'
#' @note 我爱中文。
#' @note 我爱中文。
printChineseMsg <- function() {
message("我是GB2312的中文字符")
message("我是UTF8的中文字符")
}
8 changes: 8 additions & 0 deletions tests/testthat/testUtf8Escape/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Package: testUtf8Escape
Title: Check that utf8 escapes are round tripped ok
License: GPL-2
Description:
Author: Hadley <hadley@rstudio.com>
Maintainer: Hadley <hadley@rstudio.com>
Encoding: UTF-8
Version: 0.1
4 changes: 4 additions & 0 deletions tests/testthat/testUtf8Escape/R/a.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#' Title
#'
#' @param b Some label
a <- function(b = '7°C') 1

0 comments on commit c5f33cb

Please sign in to comment.