Skip to content

Commit

Permalink
correctly parse non-ASCII characters of R files in Windows (#532)
Browse files Browse the repository at this point in the history
* add supports for nonASCII chars in roxygenize

* ensure write UTF-8 rd files
  • Loading branch information
shrektan authored and hadley committed Dec 2, 2016
1 parent 9b34848 commit 8d879e9
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 5 deletions.
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@
* The new `_PACKAGE` sentinel now also works from `roxygenise()`; before
it only worked from `devtools::document()` (#439, @krlmlr).

* `roxygen2::roxygenise()` now parse nonASCII documentation correctly
(as long as UTF-8 encoded or specified Encoding in DESCRIPTION)
(#532, @shrektan).

## Extension

* Deprecated `register.preref.parser()` and `register.preref.parsers()`
Expand Down
10 changes: 7 additions & 3 deletions R/parse.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
parse_package <- function(base_path, load_code, registry, global_options = list()) {
env <- load_code(base_path)
desc <- read_pkg_description(base_path)

files <- package_files(base_path)
parsed <- lapply(files, parse_blocks, env = env, registry = registry,
global_options = global_options)
global_options = global_options, fileEncoding = desc$Encoding %||% "UTF-8")
blocks <- unlist(parsed, recursive = FALSE)

list(env = env, blocks = blocks)
Expand All @@ -24,8 +25,11 @@ parse_text <- function(text, registry = default_tags(), global_options = list())
list(env = env, blocks = blocks)
}

parse_blocks <- function(file, env, registry, global_options = list()) {
parsed <- parse(file = file, keep.source = TRUE)
parse_blocks <- function(file, env, registry, global_options = list(), fileEncoding = "UTF-8") {

con <- file(file, encoding = fileEncoding)
on.exit(close(con), add = TRUE)
parsed <- parse(con, keep.source = TRUE, srcfile = srcfile(file, encoding = fileEncoding))
if (length(parsed) == 0) return()

refs <- utils::getSrcref(parsed)
Expand Down
7 changes: 6 additions & 1 deletion R/source.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,17 @@ source_package <- function(path) {

load_pkg_dependencies(path)

desc <- read_pkg_description(path)
paths <- package_files(path)
lapply(paths, sys.source, envir = env, keep.source = FALSE)
lapply(paths, sys_source, envir = env, fileEncoding = desc$Encoding %||% "UTF-8")

env
}

sys_source <- function(file, envir = baseenv(), fileEncoding = "UTF-8") {
source(file, encoding = fileEncoding, keep.source = FALSE, local = envir)
}

# Assume that the package has already been loaded by other means
# (e.g. build and reload)
loaded_package <- function(path) {
Expand Down
4 changes: 3 additions & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ write_if_different <- function(path, contents, check = TRUE) {
FALSE
} else {
cat(sprintf('Writing %s\n', name))
writeLines(contents, path)
con <- file(path, encoding = "UTF-8")
on.exit(close(con), add = TRUE)
writeLines(contents, con)
TRUE
}
}
Expand Down
16 changes: 16 additions & 0 deletions tests/testthat/test-nonASCII.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
context("nonASCII")

test_that("can generate nonASCII document", {
test_pkg <- temp_copy_pkg('testNonASCII')
on.exit(unlink(test_pkg, recursive = TRUE))

expect_output(roxygenize(test_pkg), "printChineseMsg[.]Rd")
expect_true(file.exists(file.path(test_pkg, "man", "printChineseMsg.Rd")))

cnChar <- readLines(file.path(test_pkg, "man", "printChineseMsg.Rd"), encoding = "UTF-8")

# Because the parse in testthat::test don't specify encoding to UTF-8 as well,
# so we have to use unicode escapes.
expect_true(any(grepl("\u6211\u7231\u4e2d\u6587", cnChar)))
expect_true(any(grepl("\u4e2d\u6587\u6ce8\u91ca", cnChar)))
})
8 changes: 8 additions & 0 deletions tests/testthat/testNonASCII/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Package: testNonASCII
Title: Test no change to Collate when there are no @includes
License: GPL-2
Description:
Author: Shrektan <shrektan@126.com>
Maintainer: Shrektan <shrektan@126.com>
Encoding: GB2312
Version: 0.1
9 changes: 9 additions & 0 deletions tests/testthat/testNonASCII/R/a.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# This script is intended to be saved in GB2312 to test if non UTF-8 encoding is
# supported.

#' 中文注释
#'
#' @note 我爱中文。
printChineseMsg <- function() {
message("我是GB2312的中文字符。")
}

0 comments on commit 8d879e9

Please sign in to comment.