Skip to content
Merged
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ Collate:
'spaces_inside_linter.R'
'spaces_left_parentheses_linter.R'
'sprintf_linter.R'
'strings_as_factors_linter.R'
'system_file_linter.R'
'trailing_blank_lines_linter.R'
'trailing_whitespace_linter.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ export(single_quotes_linter)
export(spaces_inside_linter)
export(spaces_left_parentheses_linter)
export(sprintf_linter)
export(strings_as_factors_linter)
export(system_file_linter)
export(todo_comment_linter)
export(trailing_blank_lines_linter)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ function calls. (#850, #851, @renkun-ken)
* `consecutive_stopifnot_linter()` Require consecutive calls to `stopifnot()` to be unified into one
* `ifelse_censor_linter()` Require usage of `pmax()` / `pmin()` where appropriate, e.g. `ifelse(x > y, x, y)` is `pmax(x, y)`
* `system_file_linter()` Require file paths to be constructed by `system.file()` instead of calling `file.path()` directly
* `strings_as_factors_linter()` Check for code designed to work before and after the new `stringsAsFactors = FALSE` default
* `inner_combine_linter` Require inputs to vectorized functions to be combined first rather than later, e.g. `as.Date(c(x, y))` over `c(as.Date(x), as.Date(y))`
* `assignment_linter()` now lints right assignment (`->` and `->>`) and gains two arguments. `allow_cascading_assign` (`TRUE` by default) toggles whether to lint `<<-` and `->>`; `allow_right_assign` toggles whether to lint `->` and `->>` (#915, @michaelchirico)
* `infix_spaces_linter()` gains argument `exclude_operators` to disable lints on selected infix operators. By default, all "low-precedence" operators throw lints; see `?infix_spaces_linter` for an enumeration of these. (#914 @michaelchirico)
Expand Down
78 changes: 78 additions & 0 deletions R/strings_as_factors_linter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#' Identify cases where stringsAsFactors should be supplied explicitly
#'
#' Designed for code bases written for versions of R before 4.0 seeking to upgrade to R >= 4.0, where
#' one of the biggest pain points will surely be the flipping of the
#' default value of `stringsAsFactors` from `TRUE` to `FALSE`.
#'
#' It's not always possible to tell statically whether the change will break
#' existing code because R is dynamically typed -- e.g. in `data.frame(x)`
#' if `x` is a string, this code will be affected, but if `x` is a number,
#' this code will be unaffected. However, in `data.frame(x = 'a')`, the
#' output will unambiguously be affected. We can instead supply
#' `stringsAsFactors = TRUE`, which will make this code backwards-compatible.
#'
#' See <https://developer.r-project.org/Blog/public/2020/02/16/stringsasfactors>.
#'
#' @evalRd rd_tags("strings_as_factors_linter")
#' @seealso [linters] for a complete list of linters available in lintr.
#' @export
strings_as_factors_linter <- function() {
Linter(function(source_file) {
if (length(source_file$xml_parsed_content) == 0L) {
return(list())
}

xml <- source_file$xml_parsed_content

atomic_const_cond <- "STR_CONST"
# like data.frame('a') and data.frame(c('a', 'b'))
c_cond <- xp_and(
"expr[SYMBOL_FUNCTION_CALL[text() = 'c']]",
"expr[STR_CONST]",
"not(expr[SYMBOL or expr])"
)
# save and re-use for rep_cond which satisfies the same condition
atomic_or_c_cond <- sprintf("%s or (%s)", atomic_const_cond, c_cond)
# like data.frame(rep(c('a', 'b'), 10L))
rep_cond <- xp_and(
"SYMBOL_FUNCTION_CALL[text() = 'rep']",
sprintf("following-sibling::expr[1][%s]", atomic_or_c_cond)
)

# like data.frame(character()) or data.frame(as.character(1))
known_character_funs <- c(
"character", "as.character", "paste", "sprintf",
"format", "formatC", "prettyNum", "toString", "encodeString"
)
character_cond <- sprintf(
"expr[SYMBOL_FUNCTION_CALL[%s]]",
xp_text_in_table(known_character_funs)
)

expr_cond <- xp_and(
'expr[SYMBOL_FUNCTION_CALL[text() = "data.frame"]]',
sprintf(
"expr[((%s) or (expr[%s]) or (%s)) and not(%s)]",
atomic_or_c_cond,
rep_cond,
character_cond,
"preceding-sibling::SYMBOL_SUB[1][text() = 'row.names']"
),
'not(SYMBOL_SUB[text() = "stringsAsFactors"])'
)
bad_expr <- xml2::xml_find_all(xml, sprintf("//expr[%s]", expr_cond))

return(lapply(
bad_expr,
xml_nodes_to_lint,
source_file = source_file,
lint_message = paste(
"This code relies on the default value of stringsAsFactors,",
"which changed in version R 4.0. Please supply an explicit value for",
"stringsAsFactors for this code to work with versions of R both before",
"and after this switch."
),
type = "warning"
))
})
}
1 change: 1 addition & 0 deletions inst/lintr/linters.csv
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ single_quotes_linter,style consistency readability default
spaces_inside_linter,style readability default
spaces_left_parentheses_linter,style readability default
sprintf_linter,correctness common_mistakes
strings_as_factors_linter,robustness
system_file_linter,consistency readability best_practices
T_and_F_symbol_linter,style readability robustness consistency best_practices default
todo_comment_linter,style configurable
Expand Down
3 changes: 2 additions & 1 deletion man/linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/robustness_linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions man/strings_as_factors_linter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

73 changes: 73 additions & 0 deletions tests/testthat/test-strings_as_factors_linter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
test_that("strings_as_factors_linter skips allowed usages", {
expect_lint("data.frame(1:3)", NULL, strings_as_factors_linter())
expect_lint("data.frame(x = 1:3)", NULL, strings_as_factors_linter())

expect_lint("data.frame(x = 'a', stringsAsFactors = TRUE)", NULL, strings_as_factors_linter())
expect_lint("data.frame(x = 'a', stringsAsFactors = FALSE)", NULL, strings_as_factors_linter())
expect_lint("data.frame(x = c('a', 'b'), stringsAsFactors = FALSE)", NULL, strings_as_factors_linter())

# strings in argument names to c() don't get linted
# TODO(michaelchirico): fix this; currently generates a false positive.
# expect_lint("data.frame(x = c('a b' = 1L, 'b c' = 2L))", NULL, strings_as_factors_linter())

# characters supplied to row.names are not affected
expect_lint("data.frame(x = 1:3, row.names = c('a', 'b', 'c'))", NULL, strings_as_factors_linter())

# ambiguous cases passes
expect_lint("data.frame(x = c(xx, 'a'))", NULL, strings_as_factors_linter())
expect_lint("data.frame(x = c(foo(y), 'a'))", NULL, strings_as_factors_linter())
})

test_that("strings_as_factors_linter blocks simple disallowed usages", {
msg <- "This code relies on the default value of stringsAsFactors"

expect_lint("data.frame('a')", msg, strings_as_factors_linter())
expect_lint("data.frame(c('a', 'b'))", msg, strings_as_factors_linter())
expect_lint("data.frame(x = 1:5, y = 'b')", msg, strings_as_factors_linter())
expect_lint("data.frame(x = 1:5, y, z = 'b')", msg, strings_as_factors_linter())

# catch row.names when combined with a character vector
expect_lint("data.frame(x = c('c', 'd', 'e'), row.names = c('a', 'b', 'c'))", msg, strings_as_factors_linter())
expect_lint("data.frame(row.names = c('c', 'd', 'e'), x = c('a', 'b', 'c'))", msg, strings_as_factors_linter())

# when everything is a literal, type promotion means the column is character
expect_lint("data.frame(x = c(TRUE, 'a'))", msg, strings_as_factors_linter())
})

test_that("strings_as_factors_linters catches rep(char) usages", {
msg <- "This code relies on the default value of stringsAsFactors"

expect_lint("data.frame(rep('a', 10L))", msg, strings_as_factors_linter())
expect_lint("data.frame(rep(c('a', 'b'), 10L))", msg, strings_as_factors_linter())

# literal char, not mixed or non-char
expect_lint("data.frame(rep(1L, 10L))", NULL, strings_as_factors_linter())
expect_lint("data.frame(rep(c(x, 'a'), 10L))", NULL, strings_as_factors_linter())
# however, type promotion of literals is caught
expect_lint("data.frame(rep(c(TRUE, 'a'), 10L))", msg, strings_as_factors_linter())
})

test_that("strings_as_factors_linter catches character(), as.character() usages", {
msg <- "This code relies on the default value of stringsAsFactors"

expect_lint("data.frame(a = character())", msg, strings_as_factors_linter())
expect_lint("data.frame(a = character(1L))", msg, strings_as_factors_linter())
expect_lint("data.frame(a = as.character(x))", msg, strings_as_factors_linter())

# but not for row.names
expect_lint("data.frame(a = 1:10, row.names = as.character(1:10))", NULL, strings_as_factors_linter())
})

test_that("strings_as_factors_linter catches more functions with string output", {
msg <- "This code relies on the default value of stringsAsFactors"

expect_lint("data.frame(a = paste(1, 2, 3))", msg, strings_as_factors_linter())
expect_lint("data.frame(a = sprintf('%d', 1:10))", msg, strings_as_factors_linter())
expect_lint("data.frame(a = format(x, just = 'right'))", msg, strings_as_factors_linter())
expect_lint("data.frame(a = formatC(x, format = '%d'))", msg, strings_as_factors_linter())
expect_lint("data.frame(a = prettyNum(x, big.mark = ','))", msg, strings_as_factors_linter())
expect_lint("data.frame(a = toString(x))", msg, strings_as_factors_linter())
expect_lint("data.frame(a = encodeString(x))", msg, strings_as_factors_linter())
# but not for row.names
expect_lint("data.frame(a = 1:10, row.names = paste(1:10))", NULL, strings_as_factors_linter())
})