diff --git a/r/R/metadata.R b/r/R/metadata.R index ba73f0857881d..61e412be62450 100644 --- a/r/R/metadata.R +++ b/r/R/metadata.R @@ -107,15 +107,34 @@ safe_r_metadata <- function(metadata, on_save = FALSE) { # and mutate the `types_removed` variable outside of it. check_r_metadata_types_recursive <- function(x) { allowed_types <- c("character", "double", "integer", "logical", "complex", "list", "NULL") + # Pull out the attributes so we can also check them + x_attrs <- attributes(x) + if (is.list(x)) { + # Add special handling for some base R classes that are list but + # their [[ methods leads to infinite recursion. + # We unclass here and then reapply attributes after. + x <- unclass(x) + types <- map_chr(x, typeof) - x[types == "list"] <- map(x[types == "list"], check_r_metadata_types_recursive) ok <- types %in% allowed_types if (!all(ok)) { # Record the invalid types, then remove the offending elements types_removed <<- c(types_removed, setdiff(types, allowed_types)) x <- x[ok] + if ("names" %in% names(x_attrs)) { + # Also prune from the attributes since we'll re-add later + x_attrs[["names"]] <- x_attrs[["names"]][ok] + } } + # For the rest, recurse + x <- map(x, check_r_metadata_types_recursive) + } + + # attributes() of a named list will return a list with a "names" attribute, + # so it will recurse indefinitely. + if (!is.null(x_attrs) && !identical(x_attrs, list(names = names(x)))) { + attributes(x) <- check_r_metadata_types_recursive(x_attrs) } x } diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R index 175e7ef3b6b73..06aa1535e0a36 100644 --- a/r/tests/testthat/test-metadata.R +++ b/r/tests/testthat/test-metadata.R @@ -149,6 +149,15 @@ arbitrary\040code\040was\040just\040executed ) }) +test_that("R metadata processing doesn't choke on packageVersion() output", { + metadata <- list(version = packageVersion("base")) + expect_identical(safe_r_metadata(metadata), metadata) + + df <- example_data[1:6] + attr(df, "version") <- packageVersion("base") + expect_equal_data_frame(Table$create(df), df) +}) + test_that("Complex or unsafe attributes are pruned from R metadata, if they exist", { tab <- Table$create(example_data[1:6]) bad <- new.env() @@ -161,18 +170,24 @@ i Type: \"environment\" > If you trust the source, you can set `options(arrow.unsafe_metadata = TRUE)` to preserve them.", fixed = TRUE ) + # Try hiding it even further, in attributes + bad_meta <- list(attributes = structure(list(), hidden_attr = bad)) + tab$metadata <- list(r = rawToChar(serialize(bad_meta, NULL, ascii = TRUE))) + expect_warning( + as.data.frame(tab), + "Potentially unsafe or invalid elements have been discarded from R metadata. +i Type: \"environment\" +> If you trust the source, you can set `options(arrow.unsafe_metadata = TRUE)` to preserve them.", + fixed = TRUE + ) + # You can set an option to allow them through. # It still warns, just differently, and it doesn't prune the attributes withr::local_options(list("arrow.unsafe_metadata" = TRUE)) expect_warning( - expect_warning( - as.data.frame(tab), - "R metadata may have unsafe or invalid elements + as.data.frame(tab), + "R metadata may have unsafe or invalid elements i Type: \"environment\"" - ), - # This particular example ultimately fails because it's not a list - "Invalid metadata$r", - fixed = TRUE ) })