Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-43748: [R] Handle package_version in safe_r_metadata #43895

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion r/R/metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,34 @@ safe_r_metadata <- function(metadata, on_save = FALSE) {
# and mutate the `types_removed` variable outside of it.
check_r_metadata_types_recursive <- function(x) {
allowed_types <- c("character", "double", "integer", "logical", "complex", "list", "NULL")
# Pull out the attributes so we can also check them
x_attrs <- attributes(x)

if (is.list(x)) {
# Add special handling for some base R classes that are list but
# their [[ methods leads to infinite recursion.
# We unclass here and then reapply attributes after.
x <- unclass(x)

types <- map_chr(x, typeof)
x[types == "list"] <- map(x[types == "list"], check_r_metadata_types_recursive)
ok <- types %in% allowed_types
if (!all(ok)) {
# Record the invalid types, then remove the offending elements
types_removed <<- c(types_removed, setdiff(types, allowed_types))
x <- x[ok]
if ("names" %in% names(x_attrs)) {
# Also prune from the attributes since we'll re-add later
x_attrs[["names"]] <- x_attrs[["names"]][ok]
}
}
# For the rest, recurse
x <- map(x, check_r_metadata_types_recursive)
}

# attributes() of a named list will return a list with a "names" attribute,
# so it will recurse indefinitely.
if (!is.null(x_attrs) && !identical(x_attrs, list(names = names(x)))) {
attributes(x) <- check_r_metadata_types_recursive(x_attrs)
}
x
}
Expand Down
29 changes: 22 additions & 7 deletions r/tests/testthat/test-metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,15 @@ arbitrary\040code\040was\040just\040executed
)
})

test_that("R metadata processing doesn't choke on packageVersion() output", {
metadata <- list(version = packageVersion("base"))
expect_identical(safe_r_metadata(metadata), metadata)

df <- example_data[1:6]
attr(df, "version") <- packageVersion("base")
expect_equal_data_frame(Table$create(df), df)
})

test_that("Complex or unsafe attributes are pruned from R metadata, if they exist", {
tab <- Table$create(example_data[1:6])
bad <- new.env()
Expand All @@ -161,18 +170,24 @@ i Type: \"environment\"
> If you trust the source, you can set `options(arrow.unsafe_metadata = TRUE)` to preserve them.",
fixed = TRUE
)
# Try hiding it even further, in attributes
bad_meta <- list(attributes = structure(list(), hidden_attr = bad))
tab$metadata <- list(r = rawToChar(serialize(bad_meta, NULL, ascii = TRUE)))
expect_warning(
as.data.frame(tab),
"Potentially unsafe or invalid elements have been discarded from R metadata.
i Type: \"environment\"
> If you trust the source, you can set `options(arrow.unsafe_metadata = TRUE)` to preserve them.",
fixed = TRUE
)

# You can set an option to allow them through.
# It still warns, just differently, and it doesn't prune the attributes
withr::local_options(list("arrow.unsafe_metadata" = TRUE))
expect_warning(
expect_warning(
as.data.frame(tab),
"R metadata may have unsafe or invalid elements
as.data.frame(tab),
"R metadata may have unsafe or invalid elements
i Type: \"environment\""
),
# This particular example ultimately fails because it's not a list
"Invalid metadata$r",
fixed = TRUE
)
})

Expand Down
Loading