Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug833 asdt list #835

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 118 additions & 16 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -1532,7 +1532,14 @@ as.matrix.data.table = function(x,...)

as.data.table.matrix = function(x, keep.rownames=FALSE)
{
if (keep.rownames) return(data.table(rn=rownames(x), x, keep.rownames=FALSE))
if (keep.rownames) {
## matricies and arrays may not have rownames attribute
if (is.null(rownames(x))) {
warning("keep.rownames has been set to TRUE but rownames(x) is NULL. rn column will be NA_character_")
return(data.table(rn=rep(NA_character_, nrow(x)), x, keep.rownames=FALSE))
} else
return(data.table(rn=rownames(x), x, keep.rownames=FALSE))
}
d <- dim(x)
nrows <- d[1L]
ir <- seq_len(nrows)
Expand All @@ -1559,6 +1566,37 @@ as.data.table.matrix = function(x, keep.rownames=FALSE)
alloc.col(value)
}


as.data.table.array = function(x, keep.rownames=FALSE)
{

if (keep.rownames) {
## matricies and arrays may not have rownames attribute
if (is.null(rownames(x))) {
warning("keep.rownames has been set to TRUE but rownames(x) is NULL. rn column will be NA_character_")
return(data.table(rn=rep(NA_character_, nrow(x)), x, keep.rownames=FALSE))
} else
return(data.table(rn=rownames(x), x, keep.rownames=FALSE))
}

## deep copy, since we will modify original
x <- copy(x)
## Convert to matrix
d <- dim(x)
setattr(x, "dim", c(d[1L], prod(d[-1L])))
ret <- as.data.table.matrix(x, keep.rownames=keep.rownames)

# --- TODO
# --- this would be consistent with as.data.frame, but it causes print(ret) to complain
# ------------
# ## single-dim arrays are handled differently by as.data.frame. Namely, dim is preserved
# if (length(dim) == 1) {
# setattr(ret[[1+keep.rownames]], "dim", d)
# }

return(ret)
}

as.data.table.data.frame = function(x, keep.rownames=FALSE)
{
if (keep.rownames) return(data.table(rn=rownames(x), x, keep.rownames=FALSE))
Expand All @@ -1573,24 +1611,88 @@ as.data.table.data.frame = function(x, keep.rownames=FALSE)
alloc.col(ans)
}

as.data.table.list = function(x, keep.rownames=FALSE) {
as.data.table.list = function(x, keep.rownames=FALSE, bind.using=c("cbind", "rbind")) {
if (!length(x)) return( null.data.table() )
n = vapply(x, length, 0L)
mn = max(n)
x = copy(x)
if (any(n<mn))
for (i in which(n<mn)) {
if (!is.null(x[[i]])) {# avoids warning when a list element is NULL
# Implementing FR #4813 - recycle with warning when nr %% nrows[i] != 0L
if (mn %% n[i] != 0)
warning("Item ", i, " is of size ", n[i], " but maximum size is ", mn, " (recycled leaving a remainder of ", mn%%n[i], " items)")
x[[i]] = rep(x[[i]], length.out=mn)

## Implementing #833
dims <- vapply(x, function(xi) length(dim(xi)), 0)

## Check for nested lists.
## Throw warning to notify of lack of compatibility with as.data.frame
vapply(x, is.list, NA) & (!dims)
warning("as.data.table(x) and as.data.frame(X) differ in how they handle nested lists.\nNamely, as.data.table will allow for a list to be a single element in a column.\nIf you would like to force compatibility please use:\n as.data.table(as.data.frame(x))")


## If any element has dim, then convert to data.table using rbind/cbind
## For consistency with as.data.frame, as.data.table should
## fail if inconsisent dimensions.
## There is no recycling allowed in this situation
## (Note that as.data.frame fails where cbind/rbind only issues warning)
if (any(dims != 0)) {
bind.using <- match.arg(bind.using)
bind.func <- match.fun(bind.using)

## should be all of the same class for rbind
if (bind.using == "rbind") {
classes <- lapply(x, function(xi) sapply(xi, class))
if (!all(duplicated(unlist(classes, use.names=FALSE))[-1L]))
warning ("bind.using=\"rbind\" in as.data.table is an experimental feature intended for list of matricies. It appears that the elements of 'x' are of mixed classes and coercian is likely.")
}

x <- copy(x)

## Before checking nrows/ncols, Arrays must be converted to matrices
## Accomplished by modifying the 'dim' attribute
if (any(dims > 2)) {
inds.arrays <- which(dims > 2)
## If cbind'ing, then the "number of rows" is the product of all dimensions other than columns
## If rbind'ing, then the "number of cols" is the product of all dimensions other than rows
## 'd' is the dim being "preserved" (ie, either rows or cols)
d <- if (bind.using == "cbind") 1 else 2
for (i in inds.arrays) {
dim.i <- attr(x[[i]], "dim")
setattr( x[[i]], "dim", c(dim.i[d], prod(dim.i[-d])) )
}
}

## check for consisent length/rows/cols
## 'L' will store the "size" (either length, rows or cols
## depending on the element of x and the function in bind.func)
hasdim <- dims != 0
L <- vector("integer", length=length(x))
L[!hasdim] <- vapply(x[!hasdim], length, 0L)
## use either nrow() or ncol() depending on bind.func
n_row_or_col <- if (bind.using == "cbind") nrow else ncol
L[hasdim] <- vapply(x[hasdim], n_row_or_col, 0L)
## Test that L are all equal (ie min, max, mean are all the same)
if (any(range(L) != mean(L)))
stop(sprintf("arguments imply differing number of %s: %s\n\nMore Info: 'x', the list provided to as.data.table(), has at least one element that is two-dimensional. For consistency with as.data.frame, there is no recycling when the number of %1$s / length of each element of x are not all the same.\nYou may want to consider: do.call(cbind, x)", ifelse(bind.using == "cbind", "rows", "cols"), paste(L, collapse=", ")))

# We cannot simply c/rbind since this will force coercian.
# The first element in the list must be a data.table
return(do.call(bind.func, c(list(as.data.table(x[[1L]])), x[-1L])))
} else {
## Proceed as "normal" (prior to Implementing #833)
n = vapply(x, length, 0L)
mn = max(n)
x = copy(x)

if (any(n<mn))
for (i in which(n<mn)) {
if (!is.null(x[[i]])) {# avoids warning when a list element is NULL
# Implementing FR #4813 - recycle with warning when nr %% nrows[i] != 0L
if (mn %% n[i] != 0)
warning("Item ", i, " is of size ", n[i], " but maximum size is ", mn, " (recycled leaving a remainder of ", mn%%n[i], " items)")
x[[i]] = rep(x[[i]], length.out=mn)
}
}

if (is.null(names(x)))
setattr(x,"names",paste("V",seq_len(length(x)),sep=""))
setattr(x,"row.names",.set_row_names(max(n)))
setattr(x,"class",c("data.table","data.frame"))
alloc.col(x)
}
if (is.null(names(x))) setattr(x,"names",paste("V",seq_len(length(x)),sep=""))
setattr(x,"row.names",.set_row_names(max(n)))
setattr(x,"class",c("data.table","data.frame"))
alloc.col(x)
}

as.data.table.data.table = function(x, keep.rownames=FALSE) return(x)
Expand Down
101 changes: 101 additions & 0 deletions inst/tests/test-as.data.frame-as.data.table-same.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
context("as.data.table and as.data.frame should yield comparable results")

as.dt_as.df_are.same <- function(x, quietly=FALSE, check.names=FALSE, showWarnings=TRUE) {
## compares as.data.table(x) to as.data.frame(x)
## by wrapping the former in as.data.frame and testing with identical
## returns TRUE if all values are equal, FALSE otherswise


x.dt <- try(as.data.table(x), silent=TRUE)
x.df <- try(as.data.frame(x, stringsAsFactors=FALSE), silent=TRUE)

## Check for errors
iserr.dt <- inherits(x.dt, "try-error")
iserr.df <- inherits(x.df, "try-error")
if (iserr.dt && iserr.df) {
message("both of as.data.frame(x) and as.data.table(x) threw an error")
return(invisible(TRUE))
}
if (iserr.dt && !iserr.df) {
warning("DT failed but DF did not")
return(invisible(FALSE))
}
if (!iserr.dt && iserr.df) {
warning("DF failed but DT did not")
return(invisible(FALSE))
}

## will compare x.dt_as.df to x.df
x.dt_as.df <- as.data.frame(x.dt)

if (!check.names) {
setattr(x.dt, "names", rep(NA_character_, ncol(x.dt)))
setattr(x.df, "names", rep(NA_character_, ncol(x.df)))
rownames(x.df) <- NULL
setattr(x.dt_as.df, "names", rep(NA_character_, ncol(x.df)))
rownames(x.dt_as.df) <- NULL
}

ret <- identical(x.dt_as.df, x.df)

## if indetical fails, check the acutal values.
## Perhaps it is just an attribtue that is different
if (!ret && identical(dim(x.dt_as.df), dim(x.df))) {
if (all(x.dt_as.df == x.df)) {
## try clearing attributes
setattr(x.df[[1]], "dim", NULL)
## if still not idetnical, throw warning, but return TRUE
if (showWarnings && !identical(x.df, x.dt_as.df))
warning("All values and dimensions are the same between DF and DT.\nHowever, some differences remain, perhaps in attirbute or attirbute of a column")
return(invisible(TRUE))
}
}

## If FALSE, use expect_equal for the detailed output
if (!ret)
return(testthat::expect_equal(x.dt_as.df, x.df, info="comparing as.data.frame(x) to as.data.frame(as.data.table(x))"))

return(invisible(ret))
}


###############################################################################
## as.* conversions
test_that("convert matrices to data.table/data.frame", {
mat_num <- matrix(1:12, ncol=3)
mat_char <- matrix(LETTERS[1:12], ncol=3)

A <- array(1:30, dim=c(3, 2, 5))
B <- array(-(1:600), dim=c(3, 2, 5, 2))
A_onedim <- array(1:3, dim=c(3))
A_twodim <- array(1:6, dim=c(3, 2))
A_twodim_1x6 <- array(1:6, dim=c(1, 6))
A.char <- copy(A)
A.char[] <- c(LETTERS, letters)[A]
B.char <- copy(B)
B.char[] <- sapply(seq(B), function(x) paste(sample(LETTERS, 3), collapse=""))
list_of_arrays <- list(A, B, 101:103)
list_of_arrays_mats <- list(A, B, t(mat_char), 101:103)
list_of_arrays_mats2 <- list(B.char, A, B, t(mat_char), 101:103)
list_of_one_dim_arrays <- list(A_onedim, A_onedim)
list_of_arrays_mats_fail <- list(A, B, mat_char, 101:103) # failure expected for this one

expect_error(as.data.table(list_of_arrays_mats_fail), regex="imply differing number", info="list of matrix and vectors. as.data.table should fail")

expect_true( as.dt_as.df_are.same(mat_char) )
expect_true( as.dt_as.df_are.same(mat_num) )
expect_true( as.dt_as.df_are.same(A) )
expect_true( as.dt_as.df_are.same(B) )
expect_true( as.dt_as.df_are.same( mat_char ) )
expect_true( as.dt_as.df_are.same( A_onedim ) )
expect_true( as.dt_as.df_are.same( A_twodim ) )
expect_true( as.dt_as.df_are.same( A_twodim_1x6 ) )
expect_true( as.dt_as.df_are.same( A.char ) )
expect_true( as.dt_as.df_are.same( B.char ) )
expect_true( as.dt_as.df_are.same( list_of_arrays ) )
expect_true( as.dt_as.df_are.same( list_of_arrays_mats ) )
expect_true( as.dt_as.df_are.same( list_of_arrays_mats2 ) )
expect_true( as.dt_as.df_are.same(list_of_one_dim_arrays, showWarnings=FALSE) )

})