Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

:= could change calling body and variable in calling scope #3891

Merged
merged 9 commits into from
Sep 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,8 @@

42. `DT[...,by={...}]` now handles expressions in `{`, [#3156](https://github.com/Rdatatable/data.table/issues/3156). Thanks to @tdhock for the report.

43. `:=` could change a `data.table` creation statement in the body of the function calling it, or a variable in calling scope, [#3890](https://github.com/Rdatatable/data.table/issues/3890). Many thanks to @kirillmayantsev for the detailed reports.

## NOTES

1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below.
Expand Down
7 changes: 4 additions & 3 deletions R/as.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -161,15 +161,16 @@ as.data.table.list = function(x,
# TODO: port this as.data.table.list() to C and use MAYBE_REFERENCED(x) || ALTREP(x) to save some copies.
# That saving used to be done by CcopyNamedInList but the copies happened again as well, so removing CcopyNamedInList is
# not worse than before, and gets us in a better centralized place to port as.data.table.list to C and use MAYBE_REFERENCED
# again in future.
# again in future, for #617.
}
if (identical(x,list())) vector("list", nrow) else rep(x, length.out=nrow) # new objects don't need copy
}
vnames = character(ncol)
k = 1L
n_null = 0L
for(i in seq_len(n)) {
xi = x[[i]]
if (is.null(xi)) next
if (is.null(xi)) { n_null = n_null+1L; next }
if (eachnrow[i]>1L && nrow%%eachnrow[i]!=0L) # in future: eachnrow[i]!=nrow
warning("Item ", i, " has ", eachnrow[i], " rows but longest item has ", nrow, "; recycled with remainder.")
if (eachnrow[i]==0L && nrow>0L && is.atomic(xi)) # is.atomic to ignore list() since list() is a common way to initialize; let's not insist on list(NULL)
Expand All @@ -183,7 +184,7 @@ as.data.table.list = function(x,
}
} else {
nm = names(x)[i]
vnames[k] = if (length(nm) && !is.na(nm) && nm!="") nm else paste0("V",i) # i (not k) tested by 2058.14 to be the same as the past for now
vnames[k] = if (length(nm) && !is.na(nm) && nm!="") nm else paste0("V",i-n_null) # i (not k) tested by 2058.14 to be the same as the past for now
ans[[k]] = recycle(xi, nrow)
k = k+1L
}
Expand Down
21 changes: 4 additions & 17 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -1281,24 +1281,11 @@ replace_dot_alias = function(e) {
))
jval = lapply(jval, `[`, 0L)
if (is.atomic(jval)) {
setattr(jval,"names",NULL)
jval = data.table(jval) # TO DO: should this be setDT(list(jval)) instead?
} else {
if (is.null(jvnames)) jvnames=names(jval)
lenjval = vapply(jval, length, 0L)
nulljval = vapply(jval, is.null, FALSE)
if (lenjval[1L]==0L || any(lenjval != lenjval[1L])) {
jval = as.data.table.list(jval) # does the vector expansion to create equal length vectors, and drops any NULL items
jvnames = jvnames[!nulljval] # fix for #1477
} else {
# all columns same length and at least 1 row; avoid copy. TODO: remove when as.data.table.list is ported to C
setDT(jval)
}
setattr(jval,"names",NULL) # discard names of named vectors otherwise each cell in the column would have a name
jval = list(jval)
}
if (is.null(jvnames)) jvnames = character(length(jval)-length(bynames))
ww = which(jvnames=="")
if (any(ww)) jvnames[ww] = paste0("V",ww)
setnames(jval, jvnames)
if (!is.null(jvnames) && !all(jvnames=="")) setattr(jval, 'names', jvnames) # e.g. jvnames=="N" for DT[,.N,]
jval = as.data.table.list(jval, .named=NULL)
}

if (is.data.table(jval)) {
Expand Down
29 changes: 24 additions & 5 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -15149,6 +15149,9 @@ L = list(1:3, 4:6, 7:9, 10:12)
names(L) = c("","foo","","foo")
test(2058.17, as.data.table(L),
setnames(data.table(1:3, 4:6, 7:9, 10:12),c("","foo","","foo"))) # no
L = list(1:3, NULL, 4:6)
test(2058.18, length(L), 3L)
test(2058.19, as.data.table(L), data.table(V1=1:3, V2=4:6)) # V2 not V3 # no

# rbindlist improved error message, #3638
DT = data.table(a=1)
Expand Down Expand Up @@ -15823,20 +15826,36 @@ test(2086.05, DT[ , sum(a), by = character()], data.table(V1=55L))
test(2086.06, DT[ , sum(a), keyby = character()], data.table(V1=55L))

# simple queries can create tables with columns sharing the same address, #3766
# these tests were new in 1.12.4 dev. In late stage before release 2087.1 was changed to avoid the share for #3890
# when #617 is done it will change back to being the same address
x = data.table(a=1L, b=c(1L, 4L, 2L, 3L), c=4:1)
test(2087.1, x[a == 1L, .(b, b2=b)][ , identical(address(b), address(b2))])
test(2087.01, x[a == 1L, .(b, b2=b)][ , address(b)!=address(b2)])
# setkey detects and copies shared address columns, #3496
x = data.frame(a=paste0(2:1), stringsAsFactors=FALSE)
x$b = x$a
setDT(x)
test(2087.2, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), key="a"),
output='Found and copied 1 column with a shared memory address')
test(2087.02, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), key="a"),
output='Found and copied 1 column with a shared memory address')
x = data.frame(a=paste0(2:1), stringsAsFactors=FALSE)
x$b = x$a
x$c = x$a
setDT(x)
test(2087.3, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), c=paste0(1:2), key="a"),
output='Found and copied 2 columns with a shared memory address')
test(2087.03, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), c=paste0(1:2), key="a"),
output='Found and copied 2 columns with a shared memory address')
# follow-up from #3890; function body and variable in calling scope
f = function(flag=FALSE) {
dt1 = data.table(a = 1)
dt2 = data.table(a = 1)
dt3 = dt1[, .(a, b = 0)] # (**)
if (flag) dt3[dt2, b := 999, on = "a"]
gsub(" ","",as.character(body(f)[[4L]])) # (**) above; remove spaces just to isolate from potential future formatting changes in R
}
test(2087.10, f(TRUE), c("=","dt3","dt1[,.(a,b=0)]")) # was "dt1[,.(a,b=999)]" in v1.12.2
value = 0
dt1 = data.table(a = 1)
dt2 = dt1[, .(a, b = ..value)]
dt2[1, b := 999]
test(2087.11, value, 0) # was 999 in v1.12.2

# clear '.data.table.locked' even when is.null(irows), #2245
x = data.table(a=c(0.85, -0.38, 1.19), b=c(0.56, 0.63, -1.30))
Expand Down