diff --git a/NEWS.md b/NEWS.md index 11a6e6999..14f676a86 100644 --- a/NEWS.md +++ b/NEWS.md @@ -321,6 +321,8 @@ 42. `DT[...,by={...}]` now handles expressions in `{`, [#3156](https://github.com/Rdatatable/data.table/issues/3156). Thanks to @tdhock for the report. +43. `:=` could change a `data.table` creation statement in the body of the function calling it, or a variable in calling scope, [#3890](https://github.com/Rdatatable/data.table/issues/3890). Many thanks to @kirillmayantsev for the detailed reports. + ## NOTES 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. diff --git a/R/as.data.table.R b/R/as.data.table.R index c077361be..71cbc310b 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -161,15 +161,16 @@ as.data.table.list = function(x, # TODO: port this as.data.table.list() to C and use MAYBE_REFERENCED(x) || ALTREP(x) to save some copies. # That saving used to be done by CcopyNamedInList but the copies happened again as well, so removing CcopyNamedInList is # not worse than before, and gets us in a better centralized place to port as.data.table.list to C and use MAYBE_REFERENCED - # again in future. + # again in future, for #617. } if (identical(x,list())) vector("list", nrow) else rep(x, length.out=nrow) # new objects don't need copy } vnames = character(ncol) k = 1L + n_null = 0L for(i in seq_len(n)) { xi = x[[i]] - if (is.null(xi)) next + if (is.null(xi)) { n_null = n_null+1L; next } if (eachnrow[i]>1L && nrow%%eachnrow[i]!=0L) # in future: eachnrow[i]!=nrow warning("Item ", i, " has ", eachnrow[i], " rows but longest item has ", nrow, "; recycled with remainder.") if (eachnrow[i]==0L && nrow>0L && is.atomic(xi)) # is.atomic to ignore list() since list() is a common way to initialize; let's not insist on list(NULL) @@ -183,7 +184,7 @@ as.data.table.list = function(x, } } else { nm = names(x)[i] - vnames[k] = if (length(nm) && !is.na(nm) && nm!="") nm else paste0("V",i) # i (not k) tested by 2058.14 to be the same as the past for now + vnames[k] = if (length(nm) && !is.na(nm) && nm!="") nm else paste0("V",i-n_null) # i (not k) tested by 2058.14 to be the same as the past for now ans[[k]] = recycle(xi, nrow) k = k+1L } diff --git a/R/data.table.R b/R/data.table.R index 5ad029232..a3ae2cc0d 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1281,24 +1281,11 @@ replace_dot_alias = function(e) { )) jval = lapply(jval, `[`, 0L) if (is.atomic(jval)) { - setattr(jval,"names",NULL) - jval = data.table(jval) # TO DO: should this be setDT(list(jval)) instead? - } else { - if (is.null(jvnames)) jvnames=names(jval) - lenjval = vapply(jval, length, 0L) - nulljval = vapply(jval, is.null, FALSE) - if (lenjval[1L]==0L || any(lenjval != lenjval[1L])) { - jval = as.data.table.list(jval) # does the vector expansion to create equal length vectors, and drops any NULL items - jvnames = jvnames[!nulljval] # fix for #1477 - } else { - # all columns same length and at least 1 row; avoid copy. TODO: remove when as.data.table.list is ported to C - setDT(jval) - } + setattr(jval,"names",NULL) # discard names of named vectors otherwise each cell in the column would have a name + jval = list(jval) } - if (is.null(jvnames)) jvnames = character(length(jval)-length(bynames)) - ww = which(jvnames=="") - if (any(ww)) jvnames[ww] = paste0("V",ww) - setnames(jval, jvnames) + if (!is.null(jvnames) && !all(jvnames=="")) setattr(jval, 'names', jvnames) # e.g. jvnames=="N" for DT[,.N,] + jval = as.data.table.list(jval, .named=NULL) } if (is.data.table(jval)) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c80a64b83..ce766eb14 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15149,6 +15149,9 @@ L = list(1:3, 4:6, 7:9, 10:12) names(L) = c("","foo","","foo") test(2058.17, as.data.table(L), setnames(data.table(1:3, 4:6, 7:9, 10:12),c("","foo","","foo"))) # no +L = list(1:3, NULL, 4:6) +test(2058.18, length(L), 3L) +test(2058.19, as.data.table(L), data.table(V1=1:3, V2=4:6)) # V2 not V3 # no # rbindlist improved error message, #3638 DT = data.table(a=1) @@ -15823,20 +15826,36 @@ test(2086.05, DT[ , sum(a), by = character()], data.table(V1=55L)) test(2086.06, DT[ , sum(a), keyby = character()], data.table(V1=55L)) # simple queries can create tables with columns sharing the same address, #3766 +# these tests were new in 1.12.4 dev. In late stage before release 2087.1 was changed to avoid the share for #3890 +# when #617 is done it will change back to being the same address x = data.table(a=1L, b=c(1L, 4L, 2L, 3L), c=4:1) -test(2087.1, x[a == 1L, .(b, b2=b)][ , identical(address(b), address(b2))]) +test(2087.01, x[a == 1L, .(b, b2=b)][ , address(b)!=address(b2)]) # setkey detects and copies shared address columns, #3496 x = data.frame(a=paste0(2:1), stringsAsFactors=FALSE) x$b = x$a setDT(x) -test(2087.2, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), key="a"), - output='Found and copied 1 column with a shared memory address') +test(2087.02, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), key="a"), + output='Found and copied 1 column with a shared memory address') x = data.frame(a=paste0(2:1), stringsAsFactors=FALSE) x$b = x$a x$c = x$a setDT(x) -test(2087.3, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), c=paste0(1:2), key="a"), - output='Found and copied 2 columns with a shared memory address') +test(2087.03, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), c=paste0(1:2), key="a"), + output='Found and copied 2 columns with a shared memory address') +# follow-up from #3890; function body and variable in calling scope +f = function(flag=FALSE) { + dt1 = data.table(a = 1) + dt2 = data.table(a = 1) + dt3 = dt1[, .(a, b = 0)] # (**) + if (flag) dt3[dt2, b := 999, on = "a"] + gsub(" ","",as.character(body(f)[[4L]])) # (**) above; remove spaces just to isolate from potential future formatting changes in R +} +test(2087.10, f(TRUE), c("=","dt3","dt1[,.(a,b=0)]")) # was "dt1[,.(a,b=999)]" in v1.12.2 +value = 0 +dt1 = data.table(a = 1) +dt2 = dt1[, .(a, b = ..value)] +dt2[1, b := 999] +test(2087.11, value, 0) # was 999 in v1.12.2 # clear '.data.table.locked' even when is.null(irows), #2245 x = data.table(a=c(0.85, -0.38, 1.19), b=c(0.56, 0.63, -1.30))