Rdatatable · mattdowle · Aug 27, 2019 · Aug 27, 2019 · Aug 27, 2019 · Aug 27, 2019
@@ -234,7 +234,9 @@
 
 30. `groupingsets` functions now properly handle alone special symbols when using an empty set to group by, [#3653](https://github.com/Rdatatable/data.table/issues/3653). Thanks to @Henrik-P for the report.
 
-31. Some operations in `j` could leave the output with `.data.table.locked=TRUE`, preventing mutation of the table downstream, [#2245](https://github.com/Rdatatable/data.table/issues/2245). Thanks @grayskripko for raising.
+31. A `data.table` created using `setDT()` on a `data.frame` containing identical columns referencing each other would cause `setkey()` to return incorrect results, [#3496](https://github.com/Rdatatable/data.table/issues/3496) and [#3766](https://github.com/Rdatatable/data.table/issues/3766). Thanks @kirillmayantsev and @alex46015 for reporting, and @jaapwalhout and @Atrebas for helping to debug and isolate the issue.
+
+32. `x[, round(.SD, 1)]` and similar operations on the whole of `.SD` could return a locked result, incorrectly preventing `:=` on the result, [#2245](https://github.com/Rdatatable/data.table/issues/2245). Thanks @grayskripko for raising.
 
 #### NOTES
 

@@ -1265,6 +1265,8 @@ replace_order = function(isub, verbose, env) {
     }
 
     jval = eval(jsub, SDenv, parent.frame())
+    .Call(Csetattrib, jval, '.data.table.locked', NULL) # in case jval inherits .SD's lock, #1341 #2245. Use .Call not setattr() to avoid bumping jval's MAYBE_REFERENCED.
+
     # copy 'jval' when required
     # More speedup - only check + copy if irows is NULL
     # Temp fix for #921 - check address and copy *after* evaluating 'jval'
@@ -1280,11 +1282,6 @@ replace_order = function(isub, verbose, env) {
         jval = copy(jval) # fix for #1212
       }
     }
-    # #2245 jval can be data.table even if is.null(irows); unlock then as well
-    if (is.data.table(jval)) {
-      setattr(jval, '.data.table.locked', NULL) # fix for #1341
-      if (!truelength(jval)) alloc.col(jval)
-    }
 
     if (!is.null(lhs)) {
       # TODO?: use set() here now that it can add new columns. Then remove newnames and alloc logic above.

@@ -15771,9 +15771,25 @@ test(2086.04, DT[ , sum(a), keyby = list()], data.table(V1=55L))
 test(2086.05, DT[ , sum(a), by = character()], data.table(V1=55L))
 test(2086.06, DT[ , sum(a), keyby = character()], data.table(V1=55L))
 
-## #2245 unset .data.table.locked even if is.null(irows)
-x <- data.table(a=c(0.85, -0.38, 1.19), b=c(0.56, 0.63, -1.30))
-test(2087, x[ , round(.SD, 1)][ , c := 8], data.table(a=c(.8, -.4, 1.2), b=c(.6,.6,-1.3), c=8))
+# simple queries can create tables with columns sharing the same address, #3766
+x = data.table(a=1L, b=c(1L, 4L, 2L, 3L), c=4:1)
+test(2087.1, x[a == 1L, .(b, b2=b)][ , identical(address(b), address(b2))])
+# setkey detects and copies shared address columns, #3496
+x = data.frame(a=paste0(2:1), stringsAsFactors=FALSE)
+x$b = x$a
+setDT(x)
+test(2087.2, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), key="a"),
+             output='Found and copied 1 column with a shared memory address')
+x = data.frame(a=paste0(2:1), stringsAsFactors=FALSE)
+x$b = x$a
+x$c = x$a
+setDT(x)
+test(2087.3, setkey(x, a, verbose=TRUE), data.table(a=paste0(1:2), b=paste0(1:2), c=paste0(1:2), key="a"),
+             output='Found and copied 2 columns with a shared memory address')
+
+# clear '.data.table.locked' even when is.null(irows), #2245
+x = data.table(a=c(0.85, -0.38, 1.19), b=c(0.56, 0.63, -1.30))
+test(2088, x[, round(.SD, 1)][, c:=8.88], data.table(a=c(.8, -.4, 1.2), b=c(.6,.6,-1.3), c=8.88))
 
 
 ###################################

@@ -473,12 +473,12 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
          (TYPEOF(values)!=VECSXP && i>0) // assigning the same values to a second column. Have to ensure a copy #2540
          ) {
         if (verbose) {
-          Rprintf("RHS for item %d has been duplicated because NAMED is %d, but then is being plonked. length(values)==%d; length(cols)==%d)\n",
-                  i+1, NAMED(thisvalue), length(values), length(cols));
+          Rprintf("RHS for item %d has been duplicated because NAMED==%d MAYBE_SHARED==%d, but then is being plonked. length(values)==%d; length(cols)==%d)\n",
+                  i+1, NAMED(thisvalue), MAYBE_SHARED(thisvalue), length(values), length(cols));
         }
         thisvalue = duplicate(thisvalue);   // PROTECT not needed as assigned as element to protected list below.
       } else {
-        if (verbose) Rprintf("Direct plonk of unnamed RHS, no copy.\n");  // e.g. DT[,a:=as.character(a)] as tested by 754.3
+        if (verbose) Rprintf("Direct plonk of unnamed RHS, no copy. NAMED==%d, MAYBE_SHARED==%d\n", NAMED(thisvalue), MAYBE_SHARED(thisvalue));  // e.g. DT[,a:=as.character(a)] as tested by 754.3
       }
       SET_VECTOR_ELT(dt, coln, thisvalue);                 // plonk new column in as it's already the correct length
       setAttrib(thisvalue, R_NamesSymbol, R_NilValue);     // clear names such as  DT[,a:=mapvector[a]]