Improved := verbose messages. Closes #1808

Rdatatable · Oct 19, 2016 · 8c9cfc8 · 8c9cfc8
1 parent df9cd4d
commit 8c9cfc8
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 14 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -248,6 +248,8 @@
   73. Joins on key columns in the presence of `on=` argument were slightly slower as it was unnecesarily running a check to ensure orderedness. This is now fixed, [#1825](https://github.com/Rdatatable/data.table/issues/1825). Thanks @sz-cgt. See that post for updated benchmark.
 
   74. `keyby=` now runs j in the order that the groups appear in the sorted result rather than first appearance order, [#606](https://github.com/Rdatatable/data.table/issues/606). This only makes a difference in very rare usage where j does something depending on an earlier group's result, perhaps by using `<<-`. If j is required to be run in first appearance order, then use `by=` whose behaviour is unchanged. Now we have this option. No existing tests affected. New tests added.
+
+  75. `:=` verbose messages have been corrected and improved, [#1808](https://github.com/Rdatatable/data.table/issues/1808). Thanks to @franknarf1 for reproducible examples. Tests added.
 
 #### NOTES
 

diff --git a/R/data.table.R b/R/data.table.R
@@ -1243,9 +1243,13 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
                         # We need this short circuit at all just for convenience. Otherwise users may need to
                         # fix errors in their RHS when called on empty edge cases, even when the result won't be
                         # used anyway (so it would be annoying to have to fix it.)
+                        if (verbose) {
+                            cat("No rows match i. No new columns to add so not evaluating RHS of :=\n")
+                            cat("Assigning to 0 row subset of",nrow(x),"rows\n")
+                        }
                         .global$print = address(x)
                         return(invisible(x))
-                    }   
+                    }
                 } else {
                     # Adding new column(s). TO DO: move after the first eval in case the jsub has an error.
                     newnames=setdiff(lhs,names(x))
@@ -1451,12 +1455,11 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
                 if (!truelength(jval)) alloc.col(jval)
             }
         }
-
-        if (!is.null(lhs)) {   # *** TO DO ***: use set() here now that it can add new column(s) and remove newnames and alloc logic above
-            if (verbose) cat("Assigning to ",if (is.null(irows)) "all " else paste(length(irows),"row subset of "), nrow(x)," rows\n",sep="")
+        if (!is.null(lhs)) {
+            # TODO?: use set() here now that it can add new columns. Then remove newnames and alloc logic above.
             .Call(Cassign,x,irows,cols,newnames,jval,verbose)
             return(suppPrint(x))
-        }        
+        }
         if ((is.call(jsub) && is.list(jval) && jsub[[1L]] != "get" && !is.object(jval)) || !missing(by)) {
             # is.call: selecting from a list column should return list
             # is.object: for test 168 and 168.1 (S4 object result from ggplot2::qplot). Just plain list results should result in data.table

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
@@ -7060,8 +7060,8 @@ test(1575.2, X[Y, on=c(a="a")], error="not found in i")
 X = data.table(x=5:1, y=6:10)
 setattr(X, 'index', integer(0))
 setattr(attr(X, 'index'), 'x', 5:1) # auto indexed attribute as created from v1.9.4
-ans = capture.output(X[, z := 1:5, verbose=TRUE])
-test(1576, ans[4], "Dropping index 'x' as it doesn't have '__' at the beginning of index name. It is very likely created using v1.9.4 of data.table.")
+test(1576, X[, z := 1:5, verbose=TRUE],
+    output = "Dropping index 'x' as.*beginning of its name.*very likely created by v1.9.4 of data.table")
 
 # fix for #1408
 X = fread("a|b|c|d
@@ -9288,6 +9288,16 @@ test(1726.3, DT[, {ans=mean(val)+lastGrp; lastGrp<<-min(val); .(ans, .GRP)}, by=
 test(1726.4, lastGrp, 7L)
 rm(lastGrp)
 
+# better := verbose messages, #1808
+DT = data.table(a = 1:10)
+test(1727.1, DT[a < 5, a := 5L, verbose=TRUE], output="Assigning to 4 row subset of 10 rows")
+test(1727.2, DT[a < 5, a := 5L, verbose=TRUE], output="No rows match i.*Assigning to 0 row subset of 10 rows")
+test(1727.3, DT[0, d:=1, verbose=TRUE], data.table(a=c(rep(5L,5L),6:10), d=NA_real_),
+             output = "Assigning to 0 row subset of 10 rows.*Added 1 new column initialized with all-NA")            
+test(1727.4, DT[.(a=11L), on="a", c("f","g"):=.(1L,"dummy"), verbose=TRUE],
+             data.table(a=c(rep(5L,5L),6:10), d=NA_real_, f=NA_integer_, g=NA_character_),
+             output = "Assigning to 0 row subset of 10 rows.*Added 2 new columns initialized with all-NA")
+
 
 ##########################
 

diff --git a/src/assign.c b/src/assign.c
@@ -321,24 +321,30 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v
     nrow = length(VECTOR_ELT(dt,0));
     if (isNull(rows)) {
         targetlen = nrow;
+        if (verbose) Rprintf("Assigning to all %d rows\n", nrow);
         // fast way to assign to whole column, without creating 1:nrow(x) vector up in R, or here in C
     } else {
         if (isReal(rows)) {
             rows = PROTECT(rows = coerceVector(rows, INTSXP));
             protecti++;
             warning("Coerced i from numeric to integer. Please pass integer for efficiency; e.g., 2L rather than 2");
-        }    
+        }
         if (!isInteger(rows))
             error("i is type '%s'. Must be integer, or numeric is coerced with warning. If i is a logical subset, simply wrap with which(), and take the which() outside the loop if possible for efficiency.", type2char(TYPEOF(rows)));
         targetlen = length(rows);
-        Rboolean anyToDo = FALSE;
-        for (i=0;i<targetlen;i++) {
+        int numToDo = 0;
+        for (i=0; i<targetlen; i++) {
             if ((INTEGER(rows)[i]<0 && INTEGER(rows)[i]!=NA_INTEGER) || INTEGER(rows)[i]>nrow)
                 error("i[%d] is %d which is out of range [1,nrow=%d].",i+1,INTEGER(rows)[i],nrow);
-            if (INTEGER(rows)[i]>=1) anyToDo = TRUE;
+            if (INTEGER(rows)[i]>=1) numToDo++;
+        }
+        if (verbose) Rprintf("Assigning to %d row subset of %d rows\n", numToDo, nrow);
+        // TODO: include in message if any rows are assigned several times (e.g. by=.EACHI with dups in i)
+        if (numToDo==0) {
+            if (!length(newcolnames)) return(dt); // all items of rows either 0 or NA. !length(newcolnames) for #759
+            if (verbose) Rprintf("Added %d new column%s initialized with all-NA\n",
+                                 length(newcolnames), (length(newcolnames)>1)?"s":"");
         }
-        // added !length(newcolnames) for #759 fix
-        if (!anyToDo && !length(newcolnames)) return(dt);  // all items of rows either 0 or NA, nothing to do.  
     }
     if (!length(cols)) {
         warning("length(LHS)==0; no columns to delete or assign RHS to.");
@@ -636,7 +642,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v
                     if (*tc1!='_' || *(tc1+1)!='_') {
                     	// fix for #1396
                     	if (verbose) {
-                    		Rprintf("Dropping index '%s' as it doesn't have '__' at the beginning of index name. It is very likely created using v1.9.4 of data.table.\n", c1);
+                    		Rprintf("Dropping index '%s' as it doesn't have '__' at the beginning of its name. It was very likely created by v1.9.4 of data.table.\n", c1);
                     	}
                     	setAttrib(index, a, R_NilValue);
                     	i = LENGTH(cols);