Skip to content

Commit

Permalink
Improved := verbose messages. Closes #1808
Browse files Browse the repository at this point in the history
  • Loading branch information
mattdowle committed Oct 19, 2016
1 parent df9cd4d commit 8c9cfc8
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 14 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,8 @@
73. Joins on key columns in the presence of `on=` argument were slightly slower as it was unnecesarily running a check to ensure orderedness. This is now fixed, [#1825](https://github.com/Rdatatable/data.table/issues/1825). Thanks @sz-cgt. See that post for updated benchmark.

74. `keyby=` now runs j in the order that the groups appear in the sorted result rather than first appearance order, [#606](https://github.com/Rdatatable/data.table/issues/606). This only makes a difference in very rare usage where j does something depending on an earlier group's result, perhaps by using `<<-`. If j is required to be run in first appearance order, then use `by=` whose behaviour is unchanged. Now we have this option. No existing tests affected. New tests added.

75. `:=` verbose messages have been corrected and improved, [#1808](https://github.com/Rdatatable/data.table/issues/1808). Thanks to @franknarf1 for reproducible examples. Tests added.

#### NOTES

Expand Down
13 changes: 8 additions & 5 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -1243,9 +1243,13 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
# We need this short circuit at all just for convenience. Otherwise users may need to
# fix errors in their RHS when called on empty edge cases, even when the result won't be
# used anyway (so it would be annoying to have to fix it.)
if (verbose) {
cat("No rows match i. No new columns to add so not evaluating RHS of :=\n")
cat("Assigning to 0 row subset of",nrow(x),"rows\n")
}
.global$print = address(x)
return(invisible(x))
}
}
} else {
# Adding new column(s). TO DO: move after the first eval in case the jsub has an error.
newnames=setdiff(lhs,names(x))
Expand Down Expand Up @@ -1451,12 +1455,11 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
if (!truelength(jval)) alloc.col(jval)
}
}

if (!is.null(lhs)) { # *** TO DO ***: use set() here now that it can add new column(s) and remove newnames and alloc logic above
if (verbose) cat("Assigning to ",if (is.null(irows)) "all " else paste(length(irows),"row subset of "), nrow(x)," rows\n",sep="")
if (!is.null(lhs)) {
# TODO?: use set() here now that it can add new columns. Then remove newnames and alloc logic above.
.Call(Cassign,x,irows,cols,newnames,jval,verbose)
return(suppPrint(x))
}
}
if ((is.call(jsub) && is.list(jval) && jsub[[1L]] != "get" && !is.object(jval)) || !missing(by)) {
# is.call: selecting from a list column should return list
# is.object: for test 168 and 168.1 (S4 object result from ggplot2::qplot). Just plain list results should result in data.table
Expand Down
14 changes: 12 additions & 2 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -7060,8 +7060,8 @@ test(1575.2, X[Y, on=c(a="a")], error="not found in i")
X = data.table(x=5:1, y=6:10)
setattr(X, 'index', integer(0))
setattr(attr(X, 'index'), 'x', 5:1) # auto indexed attribute as created from v1.9.4
ans = capture.output(X[, z := 1:5, verbose=TRUE])
test(1576, ans[4], "Dropping index 'x' as it doesn't have '__' at the beginning of index name. It is very likely created using v1.9.4 of data.table.")
test(1576, X[, z := 1:5, verbose=TRUE],
output = "Dropping index 'x' as.*beginning of its name.*very likely created by v1.9.4 of data.table")

# fix for #1408
X = fread("a|b|c|d
Expand Down Expand Up @@ -9288,6 +9288,16 @@ test(1726.3, DT[, {ans=mean(val)+lastGrp; lastGrp<<-min(val); .(ans, .GRP)}, by=
test(1726.4, lastGrp, 7L)
rm(lastGrp)

# better := verbose messages, #1808
DT = data.table(a = 1:10)
test(1727.1, DT[a < 5, a := 5L, verbose=TRUE], output="Assigning to 4 row subset of 10 rows")
test(1727.2, DT[a < 5, a := 5L, verbose=TRUE], output="No rows match i.*Assigning to 0 row subset of 10 rows")
test(1727.3, DT[0, d:=1, verbose=TRUE], data.table(a=c(rep(5L,5L),6:10), d=NA_real_),
output = "Assigning to 0 row subset of 10 rows.*Added 1 new column initialized with all-NA")
test(1727.4, DT[.(a=11L), on="a", c("f","g"):=.(1L,"dummy"), verbose=TRUE],
data.table(a=c(rep(5L,5L),6:10), d=NA_real_, f=NA_integer_, g=NA_character_),
output = "Assigning to 0 row subset of 10 rows.*Added 2 new columns initialized with all-NA")


##########################

Expand Down
20 changes: 13 additions & 7 deletions src/assign.c
Original file line number Diff line number Diff line change
Expand Up @@ -321,24 +321,30 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v
nrow = length(VECTOR_ELT(dt,0));
if (isNull(rows)) {
targetlen = nrow;
if (verbose) Rprintf("Assigning to all %d rows\n", nrow);
// fast way to assign to whole column, without creating 1:nrow(x) vector up in R, or here in C
} else {
if (isReal(rows)) {
rows = PROTECT(rows = coerceVector(rows, INTSXP));
protecti++;
warning("Coerced i from numeric to integer. Please pass integer for efficiency; e.g., 2L rather than 2");
}
}
if (!isInteger(rows))
error("i is type '%s'. Must be integer, or numeric is coerced with warning. If i is a logical subset, simply wrap with which(), and take the which() outside the loop if possible for efficiency.", type2char(TYPEOF(rows)));
targetlen = length(rows);
Rboolean anyToDo = FALSE;
for (i=0;i<targetlen;i++) {
int numToDo = 0;
for (i=0; i<targetlen; i++) {
if ((INTEGER(rows)[i]<0 && INTEGER(rows)[i]!=NA_INTEGER) || INTEGER(rows)[i]>nrow)
error("i[%d] is %d which is out of range [1,nrow=%d].",i+1,INTEGER(rows)[i],nrow);
if (INTEGER(rows)[i]>=1) anyToDo = TRUE;
if (INTEGER(rows)[i]>=1) numToDo++;
}
if (verbose) Rprintf("Assigning to %d row subset of %d rows\n", numToDo, nrow);
// TODO: include in message if any rows are assigned several times (e.g. by=.EACHI with dups in i)
if (numToDo==0) {
if (!length(newcolnames)) return(dt); // all items of rows either 0 or NA. !length(newcolnames) for #759
if (verbose) Rprintf("Added %d new column%s initialized with all-NA\n",
length(newcolnames), (length(newcolnames)>1)?"s":"");
}
// added !length(newcolnames) for #759 fix
if (!anyToDo && !length(newcolnames)) return(dt); // all items of rows either 0 or NA, nothing to do.
}
if (!length(cols)) {
warning("length(LHS)==0; no columns to delete or assign RHS to.");
Expand Down Expand Up @@ -636,7 +642,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v
if (*tc1!='_' || *(tc1+1)!='_') {
// fix for #1396
if (verbose) {
Rprintf("Dropping index '%s' as it doesn't have '__' at the beginning of index name. It is very likely created using v1.9.4 of data.table.\n", c1);
Rprintf("Dropping index '%s' as it doesn't have '__' at the beginning of its name. It was very likely created by v1.9.4 of data.table.\n", c1);
}
setAttrib(index, a, R_NilValue);
i = LENGTH(cols);
Expand Down

0 comments on commit 8c9cfc8

Please sign in to comment.