Skip to content

Commit

Permalink
Add group_by_. Deprecate regroup. #352
Browse files Browse the repository at this point in the history
  • Loading branch information
hadley committed Sep 25, 2014
1 parent 7ada534 commit bdc8b2c
Show file tree
Hide file tree
Showing 11 changed files with 70 additions and 68 deletions.
13 changes: 6 additions & 7 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@ S3method(format,src_local)
S3method(format,src_sql)
S3method(format_v,character)
S3method(format_v,default)
S3method(group_by_,data.frame)
S3method(group_by_,data.table)
S3method(group_by_,rowwise_df)
S3method(group_by_,tbl_cube)
S3method(group_by_,tbl_sql)
S3method(group_size,data.frame)
S3method(group_size,grouped_df)
S3method(group_size,grouped_dt)
Expand Down Expand Up @@ -171,13 +176,6 @@ S3method(print,tbl_df)
S3method(print,tbl_dt)
S3method(print,tbl_sql)
S3method(query,DBIConnection)
S3method(regroup,data.frame)
S3method(regroup,data.table)
S3method(regroup,grouped_dt)
S3method(regroup,rowwise_df)
S3method(regroup,tbl_cube)
S3method(regroup,tbl_dt)
S3method(regroup,tbl_sql)
S3method(rename_,data.frame)
S3method(rename_,data.table)
S3method(rename_,grouped_df)
Expand Down Expand Up @@ -344,6 +342,7 @@ export(funs_q)
export(get_cluster)
export(glimpse)
export(group_by)
export(group_by_)
export(group_size)
export(grouped_df)
export(grouped_dt)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# dplyr 0.2.0.9000

* `regroup()` is deprecated. Please use the more flexible `group_by_()`
instead.

* You can now program with dplyr - every function that does non-standard
evaluation (NSE) has a standard evaluation (SE) version ending in `_`.
This is powered by the new lazyeval package which provides all the tools
Expand Down
55 changes: 34 additions & 21 deletions R/group-by.r
Original file line number Diff line number Diff line change
Expand Up @@ -56,30 +56,49 @@
#'
#' # Duplicate groups are silently dropped
#' groups(group_by(by_cyl, cyl, cyl))
#' @aliases regroup
group_by <- function(.data, ..., add = FALSE) {
new_groups <- dots(...)
group_by_(.data, .dots = lazyeval::lazy_dots(...), add = add)
}

#' @export
#' @rdname group_by
group_by_ <- function(.data, ..., .dots, add = FALSE) {
UseMethod("group_by_")
}

#' Prepare for grouping.
#'
#' Performs standard operations that should happen before individual methods
#' process the data. This includes mutating the tbl to add new grouping columns
#' and updating the groups (based on add)
#'
#' @return A list
#' \item{data}{Modified tbl}
#' \item{groups}{Modified groups}
#' @noRd
group_by_prepare <- function(.data, ..., .dots, add = FALSE) {
new_groups <- lazyeval::all_dots(.dots, ..., env = parent.frame())

# If any calls, use mutate to add new columns, then group by those
is_name <- vapply(new_groups, function(x) is.name(x), logical(1))
is_name <- vapply(new_groups, function(x) is.name(x$expr), logical(1))
has_name <- names2(new_groups) != ""

needs_mutate <- has_name | !is_name
if (any(needs_mutate)) {
env <- new.env(parent = parent.frame())
env$.data <- .data

call <- as.call(c(quote(mutate), quote(.data), new_groups[needs_mutate]))
.data <- eval(call, env)

new_groups[needs_mutate] <- lapply(auto_names(new_groups)[needs_mutate], as.name)
names(new_groups)[needs_mutate] <- ""
.data <- mutate_(.data, .dots = new_groups[needs_mutate])
}

# Once we've done the mutate, we no longer need lazy objects, and
# can instead just use symbols
new_groups <- lazyeval::auto_name(new_groups)
groups <- lapply(names(new_groups), as.name)
if (add) {
new_groups <- c(groups(.data), new_groups)
groups <- c(groups(.data), groups)
}
new_groups <- new_groups[!duplicated(new_groups)]
groups <- groups[!duplicated(groups)]

regroup(.data, new_groups)
list(data = .data, groups = groups)
}

#' Get/set the grouping variables for tbl.
Expand All @@ -91,24 +110,18 @@ group_by <- function(.data, ..., add = FALSE) {
#' @param x data \code{\link{tbl}}
#' @param value a list of symbols
#' @export
#' @seealso \code{\link{group_by}} for a version that does non-standard
#' evaluation to save typing
#' @examples
#' grouped <- group_by(mtcars, cyl)
#' groups(grouped)
#' grouped <- regroup(grouped, list(quote(vs)))
#' groups(grouped)
#' groups(ungroup(grouped))
groups <- function(x) {
UseMethod("groups")
}

#' @export
#' @rdname groups
regroup <- function(x, value) {
stopifnot(is.list(value))

UseMethod("regroup")
.Deprecated("group_by_")
group_by_(x, .dots = value)
}

#' @export
Expand Down
15 changes: 3 additions & 12 deletions R/grouped-dt.r
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,9 @@ n_groups.grouped_dt <- function(x) {
}

#' @export
regroup.data.table <- function(x, value) {
grouped_dt(x, unname(value))
}

#' @export
regroup.tbl_dt <- function(x, value) {
grouped_dt(x, unname(value))
}

#' @export
regroup.grouped_dt <- function(x, value) {
grouped_dt(x, unname(value))
group_by_.data.table <- function(.data., ..., .dots, add = FALSE) {
groups <- group_by_prepare(.data., ..., .dots = .dots, add = add)
grouped_dt(groups$data, groups$groups)
}

#' @export
Expand Down
11 changes: 7 additions & 4 deletions R/manip-cube.r
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,15 @@ find_index <- function(x, names) {
}

#' @export
regroup.tbl_cube <- function(x, value) {
nms <- names(x$dims)
group_by_.tbl_cube <- function(.data., ..., .dots, add = FALSE) {
groups <- group_by_prepare(.data., ..., .dots = .dots, add = add)

# Convert symbols to indices
nms <- names(groups$data$dims)
nms_list <- as.list(setNames(seq_along(nms), nms))

x$group <- unlist(lapply(value, eval, nms_list))
x
groups$data$groups <- unlist(lapply(groups$group, eval, nms_list))
groups$data
}

#' @export
Expand Down
9 changes: 4 additions & 5 deletions R/manip-sql.r
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,9 @@ mutate_.tbl_sql <- function(.data, ..., .dots) {
}

#' @export
regroup.tbl_sql <- function(x, value) {
if (!all_apply(value, is.name)) {
stop("May only group by variable names, not expressions", call. = FALSE)
}
group_by_.tbl_sql <- function(.data., ..., .dots, add = FALSE) {
groups <- group_by_prepare(.data., ..., .dots = .dots, add = add)
x <- groups$data

# Effect of group_by on previous operations:
# * select: none
Expand All @@ -108,5 +107,5 @@ regroup.tbl_sql <- function(x, value) {
if (needed) {
x <- collapse(update(x, order_by = NULL))
}
update(x, group_by = unname(value), order_by = arrange)
update(x, group_by = groups$groups, order_by = arrange)
}
7 changes: 5 additions & 2 deletions R/rowwise.r
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@ n_groups.rowwise_df <- function(x) {
}

#' @export
regroup.rowwise_df <- function(x, value) {
group_by_.rowwise_df <- function(.data., ..., .dots, add = FALSE) {
warning("Grouping rowwise data frame strips rowwise nature", call. = FALSE)
grouped_df(ungroup(x), value)
.data <- ungroup(.data)

groups <- group_by_prepare(.data., ..., .dots = .dots, add = add)
grouped_df(groups$data, groups$groups)
}
5 changes: 3 additions & 2 deletions R/tbl-data-frame.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Grouping methods ------------------------------------------------------------

#' @export
regroup.data.frame <- function(x, value) {
grouped_df(x, value)
group_by_.data.frame <- function(.data., ..., .dots, add = FALSE) {
groups <- group_by_prepare(.data., ..., .dots = .dots, add = add)
grouped_df(groups$data, groups$groups)
}

#' @export
Expand Down
4 changes: 4 additions & 0 deletions man/group_by.Rd
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
% Generated by roxygen2 (4.0.2): do not edit by hand
\name{group_by}
\alias{group_by}
\alias{group_by_}
\alias{regroup}
\title{Group a tbl by one or more variables.}
\usage{
group_by(.data, ..., add = FALSE)

group_by_(.data, ..., .dots, add = FALSE)
}
\arguments{
\item{.data}{a tbl}
Expand Down
9 changes: 0 additions & 9 deletions man/groups.Rd
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
% Generated by roxygen2 (4.0.2): do not edit by hand
\name{groups}
\alias{groups}
\alias{regroup}
\alias{ungroup}
\title{Get/set the grouping variables for tbl.}
\usage{
groups(x)

regroup(x, value)

ungroup(x)
}
\arguments{
Expand All @@ -24,12 +21,6 @@ inline way of removing existing grouping.
\examples{
grouped <- group_by(mtcars, cyl)
groups(grouped)
grouped <- regroup(grouped, list(quote(vs)))
groups(grouped)
groups(ungroup(grouped))
}
\seealso{
\code{\link{group_by}} for a version that does non-standard
evaluation to save typing
}

7 changes: 1 addition & 6 deletions tests/testthat/test-group-by.r
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,6 @@ df_var <- data.frame(
srcs <- temp_srcs(c("df", "dt"))
var_tbls <- temp_load(srcs, df_var)

group_by_ <- function(x, vars) {
call <- as.call(c(quote(group_by), quote(x), lapply(vars, as.symbol)))
eval(call)
}

test_that("local group_by preserves variable types", {
for(var in names(df_var)) {
expected <- data.frame(unique(df_var[[var]]), n = 1L,
Expand Down Expand Up @@ -175,7 +170,7 @@ test_that("data.table invalid .selfref issue (#475)", {
})

test_that("there can be 0 groups (#486)", {
data <- regroup(data.frame(a = numeric(0), g = character(0)), list(quote(g)))
data <- data.frame(a = numeric(0), g = character(0)) %>% group_by(g)
expect_equal(length(data$a), 0L)
expect_equal(length(data$g), 0L)
expect_equal(attr(data, "group_sizes"), integer(0))
Expand Down

0 comments on commit bdc8b2c

Please sign in to comment.