Skip to content

Commit

Permalink
Lots of changes
Browse files Browse the repository at this point in the history
  • Loading branch information
hadley committed Aug 20, 2008
1 parent e752979 commit 6058c3b
Show file tree
Hide file tree
Showing 19 changed files with 396 additions and 601 deletions.
13 changes: 7 additions & 6 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@

Reshape 0.8:

* preserve.na now renamed to na.rm to be consistent with other R functions
* raw names for columns
* margins now displayed with (all) instead of NA
* extend melt.array to deal with case where there are partial dimnames - Thanks to Roberto Ugoccioni
* add the Smiths dataset to the package
* fixed bug when displaying margins with multiple result variables
* melt.array now uses type.convert on dimnames to convert to appropriate type
* preserve.na now renamed to na.rm to be consistent with other R functions
* raw names for columns
* margins now displayed with (all) instead of NA
* extend melt.array to deal with case where there are partial dimnames - Thanks to Roberto Ugoccioni
* add the Smiths dataset to the package
* fixed bug when displaying margins with multiple result variables

Reshape 0.7.4
* only display all levels of a categorical variable when requested
Expand Down
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
Package: reshape
Package: reshape2
Type: Package
Title: Flexibly reshape data.
Version: 0.1
Version: 1.0
Date: 2008-05-01
Author: Hadley Wickham <h.wickham@gmail.com>
Maintainer: Hadley Wickham <h.wickham@gmail.com>
Description: Reshape lets you flexibly restructure and aggregate data using just two functions: melt and cast.
URL: http://had.co.nz/reshape
Depends: R (>= 2.5.1), plyr
Depends: R (>= 2.6.1), plyr
Suggests: RUnit
License: MIT
LazyData: true
7 changes: 6 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
Reshape 1.0 (2008-XX-XX) --------------------------

* stamp removed - if you were using it, ddply from the plyr package should be uniformly better

* major speed improvements
* formula now more flexible
* can contain functions
* -(a + b) works like ... but excludes variables a & b

* includes tests to try and stop me from breaking it quite so often during development

* add.all.combinations argument renamed to drop to be more consistent with other R functions, and should work much more reliably

* tweaks to cast and recast to make it easier to pass in formulas created elsewhere
* allow user to specify column used for values, guessing if necessary
* improve error messages when melt or casting parameters incorrectly specified
* by default, treat character and factor variables as id variables (i.e. integer variables no longer default to being id vars)
* helpful error message if value column missing when calling cast
* ... now passed on to melt in melt.list (thanks to Charles Naylor)
* fix add.all.combinations to work properly again for a wider range of input
44 changes: 22 additions & 22 deletions R/cast.r
Original file line number Diff line number Diff line change
Expand Up @@ -89,32 +89,32 @@
#X lattice::xyplot(`1` ~ `2` | variable, cast(ff_d, ... ~ rep), aspect="iso")
cast <- function(data, formula = ... ~ variable, fun.aggregate=NULL, ..., margins=FALSE, subset=TRUE, df=FALSE, fill=NA, add.missing=FALSE, value = guess_value(data)) {
if (!is.null(fun.aggregate)) fun.aggregate <- match.fun(fun.aggregate)
if (is.formula(formula)) formula <- deparse(formula)
if (!is.character(formula)) formula <- as.character(formula)
if (is.formula(formula)) formula <- deparse(formula)
if (!is.character(formula)) formula <- as.character(formula)

subset <- eval(substitute(subset), data, parent.frame())
data <- data[subset, , drop=FALSE]
variables <- cast_parse_formula(formula, names(data))
subset <- eval(substitute(subset), data, parent.frame())
data <- data[subset, , drop=FALSE]
variables <- cast_parse_formula(formula, names(data))

if (any(names(data) == value)) names(data)[names(data) == value] <- "value"

v <- unlist(variables)
v <- v[v != "result_variable"]
if (add.missing) data[v] <- lapply(data[v], as.factor)
v <- unlist(variables)
v <- v[v != "result_variable"]
if (add.missing) data[v] <- lapply(data[v], as.factor)

if (length(fun.aggregate) > 1)
fun.aggregate <- do.call(funstofun, as.list(match.call()[[4]])[-1])
if (!is.null(variables$l)) {
res <- nested.by(data, data[variables$l], function(x) {
castd(x, variables$m, fun.aggregate, margins=margins, df=df, fill=fill, add.missing=add.missing, ...)
})
} else {
res <- castd(data, variables$m, fun.aggregate, margins=margins, df=df,fill=fill, add.missing=add.missing, ...)
}
#attr(res, "formula") <- formula
#attr(res, "data") <- deparse(substitute(data))
res
if (length(fun.aggregate) > 1)
fun.aggregate <- do.call(funstofun, as.list(match.call()[[4]])[-1])
if (!is.null(variables$l)) {
res <- nested.by(data, data[variables$l], function(x) {
castd(x, variables$m, fun.aggregate, margins=margins, df=df, fill=fill, add.missing=add.missing, ...)
})
} else {
res <- castd(data, variables$m, fun.aggregate, margins=margins, df=df,fill=fill, add.missing=add.missing, ...)
}
#attr(res, "formula") <- formula
#attr(res, "data") <- deparse(substitute(data))
res
}

58 changes: 29 additions & 29 deletions R/dimnames.r
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
# @value object of type \code{\link{cast_matrix}}
# @keyword internal
cast_matrix <- function(m, dimnames) {
rdimnames(m) <- dimnames
class(m) <- c("cast_matrix", class(m))
rdimnames(m) <- dimnames
class(m) <- c("cast_matrix", class(m))

dimnames(m) <- lapply(rdimnames(m), rownames)

m
m
}

# Dimension names
Expand All @@ -32,23 +32,23 @@ rdimnames <- function(x) attr(x, "rdimnames")
df
}
value <- lapply(value, name)
attr(x, "rdimnames") <- value
attr(x, "idvars") <- colnames(value[[1]])
x
attr(x, "rdimnames") <- value
attr(x, "idvars") <- colnames(value[[1]])
x
}
rcolnames <- function(x) rdimnames(x)[[2]]
"rcolnames<-" <- function(x, value) {
dn <- rdimnames(x)
dn[[2]] <- value
rdimnames(x) <- dn
x
dn <- rdimnames(x)
dn[[2]] <- value
rdimnames(x) <- dn
x
}
rrownames <- function(x) rdimnames(x)[[1]]
"rrownames<-" <- function(x, value) {
dn <- rdimnames(x)
dn[[1]] <- value
rdimnames(x) <- dn
x
dn <- rdimnames(x)
dn[[1]] <- value
rdimnames(x) <- dn
x
}

# Convert cast matrix into a data frame
Expand All @@ -58,31 +58,31 @@ rrownames <- function(x) rdimnames(x)[[1]]
# @argument Argument required to match generic
# @keyword internal
as.data.frame.cast_matrix <- function(x, row.names, optional, ...) {
unx <- unclass(x)
unx <- unclass(x)

colnames(unx) <- rownames(rcolnames(x))
r.df <- data.frame(rrownames(x), unx, check.names=FALSE)
class(r.df) <- c("cast_df", "data.frame")
attr(r.df, "idvars") <- attr(x, "idvars")
attr(r.df, "rdimnames") <- attr(x, "rdimnames")
rownames(r.df) <- 1:nrow(r.df)
colnames(unx) <- rownames(rcolnames(x))
r.df <- data.frame(rrownames(x), unx, check.names=FALSE)
class(r.df) <- c("cast_df", "data.frame")
attr(r.df, "idvars") <- attr(x, "idvars")
attr(r.df, "rdimnames") <- attr(x, "rdimnames")
rownames(r.df) <- 1:nrow(r.df)

r.df
r.df
}

# Convert cast data.frame into a matrix
#
# @keyword internal
as.matrix.cast_df <- function(x, ...) {
ids <- attr(x, "idvars")
ids <- attr(x, "idvars")
mat <- as.matrix.data.frame(x[, setdiff(names(x), ids)])

rownames(mat) <- rownames(rrownames(x))
colnames(mat) <- rownames(rcolnames(x))
attr(mat, "idvars") <- attr(x, "idvars")
attr(mat, "rdimnames") <- attr(x, "rdimnames")
rownames(mat) <- rownames(rrownames(x))
colnames(mat) <- rownames(rcolnames(x))
attr(mat, "idvars") <- attr(x, "idvars")
attr(mat, "rdimnames") <- attr(x, "rdimnames")

class(mat) <- c("cast_matrix", class(mat))

Expand Down
16 changes: 8 additions & 8 deletions R/factors.r
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@
# @arguments either a vector of . See examples for more details.
# @arguments label for other level
# @keyword manip
#X df <- data.frame(a = LETTERS[sample(5, 15, replace=TRUE)], y = rnorm(15))
#X df <- data.frame(a = LETTERS[sample(5, 15, replace=TRUE)], y = rnorm(15))
#X combine_factor(df$a, c(1,2,2,1,2))
#X combine_factor(df$a, c(1:4, 1))
#X (f <- reorder_factor(df$a, tapply(df$y, df$a, mean)))
#X percent <- tapply(abs(df$y), df$a, sum)
#X combine_factor(f, c(order(percent)[1:3]))
combine_factor <- function(fac, variable=levels(fac), other.label="Other") {
n <- length(levels(fac))
if (length(variable) < n) {
nvar <- c(seq(1, length(variable)), rep(length(variable)+1, n - length(variable)))
factor(nvar[as.numeric(fac)], labels=c(levels(fac)[variable], other.label))
} else {
factor(variable[as.numeric(fac)], labels=levels(fac)[!duplicated(variable)])
}
n <- length(levels(fac))
if (length(variable) < n) {
nvar <- c(seq(1, length(variable)), rep(length(variable)+1, n - length(variable)))
factor(nvar[as.numeric(fac)], labels=c(levels(fac)[variable], other.label))
} else {
factor(variable[as.numeric(fac)], labels=levels(fac)[!duplicated(variable)])
}
}
44 changes: 22 additions & 22 deletions R/formula.r
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,23 @@
#X cast_parse_formula("a + b ~ c ~ . | ...", letters[1:6])
cast_parse_formula <- function(formula = "... ~ variable", varnames) {
# check_formula(formula, varnames)
parts <- parse_expression(formula)
parts <- parse_expression(formula)

vars <- lapply(parts, get_vars)
remainder <- lapply(setdiff(varnames, c(unlist(vars), "value")), as.name)
replace.remainder <- function(x) {
rem <- is.negated(x)
if (all(!rem)) return(x)
c(x[seq_along(x) < which(rem)], remainder, x[seq_along(x) > which(rem)])
}
list(
m = lapply(parts$m, replace.remainder),
l = rev(replace.remainder(parts$l))
)
vars <- lapply(parts, get_vars)
remainder <- lapply(setdiff(varnames, c(unlist(vars), "value")), as.name)
replace.remainder <- function(x) {
rem <- is.negated(x)
if (all(!rem)) return(x)
c(x[seq_along(x) < which(rem)], remainder, x[seq_along(x) > which(rem)])
}
list(
m = lapply(parts$m, replace.remainder),
l = rev(replace.remainder(parts$l))
)
}


Expand All @@ -42,10 +42,10 @@ cast_parse_formula <- function(formula = "... ~ variable", varnames) {
# @arguments vector of variable names
# @keyword internal
check_formula <- function(formula, varnames) {
vars <- unlist(all.vars.character(formula))
unknown <- setdiff(vars, c(".", "...","result_variable",varnames))
if (length(unknown) > 0) stop("Casting formula contains variables not found in molten data: ", paste(unknown, collapse=", "), call. = FALSE)
vars <- vars[vars != "."]
if (length(unique(vars)) < length(vars)) stop("Variable names repeated", call. = FALSE)
vars <- unlist(all.vars.character(formula))
unknown <- setdiff(vars, c(".", "...","result_variable",varnames))
if (length(unknown) > 0) stop("Casting formula contains variables not found in molten data: ", paste(unknown, collapse=", "), call. = FALSE)
vars <- vars[vars != "."]
if (length(unique(vars)) < length(vars)) stop("Variable names repeated", call. = FALSE)
}
Loading

0 comments on commit 6058c3b

Please sign in to comment.