Skip to content

Commit

Permalink
mutate failed to deep copy data that ends up in a list column. closes #…
Browse files Browse the repository at this point in the history
  • Loading branch information
romainfrancois committed Feb 2, 2016
1 parent 17de1a0 commit 8691bce
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 8 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# dplyr 0.4.3.9000

* `mutate` failed to deep copy data that ends up in a list column (#1643).

* `mutate` handles adding a factor that is all `NA` (#1645).

* `bind_rows` handles promotion to strings (#1538).
Expand Down
97 changes: 90 additions & 7 deletions inst/include/dplyr/Gatherer.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ namespace dplyr {

SEXP collect(){
int ngroups = gdf.ngroups() ;
if( first_non_na == ngroups ) return data ;
typename Data::group_iterator git = gdf.group_begin() ;
int i = 0 ;
for(; i<first_non_na; i++) ++git ;
++git; i++ ;
for(; i<ngroups; i++, ++git){
SlicingIndex indices = *git ;
Shield<SEXP> subset( proxy.get( indices ) ) ;
Expand All @@ -38,16 +40,16 @@ namespace dplyr {

private:

inline void grab(SEXP data, const SlicingIndex& indices){
int n = Rf_length(data) ;
if( is<LogicalVector>(data) && all(is_na(LogicalVector(data))).is_true() ){
inline void grab(SEXP subset, const SlicingIndex& indices){
int n = Rf_length(subset) ;
if( is<LogicalVector>(subset) && all(is_na(LogicalVector(subset))).is_true() ){
grab_rep( Vector<RTYPE>::get_na(), indices ) ;
} else {
check_type(data) ;
check_type(subset) ;
if(n == indices.size() ){
grab_along( data, indices ) ;
grab_along( subset, indices ) ;
} else if( n == 1) {
grab_rep( Rcpp::internal::r_vector_start<RTYPE>(data)[0], indices ) ;
grab_rep( Rcpp::internal::r_vector_start<RTYPE>(subset)[0], indices ) ;
} else {
stop ( "incompatible size (%d), expecting %d (the group size) or 1",
n, indices.size()) ;
Expand Down Expand Up @@ -83,6 +85,87 @@ namespace dplyr {

} ;

template <typename Data, typename Subsets>
class ListGatherer : public Gatherer {
public:
typedef GroupedCallProxy<Data,Subsets> Proxy ;

ListGatherer( List first, SlicingIndex& indices, Proxy& proxy_, const Data& gdf_, int first_non_na_ ) :
gdf(gdf_), proxy(proxy_), data(gdf.nrows()), first_non_na(first_non_na_)
{
if( first_non_na < gdf.ngroups() ){
perhaps_duplicate(first) ;
grab( first, indices ) ;
}

copy_most_attributes( data, first ) ;
}

SEXP collect(){
int ngroups = gdf.ngroups() ;
if( first_non_na == ngroups ) return data ;
typename Data::group_iterator git = gdf.group_begin() ;
int i = 0 ;
for(; i<first_non_na; i++) ++git ;
++git; i++ ;
for(; i<ngroups; i++, ++git){
SlicingIndex indices = *git ;
List subset( proxy.get(indices) ) ;
perhaps_duplicate(subset) ;
grab(subset, indices);
}
return data ;
}

private:

inline void perhaps_duplicate( List& x ){
int n = x.size() ;
for( int i=0; i<n; i++){
SEXP xi = x[i] ;
if( IS_DPLYR_SHRINKABLE_VECTOR(xi) ) {
x[i] = Rf_duplicate(xi) ;
} else if( TYPEOF(xi) == VECSXP ){
List lxi(xi) ;
perhaps_duplicate( lxi ) ;
}
}
}

inline void grab(const List& subset, const SlicingIndex& indices){
int n = subset.size() ;

if(n == indices.size() ){
grab_along( subset, indices ) ;
} else if( n == 1) {
grab_rep( subset[0], indices ) ;
} else {
stop ( "incompatible size (%d), expecting %d (the group size) or 1",
n, indices.size()) ;
}
}

void grab_along( const List& subset, const SlicingIndex& indices ){
int n = indices.size();
for( int j=0; j<n; j++){
data[ indices[j] ] = subset[j] ;
}
}

void grab_rep( SEXP value, const SlicingIndex& indices ){
int n = indices.size();
for( int j=0; j<n; j++){
data[ indices[j] ] = value ;
}
}

const Data& gdf ;
Proxy& proxy ;
List data ;
int first_non_na ;

} ;

template <typename Data, typename Subsets>
class FactorGatherer : public Gatherer {
public:
Expand Down Expand Up @@ -224,7 +307,7 @@ namespace dplyr {
case REALSXP: return new GathererImpl<REALSXP,Data,Subsets> ( first, indices, proxy, gdf, i ) ;
case LGLSXP: return new GathererImpl<LGLSXP,Data,Subsets> ( first, indices, proxy, gdf, i ) ;
case STRSXP: return new GathererImpl<STRSXP,Data,Subsets> ( first, indices, proxy, gdf, i ) ;
case VECSXP: return new GathererImpl<VECSXP,Data,Subsets> ( first, indices, proxy, gdf, i ) ;
case VECSXP: return new ListGatherer<Data,Subsets> ( List(first), indices, proxy, gdf, i ) ;
case CPLXSXP: return new GathererImpl<CPLXSXP,Data,Subsets> ( first, indices, proxy, gdf, i ) ;
default: break ;
}
Expand Down
2 changes: 1 addition & 1 deletion inst/include/dplyr/Result/GroupedCallProxy.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ namespace dplyr {
HybridCall hybrid_eval( call, indices, subsets, env ) ;
return hybrid_eval.eval() ;
}

int n = proxies.size() ;
for( int i=0; i<n; i++){
proxies[i].set( subsets.get(proxies[i].symbol, indices ) ) ;
}

return call.eval(env) ;
} else if( TYPEOF(call) == SYMSXP ) {
if(subsets.count(call)){
Expand Down
6 changes: 6 additions & 0 deletions inst/include/dplyr/Result/RowwiseSubset.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ namespace dplyr {
object(x), output(1), start( Rcpp::internal::r_vector_start<RTYPE>(object) )
{
copy_most_attributes( output, x) ;
SET_DPLYR_SHRINKABLE_VECTOR( (SEXP)output) ;
}

~RowwiseSubsetTemplate(){
UNSET_DPLYR_SHRINKABLE_VECTOR( (SEXP)output) ;
}

virtual SEXP get( const SlicingIndex& indices ) {
Expand Down Expand Up @@ -68,6 +73,7 @@ namespace dplyr {
case REALSXP: return new RowwiseSubsetTemplate<REALSXP>(x) ;
case LGLSXP: return new RowwiseSubsetTemplate<LGLSXP>(x) ;
case STRSXP: return new RowwiseSubsetTemplate<STRSXP>(x) ;
case CPLXSXP: return new RowwiseSubsetTemplate<CPLXSXP>(x) ;
case VECSXP: return new RowwiseSubsetTemplate<VECSXP>(x) ;
}
return 0 ;
Expand Down
15 changes: 15 additions & 0 deletions tests/testthat/test-mutate.r
Original file line number Diff line number Diff line change
Expand Up @@ -572,3 +572,18 @@ test_that("Adding a Column of NA to a Grouped Table gives expected results (#164
expect_is( res$prediction, "factor")
expect_equal( levels(res$prediction), character() )
})

test_that("Deep copies are performed when needed (#1463)", {

res <- data.frame(prob = c(F,T)) %>%
rowwise %>%
mutate(model = list(x=prob) )
expect_equal(unlist(res$model), c(FALSE,TRUE))

res <- data.frame(x=1:4, g=c(1,1,1,2)) %>%
group_by(g) %>%
mutate(model = list(y=x) )
expect_equal(res$model[[1]], 1:3)
expect_equal(res$model[[4]], 4)

})

0 comments on commit 8691bce

Please sign in to comment.