-
Notifications
You must be signed in to change notification settings - Fork 2.1k
/
Copy pathGroupedDataFrame.h
133 lines (105 loc) · 3.73 KB
/
GroupedDataFrame.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#ifndef dplyr_tools_GroupedDataFrame_H
#define dplyr_tools_GroupedDataFrame_H
namespace Rcpp {
class GroupedDataFrame ;
class GroupedDataFrameIndexIterator {
public:
GroupedDataFrameIndexIterator( const GroupedDataFrame& gdf_ ) ;
GroupedDataFrameIndexIterator& operator++() ;
SlicingIndex operator*() const ;
int i ;
const GroupedDataFrame& gdf ;
ListOf<IntegerVector> indices ;
} ;
class GroupedDataFrame {
public:
typedef GroupedDataFrameIndexIterator group_iterator ;
GroupedDataFrame( SEXP x):
data_(x),
group_sizes(),
biggest_group_size(0),
symbols( data_.attr("vars") ),
labels()
{
// handle lazyness
bool is_lazy = Rf_isNull( data_.attr( "group_sizes") ) || Rf_isNull( data_.attr( "labels") ) ;
if( is_lazy ){
data_ = build_index_cpp( data_) ;
}
group_sizes = data_.attr( "group_sizes" );
biggest_group_size = data_.attr( "biggest_group_size" ) ;
labels = data_.attr( "labels" );
if( !is_lazy ){
// check consistency of the groups
int rows_in_groups = sum(group_sizes) ;
if( data_.nrows() != rows_in_groups ){
std::stringstream s ;
s << "corrupt 'grouped_df', contains "
<< data_.nrows()
<< " rows, and "
<< rows_in_groups
<< " rows in groups" ;
stop(s.str()) ;
}
}
}
group_iterator group_begin() const {
return GroupedDataFrameIndexIterator( *this ) ;
}
SEXP symbol( int i) const {
return symbols[i] ;
}
DataFrame& data() {
return data_ ;
}
const DataFrame& data() const {
return data_ ;
}
inline int ngroups() const {
return group_sizes.size() ;
}
inline int nvars() const {
return labels.size() ;
}
inline int nrows() const {
return data_.nrows() ;
}
inline SEXP label(int i) const {
return labels[i];
}
inline int max_group_size() const{
return biggest_group_size ;
}
inline bool has_group(SEXP g) const {
SEXP symb = as_symbol(g) ;
int n = symbols.size() ;
for( int i=0; i<n; i++){
if( symbols[i] == symb ) return true ;
}
return false ;
}
inline const IntegerVector& get_group_sizes() const {
return group_sizes ;
}
private:
DataFrame data_ ;
IntegerVector group_sizes ;
int biggest_group_size ;
ListOf<Symbol> symbols ;
DataFrame labels ;
} ;
template <>
inline bool is<GroupedDataFrame>( SEXP x){
return Rf_inherits(x, "grouped_df" ) && Rf_getAttrib(x, Rf_install("vars") ) != R_NilValue ;
}
inline GroupedDataFrameIndexIterator::GroupedDataFrameIndexIterator( const GroupedDataFrame& gdf_ ) :
i(0), gdf(gdf_), indices(gdf.data().attr("indices")) {}
inline GroupedDataFrameIndexIterator& GroupedDataFrameIndexIterator::operator++(){
i++;
return *this ;
}
inline SlicingIndex GroupedDataFrameIndexIterator::operator*() const {
return SlicingIndex( indices[i], i ) ;
}
}
#endif