Skip to content

Commit

Permalink
fix #628 (update the condition of using subset in bagging) .
Browse files Browse the repository at this point in the history
  • Loading branch information
guolinke committed Jun 17, 2017
1 parent 03ba730 commit 75121bc
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 1 deletion.
4 changes: 4 additions & 0 deletions include/LightGBM/dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,10 @@ class Dataset {
return feature_groups_[group]->bin_data_.get();
}

inline bool FeatureGroupIsSparse(int group) const {
return feature_groups_[group]->is_sparse_;
}

inline BinIterator* FeatureIterator(int i) const {
const int group = feature2group_[i];
const int sub_feature = feature2subfeature_[i];
Expand Down
11 changes: 10 additions & 1 deletion src/boosting/gbdt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,17 @@ void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_
left_write_pos_buf_.resize(num_threads_);
right_write_pos_buf_.resize(num_threads_);
double average_bag_rate = new_config->bagging_fraction / new_config->bagging_freq;
int sparse_group = 0;
for (int i = 0; i < train_data->num_feature_groups(); ++i) {
if (train_data_->FeatureGroupIsSparse(i)) {
++sparse_group;
}
}
is_use_subset_ = false;
if (average_bag_rate <= 0.5) {
const int group_threshold_usesubset = 100;
const int sparse_group_threshold_usesubset = train_data->num_feature_groups() / 4;
if (average_bag_rate <= 0.5
&& (train_data->num_feature_groups() < group_threshold_usesubset || sparse_group < sparse_group_threshold_usesubset)) {
tmp_subset_.reset(new Dataset(bag_data_cnt_));
tmp_subset_->CopyFeatureMapperFrom(train_data);
is_use_subset_ = true;
Expand Down

0 comments on commit 75121bc

Please sign in to comment.