Skip to content

Commit

Permalink
Small cleanup for histogram routines.
Browse files Browse the repository at this point in the history
- Extract hist train param from GPU hist.
- Make histogram const after construction.
  • Loading branch information
trivialfis committed Jul 31, 2023
1 parent 7579905 commit 5753869
Show file tree
Hide file tree
Showing 13 changed files with 159 additions and 84 deletions.
1 change: 1 addition & 0 deletions R-package/src/Makevars.in
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ OBJECTS= \
$(PKGROOT)/src/tree/updater_quantile_hist.o \
$(PKGROOT)/src/tree/updater_refresh.o \
$(PKGROOT)/src/tree/updater_sync.o \
$(PKGROOT)/src/tree/hist/param.o \
$(PKGROOT)/src/linear/linear_updater.o \
$(PKGROOT)/src/linear/updater_coordinate.o \
$(PKGROOT)/src/linear/updater_shotgun.o \
Expand Down
1 change: 1 addition & 0 deletions R-package/src/Makevars.win
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ OBJECTS= \
$(PKGROOT)/src/tree/updater_quantile_hist.o \
$(PKGROOT)/src/tree/updater_refresh.o \
$(PKGROOT)/src/tree/updater_sync.o \
$(PKGROOT)/src/tree/hist/param.o \
$(PKGROOT)/src/linear/linear_updater.o \
$(PKGROOT)/src/linear/updater_coordinate.o \
$(PKGROOT)/src/linear/updater_shotgun.o \
Expand Down
4 changes: 3 additions & 1 deletion include/xgboost/linalg.h
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,9 @@ template <typename Container, typename... S,
std::enable_if_t<!common::detail::IsSpan<Container>::value &&
!std::is_pointer_v<Container>> * = nullptr>
auto MakeTensorView(Context const *ctx, Container &data, S &&...shape) { // NOLINT
using T = typename Container::value_type;
using T = std::conditional_t<std::is_const_v<Container>,
std::add_const_t<typename Container::value_type>,
typename Container::value_type>;
std::size_t in_shape[sizeof...(S)];
detail::IndexToArr(in_shape, std::forward<S>(shape)...);
return TensorView<T, sizeof...(S)>{data, in_shape, ctx->gpu_id};
Expand Down
41 changes: 22 additions & 19 deletions src/common/hist_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,11 @@ void InitilizeHistByZeroes(GHistRow hist, size_t begin, size_t end) {
/*!
* \brief Increment hist as dst += add in range [begin, end)
*/
void IncrementHist(GHistRow dst, const GHistRow add, size_t begin, size_t end) {
double* pdst = reinterpret_cast<double*>(dst.data());
void IncrementHist(GHistRow dst, ConstGHistRow add, std::size_t begin, std::size_t end) {
double *pdst = reinterpret_cast<double *>(dst.data());
const double *padd = reinterpret_cast<const double *>(add.data());

for (size_t i = 2 * begin; i < 2 * end; ++i) {
for (std::size_t i = 2 * begin; i < 2 * end; ++i) {
pdst[i] += padd[i];
}
}
Expand Down Expand Up @@ -207,18 +207,23 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,

const size_t size = row_indices.Size();
const size_t *rid = row_indices.begin;
auto const *pgh = reinterpret_cast<const float *>(gpair.data());
auto const *p_gpair = reinterpret_cast<const float *>(gpair.data());
const BinIdxType *gradient_index = gmat.index.data<BinIdxType>();

auto const &row_ptr = gmat.row_ptr.data();
auto base_rowid = gmat.base_rowid;
const uint32_t *offsets = gmat.index.Offset();
auto get_row_ptr = [&](size_t ridx) {
uint32_t const *offsets = gmat.index.Offset();
// There's no feature-based compression if missing value is present.
if (kAnyMissing) {
CHECK(!offsets);
} else {
CHECK(offsets);
}

auto get_row_ptr = [&](bst_row_t ridx) {
return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];
};
auto get_rid = [&](size_t ridx) {
return kFirstPage ? ridx : (ridx - base_rowid);
};
auto get_rid = [&](bst_row_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };

const size_t n_features =
get_row_ptr(row_indices.begin[0] + 1) - get_row_ptr(row_indices.begin[0]);
Expand All @@ -228,7 +233,7 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
// So we need to multiply each row-index/bin-index by 2
// to work with gradient pairs as a singe row FP array

for (size_t i = 0; i < size; ++i) {
for (std::size_t i = 0; i < size; ++i) {
const size_t icol_start =
kAnyMissing ? get_row_ptr(rid[i]) : get_rid(rid[i]) * n_features;
const size_t icol_end =
Expand All @@ -246,7 +251,7 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
kAnyMissing ? get_row_ptr(rid[i + Prefetch::kPrefetchOffset] + 1)
: icol_start_prefetch + n_features;

PREFETCH_READ_T0(pgh + two * rid[i + Prefetch::kPrefetchOffset]);
PREFETCH_READ_T0(p_gpair + two * rid[i + Prefetch::kPrefetchOffset]);
for (size_t j = icol_start_prefetch; j < icol_end_prefetch;
j += Prefetch::GetPrefetchStep<uint32_t>()) {
PREFETCH_READ_T0(gradient_index + j);
Expand All @@ -255,12 +260,12 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
const BinIdxType *gr_index_local = gradient_index + icol_start;

// The trick with pgh_t buffer helps the compiler to generate faster binary.
const float pgh_t[] = {pgh[idx_gh], pgh[idx_gh + 1]};
const float pgh_t[] = {p_gpair[idx_gh], p_gpair[idx_gh + 1]};
for (size_t j = 0; j < row_size; ++j) {
const uint32_t idx_bin = two * (static_cast<uint32_t>(gr_index_local[j]) +
(kAnyMissing ? 0 : offsets[j]));
const uint32_t idx_bin =
two * (static_cast<uint32_t>(gr_index_local[j]) + (kAnyMissing ? 0 : offsets[j]));
auto hist_local = hist_data + idx_bin;
*(hist_local) += pgh_t[0];
*(hist_local) += pgh_t[0];
*(hist_local + 1) += pgh_t[1];
}
}
Expand All @@ -281,12 +286,10 @@ void ColsWiseBuildHistKernel(Span<GradientPair const> gpair,
auto const &row_ptr = gmat.row_ptr.data();
auto base_rowid = gmat.base_rowid;
const uint32_t *offsets = gmat.index.Offset();
auto get_row_ptr = [&](size_t ridx) {
auto get_row_ptr = [&](bst_row_t ridx) {
return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];
};
auto get_rid = [&](size_t ridx) {
return kFirstPage ? ridx : (ridx - base_rowid);
};
auto get_rid = [&](bst_row_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };

const size_t n_features = gmat.cut.Ptrs().size() - 1;
const size_t n_columns = n_features;
Expand Down
3 changes: 2 additions & 1 deletion src/common/hist_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,7 @@ bst_bin_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(std::size_t begin, std::size_t
}

using GHistRow = Span<xgboost::GradientPairPrecise>;
using ConstGHistRow = Span<xgboost::GradientPairPrecise const>;

/*!
* \brief fill a histogram by zeros
Expand All @@ -371,7 +372,7 @@ void InitilizeHistByZeroes(GHistRow hist, size_t begin, size_t end);
/*!
* \brief Increment hist as dst += add in range [begin, end)
*/
void IncrementHist(GHistRow dst, const GHistRow add, size_t begin, size_t end);
void IncrementHist(GHistRow dst, ConstGHistRow add, std::size_t begin, std::size_t end);

/*!
* \brief Copy hist from src to dst in range [begin, end)
Expand Down
9 changes: 4 additions & 5 deletions src/common/threading_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,19 +136,18 @@ class BlockedSpace2d {
// Wrapper to implement nested parallelism with simple omp parallel for
template <typename Func>
void ParallelFor2d(const BlockedSpace2d& space, int nthreads, Func func) {
const size_t num_blocks_in_space = space.Size();
std::size_t n_blocks_in_space = space.Size();
CHECK_GE(nthreads, 1);

dmlc::OMPException exc;
#pragma omp parallel num_threads(nthreads)
{
exc.Run([&]() {
size_t tid = omp_get_thread_num();
size_t chunck_size =
num_blocks_in_space / nthreads + !!(num_blocks_in_space % nthreads);
size_t chunck_size = n_blocks_in_space / nthreads + !!(n_blocks_in_space % nthreads);

size_t begin = chunck_size * tid;
size_t end = std::min(begin + chunck_size, num_blocks_in_space);
std::size_t begin = chunck_size * tid;
std::size_t end = std::min(begin + chunck_size, n_blocks_in_space);
for (auto i = begin; i < end; i++) {
func(space.GetFirstDimension(i), space.GetRange(i));
}
Expand Down
10 changes: 5 additions & 5 deletions src/tree/hist/evaluate_splits.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class HistEvaluator {
* pseudo-category for missing value but here we just do a complete scan to avoid
* making specialized histogram bin.
*/
void EnumerateOneHot(common::HistogramCuts const &cut, const common::GHistRow &hist,
void EnumerateOneHot(common::HistogramCuts const &cut, common::ConstGHistRow hist,
bst_feature_t fidx, bst_node_t nidx,
TreeEvaluator::SplitEvaluator<TrainParam> const &evaluator,
SplitEntry *p_best) const {
Expand Down Expand Up @@ -143,7 +143,7 @@ class HistEvaluator {
*/
template <int d_step>
void EnumeratePart(common::HistogramCuts const &cut, common::Span<size_t const> sorted_idx,
common::GHistRow const &hist, bst_feature_t fidx, bst_node_t nidx,
common::ConstGHistRow hist, bst_feature_t fidx, bst_node_t nidx,
TreeEvaluator::SplitEvaluator<TrainParam> const &evaluator,
SplitEntry *p_best) {
static_assert(d_step == +1 || d_step == -1, "Invalid step.");
Expand Down Expand Up @@ -214,7 +214,7 @@ class HistEvaluator {
// Returns the sum of gradients corresponding to the data points that contains
// a non-missing value for the particular feature fid.
template <int d_step>
GradStats EnumerateSplit(common::HistogramCuts const &cut, const common::GHistRow &hist,
GradStats EnumerateSplit(common::HistogramCuts const &cut, common::ConstGHistRow hist,
bst_feature_t fidx, bst_node_t nidx,
TreeEvaluator::SplitEvaluator<TrainParam> const &evaluator,
SplitEntry *p_best) const {
Expand Down Expand Up @@ -510,7 +510,7 @@ class HistMultiEvaluator {

template <bst_bin_t d_step>
bool EnumerateSplit(common::HistogramCuts const &cut, bst_feature_t fidx,
common::Span<common::GHistRow const> hist,
common::Span<common::ConstGHistRow> hist,
linalg::VectorView<GradientPairPrecise const> parent_sum, double parent_gain,
SplitEntryContainer<std::vector<GradientPairPrecise>> *p_best) const {
auto const &cut_ptr = cut.Ptrs();
Expand Down Expand Up @@ -651,7 +651,7 @@ class HistMultiEvaluator {
auto entry = &tloc_candidates[n_threads * nidx_in_set + tidx];
auto best = &entry->split;
auto parent_sum = stats_.Slice(entry->nid, linalg::All());
std::vector<common::GHistRow> node_hist;
std::vector<common::ConstGHistRow> node_hist;
for (auto t_hist : hist) {
node_hist.push_back((*t_hist)[entry->nid]);
}
Expand Down
34 changes: 34 additions & 0 deletions src/tree/hist/param.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/**
* Copyright 2021-2023, XGBoost Contributors
*/
#include "param.h"

#include <string> // for string

#include "../../collective/communicator-inl.h" // for GetRank, Broadcast
#include "xgboost/json.h" // for Object, Json
#include "xgboost/tree_model.h" // for RegTree

namespace xgboost::tree {
DMLC_REGISTER_PARAMETER(HistMakerTrainParam);

void HistMakerTrainParam::CheckTreesSynchronized(RegTree const* local_tree) const {
if (!this->debug_synchronize) {
return;
}

std::string s_model;
Json model{Object{}};
int rank = collective::GetRank();
if (rank == 0) {
local_tree->SaveModel(&model);
}
Json::Dump(model, &s_model, std::ios::binary);
collective::Broadcast(&s_model, 0);

RegTree ref_tree{}; // rank 0 tree
auto j_ref_tree = Json::Load(StringView{s_model});
ref_tree.LoadModel(j_ref_tree);
CHECK(*local_tree == ref_tree);
}
} // namespace xgboost::tree
20 changes: 20 additions & 0 deletions src/tree/hist/param.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* Copyright 2021-2023, XGBoost Contributors
*/
#pragma once
#include "xgboost/parameter.h"
#include "xgboost/tree_model.h" // for RegTree

namespace xgboost::tree {
struct HistMakerTrainParam : public XGBoostParameter<HistMakerTrainParam> {
bool debug_synchronize;
void CheckTreesSynchronized(RegTree const* local_tree) const;

// declare parameters
DMLC_DECLARE_PARAMETER(HistMakerTrainParam) {
DMLC_DECLARE_FIELD(debug_synchronize)
.set_default(false)
.describe("Check if all distributed tree are identical after tree construction.");
}
};
} // namespace xgboost::tree
29 changes: 20 additions & 9 deletions src/tree/updater_approx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@
#include "../common/random.h"
#include "../data/gradient_index.h"
#include "common_row_partitioner.h"
#include "constraints.h"
#include "driver.h"
#include "hist/evaluate_splits.h"
#include "hist/histogram.h"
#include "hist/param.h"
#include "hist/sampler.h" // for SampleGradient
#include "param.h"
#include "param.h" // for HistMakerTrainParam
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "xgboost/json.h"
#include "xgboost/linalg.h"
#include "xgboost/task.h" // for ObjInfo
#include "xgboost/task.h" // for ObjInfo
#include "xgboost/tree_model.h"
#include "xgboost/tree_updater.h" // for TreeUpdater

Expand All @@ -43,6 +43,7 @@ auto BatchSpec(TrainParam const &p, common::Span<float> hess) {
class GloablApproxBuilder {
protected:
TrainParam const *param_;
HistMakerTrainParam const *hist_param_{nullptr};
std::shared_ptr<common::ColumnSampler> col_sampler_;
HistEvaluator evaluator_;
HistogramBuilder<CPUExpandEntry> histogram_builder_;
Expand Down Expand Up @@ -169,10 +170,12 @@ class GloablApproxBuilder {
}

public:
explicit GloablApproxBuilder(TrainParam const *param, MetaInfo const &info, Context const *ctx,
explicit GloablApproxBuilder(TrainParam const *param, HistMakerTrainParam const *hist_param,
MetaInfo const &info, Context const *ctx,
std::shared_ptr<common::ColumnSampler> column_sampler,
ObjInfo const *task, common::Monitor *monitor)
: param_{param},
hist_param_{hist_param},
col_sampler_{std::move(column_sampler)},
evaluator_{ctx, param_, info, col_sampler_},
ctx_{ctx},
Expand Down Expand Up @@ -260,16 +263,23 @@ class GlobalApproxUpdater : public TreeUpdater {
std::shared_ptr<common::ColumnSampler> column_sampler_ =
std::make_shared<common::ColumnSampler>();
ObjInfo const *task_;
HistMakerTrainParam hist_param_;

public:
explicit GlobalApproxUpdater(Context const *ctx, ObjInfo const *task)
: TreeUpdater(ctx), task_{task} {
monitor_.Init(__func__);
}

void Configure(Args const &) override {}
void LoadConfig(Json const &) override {}
void SaveConfig(Json *) const override {}
void Configure(Args const &args) override { hist_param_.UpdateAllowUnknown(args); }
void LoadConfig(Json const &in) override {
auto const &config = get<Object const>(in);
FromJson(config.at("hist_train_param"), &hist_param_);
}
void SaveConfig(Json *p_out) const override {
auto &out = *p_out;
out["hist_train_param"] = ToJson(hist_param_);
}

void InitData(TrainParam const &param, HostDeviceVector<GradientPair> const *gpair,
linalg::Matrix<GradientPair> *sampled) {
Expand All @@ -284,8 +294,8 @@ class GlobalApproxUpdater : public TreeUpdater {
void Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair, DMatrix *m,
common::Span<HostDeviceVector<bst_node_t>> out_position,
const std::vector<RegTree *> &trees) override {
pimpl_ = std::make_unique<GloablApproxBuilder>(param, m->Info(), ctx_, column_sampler_, task_,
&monitor_);
pimpl_ = std::make_unique<GloablApproxBuilder>(param, &hist_param_, m->Info(), ctx_,
column_sampler_, task_, &monitor_);

linalg::Matrix<GradientPair> h_gpair;
// Obtain the hessian values for weighted sketching
Expand All @@ -300,6 +310,7 @@ class GlobalApproxUpdater : public TreeUpdater {
std::size_t t_idx = 0;
for (auto p_tree : trees) {
this->pimpl_->UpdateTree(m, s_gpair, hess, p_tree, &out_position[t_idx]);
hist_param_.CheckTreesSynchronized(p_tree);
++t_idx;
}
}
Expand Down
Loading

0 comments on commit 5753869

Please sign in to comment.