diff --git a/src/common/stats.cuh b/src/common/stats.cuh index 14fd192bc822..b95f6866ca5c 100644 --- a/src/common/stats.cuh +++ b/src/common/stats.cuh @@ -15,6 +15,7 @@ #include // std::size_t #include // std::distance #include // std::numeric_limits +#include // std::is_floating_point,std::iterator_traits #include "cuda_context.cuh" // CUDAContext #include "device_helpers.cuh" @@ -32,33 +33,40 @@ template struct QuantileSegmentOp { SegIt seg_begin; ValIt val; - AlphaIt alpha; + AlphaIt alpha_it; Span d_results; - XGBOOST_DEVICE void operator()(std::size_t i) { - std::size_t seg_idx = i; + static_assert(std::is_floating_point::value_type>::value, + "Invalid value for quantile."); + static_assert(std::is_floating_point::value_type>::value, + "Invalid alpha."); + + XGBOOST_DEVICE void operator()(std::size_t seg_idx) { std::size_t begin = seg_begin[seg_idx]; auto n = static_cast(seg_begin[seg_idx + 1] - begin); + double a = alpha_it[seg_idx]; + if (n == 0) { - d_results[i] = std::numeric_limits::quiet_NaN(); + d_results[seg_idx] = std::numeric_limits::quiet_NaN(); return; } - if (alpha[seg_idx] <= (1 / (n + 1))) { - d_results[i] = val[begin]; + if (a <= (1 / (n + 1))) { + d_results[seg_idx] = val[begin]; return; } - if (alpha[seg_idx] >= (n / (n + 1))) { - d_results[i] = val[common::LastOf(seg_idx, seg_begin)]; + if (a >= (n / (n + 1))) { + d_results[seg_idx] = val[common::LastOf(seg_idx, seg_begin)]; return; } - double x = alpha[seg_idx] * static_cast(n + 1); + double x = a * static_cast(n + 1); double k = std::floor(x) - 1; double d = (x - 1) - k; auto v0 = val[begin + static_cast(k)]; auto v1 = val[begin + static_cast(k) + 1]; + d_results[seg_idx] = v0 + d * (v1 - v0); } }; @@ -93,24 +101,28 @@ struct WeightedQuantileSegOp { Span d_weight_cdf; Span d_sorted_idx; Span d_results; + static_assert(std::is_floating_point::value_type>::value, + "Invalid alpha."); + static_assert(std::is_floating_point::value_type>::value, + "Invalid value for quantile."); - XGBOOST_DEVICE void operator()(std::size_t i) { - std::size_t seg_idx = i; + XGBOOST_DEVICE void operator()(std::size_t seg_idx) { std::size_t begin = seg_beg[seg_idx]; auto n = static_cast(seg_beg[seg_idx + 1] - begin); if (n == 0) { - d_results[i] = std::numeric_limits::quiet_NaN(); + d_results[seg_idx] = std::numeric_limits::quiet_NaN(); return; } - auto leaf_cdf = d_weight_cdf.subspan(begin, static_cast(n)); - auto leaf_sorted_idx = d_sorted_idx.subspan(begin, static_cast(n)); - float thresh = leaf_cdf.back() * alpha_it[seg_idx]; - - std::size_t idx = thrust::lower_bound(thrust::seq, leaf_cdf.data(), - leaf_cdf.data() + leaf_cdf.size(), thresh) - - leaf_cdf.data(); + auto seg_cdf = d_weight_cdf.subspan(begin, static_cast(n)); + auto seg_sorted_idx = d_sorted_idx.subspan(begin, static_cast(n)); + double a = alpha_it[seg_idx]; + double thresh = seg_cdf.back() * a; + + std::size_t idx = + thrust::lower_bound(thrust::seq, seg_cdf.data(), seg_cdf.data() + seg_cdf.size(), thresh) - + seg_cdf.data(); idx = std::min(idx, static_cast(n - 1)); - d_results[i] = val_begin[leaf_sorted_idx[idx]]; + d_results[seg_idx] = val_begin[seg_sorted_idx[idx]]; } }; @@ -151,8 +163,8 @@ void SegmentedQuantile(Context const* ctx, AlphaIt alpha_it, SegIt seg_begin, Se quantiles->Resize(n_segments); auto d_results = quantiles->DeviceSpan(); - thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), n_segments, - detail::MakeQSegOp(seg_begin, val, alpha_it, d_results)); + dh::LaunchN(n_segments, ctx->CUDACtx()->Stream(), + detail::MakeQSegOp(seg_begin, val, alpha_it, d_results)); } /** diff --git a/src/objective/quantile_obj.cu b/src/objective/quantile_obj.cu index 95dd97f3b101..bbfb6cc8bb42 100644 --- a/src/objective/quantile_obj.cu +++ b/src/objective/quantile_obj.cu @@ -135,16 +135,16 @@ class QuantileRegression : public ObjFunction { thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { return i * d_labels.Shape(0); }); CHECK_EQ(d_labels.Shape(1), 1); - auto val_it = dh::MakeTransformIterator( - thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { - auto sample_idx = i % d_labels.Shape(0); - return d_labels(sample_idx, 0); - }); + auto val_it = dh::MakeTransformIterator(thrust::make_counting_iterator(0ul), + [=] XGBOOST_DEVICE(std::size_t i) { + auto sample_idx = i % d_labels.Shape(0); + return d_labels(sample_idx, 0); + }); auto n = d_labels.Size() * d_alpha.size(); CHECK_EQ(base_score->Size(), d_alpha.size()); if (info.weights_.Empty()) { - common::SegmentedQuantile(ctx_, dh::tcbegin(d_alpha), seg_it, seg_it + d_alpha.size() + 1, - val_it, val_it + n, base_score->Data()); + common::SegmentedQuantile(ctx_, d_alpha.data(), seg_it, seg_it + d_alpha.size() + 1, val_it, + val_it + n, base_score->Data()); sw = info.num_row_; } else { info.weights_.SetDevice(ctx_->gpu_id); @@ -154,9 +154,9 @@ class QuantileRegression : public ObjFunction { auto sample_idx = i % d_labels.Shape(0); return d_weights[sample_idx]; }); - common::SegmentedWeightedQuantile(ctx_, dh::tcbegin(d_alpha), seg_it, - seg_it + d_alpha.size() + 1, val_it, val_it + n, - weight_it, weight_it + n, base_score->Data()); + common::SegmentedWeightedQuantile(ctx_, d_alpha.data(), seg_it, seg_it + d_alpha.size() + 1, + val_it, val_it + n, weight_it, weight_it + n, + base_score->Data()); sw = dh::Reduce(ctx_->CUDACtx()->CTP(), dh::tcbegin(d_weights), dh::tcend(d_weights), 0.0, thrust::plus{}); } diff --git a/tests/cpp/common/test_stats.cu b/tests/cpp/common/test_stats.cu index a03bd15f970e..8643e75a721f 100644 --- a/tests/cpp/common/test_stats.cu +++ b/tests/cpp/common/test_stats.cu @@ -3,78 +3,153 @@ */ #include -#include -#include +#include // std::size_t +#include // std::pair +#include // std::vector #include "../../../src/common/linalg_op.cuh" // ElementWiseTransformDevice #include "../../../src/common/stats.cuh" -#include "../../../src/common/stats.h" -#include "xgboost/base.h" -#include "xgboost/context.h" -#include "xgboost/host_device_vector.h" -#include "xgboost/linalg.h" +#include "xgboost/base.h" // XGBOOST_DEVICE +#include "xgboost/context.h" // Context +#include "xgboost/host_device_vector.h" // HostDeviceVector +#include "xgboost/linalg.h" // Tensor namespace xgboost { namespace common { namespace { class StatsGPU : public ::testing::Test { private: - linalg::Tensor arr_{ - {1.f, 2.f, 3.f, 4.f, 5.f, - 2.f, 4.f, 5.f, 3.f, 1.f}, - {10}, 0}; - linalg::Tensor indptr_{{0, 5, 10}, {3}, 0}; - HostDeviceVector resutls_; + linalg::Tensor arr_{{1.f, 2.f, 3.f, 4.f, 5.f, 2.f, 4.f, 5.f, 3.f, 1.f}, {10}, 0}; + linalg::Tensor indptr_{{0, 5, 10}, {3}, 0}; + HostDeviceVector results_; using TestSet = std::vector>; Context ctx_; void Check(float expected) { - auto const& h_results = resutls_.HostVector(); + auto const& h_results = results_.HostVector(); ASSERT_EQ(h_results.size(), indptr_.Size() - 1); ASSERT_EQ(h_results.front(), expected); - EXPECT_EQ(h_results.back(), expected); + ASSERT_EQ(h_results.back(), expected); } public: void SetUp() override { ctx_.gpu_id = 0; } + + void WeightedMulti() { + // data for one segment + std::vector seg{1.f, 2.f, 3.f, 4.f, 5.f}; + auto seg_size = seg.size(); + + // 3 segments + std::vector data; + data.insert(data.cend(), seg.begin(), seg.end()); + data.insert(data.cend(), seg.begin(), seg.end()); + data.insert(data.cend(), seg.begin(), seg.end()); + linalg::Tensor arr{data.cbegin(), data.cend(), {data.size()}, 0}; + auto d_arr = arr.View(0); + + auto key_it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ul), + [=] XGBOOST_DEVICE(std::size_t i) { return i * seg_size; }); + auto val_it = + dh::MakeTransformIterator(thrust::make_counting_iterator(0ul), + [=] XGBOOST_DEVICE(std::size_t i) { return d_arr(i); }); + + // one alpha for each segment + HostDeviceVector alphas{0.0f, 0.5f, 1.0f}; + alphas.SetDevice(0); + auto d_alphas = alphas.ConstDeviceSpan(); + auto w_it = thrust::make_constant_iterator(0.1f); + SegmentedWeightedQuantile(&ctx_, d_alphas.data(), key_it, key_it + d_alphas.size() + 1, val_it, + val_it + d_arr.Size(), w_it, w_it + d_arr.Size(), &results_); + + auto const& h_results = results_.HostVector(); + ASSERT_EQ(1.0f, h_results[0]); + ASSERT_EQ(3.0f, h_results[1]); + ASSERT_EQ(5.0f, h_results[2]); + } + void Weighted() { auto d_arr = arr_.View(0); auto d_key = indptr_.View(0); - auto key_it = dh::MakeTransformIterator(thrust::make_counting_iterator(0ul), - [=] __device__(size_t i) { return d_key(i); }); - auto val_it = dh::MakeTransformIterator( - thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) { return d_arr(i); }); + auto key_it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ul), + [=] XGBOOST_DEVICE(std::size_t i) { return d_key(i); }); + auto val_it = + dh::MakeTransformIterator(thrust::make_counting_iterator(0ul), + [=] XGBOOST_DEVICE(std::size_t i) { return d_arr(i); }); linalg::Tensor weights{{10}, 0}; linalg::ElementWiseTransformDevice(weights.View(0), - [=] XGBOOST_DEVICE(size_t, float) { return 1.0; }); + [=] XGBOOST_DEVICE(std::size_t, float) { return 1.0; }); auto w_it = weights.Data()->ConstDevicePointer(); for (auto const& pair : TestSet{{0.0f, 1.0f}, {0.5f, 3.0f}, {1.0f, 5.0f}}) { SegmentedWeightedQuantile(&ctx_, pair.first, key_it, key_it + indptr_.Size(), val_it, - val_it + arr_.Size(), w_it, w_it + weights.Size(), &resutls_); + val_it + arr_.Size(), w_it, w_it + weights.Size(), &results_); this->Check(pair.second); } } + void NonWeightedMulti() { + // data for one segment + std::vector seg{20.f, 15.f, 50.f, 40.f, 35.f}; + auto seg_size = seg.size(); + + // 3 segments + std::vector data; + data.insert(data.cend(), seg.begin(), seg.end()); + data.insert(data.cend(), seg.begin(), seg.end()); + data.insert(data.cend(), seg.begin(), seg.end()); + linalg::Tensor arr{data.cbegin(), data.cend(), {data.size()}, 0}; + auto d_arr = arr.View(0); + + auto key_it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ul), + [=] XGBOOST_DEVICE(std::size_t i) { return i * seg_size; }); + auto val_it = + dh::MakeTransformIterator(thrust::make_counting_iterator(0ul), + [=] XGBOOST_DEVICE(std::size_t i) { return d_arr(i); }); + + // one alpha for each segment + HostDeviceVector alphas{0.1f, 0.2f, 0.4f}; + alphas.SetDevice(0); + auto d_alphas = alphas.ConstDeviceSpan(); + SegmentedQuantile(&ctx_, d_alphas.data(), key_it, key_it + d_alphas.size() + 1, val_it, + val_it + d_arr.Size(), &results_); + + auto const& h_results = results_.HostVector(); + EXPECT_EQ(15.0f, h_results[0]); + EXPECT_EQ(16.0f, h_results[1]); + ASSERT_EQ(26.0f, h_results[2]); + } + void NonWeighted() { auto d_arr = arr_.View(0); auto d_key = indptr_.View(0); - auto key_it = dh::MakeTransformIterator(thrust::make_counting_iterator(0ul), - [=] __device__(size_t i) { return d_key(i); }); - auto val_it = dh::MakeTransformIterator( - thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) { return d_arr(i); }); + auto key_it = dh::MakeTransformIterator( + thrust::make_counting_iterator(0ul), [=] __device__(std::size_t i) { return d_key(i); }); + auto val_it = + dh::MakeTransformIterator(thrust::make_counting_iterator(0ul), + [=] XGBOOST_DEVICE(std::size_t i) { return d_arr(i); }); for (auto const& pair : TestSet{{0.0f, 1.0f}, {0.5f, 3.0f}, {1.0f, 5.0f}}) { SegmentedQuantile(&ctx_, pair.first, key_it, key_it + indptr_.Size(), val_it, - val_it + arr_.Size(), &resutls_); + val_it + arr_.Size(), &results_); this->Check(pair.second); } } }; } // anonymous namespace -TEST_F(StatsGPU, Quantile) { this->NonWeighted(); } -TEST_F(StatsGPU, WeightedQuantile) { this->Weighted(); } +TEST_F(StatsGPU, Quantile) { + this->NonWeighted(); + this->NonWeightedMulti(); +} + +TEST_F(StatsGPU, WeightedQuantile) { + this->Weighted(); + this->WeightedMulti(); +} } // namespace common } // namespace xgboost