Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BREAKING] prevent multi-gpu usage #4749

Merged
merged 7 commits into from
Aug 12, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions include/xgboost/generic_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ struct GenericParameter : public dmlc::Parameter<GenericParameter> {
.describe("The primary GPU device ordinal.");
DMLC_DECLARE_FIELD(n_gpus)
.set_default(0)
.set_lower_bound(-1)
.describe("Deprecated, please use distributed training with one "
"process per GPU. "
"Number of GPUs to use for multi-gpu algorithms.");
.set_range(0, 1)
.describe("Deprecated. Single process multi-GPU training is no longer supported. "
"Please switch to distributed training with one process per GPU. "
"This can be done using Dask or Spark.");
}
};
} // namespace xgboost
Expand Down
11 changes: 9 additions & 2 deletions src/learner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -580,8 +580,15 @@ class LearnerImpl : public Learner {
}
gbm_->Configure(args);

if (this->gbm_->UseGPU() && cfg_.find("n_gpus") == cfg_.cend()) {
generic_param_.n_gpus = 1;
if (this->gbm_->UseGPU()) {
if (cfg_.find("n_gpus") == cfg_.cend()) {
generic_param_.n_gpus = 1;
}
if (generic_param_.n_gpus != 1) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not needed for this PR. But after removing n_gpus, the UseGPU function should follow ;-).

LOG(FATAL) << "Single process multi-GPU training is no longer supported. "
"Please switch to distributed GPU training with one process per GPU. "
"This can be done using Dask or Spark.";
}
}
}

Expand Down
14 changes: 0 additions & 14 deletions tests/cpp/common/test_gpu_hist_util.cu
Original file line number Diff line number Diff line change
Expand Up @@ -88,19 +88,5 @@ TEST(gpu_hist_util, DeviceSketch_ExternalMemory) {
TestDeviceSketch(GPUSet::Range(0, 1), true);
}

#if defined(XGBOOST_USE_NCCL)
TEST(gpu_hist_util, MGPU_DeviceSketch) {
auto devices = GPUSet::AllVisible();
CHECK_GT(devices.Size(), 1);
TestDeviceSketch(devices, false);
}

TEST(gpu_hist_util, MGPU_DeviceSketch_ExternalMemory) {
auto devices = GPUSet::AllVisible();
CHECK_GT(devices.Size(), 1);
TestDeviceSketch(devices, true);
}
#endif

} // namespace common
} // namespace xgboost
43 changes: 0 additions & 43 deletions tests/cpp/linear/test_linear.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,47 +24,4 @@ TEST(Linear, GPUCoordinate) {

delete mat;
}

#if defined(XGBOOST_USE_NCCL)
TEST(Linear, MGPU_GPUCoordinate) {
{
auto mat = xgboost::CreateDMatrix(10, 10, 0);
auto lparam = CreateEmptyGenericParam(0, -1);
lparam.n_gpus = -1;
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam));
updater->Configure({{"eta", "1."}});
xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
(*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
xgboost::gbm::GBLinearModel model;
model.param.num_feature = (*mat)->Info().num_col_;
model.param.num_output_group = 1;
model.LazyInitModel();
updater->Update(&gpair, (*mat).get(), &model, gpair.Size());

ASSERT_EQ(model.bias()[0], 5.0f);
delete mat;
}

{
auto lparam = CreateEmptyGenericParam(1, -1);
lparam.n_gpus = -1;
auto mat = xgboost::CreateDMatrix(10, 10, 0);
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam));
updater->Configure({{"eta", "1."}});
xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
(*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
xgboost::gbm::GBLinearModel model;
model.param.num_feature = (*mat)->Info().num_col_;
model.param.num_output_group = 1;
model.LazyInitModel();
updater->Update(&gpair, (*mat).get(), &model, gpair.Size());

ASSERT_EQ(model.bias()[0], 5.0f);
delete mat;
}
}
#endif

} // namespace xgboost
29 changes: 0 additions & 29 deletions tests/cpp/metric/test_elementwise_metric.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,32 +101,3 @@ TEST(Metric, DeclareUnifiedTest(PoissionNegLogLik)) {
1.1280f, 0.001f);
delete metric;
}

#if defined(XGBOOST_USE_NCCL) && defined(__CUDACC__)
TEST(Metric, MGPU_RMSE) {
{
auto lparam = xgboost::CreateEmptyGenericParam(0, -1);
xgboost::Metric * metric = xgboost::Metric::Create("rmse", &lparam);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "rmse");
EXPECT_NEAR(GetMetricEval(metric, {0}, {0}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
0.6403f, 0.001f);
delete metric;
}

{
auto lparam = xgboost::CreateEmptyGenericParam(1, -1);
xgboost::Metric * metric = xgboost::Metric::Create("rmse", &lparam);
ASSERT_STREQ(metric->Name(), "rmse");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
0.6403f, 0.001f);
delete metric;
}
}
#endif
156 changes: 44 additions & 112 deletions tests/cpp/predictor/test_gpu_predictor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,13 @@
#include "gtest/gtest.h"
#include "../helpers.h"

#if defined(XGBOOST_USE_NCCL)
namespace {

inline void CheckCAPICall(int ret) {
ASSERT_EQ(ret, 0) << XGBGetLastError();
}

} // namespace anonymous
#endif

const std::map<std::string, std::string>&
QueryBoosterConfigurationArguments(BoosterHandle handle) {
Expand All @@ -46,26 +44,28 @@ TEST(gpu_predictor, Test) {
gpu_predictor->Configure({}, {});
cpu_predictor->Configure({}, {});

int n_row = 5;
int n_col = 5;
for (size_t i = 1; i < 33; i *= 2) {
int n_row = i, n_col = i;
auto dmat = CreateDMatrix(n_row, n_col, 0);

gbm::GBTreeModel model = CreateTestModel();
model.param.num_feature = n_col;
auto dmat = CreateDMatrix(n_row, n_col, 0);

// Test predict batch
HostDeviceVector<float> gpu_out_predictions;
HostDeviceVector<float> cpu_out_predictions;
gpu_predictor->PredictBatch((*dmat).get(), &gpu_out_predictions, model, 0);
cpu_predictor->PredictBatch((*dmat).get(), &cpu_out_predictions, model, 0);
std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.HostVector();
std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.HostVector();
float abs_tolerance = 0.001;
for (int i = 0; i < gpu_out_predictions.Size(); i++) {
ASSERT_NEAR(gpu_out_predictions_h[i], cpu_out_predictions_h[i], abs_tolerance);
}
gbm::GBTreeModel model = CreateTestModel();
model.param.num_feature = n_col;

// Test predict batch
HostDeviceVector<float> gpu_out_predictions;
HostDeviceVector<float> cpu_out_predictions;

gpu_predictor->PredictBatch((*dmat).get(), &gpu_out_predictions, model, 0);
cpu_predictor->PredictBatch((*dmat).get(), &cpu_out_predictions, model, 0);

delete dmat;
std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.HostVector();
std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.HostVector();
float abs_tolerance = 0.001;
for (int j = 0; j < gpu_out_predictions.Size(); j++) {
ASSERT_NEAR(gpu_out_predictions_h[j], cpu_out_predictions_h[j], abs_tolerance);
}
delete dmat;
}
}

TEST(gpu_predictor, ExternalMemoryTest) {
Expand All @@ -74,25 +74,35 @@ TEST(gpu_predictor, ExternalMemoryTest) {
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
gpu_predictor->Configure({}, {});
gbm::GBTreeModel model = CreateTestModel();
int n_col = 3;
model.param.num_feature = n_col;
model.param.num_feature = 3;
const int n_classes = 3;
model.param.num_output_group = n_classes;
std::vector<std::unique_ptr<DMatrix>> dmats;
dmlc::TemporaryDirectory tmpdir;
std::string filename = tmpdir.path + "/big.libsvm";
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(32, 64, filename);

// Test predict batch
HostDeviceVector<float> out_predictions;
gpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
EXPECT_EQ(out_predictions.Size(), dmat->Info().num_row_);
for (const auto& v : out_predictions.HostVector()) {
ASSERT_EQ(v, 1.5);
std::string file0 = tmpdir.path + "/big_0.libsvm";
std::string file1 = tmpdir.path + "/big_1.libsvm";
std::string file2 = tmpdir.path + "/big_2.libsvm";
dmats.push_back(CreateSparsePageDMatrix(9, 64UL, file0));
dmats.push_back(CreateSparsePageDMatrix(128, 128UL, file1));
dmats.push_back(CreateSparsePageDMatrix(1024, 1024UL, file2));

for (const auto& dmat: dmats) {
// Test predict batch
HostDeviceVector<float> out_predictions;
gpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
EXPECT_EQ(out_predictions.Size(), dmat->Info().num_row_ * n_classes);
const std::vector<float> &host_vector = out_predictions.ConstHostVector();
for (int i = 0; i < host_vector.size() / n_classes; i++) {
ASSERT_EQ(host_vector[i * n_classes], 1.5);
ASSERT_EQ(host_vector[i * n_classes + 1], 0.);
ASSERT_EQ(host_vector[i * n_classes + 2], 0.);
}
}
}

#if defined(XGBOOST_USE_NCCL)
// Test whether pickling preserves predictor parameters
TEST(gpu_predictor, MGPU_PicklingTest) {
int const ngpu = GPUSet::AllVisible().Size();
TEST(gpu_predictor, PicklingTest) {
int const ngpu = 1;

dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
Expand Down Expand Up @@ -153,12 +163,6 @@ TEST(gpu_predictor, MGPU_PicklingTest) {
ASSERT_EQ(kwargs.at("n_gpus"), std::to_string(ngpu).c_str());
}

{ // Change n_gpus and query again
CheckCAPICall(XGBoosterSetParam(bst2, "n_gpus", "1"));
const auto& kwargs = QueryBoosterConfigurationArguments(bst2);
ASSERT_EQ(kwargs.at("n_gpus"), "1");
}

{ // Change predictor and query again
CheckCAPICall(XGBoosterSetParam(bst2, "predictor", "cpu_predictor"));
const auto& kwargs = QueryBoosterConfigurationArguments(bst2);
Expand All @@ -167,77 +171,5 @@ TEST(gpu_predictor, MGPU_PicklingTest) {

CheckCAPICall(XGBoosterFree(bst2));
}

// multi-GPU predictor test
TEST(gpu_predictor, MGPU_Test) {
auto cpu_lparam = CreateEmptyGenericParam(0, 0);
auto gpu_lparam = CreateEmptyGenericParam(0, -1);

std::unique_ptr<Predictor> gpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &gpu_lparam));
std::unique_ptr<Predictor> cpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &cpu_lparam));

cpu_predictor->Configure({}, {});

for (size_t i = 1; i < 33; i *= 2) {
int n_row = i, n_col = i;
auto dmat = CreateDMatrix(n_row, n_col, 0);

gbm::GBTreeModel model = CreateTestModel();
model.param.num_feature = n_col;

// Test predict batch
HostDeviceVector<float> gpu_out_predictions;
HostDeviceVector<float> cpu_out_predictions;

gpu_predictor->PredictBatch((*dmat).get(), &gpu_out_predictions, model, 0);
cpu_predictor->PredictBatch((*dmat).get(), &cpu_out_predictions, model, 0);

std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.HostVector();
std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.HostVector();
float abs_tolerance = 0.001;
for (int j = 0; j < gpu_out_predictions.Size(); j++) {
ASSERT_NEAR(gpu_out_predictions_h[j], cpu_out_predictions_h[j], abs_tolerance);
}
delete dmat;
}
}

// multi-GPU predictor external memory test
TEST(gpu_predictor, MGPU_ExternalMemoryTest) {
auto gpu_lparam = CreateEmptyGenericParam(0, -1);

std::unique_ptr<Predictor> gpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &gpu_lparam));
gpu_predictor->Configure({}, {});

gbm::GBTreeModel model = CreateTestModel();
model.param.num_feature = 3;
const int n_classes = 3;
model.param.num_output_group = n_classes;
std::vector<std::unique_ptr<DMatrix>> dmats;
dmlc::TemporaryDirectory tmpdir;
std::string file0 = tmpdir.path + "/big_0.libsvm";
std::string file1 = tmpdir.path + "/big_1.libsvm";
std::string file2 = tmpdir.path + "/big_2.libsvm";
dmats.push_back(CreateSparsePageDMatrix(9, 64UL, file0));
dmats.push_back(CreateSparsePageDMatrix(128, 128UL, file1));
dmats.push_back(CreateSparsePageDMatrix(1024, 1024UL, file2));

for (const auto& dmat: dmats) {
// Test predict batch
HostDeviceVector<float> out_predictions;
gpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
EXPECT_EQ(out_predictions.Size(), dmat->Info().num_row_ * n_classes);
const std::vector<float> &host_vector = out_predictions.ConstHostVector();
for (int i = 0; i < host_vector.size() / n_classes; i++) {
ASSERT_EQ(host_vector[i * n_classes], 1.5);
ASSERT_EQ(host_vector[i * n_classes + 1], 0.);
ASSERT_EQ(host_vector[i * n_classes + 2], 0.);
}
}
}
#endif // defined(XGBOOST_USE_NCCL)
} // namespace predictor
} // namespace xgboost
4 changes: 2 additions & 2 deletions tests/cpp/test_learner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,10 @@ TEST(Learner, IO) {
std::unique_ptr<Learner> learner {Learner::Create(mat)};
learner->SetParams({Arg{"tree_method", "auto"},
Arg{"predictor", "gpu_predictor"},
Arg{"n_gpus", "-1"}});
Arg{"n_gpus", "1"}});
learner->UpdateOneIter(0, p_dmat.get());
ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
ASSERT_EQ(learner->GetGenericParameter().n_gpus, -1);
ASSERT_EQ(learner->GetGenericParameter().n_gpus, 1);

dmlc::TemporaryDirectory tempdir;
const std::string fname = tempdir.path + "/model.bst";
Expand Down
8 changes: 0 additions & 8 deletions tests/cpp/tree/test_gpu_hist.cu
Original file line number Diff line number Diff line change
Expand Up @@ -415,13 +415,5 @@ TEST(GpuHist, TestHistogramIndex) {
TestHistogramIndexImpl(1);
}

#if defined(XGBOOST_USE_NCCL)
TEST(GpuHist, MGPU_TestHistogramIndex) {
auto devices = GPUSet::AllVisible();
CHECK_GT(devices.Size(), 1);
TestHistogramIndexImpl(-1);
}
#endif

} // namespace tree
} // namespace xgboost
Loading