dmlc · RAMitchell · Aug 12, 2019 · Aug 7, 2019 · Aug 7, 2019 · Aug 7, 2019
diff --git a/include/xgboost/generic_parameters.h b/include/xgboost/generic_parameters.h
@@ -40,10 +40,10 @@ struct GenericParameter : public dmlc::Parameter<GenericParameter> {
         .describe("The primary GPU device ordinal.");
     DMLC_DECLARE_FIELD(n_gpus)
         .set_default(0)
-        .set_lower_bound(-1)
-        .describe("Deprecated, please use distributed training with one "
-                  "process per GPU. "
-                  "Number of GPUs to use for multi-gpu algorithms.");
+        .set_range(0, 1)
+        .describe("Deprecated. Single process multi-GPU training is no longer supported. "
+                  "Please switch to distributed training with one process per GPU. "
+                  "This can be done using Dask or Spark.");
   }
 };
 }  // namespace xgboost

diff --git a/src/learner.cc b/src/learner.cc
@@ -580,8 +580,15 @@ class LearnerImpl : public Learner {
     }
     gbm_->Configure(args);
 
-    if (this->gbm_->UseGPU() && cfg_.find("n_gpus") == cfg_.cend()) {
-      generic_param_.n_gpus = 1;
+    if (this->gbm_->UseGPU()) {
+      if (cfg_.find("n_gpus") == cfg_.cend()) {
+        generic_param_.n_gpus = 1;
+      }
+      if (generic_param_.n_gpus != 1) {
+        LOG(FATAL) << "Single process multi-GPU training is no longer supported. "
+                      "Please switch to distributed GPU training with one process per GPU. "
+                      "This can be done using Dask or Spark.";
+      }
     }
   }
 

diff --git a/tests/cpp/common/test_gpu_hist_util.cu b/tests/cpp/common/test_gpu_hist_util.cu
@@ -88,19 +88,5 @@ TEST(gpu_hist_util, DeviceSketch_ExternalMemory) {
   TestDeviceSketch(GPUSet::Range(0, 1), true);
 }
 
-#if defined(XGBOOST_USE_NCCL)
-TEST(gpu_hist_util, MGPU_DeviceSketch) {
-  auto devices = GPUSet::AllVisible();
-  CHECK_GT(devices.Size(), 1);
-  TestDeviceSketch(devices, false);
-}
-
-TEST(gpu_hist_util, MGPU_DeviceSketch_ExternalMemory) {
-  auto devices = GPUSet::AllVisible();
-  CHECK_GT(devices.Size(), 1);
-  TestDeviceSketch(devices, true);
-}
-#endif
-
 }  // namespace common
 }  // namespace xgboost
diff --git a/tests/cpp/linear/test_linear.cu b/tests/cpp/linear/test_linear.cu
@@ -24,47 +24,4 @@ TEST(Linear, GPUCoordinate) {
 
   delete mat;
 }
-
-#if defined(XGBOOST_USE_NCCL)
-TEST(Linear, MGPU_GPUCoordinate) {
-  {
-    auto mat = xgboost::CreateDMatrix(10, 10, 0);
-    auto lparam = CreateEmptyGenericParam(0, -1);
-    lparam.n_gpus = -1;
-    auto updater = std::unique_ptr<xgboost::LinearUpdater>(
-        xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam));
-    updater->Configure({{"eta", "1."}});
-    xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
-        (*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
-    xgboost::gbm::GBLinearModel model;
-    model.param.num_feature = (*mat)->Info().num_col_;
-    model.param.num_output_group = 1;
-    model.LazyInitModel();
-    updater->Update(&gpair, (*mat).get(), &model, gpair.Size());
-
-    ASSERT_EQ(model.bias()[0], 5.0f);
-    delete mat;
-  }
-
-  {
-    auto lparam = CreateEmptyGenericParam(1, -1);
-    lparam.n_gpus = -1;
-    auto mat = xgboost::CreateDMatrix(10, 10, 0);
-    auto updater = std::unique_ptr<xgboost::LinearUpdater>(
-        xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam));
-    updater->Configure({{"eta", "1."}});
-    xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
-        (*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
-    xgboost::gbm::GBLinearModel model;
-    model.param.num_feature = (*mat)->Info().num_col_;
-    model.param.num_output_group = 1;
-    model.LazyInitModel();
-    updater->Update(&gpair, (*mat).get(), &model, gpair.Size());
-
-    ASSERT_EQ(model.bias()[0], 5.0f);
-    delete mat;
-  }
-}
-#endif
-
 }  // namespace xgboost
diff --git a/tests/cpp/metric/test_elementwise_metric.cc b/tests/cpp/metric/test_elementwise_metric.cc
@@ -101,32 +101,3 @@ TEST(Metric, DeclareUnifiedTest(PoissionNegLogLik)) {
               1.1280f, 0.001f);
   delete metric;
 }
-
-#if defined(XGBOOST_USE_NCCL) && defined(__CUDACC__)
-TEST(Metric, MGPU_RMSE) {
-  {
-    auto lparam = xgboost::CreateEmptyGenericParam(0, -1);
-    xgboost::Metric * metric = xgboost::Metric::Create("rmse", &lparam);
-    metric->Configure({});
-    ASSERT_STREQ(metric->Name(), "rmse");
-    EXPECT_NEAR(GetMetricEval(metric, {0}, {0}), 0, 1e-10);
-    EXPECT_NEAR(GetMetricEval(metric,
-                              {0.1f, 0.9f, 0.1f, 0.9f},
-                              {  0,   0,   1,   1}),
-                0.6403f, 0.001f);
-    delete metric;
-  }
-
-  {
-    auto lparam = xgboost::CreateEmptyGenericParam(1, -1);
-    xgboost::Metric * metric = xgboost::Metric::Create("rmse", &lparam);
-    ASSERT_STREQ(metric->Name(), "rmse");
-    EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
-    EXPECT_NEAR(GetMetricEval(metric,
-                              {0.1f, 0.9f, 0.1f, 0.9f},
-                              {  0,   0,   1,   1}),
-                0.6403f, 0.001f);
-    delete metric;
-  }
-}
-#endif
diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu
@@ -12,15 +12,13 @@
 #include "gtest/gtest.h"
 #include "../helpers.h"
 
-#if defined(XGBOOST_USE_NCCL)
 namespace {
 
 inline void CheckCAPICall(int ret) {
   ASSERT_EQ(ret, 0) << XGBGetLastError();
 }
 
 }  // namespace anonymous
-#endif
 
 const std::map<std::string, std::string>&
 QueryBoosterConfigurationArguments(BoosterHandle handle) {
@@ -46,26 +44,28 @@ TEST(gpu_predictor, Test) {
   gpu_predictor->Configure({}, {});
   cpu_predictor->Configure({}, {});
 
-  int n_row = 5;
-  int n_col = 5;
+  for (size_t i = 1; i < 33; i *= 2) {
+    int n_row = i, n_col = i;
+    auto dmat = CreateDMatrix(n_row, n_col, 0);
 
-  gbm::GBTreeModel model = CreateTestModel();
-  model.param.num_feature = n_col;
-  auto dmat = CreateDMatrix(n_row, n_col, 0);
-
-  // Test predict batch
-  HostDeviceVector<float> gpu_out_predictions;
-  HostDeviceVector<float> cpu_out_predictions;
-  gpu_predictor->PredictBatch((*dmat).get(), &gpu_out_predictions, model, 0);
-  cpu_predictor->PredictBatch((*dmat).get(), &cpu_out_predictions, model, 0);
-  std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.HostVector();
-  std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.HostVector();
-  float abs_tolerance = 0.001;
-  for (int i = 0; i < gpu_out_predictions.Size(); i++) {
-    ASSERT_NEAR(gpu_out_predictions_h[i], cpu_out_predictions_h[i], abs_tolerance);
-  }
+    gbm::GBTreeModel model = CreateTestModel();
+    model.param.num_feature = n_col;
+
+    // Test predict batch
+    HostDeviceVector<float> gpu_out_predictions;
+    HostDeviceVector<float> cpu_out_predictions;
+
+    gpu_predictor->PredictBatch((*dmat).get(), &gpu_out_predictions, model, 0);
+    cpu_predictor->PredictBatch((*dmat).get(), &cpu_out_predictions, model, 0);
 
-  delete dmat;
+    std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.HostVector();
+    std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.HostVector();
+    float abs_tolerance = 0.001;
+    for (int j = 0; j < gpu_out_predictions.Size(); j++) {
+      ASSERT_NEAR(gpu_out_predictions_h[j], cpu_out_predictions_h[j], abs_tolerance);
+    }
+    delete dmat;
+  }
 }
 
 TEST(gpu_predictor, ExternalMemoryTest) {
@@ -74,25 +74,35 @@ TEST(gpu_predictor, ExternalMemoryTest) {
       std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
   gpu_predictor->Configure({}, {});
   gbm::GBTreeModel model = CreateTestModel();
-  int n_col = 3;
-  model.param.num_feature = n_col;
+  model.param.num_feature = 3;
+  const int n_classes = 3;
+  model.param.num_output_group = n_classes;
+  std::vector<std::unique_ptr<DMatrix>> dmats;
   dmlc::TemporaryDirectory tmpdir;
-  std::string filename = tmpdir.path + "/big.libsvm";
-  std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(32, 64, filename);
-
-  // Test predict batch
-  HostDeviceVector<float> out_predictions;
-  gpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
-  EXPECT_EQ(out_predictions.Size(), dmat->Info().num_row_);
-  for (const auto& v : out_predictions.HostVector()) {
-    ASSERT_EQ(v, 1.5);
+  std::string file0 = tmpdir.path + "/big_0.libsvm";
+  std::string file1 = tmpdir.path + "/big_1.libsvm";
+  std::string file2 = tmpdir.path + "/big_2.libsvm";
+  dmats.push_back(CreateSparsePageDMatrix(9, 64UL, file0));
+  dmats.push_back(CreateSparsePageDMatrix(128, 128UL, file1));
+  dmats.push_back(CreateSparsePageDMatrix(1024, 1024UL, file2));
+
+  for (const auto& dmat: dmats) {
+    // Test predict batch
+    HostDeviceVector<float> out_predictions;
+    gpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
+    EXPECT_EQ(out_predictions.Size(), dmat->Info().num_row_ * n_classes);
+    const std::vector<float> &host_vector = out_predictions.ConstHostVector();
+    for (int i = 0; i < host_vector.size() / n_classes; i++) {
+      ASSERT_EQ(host_vector[i * n_classes], 1.5);
+      ASSERT_EQ(host_vector[i * n_classes + 1], 0.);
+      ASSERT_EQ(host_vector[i * n_classes + 2], 0.);
+    }
   }
 }
 
-#if defined(XGBOOST_USE_NCCL)
 // Test whether pickling preserves predictor parameters
-TEST(gpu_predictor, MGPU_PicklingTest) {
-  int const ngpu = GPUSet::AllVisible().Size();
+TEST(gpu_predictor, PicklingTest) {
+  int const ngpu = 1;
 
   dmlc::TemporaryDirectory tempdir;
   const std::string tmp_file = tempdir.path + "/simple.libsvm";
@@ -153,12 +163,6 @@ TEST(gpu_predictor, MGPU_PicklingTest) {
     ASSERT_EQ(kwargs.at("n_gpus"), std::to_string(ngpu).c_str());
   }
 
-  {  // Change n_gpus and query again
-    CheckCAPICall(XGBoosterSetParam(bst2, "n_gpus", "1"));
-    const auto& kwargs = QueryBoosterConfigurationArguments(bst2);
-    ASSERT_EQ(kwargs.at("n_gpus"), "1");
-  }
-
   {  // Change predictor and query again
     CheckCAPICall(XGBoosterSetParam(bst2, "predictor", "cpu_predictor"));
     const auto& kwargs = QueryBoosterConfigurationArguments(bst2);
@@ -167,77 +171,5 @@ TEST(gpu_predictor, MGPU_PicklingTest) {
 
   CheckCAPICall(XGBoosterFree(bst2));
 }
-
-// multi-GPU predictor test
-TEST(gpu_predictor, MGPU_Test) {
-  auto cpu_lparam = CreateEmptyGenericParam(0, 0);
-  auto gpu_lparam = CreateEmptyGenericParam(0, -1);
-
-  std::unique_ptr<Predictor> gpu_predictor =
-      std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &gpu_lparam));
-  std::unique_ptr<Predictor> cpu_predictor =
-      std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &cpu_lparam));
-
-  cpu_predictor->Configure({}, {});
-
-  for (size_t i = 1; i < 33; i *= 2) {
-    int n_row = i, n_col = i;
-    auto dmat = CreateDMatrix(n_row, n_col, 0);
-
-    gbm::GBTreeModel model = CreateTestModel();
-    model.param.num_feature = n_col;
-
-    // Test predict batch
-    HostDeviceVector<float> gpu_out_predictions;
-    HostDeviceVector<float> cpu_out_predictions;
-
-    gpu_predictor->PredictBatch((*dmat).get(), &gpu_out_predictions, model, 0);
-    cpu_predictor->PredictBatch((*dmat).get(), &cpu_out_predictions, model, 0);
-
-    std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.HostVector();
-    std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.HostVector();
-    float abs_tolerance = 0.001;
-    for (int j = 0; j < gpu_out_predictions.Size(); j++) {
-      ASSERT_NEAR(gpu_out_predictions_h[j], cpu_out_predictions_h[j], abs_tolerance);
-    }
-    delete dmat;
-  }
-}
-
-// multi-GPU predictor external memory test
-TEST(gpu_predictor, MGPU_ExternalMemoryTest) {
-  auto gpu_lparam = CreateEmptyGenericParam(0, -1);
-
-  std::unique_ptr<Predictor> gpu_predictor =
-      std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &gpu_lparam));
-  gpu_predictor->Configure({}, {});
-
-  gbm::GBTreeModel model = CreateTestModel();
-  model.param.num_feature = 3;
-  const int n_classes = 3;
-  model.param.num_output_group = n_classes;
-  std::vector<std::unique_ptr<DMatrix>> dmats;
-  dmlc::TemporaryDirectory tmpdir;
-  std::string file0 = tmpdir.path + "/big_0.libsvm";
-  std::string file1 = tmpdir.path + "/big_1.libsvm";
-  std::string file2 = tmpdir.path + "/big_2.libsvm";
-  dmats.push_back(CreateSparsePageDMatrix(9, 64UL, file0));
-  dmats.push_back(CreateSparsePageDMatrix(128, 128UL, file1));
-  dmats.push_back(CreateSparsePageDMatrix(1024, 1024UL, file2));
-
-  for (const auto& dmat: dmats) {
-    // Test predict batch
-    HostDeviceVector<float> out_predictions;
-    gpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
-    EXPECT_EQ(out_predictions.Size(), dmat->Info().num_row_ * n_classes);
-    const std::vector<float> &host_vector = out_predictions.ConstHostVector();
-    for (int i = 0; i < host_vector.size() / n_classes; i++) {
-      ASSERT_EQ(host_vector[i * n_classes], 1.5);
-      ASSERT_EQ(host_vector[i * n_classes + 1], 0.);
-      ASSERT_EQ(host_vector[i * n_classes + 2], 0.);
-    }
-  }
-}
-#endif  // defined(XGBOOST_USE_NCCL)
 }  // namespace predictor
 }  // namespace xgboost
diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc
@@ -168,10 +168,10 @@ TEST(Learner, IO) {
   std::unique_ptr<Learner> learner {Learner::Create(mat)};
   learner->SetParams({Arg{"tree_method", "auto"},
                       Arg{"predictor", "gpu_predictor"},
-                      Arg{"n_gpus", "-1"}});
+                      Arg{"n_gpus", "1"}});
   learner->UpdateOneIter(0, p_dmat.get());
   ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
-  ASSERT_EQ(learner->GetGenericParameter().n_gpus, -1);
+  ASSERT_EQ(learner->GetGenericParameter().n_gpus, 1);
 
   dmlc::TemporaryDirectory tempdir;
   const std::string fname = tempdir.path + "/model.bst";

diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu
@@ -415,13 +415,5 @@ TEST(GpuHist, TestHistogramIndex) {
   TestHistogramIndexImpl(1);
 }
 
-#if defined(XGBOOST_USE_NCCL)
-TEST(GpuHist, MGPU_TestHistogramIndex) {
-  auto devices = GPUSet::AllVisible();
-  CHECK_GT(devices.Size(), 1);
-  TestHistogramIndexImpl(-1);
-}
-#endif
-
 }  // namespace tree
 }  // namespace xgboost