From e3bb7e4e2fee333b1d1055e9ee6bdd10480cf15c Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 7 Aug 2019 12:51:34 -0700 Subject: [PATCH 1/7] prevent multi-gpu usage --- include/xgboost/generic_parameters.h | 2 +- src/learner.cc | 10 +- tests/cpp/linear/test_linear.cu | 43 ------ tests/cpp/metric/test_elementwise_metric.cc | 29 ---- tests/cpp/predictor/test_gpu_predictor.cu | 152 +++++++++----------- tests/cpp/test_learner.cc | 4 +- tests/cpp/tree/test_gpu_hist.cu | 8 -- tests/python-gpu/test_gpu_linear.py | 12 -- tests/python-gpu/test_gpu_updaters.py | 11 -- tests/python-gpu/test_large_sizes.py | 17 +-- tests/python-gpu/test_pickling.py | 2 - 11 files changed, 83 insertions(+), 207 deletions(-) diff --git a/include/xgboost/generic_parameters.h b/include/xgboost/generic_parameters.h index 83c98bed3f8c..9d28c97b57e4 100644 --- a/include/xgboost/generic_parameters.h +++ b/include/xgboost/generic_parameters.h @@ -40,7 +40,7 @@ struct GenericParameter : public dmlc::Parameter { .describe("The primary GPU device ordinal."); DMLC_DECLARE_FIELD(n_gpus) .set_default(0) - .set_lower_bound(-1) + .set_lower_bound(0) .describe("Deprecated, please use distributed training with one " "process per GPU. " "Number of GPUs to use for multi-gpu algorithms."); diff --git a/src/learner.cc b/src/learner.cc index c0fa70384e61..0fdaff7714fb 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -580,8 +580,14 @@ class LearnerImpl : public Learner { } gbm_->Configure(args); - if (this->gbm_->UseGPU() && cfg_.find("n_gpus") == cfg_.cend()) { - generic_param_.n_gpus = 1; + if (this->gbm_->UseGPU()) { + if (cfg_.find("n_gpus") == cfg_.cend()) { + generic_param_.n_gpus = 1; + } + if (generic_param_.n_gpus != 1) { + LOG(FATAL) << "Multi-GPU training is no longer supported. " + "Please use distributed GPU training with one process per GPU."; + } } } diff --git a/tests/cpp/linear/test_linear.cu b/tests/cpp/linear/test_linear.cu index 127ddc383412..9fba4735e2d3 100644 --- a/tests/cpp/linear/test_linear.cu +++ b/tests/cpp/linear/test_linear.cu @@ -24,47 +24,4 @@ TEST(Linear, GPUCoordinate) { delete mat; } - -#if defined(XGBOOST_USE_NCCL) -TEST(Linear, MGPU_GPUCoordinate) { - { - auto mat = xgboost::CreateDMatrix(10, 10, 0); - auto lparam = CreateEmptyGenericParam(0, -1); - lparam.n_gpus = -1; - auto updater = std::unique_ptr( - xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam)); - updater->Configure({{"eta", "1."}}); - xgboost::HostDeviceVector gpair( - (*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0)); - xgboost::gbm::GBLinearModel model; - model.param.num_feature = (*mat)->Info().num_col_; - model.param.num_output_group = 1; - model.LazyInitModel(); - updater->Update(&gpair, (*mat).get(), &model, gpair.Size()); - - ASSERT_EQ(model.bias()[0], 5.0f); - delete mat; - } - - { - auto lparam = CreateEmptyGenericParam(1, -1); - lparam.n_gpus = -1; - auto mat = xgboost::CreateDMatrix(10, 10, 0); - auto updater = std::unique_ptr( - xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam)); - updater->Configure({{"eta", "1."}}); - xgboost::HostDeviceVector gpair( - (*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0)); - xgboost::gbm::GBLinearModel model; - model.param.num_feature = (*mat)->Info().num_col_; - model.param.num_output_group = 1; - model.LazyInitModel(); - updater->Update(&gpair, (*mat).get(), &model, gpair.Size()); - - ASSERT_EQ(model.bias()[0], 5.0f); - delete mat; - } -} -#endif - } // namespace xgboost \ No newline at end of file diff --git a/tests/cpp/metric/test_elementwise_metric.cc b/tests/cpp/metric/test_elementwise_metric.cc index 071ab5c48eb6..c38b81a7e6fa 100644 --- a/tests/cpp/metric/test_elementwise_metric.cc +++ b/tests/cpp/metric/test_elementwise_metric.cc @@ -101,32 +101,3 @@ TEST(Metric, DeclareUnifiedTest(PoissionNegLogLik)) { 1.1280f, 0.001f); delete metric; } - -#if defined(XGBOOST_USE_NCCL) && defined(__CUDACC__) -TEST(Metric, MGPU_RMSE) { - { - auto lparam = xgboost::CreateEmptyGenericParam(0, -1); - xgboost::Metric * metric = xgboost::Metric::Create("rmse", &lparam); - metric->Configure({}); - ASSERT_STREQ(metric->Name(), "rmse"); - EXPECT_NEAR(GetMetricEval(metric, {0}, {0}), 0, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 0.6403f, 0.001f); - delete metric; - } - - { - auto lparam = xgboost::CreateEmptyGenericParam(1, -1); - xgboost::Metric * metric = xgboost::Metric::Create("rmse", &lparam); - ASSERT_STREQ(metric->Name(), "rmse"); - EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10); - EXPECT_NEAR(GetMetricEval(metric, - {0.1f, 0.9f, 0.1f, 0.9f}, - { 0, 0, 1, 1}), - 0.6403f, 0.001f); - delete metric; - } -} -#endif diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu index 9bca7be930d1..10a6837e67ea 100644 --- a/tests/cpp/predictor/test_gpu_predictor.cu +++ b/tests/cpp/predictor/test_gpu_predictor.cu @@ -68,6 +68,41 @@ TEST(gpu_predictor, Test) { delete dmat; } +TEST(gpu_predictor, MoreTest) { + auto cpu_lparam = CreateEmptyGenericParam(0, 0); + auto gpu_lparam = CreateEmptyGenericParam(0, 1); + + std::unique_ptr gpu_predictor = + std::unique_ptr(Predictor::Create("gpu_predictor", &gpu_lparam)); + std::unique_ptr cpu_predictor = + std::unique_ptr(Predictor::Create("cpu_predictor", &cpu_lparam)); + + cpu_predictor->Configure({}, {}); + + for (size_t i = 1; i < 33; i *= 2) { + int n_row = i, n_col = i; + auto dmat = CreateDMatrix(n_row, n_col, 0); + + gbm::GBTreeModel model = CreateTestModel(); + model.param.num_feature = n_col; + + // Test predict batch + HostDeviceVector gpu_out_predictions; + HostDeviceVector cpu_out_predictions; + + gpu_predictor->PredictBatch((*dmat).get(), &gpu_out_predictions, model, 0); + cpu_predictor->PredictBatch((*dmat).get(), &cpu_out_predictions, model, 0); + + std::vector& gpu_out_predictions_h = gpu_out_predictions.HostVector(); + std::vector& cpu_out_predictions_h = cpu_out_predictions.HostVector(); + float abs_tolerance = 0.001; + for (int j = 0; j < gpu_out_predictions.Size(); j++) { + ASSERT_NEAR(gpu_out_predictions_h[j], cpu_out_predictions_h[j], abs_tolerance); + } + delete dmat; + } +} + TEST(gpu_predictor, ExternalMemoryTest) { auto lparam = CreateEmptyGenericParam(0, 1); std::unique_ptr gpu_predictor = @@ -89,10 +124,43 @@ TEST(gpu_predictor, ExternalMemoryTest) { } } -#if defined(XGBOOST_USE_NCCL) +TEST(gpu_predictor, MoreExternalMemoryTest) { + auto gpu_lparam = CreateEmptyGenericParam(0, 1); + + std::unique_ptr gpu_predictor = + std::unique_ptr(Predictor::Create("gpu_predictor", &gpu_lparam)); + gpu_predictor->Configure({}, {}); + + gbm::GBTreeModel model = CreateTestModel(); + model.param.num_feature = 3; + const int n_classes = 3; + model.param.num_output_group = n_classes; + std::vector> dmats; + dmlc::TemporaryDirectory tmpdir; + std::string file0 = tmpdir.path + "/big_0.libsvm"; + std::string file1 = tmpdir.path + "/big_1.libsvm"; + std::string file2 = tmpdir.path + "/big_2.libsvm"; + dmats.push_back(CreateSparsePageDMatrix(9, 64UL, file0)); + dmats.push_back(CreateSparsePageDMatrix(128, 128UL, file1)); + dmats.push_back(CreateSparsePageDMatrix(1024, 1024UL, file2)); + + for (const auto& dmat: dmats) { + // Test predict batch + HostDeviceVector out_predictions; + gpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0); + EXPECT_EQ(out_predictions.Size(), dmat->Info().num_row_ * n_classes); + const std::vector &host_vector = out_predictions.ConstHostVector(); + for (int i = 0; i < host_vector.size() / n_classes; i++) { + ASSERT_EQ(host_vector[i * n_classes], 1.5); + ASSERT_EQ(host_vector[i * n_classes + 1], 0.); + ASSERT_EQ(host_vector[i * n_classes + 2], 0.); + } + } +} + // Test whether pickling preserves predictor parameters -TEST(gpu_predictor, MGPU_PicklingTest) { - int const ngpu = GPUSet::AllVisible().Size(); +TEST(gpu_predictor, PicklingTest) { + int const ngpu = 1; dmlc::TemporaryDirectory tempdir; const std::string tmp_file = tempdir.path + "/simple.libsvm"; @@ -153,12 +221,6 @@ TEST(gpu_predictor, MGPU_PicklingTest) { ASSERT_EQ(kwargs.at("n_gpus"), std::to_string(ngpu).c_str()); } - { // Change n_gpus and query again - CheckCAPICall(XGBoosterSetParam(bst2, "n_gpus", "1")); - const auto& kwargs = QueryBoosterConfigurationArguments(bst2); - ASSERT_EQ(kwargs.at("n_gpus"), "1"); - } - { // Change predictor and query again CheckCAPICall(XGBoosterSetParam(bst2, "predictor", "cpu_predictor")); const auto& kwargs = QueryBoosterConfigurationArguments(bst2); @@ -167,77 +229,5 @@ TEST(gpu_predictor, MGPU_PicklingTest) { CheckCAPICall(XGBoosterFree(bst2)); } - -// multi-GPU predictor test -TEST(gpu_predictor, MGPU_Test) { - auto cpu_lparam = CreateEmptyGenericParam(0, 0); - auto gpu_lparam = CreateEmptyGenericParam(0, -1); - - std::unique_ptr gpu_predictor = - std::unique_ptr(Predictor::Create("gpu_predictor", &gpu_lparam)); - std::unique_ptr cpu_predictor = - std::unique_ptr(Predictor::Create("cpu_predictor", &cpu_lparam)); - - cpu_predictor->Configure({}, {}); - - for (size_t i = 1; i < 33; i *= 2) { - int n_row = i, n_col = i; - auto dmat = CreateDMatrix(n_row, n_col, 0); - - gbm::GBTreeModel model = CreateTestModel(); - model.param.num_feature = n_col; - - // Test predict batch - HostDeviceVector gpu_out_predictions; - HostDeviceVector cpu_out_predictions; - - gpu_predictor->PredictBatch((*dmat).get(), &gpu_out_predictions, model, 0); - cpu_predictor->PredictBatch((*dmat).get(), &cpu_out_predictions, model, 0); - - std::vector& gpu_out_predictions_h = gpu_out_predictions.HostVector(); - std::vector& cpu_out_predictions_h = cpu_out_predictions.HostVector(); - float abs_tolerance = 0.001; - for (int j = 0; j < gpu_out_predictions.Size(); j++) { - ASSERT_NEAR(gpu_out_predictions_h[j], cpu_out_predictions_h[j], abs_tolerance); - } - delete dmat; - } -} - -// multi-GPU predictor external memory test -TEST(gpu_predictor, MGPU_ExternalMemoryTest) { - auto gpu_lparam = CreateEmptyGenericParam(0, -1); - - std::unique_ptr gpu_predictor = - std::unique_ptr(Predictor::Create("gpu_predictor", &gpu_lparam)); - gpu_predictor->Configure({}, {}); - - gbm::GBTreeModel model = CreateTestModel(); - model.param.num_feature = 3; - const int n_classes = 3; - model.param.num_output_group = n_classes; - std::vector> dmats; - dmlc::TemporaryDirectory tmpdir; - std::string file0 = tmpdir.path + "/big_0.libsvm"; - std::string file1 = tmpdir.path + "/big_1.libsvm"; - std::string file2 = tmpdir.path + "/big_2.libsvm"; - dmats.push_back(CreateSparsePageDMatrix(9, 64UL, file0)); - dmats.push_back(CreateSparsePageDMatrix(128, 128UL, file1)); - dmats.push_back(CreateSparsePageDMatrix(1024, 1024UL, file2)); - - for (const auto& dmat: dmats) { - // Test predict batch - HostDeviceVector out_predictions; - gpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0); - EXPECT_EQ(out_predictions.Size(), dmat->Info().num_row_ * n_classes); - const std::vector &host_vector = out_predictions.ConstHostVector(); - for (int i = 0; i < host_vector.size() / n_classes; i++) { - ASSERT_EQ(host_vector[i * n_classes], 1.5); - ASSERT_EQ(host_vector[i * n_classes + 1], 0.); - ASSERT_EQ(host_vector[i * n_classes + 2], 0.); - } - } -} -#endif // defined(XGBOOST_USE_NCCL) } // namespace predictor } // namespace xgboost diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc index fa2a21d4fa68..9261bb96f89c 100644 --- a/tests/cpp/test_learner.cc +++ b/tests/cpp/test_learner.cc @@ -168,10 +168,10 @@ TEST(Learner, IO) { std::unique_ptr learner {Learner::Create(mat)}; learner->SetParams({Arg{"tree_method", "auto"}, Arg{"predictor", "gpu_predictor"}, - Arg{"n_gpus", "-1"}}); + Arg{"n_gpus", "1"}}); learner->UpdateOneIter(0, p_dmat.get()); ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0); - ASSERT_EQ(learner->GetGenericParameter().n_gpus, -1); + ASSERT_EQ(learner->GetGenericParameter().n_gpus, 1); dmlc::TemporaryDirectory tempdir; const std::string fname = tempdir.path + "/model.bst"; diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu index 002a04301126..869859fee416 100644 --- a/tests/cpp/tree/test_gpu_hist.cu +++ b/tests/cpp/tree/test_gpu_hist.cu @@ -415,13 +415,5 @@ TEST(GpuHist, TestHistogramIndex) { TestHistogramIndexImpl(1); } -#if defined(XGBOOST_USE_NCCL) -TEST(GpuHist, MGPU_TestHistogramIndex) { - auto devices = GPUSet::AllVisible(); - CHECK_GT(devices.Size(), 1); - TestHistogramIndexImpl(-1); -} -#endif - } // namespace tree } // namespace xgboost diff --git a/tests/python-gpu/test_gpu_linear.py b/tests/python-gpu/test_gpu_linear.py index 47d6f92cf1a3..79e5919b6fe1 100644 --- a/tests/python-gpu/test_gpu_linear.py +++ b/tests/python-gpu/test_gpu_linear.py @@ -29,15 +29,3 @@ def test_gpu_coordinate(self): param, 150, self.datasets, scale_features=True) test_linear.assert_regression_result(results, 1e-2) test_linear.assert_classification_result(results) - - @pytest.mark.mgpu - @pytest.mark.skipif(**tm.no_sklearn()) - def test_gpu_coordinate_mgpu(self): - parameters = self.common_param.copy() - parameters['n_gpus'] = [-1] - parameters['gpu_id'] = [1] - for param in test_linear.parameter_combinations(parameters): - results = test_linear.run_suite( - param, 150, self.datasets, scale_features=True) - test_linear.assert_regression_result(results, 1e-2) - test_linear.assert_classification_result(results) diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py index 5039093b4b85..dbc7b10209fa 100644 --- a/tests/python-gpu/test_gpu_updaters.py +++ b/tests/python-gpu/test_gpu_updaters.py @@ -36,17 +36,6 @@ def test_gpu_hist(self): cpu_results = run_suite(param, select_datasets=datasets) assert_gpu_results(cpu_results, gpu_results) - @pytest.mark.mgpu - def test_gpu_hist_mgpu(self): - variable_param = {'n_gpus': [-1], 'max_depth': [2, 10], - 'max_leaves': [255, 4], - 'max_bin': [2, 256], - 'grow_policy': ['lossguide'], 'debug_synchronize': [True]} - for param in parameter_combinations(variable_param): - param['tree_method'] = 'gpu_hist' - gpu_results = run_suite(param, select_datasets=datasets) - assert_results_non_increasing(gpu_results, 1e-2) - @pytest.mark.mgpu def test_specified_gpu_id_gpu_update(self): variable_param = {'n_gpus': [1], diff --git a/tests/python-gpu/test_large_sizes.py b/tests/python-gpu/test_large_sizes.py index 9241587b151f..421964285fd6 100644 --- a/tests/python-gpu/test_large_sizes.py +++ b/tests/python-gpu/test_large_sizes.py @@ -25,7 +25,7 @@ def eprint(*args, **kwargs): # reduced to fit onto 1 gpu but still be large rows3 = 5000 # small rows2 = 4360032 # medium -rows1 = 42360032 # large +rows1 = 32360032 # large # rows1 = 152360032 # can do this for multi-gpu test (very large) rowslist = [rows1, rows2, rows3] @@ -67,15 +67,6 @@ def test_large(self): 'objective': 'binary:logistic', 'max_bin': max_bin, 'eval_metric': 'auc'} - ag_param3 = {'max_depth': max_depth, - 'tree_method': 'gpu_hist', - 'nthread': 0, - 'eta': 1, - 'verbosity': 3, - 'n_gpus': -1, - 'objective': 'binary:logistic', - 'max_bin': max_bin, - 'eval_metric': 'auc'} ag_res = {} ag_resb = {} ag_res2 = {} @@ -93,9 +84,3 @@ def test_large(self): xgb.train(ag_param2, ag_dtrain, num_rounds, [(ag_dtrain, 'train')], evals_result=ag_res2) print("Time to Train: %s seconds" % (str(time.time() - tmp))) - - tmp = time.time() - eprint("gpu_hist updater all gpus") - xgb.train(ag_param3, ag_dtrain, num_rounds, [(ag_dtrain, 'train')], - evals_result=ag_res3) - print("Time to Train: %s seconds" % (str(time.time() - tmp))) diff --git a/tests/python-gpu/test_pickling.py b/tests/python-gpu/test_pickling.py index 691f4a56ba72..9c077e3155a9 100644 --- a/tests/python-gpu/test_pickling.py +++ b/tests/python-gpu/test_pickling.py @@ -35,8 +35,6 @@ def test_pickling(self): x, y = build_dataset() train_x = xgb.DMatrix(x, label=y) param = {'tree_method': 'gpu_hist', - 'gpu_id': 0, - 'n_gpus': -1, 'verbosity': 1} bst = xgb.train(param, train_x) From e1f462eda009008a41730a9900b1b286c556d05c Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 7 Aug 2019 15:59:01 -0700 Subject: [PATCH 2/7] fix build --- tests/cpp/predictor/test_gpu_predictor.cu | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu index 10a6837e67ea..eecd55e75d96 100644 --- a/tests/cpp/predictor/test_gpu_predictor.cu +++ b/tests/cpp/predictor/test_gpu_predictor.cu @@ -12,7 +12,6 @@ #include "gtest/gtest.h" #include "../helpers.h" -#if defined(XGBOOST_USE_NCCL) namespace { inline void CheckCAPICall(int ret) { @@ -20,7 +19,6 @@ inline void CheckCAPICall(int ret) { } } // namespace anonymous -#endif const std::map& QueryBoosterConfigurationArguments(BoosterHandle handle) { From a2ca8077a5d6105e407fb697db88bcfa40acdba6 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 7 Aug 2019 16:57:14 -0700 Subject: [PATCH 3/7] fix distributed test --- tests/distributed/distributed_gpu.py | 26 -------------------------- tests/distributed/runtests-gpu.sh | 20 +------------------- 2 files changed, 1 insertion(+), 45 deletions(-) diff --git a/tests/distributed/distributed_gpu.py b/tests/distributed/distributed_gpu.py index 0099051104b9..6665cd142d14 100644 --- a/tests/distributed/distributed_gpu.py +++ b/tests/distributed/distributed_gpu.py @@ -66,27 +66,6 @@ def params_basic_1x4(rank): }), 20 -def params_basic_2x2(rank): - return dict(base_params, **{ - 'n_gpus': 2, - 'gpu_id': 2*rank, - }), 20 - - -def params_basic_4x1(rank): - return dict(base_params, **{ - 'n_gpus': 4, - 'gpu_id': rank, - }), 20 - - -def params_basic_asym(rank): - return dict(base_params, **{ - 'n_gpus': 1 if rank == 0 else 3, - 'gpu_id': rank, - }), 20 - - rf_update_params = { 'subsample': 0.5, 'colsample_bynode': 0.5 @@ -103,11 +82,6 @@ def wrapped_params_fun(rank): params_rf_1x4 = wrap_rf(params_basic_1x4) -params_rf_2x2 = wrap_rf(params_basic_2x2) - -params_rf_4x1 = wrap_rf(params_basic_4x1) - -params_rf_asym = wrap_rf(params_basic_asym) test_name = sys.argv[1] diff --git a/tests/distributed/runtests-gpu.sh b/tests/distributed/runtests-gpu.sh index 950704f9850b..e942efd25b80 100755 --- a/tests/distributed/runtests-gpu.sh +++ b/tests/distributed/runtests-gpu.sh @@ -8,23 +8,5 @@ submit="timeout 30 python ../../dmlc-core/tracker/dmlc-submit" echo -e "\n ====== 1. Basic distributed-gpu test with Python: 4 workers; 1 GPU per worker ====== \n" $submit --num-workers=4 python distributed_gpu.py basic_1x4 || exit 1 -echo -e "\n ====== 2. Basic distributed-gpu test with Python: 2 workers; 2 GPUs per worker ====== \n" -$submit --num-workers=2 python distributed_gpu.py basic_2x2 || exit 1 - -echo -e "\n ====== 3. Basic distributed-gpu test with Python: 2 workers; Rank 0: 1 GPU, Rank 1: 3 GPUs ====== \n" -$submit --num-workers=2 python distributed_gpu.py basic_asym || exit 1 - -echo -e "\n ====== 4. Basic distributed-gpu test with Python: 1 worker; 4 GPUs per worker ====== \n" -$submit --num-workers=1 python distributed_gpu.py basic_4x1 || exit 1 - -echo -e "\n ====== 5. RF distributed-gpu test with Python: 4 workers; 1 GPU per worker ====== \n" +echo -e "\n ====== 2. RF distributed-gpu test with Python: 4 workers; 1 GPU per worker ====== \n" $submit --num-workers=4 python distributed_gpu.py rf_1x4 || exit 1 - -echo -e "\n ====== 6. RF distributed-gpu test with Python: 2 workers; 2 GPUs per worker ====== \n" -$submit --num-workers=2 python distributed_gpu.py rf_2x2 || exit 1 - -echo -e "\n ====== 7. RF distributed-gpu test with Python: 2 workers; Rank 0: 1 GPU, Rank 1: 3 GPUs ====== \n" -$submit --num-workers=2 python distributed_gpu.py rf_asym || exit 1 - -echo -e "\n ====== 8. RF distributed-gpu test with Python: 1 worker; 4 GPUs per worker ====== \n" -$submit --num-workers=1 python distributed_gpu.py rf_4x1 || exit 1 From 42c573663d30c7c6e1ef124055490e8e425e3b77 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 8 Aug 2019 09:13:34 -0700 Subject: [PATCH 4/7] combine gpu predictor tests --- tests/cpp/predictor/test_gpu_predictor.cu | 56 ----------------------- 1 file changed, 56 deletions(-) diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu index eecd55e75d96..5cf5817de1e5 100644 --- a/tests/cpp/predictor/test_gpu_predictor.cu +++ b/tests/cpp/predictor/test_gpu_predictor.cu @@ -44,39 +44,6 @@ TEST(gpu_predictor, Test) { gpu_predictor->Configure({}, {}); cpu_predictor->Configure({}, {}); - int n_row = 5; - int n_col = 5; - - gbm::GBTreeModel model = CreateTestModel(); - model.param.num_feature = n_col; - auto dmat = CreateDMatrix(n_row, n_col, 0); - - // Test predict batch - HostDeviceVector gpu_out_predictions; - HostDeviceVector cpu_out_predictions; - gpu_predictor->PredictBatch((*dmat).get(), &gpu_out_predictions, model, 0); - cpu_predictor->PredictBatch((*dmat).get(), &cpu_out_predictions, model, 0); - std::vector& gpu_out_predictions_h = gpu_out_predictions.HostVector(); - std::vector& cpu_out_predictions_h = cpu_out_predictions.HostVector(); - float abs_tolerance = 0.001; - for (int i = 0; i < gpu_out_predictions.Size(); i++) { - ASSERT_NEAR(gpu_out_predictions_h[i], cpu_out_predictions_h[i], abs_tolerance); - } - - delete dmat; -} - -TEST(gpu_predictor, MoreTest) { - auto cpu_lparam = CreateEmptyGenericParam(0, 0); - auto gpu_lparam = CreateEmptyGenericParam(0, 1); - - std::unique_ptr gpu_predictor = - std::unique_ptr(Predictor::Create("gpu_predictor", &gpu_lparam)); - std::unique_ptr cpu_predictor = - std::unique_ptr(Predictor::Create("cpu_predictor", &cpu_lparam)); - - cpu_predictor->Configure({}, {}); - for (size_t i = 1; i < 33; i *= 2) { int n_row = i, n_col = i; auto dmat = CreateDMatrix(n_row, n_col, 0); @@ -106,29 +73,6 @@ TEST(gpu_predictor, ExternalMemoryTest) { std::unique_ptr gpu_predictor = std::unique_ptr(Predictor::Create("gpu_predictor", &lparam)); gpu_predictor->Configure({}, {}); - gbm::GBTreeModel model = CreateTestModel(); - int n_col = 3; - model.param.num_feature = n_col; - dmlc::TemporaryDirectory tmpdir; - std::string filename = tmpdir.path + "/big.libsvm"; - std::unique_ptr dmat = CreateSparsePageDMatrix(32, 64, filename); - - // Test predict batch - HostDeviceVector out_predictions; - gpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0); - EXPECT_EQ(out_predictions.Size(), dmat->Info().num_row_); - for (const auto& v : out_predictions.HostVector()) { - ASSERT_EQ(v, 1.5); - } -} - -TEST(gpu_predictor, MoreExternalMemoryTest) { - auto gpu_lparam = CreateEmptyGenericParam(0, 1); - - std::unique_ptr gpu_predictor = - std::unique_ptr(Predictor::Create("gpu_predictor", &gpu_lparam)); - gpu_predictor->Configure({}, {}); - gbm::GBTreeModel model = CreateTestModel(); model.param.num_feature = 3; const int n_classes = 3; From 4db36c83a2d59ab8892eaf007071d788f804eb8b Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 8 Aug 2019 10:47:20 -0700 Subject: [PATCH 5/7] set upper bound on n_gpus --- include/xgboost/generic_parameters.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/xgboost/generic_parameters.h b/include/xgboost/generic_parameters.h index 9d28c97b57e4..2ee5ed96f5cf 100644 --- a/include/xgboost/generic_parameters.h +++ b/include/xgboost/generic_parameters.h @@ -40,7 +40,7 @@ struct GenericParameter : public dmlc::Parameter { .describe("The primary GPU device ordinal."); DMLC_DECLARE_FIELD(n_gpus) .set_default(0) - .set_lower_bound(0) + .set_range(0, 1) .describe("Deprecated, please use distributed training with one " "process per GPU. " "Number of GPUs to use for multi-gpu algorithms."); From 31b8a004a2f6384d54e4f32e216b0fdd19db988e Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 8 Aug 2019 11:31:26 -0700 Subject: [PATCH 6/7] remove failed mgpu tests --- tests/cpp/common/test_gpu_hist_util.cu | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/tests/cpp/common/test_gpu_hist_util.cu b/tests/cpp/common/test_gpu_hist_util.cu index 2081bd7617f3..5be7d8dd2202 100644 --- a/tests/cpp/common/test_gpu_hist_util.cu +++ b/tests/cpp/common/test_gpu_hist_util.cu @@ -88,19 +88,5 @@ TEST(gpu_hist_util, DeviceSketch_ExternalMemory) { TestDeviceSketch(GPUSet::Range(0, 1), true); } -#if defined(XGBOOST_USE_NCCL) -TEST(gpu_hist_util, MGPU_DeviceSketch) { - auto devices = GPUSet::AllVisible(); - CHECK_GT(devices.Size(), 1); - TestDeviceSketch(devices, false); -} - -TEST(gpu_hist_util, MGPU_DeviceSketch_ExternalMemory) { - auto devices = GPUSet::AllVisible(); - CHECK_GT(devices.Size(), 1); - TestDeviceSketch(devices, true); -} -#endif - } // namespace common } // namespace xgboost From 4fe16ce0feab98a15008d2b6dbd78d90b0bf90de Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Sun, 11 Aug 2019 23:06:19 -0700 Subject: [PATCH 7/7] clarify error message --- include/xgboost/generic_parameters.h | 6 +++--- src/learner.cc | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/xgboost/generic_parameters.h b/include/xgboost/generic_parameters.h index 2ee5ed96f5cf..e4cef9cf094c 100644 --- a/include/xgboost/generic_parameters.h +++ b/include/xgboost/generic_parameters.h @@ -41,9 +41,9 @@ struct GenericParameter : public dmlc::Parameter { DMLC_DECLARE_FIELD(n_gpus) .set_default(0) .set_range(0, 1) - .describe("Deprecated, please use distributed training with one " - "process per GPU. " - "Number of GPUs to use for multi-gpu algorithms."); + .describe("Deprecated. Single process multi-GPU training is no longer supported. " + "Please switch to distributed training with one process per GPU. " + "This can be done using Dask or Spark."); } }; } // namespace xgboost diff --git a/src/learner.cc b/src/learner.cc index 0fdaff7714fb..8e694882db74 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -585,8 +585,9 @@ class LearnerImpl : public Learner { generic_param_.n_gpus = 1; } if (generic_param_.n_gpus != 1) { - LOG(FATAL) << "Multi-GPU training is no longer supported. " - "Please use distributed GPU training with one process per GPU."; + LOG(FATAL) << "Single process multi-GPU training is no longer supported. " + "Please switch to distributed GPU training with one process per GPU. " + "This can be done using Dask or Spark."; } } }