dmlc · RAMitchell · Aug 25, 2019 · Aug 14, 2019 · Aug 14, 2019 · Aug 15, 2019
diff --git a/demo/c-api/c-api-demo.c b/demo/c-api/c-api-demo.c
@@ -36,13 +36,12 @@ int main(int argc, char** argv) {
   //   https://xgboost.readthedocs.io/en/latest/parameter.html
   safe_xgboost(XGBoosterSetParam(booster, "tree_method", use_gpu ? "gpu_hist" : "hist"));
   if (use_gpu) {
-    // set the number of GPUs and the first GPU to use;
+    // set the GPU to use;
     // this is not necessary, but provided here as an illustration
-    safe_xgboost(XGBoosterSetParam(booster, "n_gpus", "1"));
     safe_xgboost(XGBoosterSetParam(booster, "gpu_id", "0"));
   } else {
     // avoid evaluating objective and metric on a GPU
-    safe_xgboost(XGBoosterSetParam(booster, "n_gpus", "0"));
+    safe_xgboost(XGBoosterSetParam(booster, "gpu_id", "-1"));
   }
 
   safe_xgboost(XGBoosterSetParam(booster, "objective", "binary:logistic"));

diff --git a/include/xgboost/generic_parameters.h b/include/xgboost/generic_parameters.h
@@ -19,10 +19,8 @@ struct GenericParameter : public dmlc::Parameter<GenericParameter> {
   // number of threads to use if OpenMP is enabled
   // if equals 0, use system default
   int nthread;
-  // primary device.
+  // primary device, -1 means no gpu.
   int gpu_id;
-  // number of devices to use, -1 implies using all available devices.
-  int n_gpus;
   // declare parameters
   DMLC_DECLARE_PARAMETER(GenericParameter) {
     DMLC_DECLARE_FIELD(seed).set_default(0).describe(
@@ -36,15 +34,20 @@ struct GenericParameter : public dmlc::Parameter<GenericParameter> {
     DMLC_DECLARE_FIELD(nthread).set_default(0).describe(
         "Number of threads to use.");
     DMLC_DECLARE_FIELD(gpu_id)
-        .set_default(0)
+        .set_default(-1)
+        .set_lower_bound(-1)
         .describe("The primary GPU device ordinal.");
     DMLC_DECLARE_FIELD(n_gpus)
         .set_default(0)
-        .set_range(0, 1)
+        .set_range(0, 0)
         .describe("Deprecated. Single process multi-GPU training is no longer supported. "
                   "Please switch to distributed training with one process per GPU. "
                   "This can be done using Dask or Spark.");
   }
+
+ private:
+  // number of devices to use (deprecated).
+  int n_gpus;
 };
 }  // namespace xgboost
 

diff --git a/plugin/example/custom_obj.cc b/plugin/example/custom_obj.cc
@@ -60,8 +60,8 @@ class MyLogistic : public ObjFunction {
   void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
     // transform margin value to probability.
     std::vector<bst_float> &preds = io_preds->HostVector();
-    for (size_t i = 0; i < preds.size(); ++i) {
-      preds[i] = 1.0f / (1.0f + std::exp(-preds[i]));
+    for (auto& pred : preds) {
+      pred = 1.0f / (1.0f + std::exp(-pred));
     }
   }
   bst_float ProbToMargin(bst_float base_score) const override {

diff --git a/src/common/common.cc b/src/common/common.cc
@@ -22,48 +22,12 @@ using RandomThreadLocalStore = dmlc::ThreadLocalStore<RandomThreadLocalEntry>;
 GlobalRandomEngine& GlobalRandom() {
   return RandomThreadLocalStore::Get()->engine;
 }
-}  // namespace common
 
 #if !defined(XGBOOST_USE_CUDA)
-int AllVisibleImpl::AllVisible() {
+int AllVisibleGPUs() {
   return 0;
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
 
-constexpr GPUSet::GpuIdType GPUSet::kAll;
-
-GPUSet GPUSet::All(GpuIdType gpu_id, GpuIdType n_gpus, int32_t n_rows) {
-  CHECK_GE(gpu_id, 0) << "gpu_id must be >= 0.";
-  CHECK_GE(n_gpus, -1) << "n_gpus must be >= -1.";
-
-  GpuIdType const n_devices_visible = AllVisible().Size();
-  CHECK_LE(n_gpus, n_devices_visible);
-  if (n_devices_visible == 0 || n_gpus == 0 || n_rows == 0) {
-    LOG(DEBUG) << "Runing on CPU.";
-    return Empty();
-  }
-
-  GpuIdType const n_available_devices = n_devices_visible - gpu_id;
-
-  if (n_gpus == kAll) {  // Use all devices starting from `gpu_id'.
-    CHECK(gpu_id < n_devices_visible)
-        << "\ngpu_id should be less than number of visible devices.\ngpu_id: "
-        << gpu_id
-        << ", number of visible devices: "
-        << n_devices_visible;
-    GpuIdType n_devices =
-        n_available_devices < n_rows ? n_available_devices : n_rows;
-    LOG(DEBUG) << "GPU ID: " << gpu_id << ", Number of GPUs: " << n_devices;
-    return Range(gpu_id, n_devices);
-  } else {  // Use devices in ( gpu_id, gpu_id + n_gpus ).
-    CHECK_LE(n_gpus, n_available_devices)
-        << "Starting from gpu id: " << gpu_id << ", there are only "
-        << n_available_devices << " available devices, while n_gpus is set to: "
-        << n_gpus;
-    GpuIdType n_devices = n_gpus < n_rows ? n_gpus : n_rows;
-    LOG(DEBUG) << "GPU ID: " << gpu_id << ", Number of GPUs: " << n_devices;
-    return Range(gpu_id, n_devices);
-  }
-}
-
+}  // namespace common
 }  // namespace xgboost
diff --git a/src/common/common.cu b/src/common/common.cu
@@ -4,8 +4,9 @@
 #include "common.h"
 
 namespace xgboost {
+namespace common {
 
-int AllVisibleImpl::AllVisible() {
+int AllVisibleGPUs() {
   int n_visgpus = 0;
   try {
     // When compiled with CUDA but running on CPU only device,
@@ -17,4 +18,5 @@ int AllVisibleImpl::AllVisible() {
   return n_visgpus;
 }
 
+}  // namespace common
 }  // namespace xgboost
diff --git a/src/common/common.h b/src/common/common.h
@@ -140,88 +140,8 @@ class Range {
   Iterator begin_;
   Iterator end_;
 };
-}  // namespace common
-
-struct AllVisibleImpl {
-  static int AllVisible();
-};
-/* \brief set of devices across which HostDeviceVector can be distributed.
- *
- * Currently implemented as a range, but can be changed later to something else,
- *   e.g. a bitset
- */
-class GPUSet {
- public:
-  using GpuIdType = int;
-  static constexpr GpuIdType kAll = -1;
-
-  explicit GPUSet(int start = 0, int ndevices = 0)
-      : devices_(start, start + ndevices) {}
-
-  static GPUSet Empty() { return GPUSet(); }
-
-  static GPUSet Range(GpuIdType start, GpuIdType n_gpus) {
-    return n_gpus <= 0 ? Empty() : GPUSet{start, n_gpus};
-  }
-  /*! \brief n_gpus and num_rows both are upper bounds. */
-  static GPUSet All(GpuIdType gpu_id, GpuIdType n_gpus,
-                    GpuIdType num_rows = std::numeric_limits<GpuIdType>::max());
-
-  static GPUSet AllVisible() {
-    GpuIdType n =  AllVisibleImpl::AllVisible();
-    return Range(0, n);
-  }
-
-  size_t Size() const {
-    GpuIdType size = *devices_.end() - *devices_.begin();
-    GpuIdType res = size < 0 ? 0 : size;
-    return static_cast<size_t>(res);
-  }
-
-  /*
-   * By default, we have two configurations of identifying device, one
-   * is the device id obtained from `cudaGetDevice'.  But we sometimes
-   * store objects that allocated one for each device in a list, which
-   * requires a zero-based index.
-   *
-   * Hence, `DeviceId' converts a zero-based index to actual device id,
-   * `Index' converts a device id to a zero-based index.
-   */
-  GpuIdType DeviceId(size_t index) const {
-    GpuIdType result = *devices_.begin() + static_cast<GpuIdType>(index);
-    CHECK(Contains(result)) << "\nDevice " << result << " is not in GPUSet."
-                            << "\nIndex: " << index
-                            << "\nGPUSet: (" << *begin() << ", " << *end() << ")"
-                            << std::endl;
-    return result;
-  }
-  size_t Index(GpuIdType device) const {
-    CHECK(Contains(device)) << "\nDevice " << device << " is not in GPUSet."
-                            << "\nGPUSet: (" << *begin() << ", " << *end() << ")"
-                            << std::endl;
-    size_t result = static_cast<size_t>(device - *devices_.begin());
-    return result;
-  }
-
-  bool IsEmpty() const { return Size() == 0; }
-
-  bool Contains(GpuIdType device) const {
-    return *devices_.begin() <= device && device < *devices_.end();
-  }
-
-  common::Range::Iterator begin() const { return devices_.begin(); }  // NOLINT
-  common::Range::Iterator end() const { return devices_.end(); }      // NOLINT
-
-  friend bool operator==(const GPUSet& lhs, const GPUSet& rhs) {
-    return lhs.devices_ == rhs.devices_;
-  }
-  friend bool operator!=(const GPUSet& lhs, const GPUSet& rhs) {
-    return !(lhs == rhs);
-  }
-
- private:
-  common::Range devices_;
-};
 
+int AllVisibleGPUs();
+}  // namespace common
 }  // namespace xgboost
 #endif  // XGBOOST_COMMON_COMMON_H_
diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh
@@ -72,22 +72,6 @@ const T *Raw(const thrust::device_vector<T> &v) {  //  NOLINT
   return raw_pointer_cast(v.data());
 }
 
-// if n_devices=-1, then use all visible devices
-inline void SynchronizeNDevices(xgboost::GPUSet devices) {
-  devices = devices.IsEmpty() ? xgboost::GPUSet::AllVisible() : devices;
-  for (auto const d : devices) {
-    safe_cuda(cudaSetDevice(d));
-    safe_cuda(cudaDeviceSynchronize());
-  }
-}
-
-inline void SynchronizeAll() {
-  for (int device_idx : xgboost::GPUSet::AllVisible()) {
-    safe_cuda(cudaSetDevice(device_idx));
-    safe_cuda(cudaDeviceSynchronize());
-  }
-}
-
 inline size_t AvailableMemory(int device_idx) {
   size_t device_free = 0;
   size_t device_total = 0;
@@ -119,7 +103,7 @@ inline size_t MaxSharedMemory(int device_idx) {
 }
 
 inline void CheckComputeCapability() {
-  for (int d_idx : xgboost::GPUSet::AllVisible()) {
+  for (int d_idx = 0; d_idx < xgboost::common::AllVisibleGPUs(); ++d_idx) {
     cudaDeviceProp prop;
     safe_cuda(cudaGetDeviceProperties(&prop, d_idx));
     std::ostringstream oss;