votca · JensWehner · Jun 1, 2021 · Jun 1, 2021 · Jun 1, 2021 · Jun 1, 2021
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -35,6 +35,7 @@ Version 2022-dev
 -  adapted tokenizer api (#707)
 -  move OpenMP detection to tools (#709)
 -  refactored applications (#710)
+-  add command line option for number of gpus (#711)
 
 Version 2021.1 (released XX.03.21)
 ==================================

diff --git a/include/votca/xtp/cudamatrix.h b/include/votca/xtp/cudamatrix.h
@@ -111,7 +111,7 @@ class CudaMatrix {
 
   void reshape(Index rows, Index cols) {
     assert(rows * cols == size() &&
-           "reshape cannot change the shape of the matrix");
+           "reshape cannot change the size of the matrix only the shape");
     _cols = cols;
     _ld = rows;
   }

diff --git a/include/votca/xtp/cudapipeline.h b/include/votca/xtp/cudapipeline.h
@@ -78,6 +78,7 @@ class CudaPipeline {
   cublasHandle_t _handle;
 
   // Asynchronous stream
+
   cudaStream_t _stream;
 };
 

diff --git a/include/votca/xtp/openmp_cuda.h b/include/votca/xtp/openmp_cuda.h
@@ -68,13 +68,9 @@ namespace xtp {
 class OpenMP_CUDA {
  public:
   OpenMP_CUDA();
-  static Index UsingGPUs() {
-#ifdef USE_CUDA
-    return count_available_gpus();
-#else
-    return 0;
-#endif
-  }
+  static Index UsingGPUs();
+  static Index AvailableGPUs();
+  static void SetNoGPUs(Index number);
 
   // 3c multiply
   void setOperators(const std::vector<Eigen::MatrixXd>& tensor,
@@ -155,6 +151,8 @@ class OpenMP_CUDA {
   bool inside_Parallel_region_;
   Index threadID_parent_;
 
+  static Index number_of_gpus;
+
   Index getParentThreadId(Index OpenmpThreadId) const;
 
   Index getLocalThreadId(Index ParentThreadId) const;

diff --git a/src/libxtp/openmp_cuda.cc b/src/libxtp/openmp_cuda.cc
@@ -23,6 +23,28 @@
 namespace votca {
 namespace xtp {
 
+// Has to be declared because of
+// https://stackoverflow.com/questions/9110487/undefined-reference-to-a-static-member
+Index OpenMP_CUDA::number_of_gpus = 0;
+
+Index OpenMP_CUDA::UsingGPUs() { return number_of_gpus; }
+
+Index OpenMP_CUDA::AvailableGPUs() {
+#ifdef USE_CUDA
+  return count_available_gpus();
+#else
+  return 0;
+#endif
+}
+
+void OpenMP_CUDA::SetNoGPUs(Index number) {
+  if (number < 0 || number > AvailableGPUs()) {
+    number_of_gpus = AvailableGPUs();
+  } else {
+    number_of_gpus = number;
+  }
+}
+
 OpenMP_CUDA::OpenMP_CUDA() {
 
   inside_Parallel_region_ = OPENMP::InsideActiveParallelRegion();
@@ -31,7 +53,7 @@ OpenMP_CUDA::OpenMP_CUDA() {
   cpus_.resize(getNumberThreads());
 
 #ifdef USE_CUDA
-  Index no_gpus = count_available_gpus();
+  Index no_gpus = UsingGPUs();
   gpus_.clear();
   if (inside_Parallel_region_) {
     if (threadID_parent_ < no_gpus) {

diff --git a/src/libxtp/xtpapplication.cc b/src/libxtp/xtpapplication.cc
@@ -25,8 +25,8 @@
 #include <votca/tools/propertyiomanipulator.h>
 
 // Local VOTCA includes
+#include "votca/xtp/openmp_cuda.h"
 #include "votca/xtp/version.h"
-#include "votca/xtp/votca_xtp_config.h"
 #include "votca/xtp/xtpapplication.h"
 
 namespace votca {
@@ -79,6 +79,9 @@ bool XtpApplication::EvaluateOptions() {
     StopExecution();
     return true;
   }
+#ifdef USE_CUDA
+  OpenMP_CUDA::SetNoGPUs(OptionsMap()["gpus"].as<Index>());
+#endif
 
   if (OptionsMap().count("description")) {
     CheckRequired("description", "no " + CalculatorType() + " is given");
-Original file line number
+Diff line change
@@ Expand Up / @@ -78,6 +78,7 @@ class CudaPipeline { @@
       cublasHandle_t _handle;
       // Asynchronous stream
       cudaStream_t _stream;
     };
@@ Expand Down @@