Put alpha and beta on the device since they are passed as pointers.

By default, Ginkgo handles use the `POINTER_MODE_DEVICE`, which means all pointers should be allocated on the GPU. This pointer mode type has some advantages as it means to implicit synchronizations with the CPU. For more information see: https://docs.nvidia.com/cuda/cublas/index.html#scalar-parameters.
ginkgo-project · May 14, 2019 · a499e6d · a499e6d
1 parent cc5d861
commit a499e6d
Showing 1 changed file with 45 additions and 31 deletions.
diff --git a/benchmark/spmv/cuda_linops.hpp b/benchmark/spmv/cuda_linops.hpp
@@ -121,17 +121,20 @@ class CuspCsrmp
         auto dense_x = gko::as<gko::matrix::Dense<ValueType>>(x);
         auto db = dense_b->get_const_values();
         auto dx = dense_x->get_values();
-        auto alpha = gko::one<ValueType>();
-        auto beta = gko::zero<ValueType>();
+        gko::Array<ValueType> alpha(this->get_executor(),
+                                    {gko::one<ValueType>()});
+        gko::Array<ValueType> beta(this->get_executor(),
+                                   {gko::zero<ValueType>()});
 
         const auto id = this->get_gpu_exec()->get_device_id();
         gko::device_guard g{id};
         GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrmv_mp(
             this->get_gpu_exec()->get_cusparse_handle(), trans_,
             this->get_size()[0], this->get_size()[1],
-            csr_->get_num_stored_elements(), &alpha, this->get_descr(),
-            csr_->get_const_values(), csr_->get_const_row_ptrs(),
-            csr_->get_const_col_idxs(), db, &beta, dx));
+            csr_->get_num_stored_elements(), alpha.get_data(),
+            this->get_descr(), csr_->get_const_values(),
+            csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db,
+            beta.get_data(), dx));
     }
 
     gko::size_type get_num_stored_elements() const noexcept
@@ -178,17 +181,20 @@ class CuspCsr
         auto dense_x = gko::as<gko::matrix::Dense<ValueType>>(x);
         auto db = dense_b->get_const_values();
         auto dx = dense_x->get_values();
-        auto alpha = gko::one<ValueType>();
-        auto beta = gko::zero<ValueType>();
+        gko::Array<ValueType> alpha(this->get_executor(),
+                                    {gko::one<ValueType>()});
+        gko::Array<ValueType> beta(this->get_executor(),
+                                   {gko::zero<ValueType>()});
 
         const auto id = this->get_gpu_exec()->get_device_id();
         gko::device_guard g{id};
         GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrmv(
             this->get_gpu_exec()->get_cusparse_handle(), trans_,
             this->get_size()[0], this->get_size()[1],
-            csr_->get_num_stored_elements(), &alpha, this->get_descr(),
-            csr_->get_const_values(), csr_->get_const_row_ptrs(),
-            csr_->get_const_col_idxs(), db, &beta, dx));
+            csr_->get_num_stored_elements(), alpha.get_data(),
+            this->get_descr(), csr_->get_const_values(),
+            csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db,
+            beta.get_data(), dx));
     }
 
     gko::size_type get_num_stored_elements() const noexcept
@@ -233,18 +239,21 @@ class CuspCsrmm
         auto dense_b = gko::as<gko::matrix::Dense<ValueType>>(b);
         auto dense_x = gko::as<gko::matrix::Dense<ValueType>>(x);
         auto db = dense_b->get_const_values();
+        gko::Array<ValueType> alpha(this->get_executor(),
+                                    {gko::one<ValueType>()});
+        gko::Array<ValueType> beta(this->get_executor(),
+                                   {gko::zero<ValueType>()});
         auto dx = dense_x->get_values();
-        auto alpha = gko::one<ValueType>();
-        auto beta = gko::zero<ValueType>();
 
         const auto id = this->get_gpu_exec()->get_device_id();
         gko::device_guard g{id};
         GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDcsrmm(
             this->get_gpu_exec()->get_cusparse_handle(), trans_,
             this->get_size()[0], dense_b->get_size()[1], this->get_size()[1],
-            csr_->get_num_stored_elements(), &alpha, this->get_descr(),
-            csr_->get_const_values(), csr_->get_const_row_ptrs(),
-            csr_->get_const_col_idxs(), db, dense_b->get_size()[0], &beta, dx,
+            csr_->get_num_stored_elements(), alpha.get_data(),
+            this->get_descr(), csr_->get_const_values(),
+            csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db,
+            dense_b->get_size()[0], beta.get_data(), dx,
             dense_x->get_size()[0]));
     }
 
@@ -283,20 +292,23 @@ class CuspCsrEx
     {
         csr_->read(data);
         size_t buffer_size;
-        auto alpha = gko::one<ValueType>();
-        auto beta = gko::zero<ValueType>();
+        gko::Array<ValueType> alpha(this->get_executor(),
+                                    {gko::one<ValueType>()});
+        gko::Array<ValueType> beta(this->get_executor(),
+                                   {gko::zero<ValueType>()});
         this->set_size(gko::dim<2>{csr_->get_size()});
 
         const auto id = this->get_gpu_exec()->get_device_id();
         gko::device_guard g{id};
+        // TODO: There is a problem here !
         GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseCsrmvEx_bufferSize(
             this->get_gpu_exec()->get_cusparse_handle(), algmode_, trans_,
             this->get_size()[0], this->get_size()[1],
-            csr_->get_num_stored_elements(), &alpha, CUDA_R_64F,
+            csr_->get_num_stored_elements(), alpha.get_data(), CUDA_R_64F,
             this->get_descr(), csr_->get_const_values(), CUDA_R_64F,
             csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), nullptr,
-            CUDA_R_64F, &beta, CUDA_R_64F, nullptr, CUDA_R_64F, CUDA_R_64F,
-            &buffer_size));
+            CUDA_R_64F, beta.get_data(), CUDA_R_64F, nullptr, CUDA_R_64F,
+            CUDA_R_64F, &buffer_size));
         GKO_ASSERT_NO_CUDA_ERRORS(cudaMalloc(&buffer_, buffer_size));
         set_buffer_ = true;
     }
@@ -307,26 +319,24 @@ class CuspCsrEx
         auto dense_x = gko::as<gko::matrix::Dense<ValueType>>(x);
         auto db = dense_b->get_const_values();
         auto dx = dense_x->get_values();
-        auto alpha = gko::one<ValueType>();
-        auto beta = gko::zero<ValueType>();
+        gko::Array<ValueType> alpha(this->get_executor(),
+                                    {gko::one<ValueType>()});
+        gko::Array<ValueType> beta(this->get_executor(),
+                                   {gko::zero<ValueType>()});
 
         const auto id = this->get_gpu_exec()->get_device_id();
         gko::device_guard g{id};
         GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseCsrmvEx(
             this->get_gpu_exec()->get_cusparse_handle(), algmode_, trans_,
             this->get_size()[0], this->get_size()[1],
-            csr_->get_num_stored_elements(), &alpha, CUDA_R_64F,
+            csr_->get_num_stored_elements(), alpha.get_data(), CUDA_R_64F,
             this->get_descr(), csr_->get_const_values(), CUDA_R_64F,
             csr_->get_const_row_ptrs(), csr_->get_const_col_idxs(), db,
-            CUDA_R_64F, &beta, CUDA_R_64F, dx, CUDA_R_64F, CUDA_R_64F,
+            CUDA_R_64F, beta.get_data(), CUDA_R_64F, dx, CUDA_R_64F, CUDA_R_64F,
             buffer_));
     }
 
 
-    void apply_impl(const gko::LinOp *, const gko::LinOp *, const gko::LinOp *,
-                    gko::LinOp *) const override
-    {}
-
     gko::size_type get_num_stored_elements() const noexcept
     {
         return csr_->get_num_stored_elements();
@@ -407,13 +417,17 @@ class CuspHybrid
         auto dense_x = gko::as<gko::matrix::Dense<ValueType>>(x);
         auto db = dense_b->get_const_values();
         auto dx = dense_x->get_values();
-        auto alpha = gko::one<ValueType>();
-        auto beta = gko::zero<ValueType>();
+        gko::Array<ValueType> alpha(this->get_executor(),
+                                    {gko::one<ValueType>()});
+        gko::Array<ValueType> beta(this->get_executor(),
+                                   {gko::zero<ValueType>()});
+
         const auto id = this->get_gpu_exec()->get_device_id();
         gko::device_guard g{id};
         GKO_ASSERT_NO_CUSPARSE_ERRORS(
             cusparseDhybmv(this->get_gpu_exec()->get_cusparse_handle(), trans_,
-                           &alpha, this->get_descr(), hyb_, db, &beta, dx));
+                           alpha.get_data(), this->get_descr(), hyb_, db,
+                           beta.get_data(), dx));
     }
 
     CuspHybrid(std::shared_ptr<const gko::Executor> exec,