Review update: rem code duplication, update YML file.

+ Remove code duplication in cuda kernels by moving common code to a .cuh file. + Update the artifacts uploading in the YML file to circumvent the GITLAB limits.
ginkgo-project · Sep 16, 2019 · 5b08ab0 · 5b08ab0
1 parent a83c60c
commit 5b08ab0
Show file tree

Hide file tree

Showing 7 changed files with 272 additions and 292 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -57,17 +57,18 @@ stages:
     paths:
       - "build/*/*/*/*/*/CMakeCache.txt"
       - "build/*/*/*/*/*/*.cmake"
-      - "build/*/*/*/*/*/core/test"
-      - "build/*/*/*/*/*/cuda/test"
-      - "build/*/*/*/*/*/omp/test"
-      - "build/*/*/*/*/*/reference/test"
+      - "build/*/*/*/*/*/core/test/*/[a-z_]*"
+      - "build/*/*/*/*/*/cuda/test/*/[a-z_]*"
+      - "build/*/*/*/*/*/omp/test/*/[a-z_]*"
+      - "build/*/*/*/*/*/reference/test/*/[a-z_]*"
       - "build/*/*/*/*/*/core/libginkgo*"
       - "build/*/*/*/*/*/cuda/libginkgo*"
       - "build/*/*/*/*/*/omp/libginkgo*"
       - "build/*/*/*/*/*/reference/libginkgo*"
       - "build/*/*/*/*/*/core/device_hooks/libginkgo*"
       - "build/*/*/*/*/*/*/CTestTestfile.cmake"
       - "build/*/*/*/*/*/*/*/CTestTestfile.cmake"
+      - "build/*/*/*/*/*/*/*/*/CTestTestfile.cmake"
   except:
       - schedules
 # build paths are of the form: build/<cuda_version>/<compiler>/<module(s)>/{debug,release}/{shared,static}/
@@ -140,7 +141,6 @@ build/cuda91/gcc/all/debug/static:
   image: localhost:5000/gko-cuda91-gnu6-llvm40
   variables:
     <<: *default_variables
-    BUILD_OMP: "ON"
     BUILD_CUDA: "ON"
     BUILD_TYPE: Debug
     EXTRA_CMAKE_FLAGS: *cuda_flags_static
@@ -187,7 +187,6 @@ build/cuda92/clang/all/debug/static:
     <<: *default_variables
     C_COMPILER: clang
     CXX_COMPILER: clang++
-    BUILD_OMP: "ON"
     BUILD_CUDA: "ON"
     BUILD_TYPE: Debug
     EXTRA_CMAKE_FLAGS: *cuda_flags_static

diff --git a/cuda/base/device_guard.hpp b/cuda/base/device_guard.hpp
@@ -30,6 +30,9 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
+#ifndef GKO_CUDA_BASE_DEVICE_GUARD_HPP_
+#define GKO_CUDA_BASE_DEVICE_GUARD_HPP_
+
 
 #include <cuda_runtime.h>
 
@@ -72,3 +75,5 @@ class device_guard {
 
 
 }  // namespace gko
+
+#endif
diff --git a/cuda/base/pointer_mode_guard.hpp b/cuda/base/pointer_mode_guard.hpp
@@ -30,6 +30,9 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
+#ifndef GKO_CUDA_BASE_POINTER_MODE_GUARD_HPP_
+#define GKO_CUDA_BASE_POINTER_MODE_GUARD_HPP_
+
 
 #include <cublas_v2.h>
 #include <cuda_runtime.h>
@@ -113,3 +116,5 @@ class cusparse_pointer_mode_guard {
 
 
 }  // namespace gko
+
+#endif
diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu
@@ -702,9 +702,9 @@ void spmv(std::shared_ptr<const CudaExecutor> exec,
                 auto col_idxs = a->get_const_col_idxs();
                 auto alpha = one<ValueType>();
                 auto beta = zero<ValueType>();
-                if (b->get_stride() != 1 || c->get_stride() != 1)
+                if (b->get_stride() != 1 || c->get_stride() != 1) {
                     GKO_NOT_IMPLEMENTED;
-
+                }
                 cusparse::spmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
                                a->get_size()[0], a->get_size()[1],
                                a->get_num_stored_elements(), &alpha, descr,

diff --git a/cuda/solver/common_trs_kernels.cuh b/cuda/solver/common_trs_kernels.cuh
@@ -0,0 +1,240 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2019, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CUDA_SOLVER_COMMON_TRS_KERNELS_CUH_
+#define GKO_CUDA_SOLVER_COMMON_TRS_KERNELS_CUH_
+
+
+#include <functional>
+#include <memory>
+
+
+#include <cuda.h>
+#include <cusparse.h>
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "core/matrix/dense_kernels.hpp"
+#include "core/synthesizer/implementation_selection.hpp"
+#include "cuda/base/cusparse_bindings.hpp"
+#include "cuda/base/device_guard.hpp"
+#include "cuda/base/math.hpp"
+#include "cuda/base/pointer_mode_guard.hpp"
+#include "cuda/base/types.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+namespace {
+
+
+void should_perform_transpose_kernel(std::shared_ptr<const CudaExecutor> exec,
+                                     bool &do_transpose)
+{
+#if (defined(CUDA_VERSION) && (CUDA_VERSION >= 9020))
+
+
+    do_transpose = false;
+
+
+#elif (defined(CUDA_VERSION) && (CUDA_VERSION < 9020))
+
+
+    do_transpose = true;
+
+
+#endif
+}
+
+
+void init_struct_kernel(std::shared_ptr<const CudaExecutor> exec,
+                        std::shared_ptr<solver::SolveStruct> &solve_struct)
+{
+    solve_struct =
+        std::shared_ptr<solver::SolveStruct>(new solver::SolveStruct());
+}
+
+
+template <typename ValueType, typename IndexType>
+void generate_kernel(std::shared_ptr<const CudaExecutor> exec,
+                     const matrix::Csr<ValueType, IndexType> *matrix,
+                     solver::SolveStruct *solve_struct,
+                     const gko::size_type num_rhs, bool is_upper)
+{
+    if (cusparse::is_supported<ValueType, IndexType>::value) {
+        auto handle = exec->get_cusparse_handle();
+        if (is_upper) {
+            GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSetMatFillMode(
+                solve_struct->factor_descr, CUSPARSE_FILL_MODE_UPPER));
+        }
+
+
+#if (defined(CUDA_VERSION) && (CUDA_VERSION >= 9020))
+
+
+        ValueType one = 1.0;
+
+        {
+            cusparse_pointer_mode_guard pm_guard(handle);
+            cusparse::buffer_size_ext(
+                handle, solve_struct->algorithm,
+                CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_TRANSPOSE,
+                matrix->get_size()[0], num_rhs,
+                matrix->get_num_stored_elements(), &one,
+                solve_struct->factor_descr, matrix->get_const_values(),
+                matrix->get_const_row_ptrs(), matrix->get_const_col_idxs(),
+                nullptr, num_rhs, solve_struct->solve_info,
+                solve_struct->policy, &solve_struct->factor_work_size);
+
+            // allocate workspace
+            if (solve_struct->factor_work_vec != nullptr) {
+                exec->free(solve_struct->factor_work_vec);
+            }
+            solve_struct->factor_work_vec =
+                exec->alloc<void *>(solve_struct->factor_work_size);
+
+            cusparse::csrsm2_analysis(
+                handle, solve_struct->algorithm,
+                CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_TRANSPOSE,
+                matrix->get_size()[0], num_rhs,
+                matrix->get_num_stored_elements(), &one,
+                solve_struct->factor_descr, matrix->get_const_values(),
+                matrix->get_const_row_ptrs(), matrix->get_const_col_idxs(),
+                nullptr, num_rhs, solve_struct->solve_info,
+                solve_struct->policy, solve_struct->factor_work_vec);
+        }
+
+
+#elif (defined(CUDA_VERSION) && (CUDA_VERSION < 9020))
+
+
+        {
+            cusparse_pointer_mode_guard pm_guard(handle);
+            cusparse::csrsm_analysis(
+                handle, CUSPARSE_OPERATION_NON_TRANSPOSE, matrix->get_size()[0],
+                matrix->get_num_stored_elements(), solve_struct->factor_descr,
+                matrix->get_const_values(), matrix->get_const_row_ptrs(),
+                matrix->get_const_col_idxs(), solve_struct->solve_info);
+        }
+
+
+#endif
+
+
+    } else {
+        GKO_NOT_IMPLEMENTED;
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+void solve_kernel(std::shared_ptr<const CudaExecutor> exec,
+                  const matrix::Csr<ValueType, IndexType> *matrix,
+                  const solver::SolveStruct *solve_struct,
+                  matrix::Dense<ValueType> *trans_b,
+                  matrix::Dense<ValueType> *trans_x,
+                  const matrix::Dense<ValueType> *b,
+                  matrix::Dense<ValueType> *x)
+{
+    using vec = matrix::Dense<ValueType>;
+    if (cusparse::is_supported<ValueType, IndexType>::value) {
+        ValueType one = 1.0;
+        auto handle = exec->get_cusparse_handle();
+
+
+#if (defined(CUDA_VERSION) && (CUDA_VERSION >= 9020))
+
+
+        x->copy_from(gko::lend(b));
+        {
+            cusparse_pointer_mode_guard pm_guard(handle);
+            cusparse::csrsm2_solve(
+                handle, solve_struct->algorithm,
+                CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_TRANSPOSE,
+                matrix->get_size()[0], b->get_stride(),
+                matrix->get_num_stored_elements(), &one,
+                solve_struct->factor_descr, matrix->get_const_values(),
+                matrix->get_const_row_ptrs(), matrix->get_const_col_idxs(),
+                x->get_values(), b->get_stride(), solve_struct->solve_info,
+                solve_struct->policy, solve_struct->factor_work_vec);
+        }
+
+#elif (defined(CUDA_VERSION) && (CUDA_VERSION < 9020))
+
+
+        {
+            cusparse_pointer_mode_guard pm_guard(handle);
+            if (b->get_stride() == 1) {
+                auto temp_b = const_cast<ValueType *>(b->get_const_values());
+                cusparse::csrsm_solve(
+                    handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
+                    matrix->get_size()[0], b->get_stride(), &one,
+                    solve_struct->factor_descr, matrix->get_const_values(),
+                    matrix->get_const_row_ptrs(), matrix->get_const_col_idxs(),
+                    solve_struct->solve_info, temp_b, b->get_size()[0],
+                    x->get_values(), x->get_size()[0]);
+            } else {
+                dense::transpose(exec, trans_b, b);
+                dense::transpose(exec, trans_x, x);
+                cusparse::csrsm_solve(
+                    handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
+                    matrix->get_size()[0], trans_b->get_size()[0], &one,
+                    solve_struct->factor_descr, matrix->get_const_values(),
+                    matrix->get_const_row_ptrs(), matrix->get_const_col_idxs(),
+                    solve_struct->solve_info, trans_b->get_values(),
+                    trans_b->get_size()[1], trans_x->get_values(),
+                    trans_x->get_size()[1]);
+                dense::transpose(exec, x, trans_x);
+            }
+        }
+
+
+#endif
+
+
+    } else {
+        GKO_NOT_IMPLEMENTED;
+    }
+}
+
+
+}  // namespace
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif