From c8fdb1108072ba1b7bf7eeeeb579a770dc22973f Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Thu, 15 Oct 2020 11:49:26 -0600 Subject: [PATCH] Add fallback condition to use spmv_native when cuSPARSE won't work Improve SpMV unit test: - generate random complex values with nonzero imaginary component - catch exceptions in spmv --- .../tpls/KokkosSparse_spmv_tpl_spec_decl.hpp | 46 +++++++++++-- src/sparse/KokkosSparse_spmv.hpp | 2 +- .../KokkosSparse_spgemm_cuSPARSE_impl.hpp | 16 ++--- src/sparse/impl/KokkosSparse_spmv_spec.hpp | 2 - .../KokkosSparse_sptrsv_cuSPARSE_impl.hpp | 4 +- unit_test/sparse/Test_Sparse_spmv.hpp | 64 +++++++++++++++---- 6 files changed, 101 insertions(+), 33 deletions(-) diff --git a/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp b/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp index e9596fb772..ced3476539 100644 --- a/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp @@ -50,6 +50,7 @@ #include "cusparse.h" #include "KokkosKernels_SparseUtils_cusparse.hpp" #include "KokkosKernels_Controls.hpp" +#include "KokkosSparse_spmv_impl.hpp" namespace KokkosSparse { namespace Impl { @@ -64,8 +65,18 @@ namespace Impl { const YVector& y) { using KAT = Kokkos::Details::ArithTraits; - std::cout << "It is currently not possible to use the native SpMV implementation" - " when cuSPARSE is enabled" << std::endl; + if (beta == KAT::zero ()) { + KokkosSparse::Impl::spmv_beta (controls, mode, alpha, A, x, beta, y); + } + else if (beta == KAT::one ()) { + KokkosSparse::Impl::spmv_beta (controls, mode, alpha, A, x, beta, y); + } + else if (beta == -KAT::one ()) { + KokkosSparse::Impl::spmv_beta (controls, mode, alpha, A, x, beta, y); + } + else { + KokkosSparse::Impl::spmv_beta (controls, mode, alpha, A, x, beta, y); + } } template @@ -84,9 +95,24 @@ namespace Impl { cusparseHandle_t cusparseHandle = controls.getCusparseHandle(); /* Set the operation mode */ - cusparseOperation_t myCusparseOperation = CUSPARSE_OPERATION_NON_TRANSPOSE; - if(mode[0] == Transpose[0]) {myCusparseOperation = CUSPARSE_OPERATION_TRANSPOSE;} - else if(mode[0] == ConjugateTranspose[0]) {myCusparseOperation = CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE;} + cusparseOperation_t myCusparseOperation; + switch(toupper(mode[0])) + { + case 'N': + myCusparseOperation = CUSPARSE_OPERATION_NON_TRANSPOSE; + break; + case 'T': + myCusparseOperation = CUSPARSE_OPERATION_TRANSPOSE; + break; + case 'H': + myCusparseOperation = CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE; + break; + default: + { + std::cerr << "Mode " << mode << " invalid for cuSPARSE SpMV.\n"; + throw std::invalid_argument("Invalid mode"); + } + } #if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION) @@ -239,8 +265,9 @@ namespace Impl { const XVector& x, \ const coefficient_type& beta, \ const YVector& y) { \ - if(controls.isParameter("algorithm") && controls.getParameter("algorithm") == "native") { \ - std::string label = "KokkosSparse::spmv[TPL_CUSPARSE," + Kokkos::ArithTraits::name() + "]"; \ + bool fallback = *mode == 'C' || ((*mode == 'T' || *mode == 'H') && 9000 <= CUDA_VERSION && CUDA_VERSION < 10000); \ + if((controls.isParameter("algorithm") && controls.getParameter("algorithm") == "native") || fallback) { \ + std::string label = "KokkosSparse::spmv[NATIVE," + Kokkos::ArithTraits::name() + "]"; \ Kokkos::Profiling::pushRegion(label); \ spmv_native(controls, mode, alpha, A, x, beta, y); \ Kokkos::Profiling::popRegion(); \ @@ -252,6 +279,11 @@ namespace Impl { } \ } \ }; + +//BMK: cuSPARSE that comes with CUDA 9 does not support tranpose or conjugate transpose modes. +//No version of cuSPARSE supports mode C (conjugate, non transpose). +//In those cases, fall back to KokkosKernels native spmv. + #if (9000 <= CUDA_VERSION) KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutRight, Kokkos::CudaSpace, true) diff --git a/src/sparse/KokkosSparse_spmv.hpp b/src/sparse/KokkosSparse_spmv.hpp index e18bc4690f..4c26f5cd6e 100644 --- a/src/sparse/KokkosSparse_spmv.hpp +++ b/src/sparse/KokkosSparse_spmv.hpp @@ -151,7 +151,7 @@ spmv (KokkosKernels::Experimental::Controls controls, KokkosBlas::scal(y_i, beta, y_i); return; } - return Impl::SPMV< + Impl::SPMV< typename AMatrix_Internal::value_type, typename AMatrix_Internal::ordinal_type, typename AMatrix_Internal::device_type, diff --git a/src/sparse/impl/KokkosSparse_spgemm_cuSPARSE_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_cuSPARSE_impl.hpp index f523bfe5f1..420e622c8f 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_cuSPARSE_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_cuSPARSE_impl.hpp @@ -79,9 +79,10 @@ namespace Impl{ #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE - using device1 = typename ain_row_index_view_type::device_type; - using device2 = typename ain_nonzero_index_view_type::device_type; - using idx = typename KernelHandle::nnz_lno_t; + using device1 = typename ain_row_index_view_type::device_type; + using device2 = typename ain_nonzero_index_view_type::device_type; + using idx = typename KernelHandle::nnz_lno_t; + using size_type = typename KernelHandle::size_type; //TODO this is not correct, check memory space. @@ -98,11 +99,10 @@ namespace Impl{ throw std::runtime_error ("SpGEMM cuSPARSE backend is not yet supported for this CUDA version\n"); #else - if (std::is_same::value){ - - const idx *a_xadj = (int *)row_mapA.data(); - const idx *b_xadj = (int *)row_mapB.data(); - idx *c_xadj = (int *)row_mapC.data(); + if (std::is_same::value && std::is_same::value){ + const idx *a_xadj = (const idx*) row_mapA.data(); + const idx *b_xadj = (const idx*) row_mapB.data(); + idx *c_xadj = (idx*) row_mapC.data(); const idx *a_adj = entriesA.data(); const idx *b_adj = entriesB.data(); diff --git a/src/sparse/impl/KokkosSparse_spmv_spec.hpp b/src/sparse/impl/KokkosSparse_spmv_spec.hpp index 9d1f44bd2a..b678142dbe 100644 --- a/src/sparse/impl/KokkosSparse_spmv_spec.hpp +++ b/src/sparse/impl/KokkosSparse_spmv_spec.hpp @@ -257,8 +257,6 @@ struct SPMV < AT, AO, AD, AM, AS, { typedef Kokkos::Details::ArithTraits KAT; - typedef Kokkos::Details::ArithTraits KAT; - if (alpha == KAT::zero ()) { if (beta != KAT::one ()) { KokkosBlas::scal (y, beta, y); diff --git a/src/sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp b/src/sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp index 25e9844940..623df284ea 100644 --- a/src/sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp +++ b/src/sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp @@ -95,7 +95,7 @@ namespace Impl{ if (!std::is_same::value) sptrsv_handle->allocate_tmp_int_rowmap(row_map.extent(0)); const int* rm = !std::is_same::value ? sptrsv_handle->get_int_rowmap_ptr_copy(row_map) : (const int*)row_map.data(); - const int* ent = entries.data(); + const int* ent = (const int*) entries.data(); const scalar_type* vals = values.data(); if (std::is_same::value) { @@ -297,7 +297,7 @@ namespace Impl{ int nnz = entries.extent_int(0); const int* rm = !std::is_same::value ? sptrsv_handle->get_int_rowmap_ptr() : (const int*)row_map.data(); - const int* ent = entries.data(); + const int* ent = (const int*) entries.data(); const scalar_type* vals = values.data(); const scalar_type* bv = rhs.data(); scalar_type* xv = lhs.data(); diff --git a/unit_test/sparse/Test_Sparse_spmv.hpp b/unit_test/sparse/Test_Sparse_spmv.hpp index 4a19137886..e27012991a 100644 --- a/unit_test/sparse/Test_Sparse_spmv.hpp +++ b/unit_test/sparse/Test_Sparse_spmv.hpp @@ -132,9 +132,19 @@ void check_spmv(crsMat_t input_mat, x_vector_type x, y_vector_type y, Kokkos::fence(); sequential_spmv(input_mat, x, expected_y, alpha, beta, mode); - //KokkosKernels::Impl::print_1Dview(expected_y); - KokkosSparse::spmv(&mode, alpha, input_mat, x, beta, y); - //KokkosKernels::Impl::print_1Dview(y); + bool threw = false; + std::string msg; + try + { + KokkosSparse::spmv(&mode, alpha, input_mat, x, beta, y); + Kokkos::fence(); + } + catch(std::exception& e) + { + threw = true; + msg = e.what(); + } + ASSERT_FALSE(threw) << "KokkosSparse::Test::spmv 1D, mode " << mode << ": threw exception:\n" << msg << '\n'; int num_errors = 0; Kokkos::parallel_reduce("KokkosSparse::Test::spmv", my_exec_space(0, y.extent(0)), @@ -165,8 +175,19 @@ void check_spmv_mv(crsMat_t input_mat, x_vector_type x, y_vector_type y, y_vecto Kokkos::fence(); - KokkosSparse::spmv(&mode, alpha, input_mat, x, beta, y); - + bool threw = false; + std::string msg; + try + { + KokkosSparse::spmv(&mode, alpha, input_mat, x, beta, y); + Kokkos::fence(); + } + catch(std::exception& e) + { + threw = true; + msg = e.what(); + } + ASSERT_FALSE(threw) << "KokkosSparse::Test::spmv 2D, mode " << mode << ": threw exception:\n" << msg << '\n'; for (int i = 0; i < numMV; ++i){ auto x_i = Kokkos::subview (x, Kokkos::ALL (), i); @@ -318,6 +339,23 @@ void check_spmv_controls(KokkosKernels::Experimental::Controls controls, } // namespace Test +template +scalar_t randomUpperBound(int mag) +{ + return (scalar_t) mag; +} + +template <> +Kokkos::complex randomUpperBound>(int mag) +{ + return Kokkos::complex(mag, mag); +} + +template <> +Kokkos::complex randomUpperBound>(int mag) +{ + return Kokkos::complex(mag, mag); +} template void test_spmv(lno_t numRows,size_type nnz, lno_t bandwidth, lno_t row_size_variance){ @@ -346,10 +384,10 @@ void test_spmv(lno_t numRows,size_type nnz, lno_t bandwidth, lno_t row_size_vari typedef typename x_vector_type::value_type ScalarX; typedef typename y_vector_type::value_type ScalarY; - Kokkos::fill_random(input_x,rand_pool,ScalarX(10)); - Kokkos::fill_random(output_y,rand_pool,ScalarY(10)); - Kokkos::fill_random(input_xt,rand_pool,ScalarX(10)); - Kokkos::fill_random(output_yt,rand_pool,ScalarY(10)); + Kokkos::fill_random(input_x,rand_pool,randomUpperBound(10)); + Kokkos::fill_random(output_y,rand_pool,randomUpperBound(10)); + Kokkos::fill_random(input_xt,rand_pool,randomUpperBound(10)); + Kokkos::fill_random(output_yt,rand_pool,randomUpperBound(10)); std::vector nonTransModes = {'N', 'C'}; std::vector transModes = {'T', 'H'}; @@ -385,10 +423,10 @@ void test_spmv_mv(lno_t numRows,size_type nnz, lno_t bandwidth, lno_t row_size_v ViewTypeY b_yt_copy("B",numRows,numMV); Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_x,rand_pool,scalar_t(10)); - Kokkos::fill_random(b_y,rand_pool,scalar_t(10)); - Kokkos::fill_random(b_xt,rand_pool,scalar_t(10)); - Kokkos::fill_random(b_yt,rand_pool,scalar_t(10)); + Kokkos::fill_random(b_x,rand_pool,randomUpperBound(10)); + Kokkos::fill_random(b_y,rand_pool,randomUpperBound(10)); + Kokkos::fill_random(b_xt,rand_pool,randomUpperBound(10)); + Kokkos::fill_random(b_yt,rand_pool,randomUpperBound(10)); crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix(numRows,numCols,nnz,row_size_variance, bandwidth);