Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fallback condition to use spmv_native when cuSPARSE won't work #834

Merged
merged 1 commit into from
Oct 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 39 additions & 7 deletions src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
#include "cusparse.h"
#include "KokkosKernels_SparseUtils_cusparse.hpp"
#include "KokkosKernels_Controls.hpp"
#include "KokkosSparse_spmv_impl.hpp"

namespace KokkosSparse {
namespace Impl {
Expand All @@ -64,8 +65,18 @@ namespace Impl {
const YVector& y) {
using KAT = Kokkos::Details::ArithTraits<typename YVector::non_const_value_type>;

std::cout << "It is currently not possible to use the native SpMV implementation"
" when cuSPARSE is enabled" << std::endl;
if (beta == KAT::zero ()) {
KokkosSparse::Impl::spmv_beta<AMatrix, XVector, YVector, 0> (controls, mode, alpha, A, x, beta, y);
}
else if (beta == KAT::one ()) {
KokkosSparse::Impl::spmv_beta<AMatrix, XVector, YVector, 1> (controls, mode, alpha, A, x, beta, y);
}
else if (beta == -KAT::one ()) {
KokkosSparse::Impl::spmv_beta<AMatrix, XVector, YVector, -1> (controls, mode, alpha, A, x, beta, y);
}
else {
KokkosSparse::Impl::spmv_beta<AMatrix, XVector, YVector, 2> (controls, mode, alpha, A, x, beta, y);
}
}

template <class AMatrix, class XVector, class YVector>
Expand All @@ -84,9 +95,24 @@ namespace Impl {
cusparseHandle_t cusparseHandle = controls.getCusparseHandle();

/* Set the operation mode */
cusparseOperation_t myCusparseOperation = CUSPARSE_OPERATION_NON_TRANSPOSE;
if(mode[0] == Transpose[0]) {myCusparseOperation = CUSPARSE_OPERATION_TRANSPOSE;}
else if(mode[0] == ConjugateTranspose[0]) {myCusparseOperation = CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE;}
cusparseOperation_t myCusparseOperation;
switch(toupper(mode[0]))
{
case 'N':
myCusparseOperation = CUSPARSE_OPERATION_NON_TRANSPOSE;
break;
case 'T':
myCusparseOperation = CUSPARSE_OPERATION_TRANSPOSE;
break;
case 'H':
myCusparseOperation = CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE;
break;
default:
{
std::cerr << "Mode " << mode << " invalid for cuSPARSE SpMV.\n";
throw std::invalid_argument("Invalid mode");
}
}

#if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION)

Expand Down Expand Up @@ -239,8 +265,9 @@ namespace Impl {
const XVector& x, \
const coefficient_type& beta, \
const YVector& y) { \
if(controls.isParameter("algorithm") && controls.getParameter("algorithm") == "native") { \
std::string label = "KokkosSparse::spmv[TPL_CUSPARSE," + Kokkos::ArithTraits<SCALAR>::name() + "]"; \
bool fallback = *mode == 'C' || ((*mode == 'T' || *mode == 'H') && 9000 <= CUDA_VERSION && CUDA_VERSION < 10000); \
if((controls.isParameter("algorithm") && controls.getParameter("algorithm") == "native") || fallback) { \
std::string label = "KokkosSparse::spmv[NATIVE," + Kokkos::ArithTraits<SCALAR>::name() + "]"; \
Kokkos::Profiling::pushRegion(label); \
spmv_native(controls, mode, alpha, A, x, beta, y); \
Kokkos::Profiling::popRegion(); \
Expand All @@ -252,6 +279,11 @@ namespace Impl {
} \
} \
};

//BMK: cuSPARSE that comes with CUDA 9 does not support tranpose or conjugate transpose modes.
//No version of cuSPARSE supports mode C (conjugate, non transpose).
//In those cases, fall back to KokkosKernels native spmv.

#if (9000 <= CUDA_VERSION)
KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutLeft, Kokkos::CudaSpace, true)
KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutRight, Kokkos::CudaSpace, true)
Expand Down
2 changes: 1 addition & 1 deletion src/sparse/KokkosSparse_spmv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ spmv (KokkosKernels::Experimental::Controls controls,
KokkosBlas::scal(y_i, beta, y_i);
return;
}
return Impl::SPMV<
Impl::SPMV<
typename AMatrix_Internal::value_type,
typename AMatrix_Internal::ordinal_type,
typename AMatrix_Internal::device_type,
Expand Down
16 changes: 8 additions & 8 deletions src/sparse/impl/KokkosSparse_spgemm_cuSPARSE_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,10 @@ namespace Impl{

#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE

using device1 = typename ain_row_index_view_type::device_type;
using device2 = typename ain_nonzero_index_view_type::device_type;
using idx = typename KernelHandle::nnz_lno_t;
using device1 = typename ain_row_index_view_type::device_type;
using device2 = typename ain_nonzero_index_view_type::device_type;
using idx = typename KernelHandle::nnz_lno_t;
using size_type = typename KernelHandle::size_type;


//TODO this is not correct, check memory space.
Expand All @@ -98,11 +99,10 @@ namespace Impl{
throw std::runtime_error ("SpGEMM cuSPARSE backend is not yet supported for this CUDA version\n");
#else

if (std::is_same<idx, int>::value){

const idx *a_xadj = (int *)row_mapA.data();
const idx *b_xadj = (int *)row_mapB.data();
idx *c_xadj = (int *)row_mapC.data();
if (std::is_same<idx, int>::value && std::is_same<size_type, int>::value){
const idx *a_xadj = (const idx*) row_mapA.data();
const idx *b_xadj = (const idx*) row_mapB.data();
idx *c_xadj = (idx*) row_mapC.data();

const idx *a_adj = entriesA.data();
const idx *b_adj = entriesB.data();
Expand Down
2 changes: 0 additions & 2 deletions src/sparse/impl/KokkosSparse_spmv_spec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,6 @@ struct SPMV < AT, AO, AD, AM, AS,
{
typedef Kokkos::Details::ArithTraits<coefficient_type> KAT;

typedef Kokkos::Details::ArithTraits<coefficient_type> KAT;

if (alpha == KAT::zero ()) {
if (beta != KAT::one ()) {
KokkosBlas::scal (y, beta, y);
Expand Down
4 changes: 2 additions & 2 deletions src/sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ namespace Impl{
if (!std::is_same<size_type, int>::value)
sptrsv_handle->allocate_tmp_int_rowmap(row_map.extent(0));
const int* rm = !std::is_same<size_type, int>::value ? sptrsv_handle->get_int_rowmap_ptr_copy(row_map) : (const int*)row_map.data();
const int* ent = entries.data();
const int* ent = (const int*) entries.data();
const scalar_type* vals = values.data();

if (std::is_same<scalar_type,double>::value) {
Expand Down Expand Up @@ -297,7 +297,7 @@ namespace Impl{
int nnz = entries.extent_int(0);

const int* rm = !std::is_same<size_type, int>::value ? sptrsv_handle->get_int_rowmap_ptr() : (const int*)row_map.data();
const int* ent = entries.data();
const int* ent = (const int*) entries.data();
const scalar_type* vals = values.data();
const scalar_type* bv = rhs.data();
scalar_type* xv = lhs.data();
Expand Down
64 changes: 51 additions & 13 deletions unit_test/sparse/Test_Sparse_spmv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,19 @@ void check_spmv(crsMat_t input_mat, x_vector_type x, y_vector_type y,
Kokkos::fence();

sequential_spmv(input_mat, x, expected_y, alpha, beta, mode);
//KokkosKernels::Impl::print_1Dview(expected_y);
KokkosSparse::spmv(&mode, alpha, input_mat, x, beta, y);
//KokkosKernels::Impl::print_1Dview(y);
bool threw = false;
std::string msg;
try
{
KokkosSparse::spmv(&mode, alpha, input_mat, x, beta, y);
Kokkos::fence();
}
catch(std::exception& e)
{
threw = true;
msg = e.what();
}
ASSERT_FALSE(threw) << "KokkosSparse::Test::spmv 1D, mode " << mode << ": threw exception:\n" << msg << '\n';
int num_errors = 0;
Kokkos::parallel_reduce("KokkosSparse::Test::spmv",
my_exec_space(0, y.extent(0)),
Expand Down Expand Up @@ -165,8 +175,19 @@ void check_spmv_mv(crsMat_t input_mat, x_vector_type x, y_vector_type y, y_vecto

Kokkos::fence();

KokkosSparse::spmv(&mode, alpha, input_mat, x, beta, y);

bool threw = false;
std::string msg;
try
{
KokkosSparse::spmv(&mode, alpha, input_mat, x, beta, y);
Kokkos::fence();
}
catch(std::exception& e)
{
threw = true;
msg = e.what();
}
ASSERT_FALSE(threw) << "KokkosSparse::Test::spmv 2D, mode " << mode << ": threw exception:\n" << msg << '\n';

for (int i = 0; i < numMV; ++i){
auto x_i = Kokkos::subview (x, Kokkos::ALL (), i);
Expand Down Expand Up @@ -318,6 +339,23 @@ void check_spmv_controls(KokkosKernels::Experimental::Controls controls,

} // namespace Test

template <typename scalar_t>
scalar_t randomUpperBound(int mag)
{
return (scalar_t) mag;
}

template <>
Kokkos::complex<double> randomUpperBound<Kokkos::complex<double>>(int mag)
{
return Kokkos::complex<double>(mag, mag);
}

template <>
Kokkos::complex<float> randomUpperBound<Kokkos::complex<float>>(int mag)
{
return Kokkos::complex<float>(mag, mag);
}

template <typename scalar_t, typename lno_t, typename size_type, class Device>
void test_spmv(lno_t numRows,size_type nnz, lno_t bandwidth, lno_t row_size_variance){
Expand Down Expand Up @@ -346,10 +384,10 @@ void test_spmv(lno_t numRows,size_type nnz, lno_t bandwidth, lno_t row_size_vari
typedef typename x_vector_type::value_type ScalarX;
typedef typename y_vector_type::value_type ScalarY;

Kokkos::fill_random(input_x,rand_pool,ScalarX(10));
Kokkos::fill_random(output_y,rand_pool,ScalarY(10));
Kokkos::fill_random(input_xt,rand_pool,ScalarX(10));
Kokkos::fill_random(output_yt,rand_pool,ScalarY(10));
Kokkos::fill_random(input_x,rand_pool,randomUpperBound<ScalarX>(10));
Kokkos::fill_random(output_y,rand_pool,randomUpperBound<ScalarY>(10));
Kokkos::fill_random(input_xt,rand_pool,randomUpperBound<ScalarX>(10));
Kokkos::fill_random(output_yt,rand_pool,randomUpperBound<ScalarY>(10));

std::vector<char> nonTransModes = {'N', 'C'};
std::vector<char> transModes = {'T', 'H'};
Expand Down Expand Up @@ -385,10 +423,10 @@ void test_spmv_mv(lno_t numRows,size_type nnz, lno_t bandwidth, lno_t row_size_v
ViewTypeY b_yt_copy("B",numRows,numMV);

Kokkos::Random_XorShift64_Pool<typename Device::execution_space> rand_pool(13718);
Kokkos::fill_random(b_x,rand_pool,scalar_t(10));
Kokkos::fill_random(b_y,rand_pool,scalar_t(10));
Kokkos::fill_random(b_xt,rand_pool,scalar_t(10));
Kokkos::fill_random(b_yt,rand_pool,scalar_t(10));
Kokkos::fill_random(b_x,rand_pool,randomUpperBound<scalar_t>(10));
Kokkos::fill_random(b_y,rand_pool,randomUpperBound<scalar_t>(10));
Kokkos::fill_random(b_xt,rand_pool,randomUpperBound<scalar_t>(10));
Kokkos::fill_random(b_yt,rand_pool,randomUpperBound<scalar_t>(10));


crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix<crsMat_t>(numRows,numCols,nnz,row_size_variance, bandwidth);
Expand Down