From feb1f5576447c8d3f6bae399edad4dd8a2a8d830 Mon Sep 17 00:00:00 2001 From: brian-kelley Date: Wed, 22 May 2024 17:15:01 -0600 Subject: [PATCH] Fix spmv regressions (#2204) * Restore cusparse spmv ALG2 path for imbalanced With correct version cutoffs * spmv: use separate rank-1 and rank-2 tpl subhandles * Remove redundant single-column path in native spmv_mv * Fix unused param warning --- sparse/impl/KokkosSparse_spmv_spec.hpp | 56 ++++----------- sparse/src/KokkosSparse_spmv.hpp | 49 +++---------- sparse/src/KokkosSparse_spmv_handle.hpp | 9 +-- ...kosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp | 47 ++++++------- .../KokkosSparse_spmv_mv_tpl_spec_decl.hpp | 12 ++-- .../tpls/KokkosSparse_spmv_tpl_spec_decl.hpp | 68 +++++++++++-------- 6 files changed, 90 insertions(+), 151 deletions(-) diff --git a/sparse/impl/KokkosSparse_spmv_spec.hpp b/sparse/impl/KokkosSparse_spmv_spec.hpp index da02b1af5a..67a2f05639 100644 --- a/sparse/impl/KokkosSparse_spmv_spec.hpp +++ b/sparse/impl/KokkosSparse_spmv_spec.hpp @@ -203,54 +203,24 @@ struct SPMV_MV { typedef typename YVector::non_const_value_type coefficient_type; - static void spmv_mv(const ExecutionSpace& space, Handle* handle, + // TODO: pass handle through to implementation and use tuning parameters + static void spmv_mv(const ExecutionSpace& space, Handle* /* handle */, const char mode[], const coefficient_type& alpha, const AMatrix& A, const XVector& x, const coefficient_type& beta, const YVector& y) { typedef Kokkos::ArithTraits KAT; - // Intercept special case: if x/y have only 1 column and both are - // contiguous, use the more efficient single-vector impl. - // - // We cannot do this if x or y is noncontiguous, because the column subview - // must be LayoutStride which is not ETI'd. - // - // Do not use a TPL even if one is available for the types: - // we don't want the same handle being used in both TPL and non-TPL versions - if (x.extent(1) == size_t(1) && x.span_is_contiguous() && - y.span_is_contiguous()) { - Kokkos::View - x0(x.data(), x.extent(0)); - Kokkos::View - y0(y.data(), y.extent(0)); - if (beta == KAT::zero()) { - spmv_beta(space, handle, mode, alpha, A, x0, beta, y0); - } else if (beta == KAT::one()) { - spmv_beta(space, handle, mode, alpha, A, x0, beta, y0); - } else if (beta == -KAT::one()) { - spmv_beta(space, handle, mode, alpha, A, x0, beta, y0); - } else { - spmv_beta(space, handle, mode, alpha, A, x0, beta, y0); - } + if (alpha == KAT::zero()) { + spmv_alpha_mv( + space, mode, alpha, A, x, beta, y); + } else if (alpha == KAT::one()) { + spmv_alpha_mv( + space, mode, alpha, A, x, beta, y); + } else if (alpha == -KAT::one()) { + spmv_alpha_mv( + space, mode, alpha, A, x, beta, y); } else { - if (alpha == KAT::zero()) { - spmv_alpha_mv( - space, mode, alpha, A, x, beta, y); - } else if (alpha == KAT::one()) { - spmv_alpha_mv( - space, mode, alpha, A, x, beta, y); - } else if (alpha == -KAT::one()) { - spmv_alpha_mv( - space, mode, alpha, A, x, beta, y); - } else { - spmv_alpha_mv( - space, mode, alpha, A, x, beta, y); - } + spmv_alpha_mv( + space, mode, alpha, A, x, beta, y); } } }; diff --git a/sparse/src/KokkosSparse_spmv.hpp b/sparse/src/KokkosSparse_spmv.hpp index b59124df20..336bae4f1d 100644 --- a/sparse/src/KokkosSparse_spmv.hpp +++ b/sparse/src/KokkosSparse_spmv.hpp @@ -40,32 +40,6 @@ struct RANK_ONE {}; struct RANK_TWO {}; } // namespace -namespace Impl { -template -inline constexpr bool spmv_general_tpl_avail() { - constexpr bool isBSR = ::KokkosSparse::Experimental::is_bsr_matrix_v; - if constexpr (!isBSR) { - // CRS - if constexpr (XVector::rank() == 1) - return spmv_tpl_spec_avail::value; - else - return spmv_mv_tpl_spec_avail::value; - } else { - // BSR - if constexpr (XVector::rank() == 1) - return spmv_bsrmatrix_tpl_spec_avail::value; - else - return spmv_mv_bsrmatrix_tpl_spec_avail::value; - } - return false; -} -} // namespace Impl - // clang-format off /// \brief Kokkos sparse matrix-vector multiply. /// Computes y := alpha*Op(A)*x + beta*y, where Op(A) is @@ -248,8 +222,8 @@ void spmv(const ExecutionSpace& space, Handle* handle, const char mode[], typename YVector::device_type, Kokkos::MemoryTraits>; // Special case: XVector/YVector are rank-2 but x,y both have one column and - // are contiguous. If a TPL is available for rank-1 vectors but not rank-2, - // take rank-1 subviews of x,y and call the rank-1 version. + // are contiguous. In this case take rank-1 subviews of x,y and call the + // rank-1 version. if constexpr (XVector::rank() == 2) { using XVector_SubInternal = Kokkos::View< typename XVector::const_value_type*, @@ -260,19 +234,12 @@ void spmv(const ExecutionSpace& space, Handle* handle, const char mode[], typename YVector::non_const_value_type*, typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, typename YVector::device_type, Kokkos::MemoryTraits>; - if constexpr (!Impl::spmv_general_tpl_avail< - ExecutionSpace, HandleImpl, AMatrix_Internal, - XVector_Internal, YVector_Internal>() && - Impl::spmv_general_tpl_avail< - ExecutionSpace, HandleImpl, AMatrix_Internal, - XVector_SubInternal, YVector_SubInternal>()) { - if (x.extent(1) == size_t(1) && x.span_is_contiguous() && - y.span_is_contiguous()) { - XVector_SubInternal xsub(x.data(), x.extent(0)); - YVector_SubInternal ysub(y.data(), y.extent(0)); - spmv(space, handle->get_impl(), mode, alpha, A, xsub, beta, ysub); - return; - } + if (x.extent(1) == size_t(1) && x.span_is_contiguous() && + y.span_is_contiguous()) { + XVector_SubInternal xsub(x.data(), x.extent(0)); + YVector_SubInternal ysub(y.data(), y.extent(0)); + spmv(space, handle->get_impl(), mode, alpha, A, xsub, beta, ysub); + return; } } diff --git a/sparse/src/KokkosSparse_spmv_handle.hpp b/sparse/src/KokkosSparse_spmv_handle.hpp index b3e878b5e9..6d23d2bde1 100644 --- a/sparse/src/KokkosSparse_spmv_handle.hpp +++ b/sparse/src/KokkosSparse_spmv_handle.hpp @@ -234,7 +234,8 @@ struct SPMVHandleImpl { "SPMVHandleImpl: Ordinal must not be a const type"); SPMVHandleImpl(SPMVAlgorithm algo_) : algo(algo_) {} ~SPMVHandleImpl() { - if (tpl) delete tpl; + if (tpl_rank1) delete tpl_rank1; + if (tpl_rank2) delete tpl_rank2; } ImplType* get_impl() { return this; } @@ -242,9 +243,9 @@ struct SPMVHandleImpl { /// Get the SPMVAlgorithm used by this handle SPMVAlgorithm get_algorithm() const { return this->algo; } - bool is_set_up = false; - const SPMVAlgorithm algo = SPMV_DEFAULT; - TPL_SpMV_Data* tpl = nullptr; + const SPMVAlgorithm algo = SPMV_DEFAULT; + TPL_SpMV_Data* tpl_rank1 = nullptr; + TPL_SpMV_Data* tpl_rank2 = nullptr; // Expert tuning parameters for native SpMV // TODO: expose a proper Experimental interface to set these. Currently they // can be assigned directly in the SPMVHandle as they are public members. diff --git a/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp index 7eb6307753..3564fa68fd 100644 --- a/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp @@ -43,8 +43,8 @@ inline void spmv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, Subhandle* subhandle; const MKLScalar* x_mkl = reinterpret_cast(x); MKLScalar* y_mkl = reinterpret_cast(y); - if (handle->is_set_up) { - subhandle = dynamic_cast(handle->tpl); + if (handle->tpl_rank1) { + subhandle = dynamic_cast(handle->tpl_rank1); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for MKL BSR"); @@ -54,7 +54,7 @@ inline void spmv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, // Use the default execution space instance, as classic MKL does not use // a specific instance. subhandle = new Subhandle(ExecSpace()); - handle->tpl = subhandle; + handle->tpl_rank1 = subhandle; subhandle->descr.type = SPARSE_MATRIX_TYPE_GENERAL; subhandle->descr.mode = SPARSE_FILL_MODE_FULL; subhandle->descr.diag = SPARSE_DIAG_NON_UNIT; @@ -87,7 +87,6 @@ inline void spmv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } - handle->is_set_up = true; } MKLScalar alpha_mkl = KokkosSparse::Impl::KokkosToMKLScalar(alpha); MKLScalar beta_mkl = KokkosSparse::Impl::KokkosToMKLScalar(beta); @@ -124,8 +123,8 @@ inline void spmv_mv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, Subhandle* subhandle; const MKLScalar* x_mkl = reinterpret_cast(x); MKLScalar* y_mkl = reinterpret_cast(y); - if (handle->is_set_up) { - subhandle = dynamic_cast(handle->tpl); + if (handle->tpl_rank2) { + subhandle = dynamic_cast(handle->tpl_rank2); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for MKL BSR"); @@ -135,7 +134,7 @@ inline void spmv_mv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, // Use the default execution space instance, as classic MKL does not use // a specific instance. subhandle = new Subhandle(ExecSpace()); - handle->tpl = subhandle; + handle->tpl_rank2 = subhandle; subhandle->descr.type = SPARSE_MATRIX_TYPE_GENERAL; subhandle->descr.mode = SPARSE_FILL_MODE_FULL; subhandle->descr.diag = SPARSE_DIAG_NON_UNIT; @@ -168,7 +167,6 @@ inline void spmv_mv_bsr_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } - handle->is_set_up = true; } MKLScalar alpha_mkl = KokkosSparse::Impl::KokkosToMKLScalar(alpha); MKLScalar beta_mkl = KokkosSparse::Impl::KokkosToMKLScalar(beta); @@ -376,23 +374,22 @@ void spmv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, KokkosSparse::Impl::CuSparse9_SpMV_Data* subhandle; - if (handle->is_set_up) { - subhandle = - dynamic_cast(handle->tpl); + if (handle->tpl_rank1) { + subhandle = dynamic_cast( + handle->tpl_rank1); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for cusparse"); subhandle->set_exec_space(exec); } else { /* create and set the subhandle and matrix descriptor */ - subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); - handle->tpl = subhandle; + subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); + handle->tpl_rank1 = subhandle; KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat)); KOKKOS_CUSPARSE_SAFE_CALL( cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); KOKKOS_CUSPARSE_SAFE_CALL( cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); - handle->is_set_up = true; } cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW; @@ -504,23 +501,22 @@ void spmv_mv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, KokkosSparse::Impl::CuSparse9_SpMV_Data* subhandle; - if (handle->is_set_up) { - subhandle = - dynamic_cast(handle->tpl); + if (handle->tpl_rank2) { + subhandle = dynamic_cast( + handle->tpl_rank2); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for cusparse"); subhandle->set_exec_space(exec); } else { /* create and set the subhandle and matrix descriptor */ - subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); - handle->tpl = subhandle; + subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); + handle->tpl_rank2 = subhandle; KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat)); KOKKOS_CUSPARSE_SAFE_CALL( cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); KOKKOS_CUSPARSE_SAFE_CALL( cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); - handle->is_set_up = true; } cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW; @@ -855,16 +851,16 @@ void spmv_bsr_rocsparse(const Kokkos::HIP& exec, Handle* handle, rocsparse_value_type* y_ = reinterpret_cast(y.data()); KokkosSparse::Impl::RocSparse_BSR_SpMV_Data* subhandle; - if (handle->is_set_up) { - subhandle = - dynamic_cast(handle->tpl); + if (handle->tpl_rank1) { + subhandle = dynamic_cast( + handle->tpl_rank1); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for rocsparse BSR"); subhandle->set_exec_space(exec); } else { - subhandle = new KokkosSparse::Impl::RocSparse_BSR_SpMV_Data(exec); - handle->tpl = subhandle; + subhandle = new KokkosSparse::Impl::RocSparse_BSR_SpMV_Data(exec); + handle->tpl_rank1 = subhandle; KOKKOS_ROCSPARSE_SAFE_CALL_IMPL( rocsparse_create_mat_descr(&subhandle->mat)); // *_ex* functions deprecated in introduced in 6+ @@ -918,7 +914,6 @@ void spmv_bsr_rocsparse(const Kokkos::HIP& exec, Handle* handle, "unsupported value type for rocsparse_*bsrmv"); } #endif - handle->is_set_up = true; } // *_ex* functions deprecated in introduced in 6+ diff --git a/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp index 500fbddbe7..c52047ab25 100644 --- a/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp @@ -186,16 +186,16 @@ void spmv_mv_cusparse(const Kokkos::Cuda &exec, Handle *handle, } KokkosSparse::Impl::CuSparse10_SpMV_Data *subhandle; - if (handle->is_set_up) { - subhandle = - dynamic_cast(handle->tpl); + if (handle->tpl_rank2) { + subhandle = dynamic_cast( + handle->tpl_rank2); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for cusparse"); subhandle->set_exec_space(exec); } else { - subhandle = new KokkosSparse::Impl::CuSparse10_SpMV_Data(exec); - handle->tpl = subhandle; + subhandle = new KokkosSparse::Impl::CuSparse10_SpMV_Data(exec); + handle->tpl_rank2 = subhandle; /* create matrix */ KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( &subhandle->mat, A.numRows(), A.numCols(), A.nnz(), @@ -209,8 +209,6 @@ void spmv_mv_cusparse(const Kokkos::Cuda &exec, Handle *handle, KOKKOS_IMPL_CUDA_SAFE_CALL( cudaMalloc(&subhandle->buffer, subhandle->bufferSize)); - - handle->is_set_up = true; } KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMM(cusparseHandle, opA, opB, &alpha, diff --git a/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp index 66ea90c746..c8d25c2c58 100644 --- a/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp @@ -96,25 +96,38 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec( &vecY, y.extent_int(0), (void*)y.data(), myCudaDataType)); - // use default cusparse algo for best performance + // Prior to CUDA 11.2.1, ALG2 was more performant than default for imbalanced + // matrices. After 11.2.1, the default is performant for imbalanced matrices, + // and ALG2 now means something else. CUDA >= 11.2.1 corresponds to + // CUSPARSE_VERSION >= 11402. +#if CUSPARSE_VERSION >= 11402 + const bool useAlg2 = false; +#else + const bool useAlg2 = handle->get_algorithm() == SPMV_MERGE_PATH; +#endif + + // In CUDA 11.2.0, the algorithm enums were renamed. + // This corresponds to CUSPARSE_VERSION >= 11400. #if CUSPARSE_VERSION >= 11400 - cusparseSpMVAlg_t algo = CUSPARSE_SPMV_ALG_DEFAULT; + cusparseSpMVAlg_t algo = + useAlg2 ? CUSPARSE_SPMV_CSR_ALG2 : CUSPARSE_SPMV_ALG_DEFAULT; #else - cusparseSpMVAlg_t algo = CUSPARSE_MV_ALG_DEFAULT; + cusparseSpMVAlg_t algo = + useAlg2 ? CUSPARSE_CSRMV_ALG2 : CUSPARSE_MV_ALG_DEFAULT; #endif KokkosSparse::Impl::CuSparse10_SpMV_Data* subhandle; - if (handle->is_set_up) { - subhandle = - dynamic_cast(handle->tpl); + if (handle->tpl_rank1) { + subhandle = dynamic_cast( + handle->tpl_rank1); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for cusparse"); subhandle->set_exec_space(exec); } else { - subhandle = new KokkosSparse::Impl::CuSparse10_SpMV_Data(exec); - handle->tpl = subhandle; + subhandle = new KokkosSparse::Impl::CuSparse10_SpMV_Data(exec); + handle->tpl_rank1 = subhandle; /* create matrix */ KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( @@ -135,7 +148,6 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], KOKKOS_IMPL_CUDA_SAFE_CALL( cudaMalloc(&subhandle->buffer, subhandle->bufferSize)); #endif - handle->is_set_up = true; } /* perform SpMV */ @@ -150,24 +162,23 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], KokkosSparse::Impl::CuSparse9_SpMV_Data* subhandle; - if (handle->is_set_up) { - subhandle = - dynamic_cast(handle->tpl); + if (handle->tpl_rank1) { + subhandle = dynamic_cast( + handle->tpl_rank1); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for cusparse"); subhandle->set_exec_space(exec); } else { /* create and set the subhandle and matrix descriptor */ - subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); - handle->tpl = subhandle; + subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); + handle->tpl_rank1 = subhandle; cusparseMatDescr_t descrA = 0; KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat)); KOKKOS_CUSPARSE_SAFE_CALL( cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); KOKKOS_CUSPARSE_SAFE_CALL( cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); - handle->is_set_up = true; } /* perform the actual SpMV operation */ @@ -386,16 +397,16 @@ void spmv_rocsparse(const Kokkos::HIP& exec, Handle* handle, const char mode[], rocsparse_spmv_alg alg = rocsparse_spmv_alg_default; KokkosSparse::Impl::RocSparse_CRS_SpMV_Data* subhandle; - if (handle->is_set_up) { - subhandle = - dynamic_cast(handle->tpl); + if (handle->tpl_rank1) { + subhandle = dynamic_cast( + handle->tpl_rank1); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for rocsparse CRS"); subhandle->set_exec_space(exec); } else { - subhandle = new KokkosSparse::Impl::RocSparse_CRS_SpMV_Data(exec); - handle->tpl = subhandle; + subhandle = new KokkosSparse::Impl::RocSparse_CRS_SpMV_Data(exec); + handle->tpl_rank1 = subhandle; /* Create the rocsparse csr descr */ // We need to do some casting to void* // Note that row_map is always a const view so const_cast is necessary, @@ -443,7 +454,6 @@ void spmv_rocsparse(const Kokkos::HIP& exec, Handle* handle, const char mode[], KOKKOS_IMPL_HIP_SAFE_CALL( hipMalloc(&subhandle->buffer, subhandle->bufferSize)); #endif - handle->is_set_up = true; } /* Perform the actual computation */ @@ -551,8 +561,8 @@ inline void spmv_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, Subhandle* subhandle; const MKLScalar* x_mkl = reinterpret_cast(x); MKLScalar* y_mkl = reinterpret_cast(y); - if (handle->is_set_up) { - subhandle = dynamic_cast(handle->tpl); + if (handle->tpl_rank1) { + subhandle = dynamic_cast(handle->tpl_rank1); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for MKL CRS"); @@ -562,7 +572,7 @@ inline void spmv_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, // Use the default execution space instance, as classic MKL does not use // a specific instance. subhandle = new Subhandle(ExecSpace()); - handle->tpl = subhandle; + handle->tpl_rank1 = subhandle; subhandle->descr.type = SPARSE_MATRIX_TYPE_GENERAL; subhandle->descr.mode = SPARSE_FILL_MODE_FULL; subhandle->descr.diag = SPARSE_DIAG_NON_UNIT; @@ -591,7 +601,6 @@ inline void spmv_mkl(Handle* handle, sparse_operation_t op, Scalar alpha, const_cast(Arowptrs), const_cast(Arowptrs + 1), const_cast(Aentries), Avalues_mkl)); } - handle->is_set_up = true; } MKLScalar alpha_mkl = KokkosToMKLScalar(alpha); MKLScalar beta_mkl = KokkosToMKLScalar(beta); @@ -709,15 +718,15 @@ inline void spmv_onemkl(const execution_space& exec, Handle* handle, mkl_mode = oneapi::mkl::transpose::trans; OneMKL_SpMV_Data* subhandle; - if (handle->is_set_up) { - subhandle = dynamic_cast(handle->tpl); + if (handle->tpl_rank1) { + subhandle = dynamic_cast(handle->tpl_rank1); if (!subhandle) throw std::runtime_error( "KokkosSparse::spmv: subhandle is not set up for OneMKL CRS"); subhandle->set_exec_space(exec); } else { - subhandle = new OneMKL_SpMV_Data(exec); - handle->tpl = subhandle; + subhandle = new OneMKL_SpMV_Data(exec); + handle->tpl_rank1 = subhandle; oneapi::mkl::sparse::init_matrix_handle(&subhandle->mat); // Even for out-of-order SYCL queue, the inputs here do not depend on // kernels being sequenced @@ -732,7 +741,6 @@ inline void spmv_onemkl(const execution_space& exec, Handle* handle, // optimize_gemv has finished oneapi::mkl::sparse::optimize_gemv(exec.sycl_queue(), mkl_mode, subhandle->mat, {ev}); - handle->is_set_up = true; } // Uncommon case: an out-of-order SYCL queue does not promise that previously