From eb1b04ece37563afffac4fc924b48de0781d6934 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Fri, 31 Mar 2023 12:35:47 -0400 Subject: [PATCH] Use rocsparse_*bsrmv for BsrMatrix SpMV when rocSparse enabled --- common/src/KokkosKernels_AlwaysFalse.hpp | 39 +++ .../impl/KokkosSparse_spmv_bsrmatrix_spec.hpp | 22 +- sparse/src/KokkosSparse_Utils_rocsparse.hpp | 16 +- sparse/src/KokkosSparse_spmv.hpp | 8 + ...osSparse_spmv_bsrmatrix_tpl_spec_avail.hpp | 65 ++++- ...kosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp | 240 +++++++++++++++++- .../tpls/KokkosSparse_spmv_tpl_spec_decl.hpp | 2 + 7 files changed, 370 insertions(+), 22 deletions(-) create mode 100644 common/src/KokkosKernels_AlwaysFalse.hpp diff --git a/common/src/KokkosKernels_AlwaysFalse.hpp b/common/src/KokkosKernels_AlwaysFalse.hpp new file mode 100644 index 0000000000..ec6acbe11b --- /dev/null +++ b/common/src/KokkosKernels_AlwaysFalse.hpp @@ -0,0 +1,39 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSKERNELS_ALWAYSFALSE_HPP +#define KOKKOSKERNELS_ALWAYSFALSE_HPP + +#include + +/*! \file KokkosKernels_AlwaysFalse.hpp + \brief A convenience type to be used in a static_assert that should always + fail +*/ + +namespace KokkosKernels { +namespace Impl { + +template +using always_false = std::false_type; + +template +inline constexpr bool always_false_v = always_false::value; + +} // namespace Impl +} // namespace KokkosKernels + +#endif // \ No newline at end of file diff --git a/sparse/impl/KokkosSparse_spmv_bsrmatrix_spec.hpp b/sparse/impl/KokkosSparse_spmv_bsrmatrix_spec.hpp index ef5c2e0684..e403ee6b20 100644 --- a/sparse/impl/KokkosSparse_spmv_bsrmatrix_spec.hpp +++ b/sparse/impl/KokkosSparse_spmv_bsrmatrix_spec.hpp @@ -150,14 +150,13 @@ struct SPMV_BSRMATRIX + #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE #include #include "rocsparse/rocsparse.h" @@ -150,21 +152,29 @@ inline rocsparse_datatype rocsparse_compute_type>() { return rocsparse_datatype_f64_c; } -template -struct kokkos_to_rocsparse_type { - using type = Scalar; +template +struct kokkos_to_rocsparse_type; + +// for floats, rocsparse uses c++ builtin types +template +struct kokkos_to_rocsparse_type>> { + using type = T; }; +// translate complex float template <> struct kokkos_to_rocsparse_type> { using type = rocsparse_float_complex; }; +// translate complex double template <> struct kokkos_to_rocsparse_type> { using type = rocsparse_double_complex; }; +// e.g. 5.4 -> 50400 #define KOKKOSSPARSE_IMPL_ROCM_VERSION \ ROCM_VERSION_MAJOR * 10000 + ROCM_VERSION_MINOR * 100 + ROCM_VERSION_PATCH diff --git a/sparse/src/KokkosSparse_spmv.hpp b/sparse/src/KokkosSparse_spmv.hpp index 8ec6635ffb..c18c0bfeb4 100644 --- a/sparse/src/KokkosSparse_spmv.hpp +++ b/sparse/src/KokkosSparse_spmv.hpp @@ -349,6 +349,14 @@ void spmv(KokkosKernels::Experimental::Controls controls, const char mode[], } #endif +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE + // rocSparse does not support the modes (C), (T), (H) + if constexpr (std::is_same_v) { + useFallback = useFallback || (mode[0] != NoTranspose[0]); + } +#endif + if (useFallback) { // Explicitly call the non-TPL SPMV_BSRMATRIX implementation std::string label = diff --git a/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_avail.hpp b/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_avail.hpp index 97a9790385..4eea5df57e 100644 --- a/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_avail.hpp +++ b/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_avail.hpp @@ -151,7 +151,7 @@ KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_MKL(Kokkos::complex, Kokkos::OpenMP) #endif -#endif +#endif // KOKKOSKERNELS_ENABLE_TPL_MKL // Specialization struct which defines whether a specialization exists template , #endif +#if defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) + +#include "KokkosSparse_Utils_rocsparse.hpp" + +#define KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE( \ + SCALAR, ORDINAL, OFFSET, LAYOUT, MEMSPACE) \ + template <> \ + struct spmv_bsrmatrix_tpl_spec_avail< \ + const SCALAR, const ORDINAL, Kokkos::Device, \ + Kokkos::MemoryTraits, const OFFSET, const SCALAR*, \ + LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits, SCALAR*, \ + LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > { \ + enum : bool { value = true }; \ + }; + +// These things may also be valid before 5.4, but I haven't tested it. +#if KOKKOSSPARSE_IMPL_ROCM_VERSION >= 50400 + +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(float, rocsparse_int, + rocsparse_int, + Kokkos::LayoutLeft, + Kokkos::HIPSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(double, rocsparse_int, + rocsparse_int, + Kokkos::LayoutLeft, + Kokkos::HIPSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(float, rocsparse_int, + rocsparse_int, + Kokkos::LayoutRight, + Kokkos::HIPSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(double, rocsparse_int, + rocsparse_int, + Kokkos::LayoutRight, + Kokkos::HIPSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, + rocsparse_int, + rocsparse_int, + Kokkos::LayoutLeft, + Kokkos::HIPSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, + rocsparse_int, + rocsparse_int, + Kokkos::LayoutLeft, + Kokkos::HIPSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, + rocsparse_int, + rocsparse_int, + Kokkos::LayoutRight, + Kokkos::HIPSpace) +KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE(Kokkos::complex, + rocsparse_int, + rocsparse_int, + Kokkos::LayoutRight, + Kokkos::HIPSpace) + +#endif // KOKKOSSPARSE_IMPL_ROCM_VERSION >= 50400 + +#undef KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_AVAIL_ROCSPARSE + +#endif // defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) + } // namespace Impl } // namespace Experimental } // namespace KokkosSparse diff --git a/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp index 0e0fe463a5..cc3e2a6b1e 100644 --- a/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp @@ -17,6 +17,7 @@ #ifndef KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_DECL_HPP #define KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_DECL_HPP +#include "KokkosKernels_AlwaysFalse.hpp" #include "KokkosKernels_Controls.hpp" #include "KokkosSparse_Utils_mkl.hpp" @@ -421,7 +422,7 @@ KOKKOSSPARSE_SPMV_MV_MKL(Kokkos::complex, Kokkos::OpenMP, } // namespace Experimental } // namespace KokkosSparse -#endif +#endif // KOKKOSKERNELS_ENABLE_TPL_MKL // cuSPARSE #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE @@ -525,7 +526,7 @@ void spmv_block_impl_cusparse( } KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyMatDescr(descrA)); -#endif // CUDA_VERSION +#endif // (9000 <= CUDA_VERSION) } // Reference @@ -542,7 +543,7 @@ void spmv_block_impl_cusparse( // -> t(t(C)) = t(A * t(B)) + t(t(C)) // -> C = t(t(B)) * t(A) + C // -> C = B * t(A) + C -// This is impossible in cuSparse without explicitly transposing C, +// This is impossible in cuSparse without explicitly transposing A, // so we just do not support LayoutRight in cuSparse TPL now // template < @@ -647,7 +648,7 @@ void spm_mv_block_impl_cusparse( } KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyMatDescr(descrA)); -#endif // CUDA_VERSION +#endif // (9000 <= CUDA_VERSION) } #define KOKKOSSPARSE_SPMV_CUSPARSE(SCALAR, ORDINAL, OFFSET, LAYOUT, SPACE, \ @@ -735,7 +736,7 @@ KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutLeft, KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int, int, Kokkos::LayoutRight, Kokkos::CudaUVMSpace, KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) -#endif // 9000 <= CUDA_VERSION +#endif // (9000 <= CUDA_VERSION) #undef KOKKOSSPARSE_SPMV_CUSPARSE @@ -803,7 +804,7 @@ KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, Kokkos::CudaUVMSpace, false) -#endif // 9000 <= CUDA_VERSION +#endif // (9000 <= CUDA_VERSION) #undef KOKKOSSPARSE_SPMV_MV_CUSPARSE @@ -813,4 +814,231 @@ KOKKOSSPARSE_SPMV_MV_CUSPARSE(Kokkos::complex, int, int, #endif // KOKKOSKERNELS_ENABLE_TPL_CUSPARSE +// -------------------- +// rocSparse +// -------------------- +#if defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) + +#include + +#include "KokkosSparse_Utils_rocsparse.hpp" + +namespace KokkosSparse { +namespace Experimental { +namespace Impl { + +template +void spmv_block_impl_rocsparse( + const KokkosKernels::Experimental::Controls& controls, const char mode[], + typename YVector::non_const_value_type const& alpha, const AMatrix& A, + const XVector& x, typename YVector::non_const_value_type const& beta, + const YVector& y) { + /* + rocm 5.4.0 rocsparse_*bsrmv reference: + https://rocsparse.readthedocs.io/en/rocm-5.4.0/usermanual.html#rocsparse-bsrmv-ex + + only trans = rocsparse_operation_none is supported + only descr = rocsparse_matrix_type_general is supported + + */ + + using offset_type = typename AMatrix::non_const_size_type; + using ordinal_type = typename AMatrix::non_const_ordinal_type; + using value_type = typename AMatrix::non_const_value_type; + using rocsparse_value_type = + typename KokkosSparse::Impl::kokkos_to_rocsparse_type::type; + + // assert ordinals and offsets are the expected types + static_assert(std::is_same_v, + "A offset_type must be rocsparse_int"); + static_assert(std::is_same_v, + "A ordinal_type must be rocsparse_int"); + + // assert all operands are the same type + using x_value_type = typename XVector::non_const_value_type; + using y_value_type = typename YVector::non_const_value_type; + static_assert(std::is_same_v, + "A and x must have same value type"); + static_assert(std::is_same_v, + "A and y must have same value type"); + + // assert X and Y are non-stride (pass raw pointers to TPL) + static_assert( + !std::is_same_v, + "x must be contiguous"); + static_assert( + !std::is_same_v, + "y must be contiguous"); + + // assert BSR data is non-stride (pass raw pointers to TPL) + static_assert(!std::is_same_v, + "A values must be contiguous"); + static_assert(!std::is_same_v, + "A row_map must be contiguous"); + static_assert(!std::is_same_v, + "A entries must be contiguous"); + + rocsparse_handle handle = controls.getRocsparseHandle(); + + // set the mode + rocsparse_operation trans; + switch (toupper(mode[0])) { + case 'N': trans = rocsparse_operation_none; break; + default: { + std::stringstream ss; + ss << "Mode " << mode << " invalid for rocsparse_[*]bsrmv\n"; + throw std::invalid_argument(ss.str()); + } + } + + /* + Specify the matrix direction. + The rocsparse_direction indicates whether a dense matrix should be parsed by + rows or by columns, assuming column-major storage. Values: enumerator + rocsparse_direction_row Parse the matrix by rows. enumerator + rocsparse_direction_column Parse the matrix by columns. + */ + // KokkosSparse Bsr matrix blocks are layoutright (row-major) + static_assert( + std::is_same_v, + "A blocks must be stored layout-right"); + rocsparse_direction dir = rocsparse_direction_row; + + const rocsparse_int mb = rocsparse_int(A.numRows()); // number of block rows + const rocsparse_int nb = rocsparse_int(A.numCols()); // number of block cols + const rocsparse_int nnzb = + rocsparse_int(A.nnz()); // number of non-zero blocks + const rocsparse_value_type* alpha_ = + reinterpret_cast(&alpha); + + const rocsparse_value_type* bsr_val = + reinterpret_cast(A.values.data()); + const rocsparse_int* bsr_row_ptr = A.graph.row_map.data(); + const rocsparse_int* bsr_col_ind = A.graph.entries.data(); + const rocsparse_int block_dim = rocsparse_int(A.blockDim()); + const rocsparse_value_type* x_ = + reinterpret_cast(x.data()); + const rocsparse_value_type* beta_ = + reinterpret_cast(&beta); + rocsparse_value_type* y_ = reinterpret_cast(y.data()); + + rocsparse_mat_descr descr; + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_create_mat_descr(&descr)); + rocsparse_mat_info info; + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_create_mat_info(&info)); + if constexpr (std::is_same_v) { + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_sbsrmv_ex_analysis( + handle, dir, trans, mb, nb, nnzb, descr, bsr_val, bsr_row_ptr, + bsr_col_ind, block_dim, info)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_sbsrmv_ex( + handle, dir, trans, mb, nb, nnzb, alpha_, descr, bsr_val, bsr_row_ptr, + bsr_col_ind, block_dim, info, x_, beta_, y_)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_bsrsv_clear(handle, info)); + } else if constexpr (std::is_same_v) { + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_dbsrmv_ex_analysis( + handle, dir, trans, mb, nb, nnzb, descr, bsr_val, bsr_row_ptr, + bsr_col_ind, block_dim, info)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_dbsrmv_ex( + handle, dir, trans, mb, nb, nnzb, alpha_, descr, bsr_val, bsr_row_ptr, + bsr_col_ind, block_dim, info, x_, beta_, y_)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_bsrsv_clear(handle, info)); + } else if constexpr (std::is_same_v>) { + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_cbsrmv_ex_analysis( + handle, dir, trans, mb, nb, nnzb, descr, bsr_val, bsr_row_ptr, + bsr_col_ind, block_dim, info)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_cbsrmv_ex( + handle, dir, trans, mb, nb, nnzb, alpha_, descr, bsr_val, bsr_row_ptr, + bsr_col_ind, block_dim, info, x_, beta_, y_)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_bsrsv_clear(handle, info)); + } else if constexpr (std::is_same_v>) { + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_zbsrmv_ex_analysis( + handle, dir, trans, mb, nb, nnzb, descr, bsr_val, bsr_row_ptr, + bsr_col_ind, block_dim, info)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_zbsrmv_ex( + handle, dir, trans, mb, nb, nnzb, alpha_, descr, bsr_val, bsr_row_ptr, + bsr_col_ind, block_dim, info, x_, beta_, y_)); + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_bsrsv_clear(handle, info)); + } else { + static_assert(KokkosKernels::Impl::always_false_v, + "unsupported value type for rocsparse_*bsrmv"); + } + rocsparse_destroy_mat_descr(descr); + rocsparse_destroy_mat_info(info); + +} // spmv_block_impl_rocsparse + +#define KOKKOSSPARSE_SPMV_ROCSPARSE(SCALAR, ORDINAL, OFFSET, LAYOUT, SPACE, \ + COMPILE_LIBRARY) \ + template <> \ + struct SPMV_BSRMATRIX< \ + SCALAR const, ORDINAL const, Kokkos::Device, \ + Kokkos::MemoryTraits, OFFSET const, SCALAR const*, \ + LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits, SCALAR*, \ + LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits, true, COMPILE_LIBRARY> { \ + using device_type = Kokkos::Device; \ + using memory_trait_type = Kokkos::MemoryTraits; \ + using AMatrix = BsrMatrix; \ + using XVector = Kokkos::View< \ + SCALAR const*, LAYOUT, device_type, \ + Kokkos::MemoryTraits>; \ + using YVector = \ + Kokkos::View; \ + using Controls = KokkosKernels::Experimental::Controls; \ + \ + using coefficient_type = typename YVector::non_const_value_type; \ + \ + static void spmv_bsrmatrix(const Controls& controls, const char mode[], \ + const coefficient_type& alpha, \ + const AMatrix& A, const XVector& x, \ + const coefficient_type& beta, \ + const YVector& y) { \ + std::string label = "KokkosSparse::spmv[TPL_ROCSPARSE,BSRMATRIX" + \ + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spmv_block_impl_rocsparse(controls, mode, alpha, A, x, beta, y); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSSPARSE_SPMV_ROCSPARSE(float, rocsparse_int, rocsparse_int, + Kokkos::LayoutLeft, Kokkos::HIPSpace, + KOKKOSKERNELS_IMPL_COMPILE_LIBRARY); +KOKKOSSPARSE_SPMV_ROCSPARSE(float, rocsparse_int, rocsparse_int, + Kokkos::LayoutRight, Kokkos::HIPSpace, + KOKKOSKERNELS_IMPL_COMPILE_LIBRARY); +KOKKOSSPARSE_SPMV_ROCSPARSE(double, rocsparse_int, rocsparse_int, + Kokkos::LayoutLeft, Kokkos::HIPSpace, + KOKKOSKERNELS_IMPL_COMPILE_LIBRARY); +KOKKOSSPARSE_SPMV_ROCSPARSE(double, rocsparse_int, rocsparse_int, + Kokkos::LayoutRight, Kokkos::HIPSpace, + KOKKOSKERNELS_IMPL_COMPILE_LIBRARY); +KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, + rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIPSpace, + KOKKOSKERNELS_IMPL_COMPILE_LIBRARY); +KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, + rocsparse_int, Kokkos::LayoutRight, + Kokkos::HIPSpace, + KOKKOSKERNELS_IMPL_COMPILE_LIBRARY); +KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, + rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIPSpace, + KOKKOSKERNELS_IMPL_COMPILE_LIBRARY); +KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, + rocsparse_int, Kokkos::LayoutRight, + Kokkos::HIPSpace, + KOKKOSKERNELS_IMPL_COMPILE_LIBRARY); + +#undef KOKKOSSPARSE_SPMV_ROCSPARSE + +} // namespace Impl +} // namespace Experimental +} // namespace KokkosSparse + +#endif // defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) + #endif // KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_DECL_HPP diff --git a/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp index 7d14e304d7..11570c0d29 100644 --- a/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp @@ -506,6 +506,8 @@ KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, Kokkos::LayoutLeft, KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, Kokkos::LayoutRight, KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) +#undef KOKKOSSPARSE_SPMV_ROCSPARSE + } // namespace Impl } // namespace KokkosSparse #endif // KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE