From edf7c672b07149e4b830976a3394990837d23386 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Wed, 12 Jun 2024 15:14:11 -0600 Subject: [PATCH 1/3] Update default spmv algorithms for rocsparse - Use stream for common cases (default, fast setup) as it has nearly zero setup cost and performs well for somewhat balanced matrices - Use adaptive (which is rocsparse's default) only if SPMV_MERGE_PATH is the algorithm, as it has a very high setup cost --- sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp index be2588483f..9c85be8889 100644 --- a/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp @@ -392,7 +392,15 @@ void spmv_rocsparse(const Kokkos::HIP& exec, Handle* handle, const char mode[], &vecY, y.extent_int(0), y_data, rocsparse_compute_type())); - rocsparse_spmv_alg alg = rocsparse_spmv_alg_default; + // Default to using the "stream" algorithm which has almost no setup cost, + // and performs well for reasonably balanced matrices + rocsparse_spmv_alg alg = rocsparse_spmv_alg_csr_stream; + if(handle->get_algorithm() == SPMV_MERGE_PATH) { + // Only use the "adaptive" algorithm if the user has indicated that the + // matrix is very imbalanced, by asking for merge path. This algorithm + // has fairly expensive setup + alg = rocsparse_spmv_alg_csr_adaptive; + } KokkosSparse::Impl::RocSparse_CRS_SpMV_Data* subhandle; if (handle->tpl_rank1) { From 234767e4c86b361704e17605a9b2d95cd7ba6979 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Wed, 12 Jun 2024 15:17:11 -0600 Subject: [PATCH 2/3] Re-enable rocsparse spmv for SPMV_FAST_SETUP --- sparse/src/KokkosSparse_spmv.hpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sparse/src/KokkosSparse_spmv.hpp b/sparse/src/KokkosSparse_spmv.hpp index ddbef56504..5fa0be3619 100644 --- a/sparse/src/KokkosSparse_spmv.hpp +++ b/sparse/src/KokkosSparse_spmv.hpp @@ -247,13 +247,6 @@ void spmv(const ExecutionSpace& space, Handle* handle, const char mode[], YVector_Internal y_i(y); bool useNative = is_spmv_algorithm_native(handle->get_algorithm()); - // Also use the native algorithm if SPMV_FAST_SETUP was selected and - // rocSPARSE is the possible TPL to use. Native is faster in this case. -#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE - if (handle->get_algorithm() == SPMV_FAST_SETUP && - std::is_same_v) - useNative = true; -#endif // Now call the proper implementation depending on isBSR and the rank of X/Y if constexpr (!isBSR) { From 3d7c75219e90efe32c386ab0786ca13fcc2a3ace Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Wed, 12 Jun 2024 15:19:27 -0600 Subject: [PATCH 3/3] Formatting --- sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp index 9c85be8889..33eb052135 100644 --- a/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp @@ -395,7 +395,7 @@ void spmv_rocsparse(const Kokkos::HIP& exec, Handle* handle, const char mode[], // Default to using the "stream" algorithm which has almost no setup cost, // and performs well for reasonably balanced matrices rocsparse_spmv_alg alg = rocsparse_spmv_alg_csr_stream; - if(handle->get_algorithm() == SPMV_MERGE_PATH) { + if (handle->get_algorithm() == SPMV_MERGE_PATH) { // Only use the "adaptive" algorithm if the user has indicated that the // matrix is very imbalanced, by asking for merge path. This algorithm // has fairly expensive setup