From 7129f3b4242ddab0820804b5061421aa5f56f235 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Fri, 7 Jan 2022 14:19:05 +0100 Subject: [PATCH 01/19] Refactor MKL implementation of SpGEMM --- .../impl/KokkosSparse_spgemm_mkl_impl.hpp | 863 ++++++------------ 1 file changed, 283 insertions(+), 580 deletions(-) diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp index 8eb0bd3930..1b22906ea3 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp @@ -47,634 +47,337 @@ #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL #include "mkl_spblas.h" -#include "mkl.h" #endif -#include "KokkosKernels_Utils.hpp" -#include - namespace KokkosSparse { - namespace Impl { -template -void mkl_symbolic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, - typename KernelHandle::nnz_lno_t n, - typename KernelHandle::nnz_lno_t k, - in_row_index_view_type row_mapA, - in_nonzero_index_view_type entriesA, - - bool transposeA, bin_row_index_view_type row_mapB, - bin_nonzero_index_view_type entriesB, bool transposeB, - cin_row_index_view_type row_mapC, bool verbose = false) { #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL - typedef typename KernelHandle::nnz_lno_t idx; - typedef typename KernelHandle::size_type size_type; - - typedef typename KernelHandle::HandleTempMemorySpace HandleTempMemorySpace; - typedef typename Kokkos::View - int_temp_work_view_t; - - typedef typename KernelHandle::nnz_scalar_t value_type; - - typedef typename KernelHandle::HandleExecSpace MyExecSpace; - /* - if (!( - (Kokkos::SpaceAccessibility::accessible) && - (Kokkos::SpaceAccessibility::accessible) && - (Kokkos::SpaceAccessibility::accessible) ) - ){ - throw std::runtime_error ("MEMORY IS NOT ALLOCATED IN HOST DEVICE for - MKL\n"); return; - } - */ - if (std::is_same::value) { - int *a_xadj = NULL; - int *b_xadj = NULL; - int_temp_work_view_t a_xadj_v, b_xadj_v; - - if (std::is_same::value) { - a_xadj = (int *)row_mapA.data(); - b_xadj = (int *)row_mapB.data(); - } else { - // TODO test this case. - - Kokkos::Timer copy_time; - const int max_integer = 2147483647; - if (entriesB.extent(0) > max_integer || - entriesA.extent(0) > max_integer) { - throw std::runtime_error( - "MKL requires integer values for size type for SPGEMM. Copying to " - "integer will cause overflow.\n"); - return; - } - a_xadj_v = int_temp_work_view_t("tmpa", m + 1); - a_xadj = (int *)a_xadj_v.data(); - b_xadj_v = int_temp_work_view_t("tmpb", n + 1); - b_xadj = (int *)b_xadj_v.data(); - - KokkosKernels::Impl::copy_vector( - m + 1, row_mapA, a_xadj_v); - - KokkosKernels::Impl::copy_vector( - m + 1, row_mapB, b_xadj_v); - - if (verbose) - std::cout << "MKL COPY size type to int TIME:" << copy_time.seconds() - << std::endl; - } - - int *a_adj = (int *)entriesA.data(); - int *b_adj = (int *)entriesB.data(); - - std::vector tmp_values( - KOKKOSKERNELS_MACRO_MAX(entriesB.extent(0), entriesA.extent(0))); - value_type *ptmp_values = &(tmp_values[0]); - value_type *a_ew = ptmp_values; - value_type *b_ew = ptmp_values; - - sparse_matrix_t A; - sparse_matrix_t B; - sparse_matrix_t C; - - if (std::is_same::value) { - if (SPARSE_STATUS_SUCCESS != - mkl_sparse_s_create_csr(&A, SPARSE_INDEX_BASE_ZERO, m, n, a_xadj, - a_xadj + 1, a_adj, (float *)a_ew)) { - throw std::runtime_error( - "CANNOT CREATE mkl_sparse_s_create_csr A matrix\n"); - return; - } - - if (SPARSE_STATUS_SUCCESS != - mkl_sparse_s_create_csr(&B, SPARSE_INDEX_BASE_ZERO, n, k, b_xadj, - b_xadj + 1, b_adj, (float *)b_ew)) { - throw std::runtime_error( - "CANNOT CREATE mkl_sparse_s_create_csr B matrix\n"); - return; - } - - sparse_operation_t operation; - if (transposeA && transposeB) { - operation = SPARSE_OPERATION_TRANSPOSE; - } else if (!(transposeA || transposeB)) { - operation = SPARSE_OPERATION_NON_TRANSPOSE; - } else { - throw std::runtime_error( - "MKL either transpose both matrices, or none for SPGEMM\n"); - return; - } - - Kokkos::Timer timer1; - bool success = - SPARSE_STATUS_SUCCESS != mkl_sparse_spmm(operation, A, B, &C); - if (verbose) - std::cout << "Actual FLOAT MKL SPMM Time in symbolic:" - << timer1.seconds() << std::endl; - - if (success) { - throw std::runtime_error( - "ERROR at SPGEMM multiplication in mkl_sparse_spmm\n"); - - return; - } else { - sparse_index_base_t c_indexing; - MKL_INT c_rows, c_cols, *rows_start, *rows_end, *columns; - float *values; - - if (SPARSE_STATUS_SUCCESS != - mkl_sparse_s_export_csr(C, &c_indexing, &c_rows, &c_cols, - &rows_start, &rows_end, &columns, - &values)) { - throw std::runtime_error( - "ERROR at exporting result matrix in mkl_sparse_spmm\n"); - return; - } - - if (SPARSE_INDEX_BASE_ZERO != c_indexing) { - throw std::runtime_error("C is not zero based indexed\n"); - return; - } - - KokkosKernels::Impl::copy_vector< - MKL_INT *, typename cin_row_index_view_type::non_const_type, - MyExecSpace>(m, rows_start, row_mapC); - idx nnz = row_mapC(m) = rows_end[m - 1]; - handle->set_c_nnz(nnz); - } - - if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(A)) { - throw std::runtime_error("Error at mkl_sparse_destroy A\n"); - return; - } - - if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(B)) { - throw std::runtime_error("Error at mkl_sparse_destroy B\n"); - return; - } - if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(C)) { - throw std::runtime_error("Error at mkl_sparse_destroy C\n"); - return; - } - } else if (std::is_same::value) { - /* - std::cout << "create a" << std::endl; - std::cout << "m:" << m << " n:" << n << std::endl; - std::cout << "a_xadj[0]:" << a_xadj[0] << " a_xadj[m]:" << a_xadj[m] << - std::endl; std::cout << "a_adj[a_xadj[m] - 1]:" << a_adj[a_xadj[m] - 1] << - " a_ew[a_xadj[m] - 1]:" << a_ew[a_xadj[m] - 1] << std::endl; - */ - if (SPARSE_STATUS_SUCCESS != - mkl_sparse_d_create_csr(&A, SPARSE_INDEX_BASE_ZERO, m, n, a_xadj, - a_xadj + 1, a_adj, (double *)a_ew)) { - throw std::runtime_error( - "CANNOT CREATE mkl_sparse_s_create_csr A matrix\n"); - return; - } - - // std::cout << "create b" << std::endl; - if (SPARSE_STATUS_SUCCESS != - mkl_sparse_d_create_csr(&B, SPARSE_INDEX_BASE_ZERO, n, k, b_xadj, - b_xadj + 1, b_adj, (double *)b_ew)) { - throw std::runtime_error( - "CANNOT CREATE mkl_sparse_s_create_csr B matrix\n"); - return; - } +KOKKOS_INLINE_FUNCTION +void mkl_call(sparse_status_t result, const char *err_msg) { + if (SPARSE_STATUS_SUCCESS != result) { + throw std::runtime_error(err_msg); + } +} - sparse_operation_t operation; - if (transposeA && transposeB) { - operation = SPARSE_OPERATION_TRANSPOSE; - } else if (!(transposeA || transposeB)) { - operation = SPARSE_OPERATION_NON_TRANSPOSE; - } else { - throw std::runtime_error( - "MKL either transpose both matrices, or none for SPGEMM\n"); - return; - } +template +class MKLSparseMatrix { + sparse_matrix_t mtx; + + public: + KOKKOS_INLINE_FUNCTION + MKLSparseMatrix(const MKL_INT m, const MKL_INT n, MKL_INT *xadj, MKL_INT *adj, + value_type *values); + + KOKKOS_INLINE_FUNCTION + static MKLSparseMatrix spmm( + sparse_operation_t operation, const MKLSparseMatrix &A, + const MKLSparseMatrix &B) { + sparse_matrix_t c; + mkl_call(mkl_sparse_spmm(operation, A.mtx, B.mtx, &c), + "mkl_sparse_spmm() failed!"); + return MKLSparseMatrix(c); + } - Kokkos::Timer timer1; - bool success = - SPARSE_STATUS_SUCCESS != mkl_sparse_spmm(operation, A, B, &C); - if (verbose) - std::cout << "Actual DOUBLE MKL SPMM Time Without Free:" - << timer1.seconds() << std::endl; - mkl_free_buffers(); - if (verbose) - std::cout << "Actual DOUBLE MKL SPMM Time:" << timer1.seconds() - << std::endl; + KOKKOS_INLINE_FUNCTION + void get(MKL_INT &rows, MKL_INT &cols, MKL_INT *&rows_start, + MKL_INT *&columns, value_type *&values); - if (success) { - throw std::runtime_error( - "ERROR at SPGEMM multiplication in mkl_sparse_spmm\n"); - return; - } else { - sparse_index_base_t c_indexing; - MKL_INT c_rows, c_cols, *rows_start, *rows_end, *columns; - double *values; - - if (SPARSE_STATUS_SUCCESS != - mkl_sparse_d_export_csr(C, &c_indexing, &c_rows, &c_cols, - &rows_start, &rows_end, &columns, - &values)) { - throw std::runtime_error( - "ERROR at exporting result matrix in mkl_sparse_spmm\n"); - return; - } - - if (SPARSE_INDEX_BASE_ZERO != c_indexing) { - throw std::runtime_error("C is not zero based indexed\n"); - return; - } - if (handle->mkl_keep_output) { - Kokkos::Timer copy_time; - - KokkosKernels::Impl::copy_vector< - MKL_INT *, typename cin_row_index_view_type::non_const_type, - MyExecSpace>(m, rows_start, row_mapC); - idx nnz = row_mapC(m) = rows_end[m - 1]; - handle->set_c_nnz(nnz); - - double copy_time_d = copy_time.seconds(); - if (verbose) std::cout << "MKL COPYTIME:" << copy_time_d << std::endl; - } - } + KOKKOS_INLINE_FUNCTION + void destroy() { + mkl_call(mkl_sparse_destroy(mtx), "mkl_sparse_destroy() failed!"); + } - if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(A)) { - throw std::runtime_error("Error at mkl_sparse_destroy A\n"); - return; - } + private: + KOKKOS_INLINE_FUNCTION + MKLSparseMatrix(sparse_matrix_t mtx_) : mtx(mtx_) {} +}; + +template <> +KOKKOS_INLINE_FUNCTION MKLSparseMatrix::MKLSparseMatrix( + const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj, + float *values) { + mkl_call(mkl_sparse_s_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, + xadj, xadj + 1, adj, values), + "mkl_sparse_s_create_csr() failed!"); +} - if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(B)) { - throw std::runtime_error("Error at mkl_sparse_destroy B\n"); - return; - } - if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(C)) { - throw std::runtime_error("Error at mkl_sparse_destroy C\n"); - return; - } +template <> +KOKKOS_INLINE_FUNCTION MKLSparseMatrix::MKLSparseMatrix( + const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj, + double *values) { + mkl_call(mkl_sparse_d_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, + xadj, xadj + 1, adj, values), + "mkl_sparse_d_create_csr() failed!"); +} - } else { - throw std::runtime_error( - "MKL requires float or double values. Complex values are not " - "implemented yet.\n"); - return; - } - } else { - throw std::runtime_error("MKL requires local ordinals to be integer.\n"); +template <> +KOKKOS_INLINE_FUNCTION void MKLSparseMatrix::get(MKL_INT &rows, + MKL_INT &cols, + MKL_INT *&rows_start, + MKL_INT *&columns, + float *&values) { + sparse_index_base_t indexing; + MKL_INT *rows_end; + mkl_call(mkl_sparse_s_export_csr(mtx, &indexing, &rows, &cols, &rows_start, + &rows_end, &columns, &values), + "Failed to export matrix with mkl_sparse_s_export_csr()!"); + if (SPARSE_INDEX_BASE_ZERO != indexing) { + throw std::runtime_error( + "Expected zero based indexing in exported MKL sparse matrix\n"); return; } -#else - (void)handle; - (void)m; - (void)n; - (void)k; - (void)row_mapA; - (void)row_mapB; - (void)row_mapC; - (void)entriesA; - (void)entriesB; - (void)transposeA; - (void)transposeB; - (void)verbose; - throw std::runtime_error("MKL IS NOT DEFINED\n"); - // return; -#endif } -template < - typename KernelHandle, typename in_row_index_view_type, - typename in_nonzero_index_view_type, typename in_nonzero_value_view_type, - typename bin_row_index_view_type, typename bin_nonzero_index_view_type, - typename bin_nonzero_value_view_type, typename cin_row_index_view_type, - typename cin_nonzero_index_view_type, typename cin_nonzero_value_view_type> -void mkl_apply(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, - typename KernelHandle::nnz_lno_t n, - typename KernelHandle::nnz_lno_t k, - in_row_index_view_type row_mapA, - in_nonzero_index_view_type entriesA, - in_nonzero_value_view_type valuesA, - - bool transposeA, bin_row_index_view_type row_mapB, - bin_nonzero_index_view_type entriesB, - bin_nonzero_value_view_type valuesB, bool transposeB, - cin_row_index_view_type row_mapC, - cin_nonzero_index_view_type entriesC, - cin_nonzero_value_view_type valuesC, bool verbose = false) { -#ifdef KOKKOSKERNELS_ENABLE_TPL_MKL +template <> +KOKKOS_INLINE_FUNCTION void MKLSparseMatrix::get(MKL_INT &rows, + MKL_INT &cols, + MKL_INT *&rows_start, + MKL_INT *&columns, + double *&values) { + sparse_index_base_t indexing; + MKL_INT *rows_end; + mkl_call(mkl_sparse_d_export_csr(mtx, &indexing, &rows, &cols, &rows_start, + &rows_end, &columns, &values), + "Failed to export matrix with mkl_sparse_s_export_csr()!"); + if (SPARSE_INDEX_BASE_ZERO != indexing) { + throw std::runtime_error( + "Expected zero based indexing in exported MKL sparse matrix\n"); + return; + } +} - typedef typename KernelHandle::nnz_lno_t idx; +template +class MKLApply { + public: + typedef typename KernelHandle::nnz_lno_t nnz_lno_t; typedef typename KernelHandle::size_type size_type; - - typedef typename KernelHandle::HandleTempMemorySpace HandleTempMemorySpace; - typedef typename Kokkos::View - int_temp_work_view_t; - typedef typename KernelHandle::nnz_scalar_t value_type; - + typedef typename KernelHandle::HandleTempMemorySpace HandleTempMemorySpace; typedef typename KernelHandle::HandleExecSpace MyExecSpace; - /* - if (!( - (Kokkos::SpaceAccessibility::accessible) && - (Kokkos::SpaceAccessibility::accessible) && - (Kokkos::SpaceAccessibility::accessible) ) - ){ - throw std::runtime_error ("MEMORY IS NOT ALLOCATED IN HOST DEVICE for - MKL\n"); return; - } - */ - if (std::is_same::value) { - int *a_xadj = NULL; - int *b_xadj = NULL; - int_temp_work_view_t a_xadj_v, b_xadj_v; - - if (std::is_same::value) { - a_xadj = (int *)row_mapA.data(); - b_xadj = (int *)row_mapB.data(); - } else { - // TODO test this case. - - Kokkos::Timer copy_time; - const int max_integer = 2147483647; - if (entriesB.extent(0) > max_integer || - entriesA.extent(0) > max_integer) { - throw std::runtime_error( - "MKL requires integer values for size type for SPGEMM. Copying to " - "integer will cause overflow.\n"); - return; - } - a_xadj_v = int_temp_work_view_t("tmpa", m + 1); - a_xadj = (int *)a_xadj_v.data(); - b_xadj_v = int_temp_work_view_t("tmpb", n + 1); - b_xadj = (int *)b_xadj_v.data(); - - KokkosKernels::Impl::copy_vector( - m + 1, row_mapA, a_xadj_v); - - KokkosKernels::Impl::copy_vector( - m + 1, row_mapB, b_xadj_v); - - if (verbose) - std::cout << "MKL COPY size type to int TIME:" << copy_time.seconds() - << std::endl; + typedef typename Kokkos::View int_tmp_view_t; + + public: + static void mkl_symbolic(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, + nnz_lno_t k, a_rowmap_view_type row_mapA, + a_index_view_type entriesA, bool transposeA, + b_rowmap_view_type row_mapB, + b_index_view_type entriesB, bool transposeB, + c_rowmap_view_type row_mapC, bool verbose = false) { + if (m < 1 || n < 1 || k < 1 || row_mapA(m) < 1 || row_mapB(n) < 1) { + // set correct values in non-empty 0-nnz corner case + handle->set_c_nnz(0); + Kokkos::deep_copy(row_mapC, 0); + return; } - int *a_adj = (int *)entriesA.data(); - int *b_adj = (int *)entriesB.data(); + Kokkos::Timer timer; + using scalar_t = typename KernelHandle::nnz_scalar_t; + using tmp_values_type = + Kokkos::View; - const value_type *a_ew = valuesA.data(); - const value_type *b_ew = valuesB.data(); + const auto export_rowmap = [&](MKL_INT m, MKL_INT *rows_start, + MKL_INT *columns, scalar_t *values) { + if (handle->mkl_keep_output) { + Kokkos::Timer copy_time; + const nnz_lno_t nnz = rows_start[m]; + handle->set_c_nnz(nnz); + copy(m + 1, rows_start, row_mapC); + if (verbose) + std::cout << "\tMKL rowmap export time:" << copy_time.seconds() + << std::endl; + } + }; - sparse_matrix_t A; - sparse_matrix_t B; - sparse_matrix_t C; + // use dummy values for A and B inputs + tmp_values_type tmp_values( + Kokkos::ViewAllocateWithoutInitializing("tmp_values"), + KOKKOSKERNELS_MACRO_MAX(entriesA.extent(0), entriesB.extent(0))); - if (std::is_same::value) { - if (SPARSE_STATUS_SUCCESS != - mkl_sparse_s_create_csr(&A, SPARSE_INDEX_BASE_ZERO, m, n, a_xadj, - a_xadj + 1, a_adj, (float *)a_ew)) { - throw std::runtime_error( - "CANNOT CREATE mkl_sparse_s_create_csr A matrix\n"); - return; - } + apply(handle, m, n, k, row_mapA, entriesA, tmp_values, transposeA, row_mapB, + entriesB, tmp_values, transposeB, verbose, export_rowmap); - if (SPARSE_STATUS_SUCCESS != - mkl_sparse_s_create_csr(&B, SPARSE_INDEX_BASE_ZERO, n, k, b_xadj, - b_xadj + 1, b_adj, (float *)b_ew)) { - throw std::runtime_error( - "CANNOT CREATE mkl_sparse_s_create_csr B matrix\n"); - return; - } + if (verbose) + std::cout << "MKL symbolic time:" << timer.seconds() << std::endl; + } - sparse_operation_t operation; - if (transposeA && transposeB) { - operation = SPARSE_OPERATION_TRANSPOSE; - } else if (!(transposeA || transposeB)) { - operation = SPARSE_OPERATION_NON_TRANSPOSE; - } else { - throw std::runtime_error( - "MKL either transpose both matrices, or none for SPGEMM\n"); - return; - } + static void mkl_numeric( + KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k, + a_rowmap_view_type row_mapA, a_index_view_type entriesA, + a_values_view_type valuesA, bool transposeA, b_rowmap_view_type row_mapB, + b_index_view_type entriesB, b_values_view_type valuesB, bool transposeB, + c_rowmap_view_type row_mapC, c_index_view_type entriesC, + c_values_view_type valuesC, bool verbose = false) { + Kokkos::Timer timer; + + const auto export_values = + [&](MKL_INT m, MKL_INT *rows_start, MKL_INT *columns, + typename KernelHandle::nnz_scalar_t *values) { + if (handle->mkl_keep_output) { + Kokkos::Timer copy_time; + const nnz_lno_t nnz = rows_start[m]; + copy(nnz, columns, entriesC); + copy(nnz, values, valuesC); + if (verbose) + std::cout << "\tMKL values export time:" << copy_time.seconds() + << std::endl; + } + }; + + apply(handle, m, n, k, row_mapA, entriesA, valuesA, transposeA, row_mapB, + entriesB, valuesB, transposeB, verbose, export_values); + + if (verbose) + std::cout << "MKL numeric time:" << timer.seconds() << std::endl; + } - Kokkos::Timer timer1; - bool success = - SPARSE_STATUS_SUCCESS != mkl_sparse_spmm(operation, A, B, &C); - if (verbose) - std::cout << "Actual FLOAT MKL SPMM Time:" << timer1.seconds() - << std::endl; + private: + static constexpr int max_integer = 2147483647; - if (success) { - throw std::runtime_error( - "ERROR at SPGEMM multiplication in mkl_sparse_spmm\n"); - - return; - } else { - sparse_index_base_t c_indexing; - MKL_INT c_rows, c_cols, *rows_start, *rows_end, *columns; - float *values; - - if (SPARSE_STATUS_SUCCESS != - mkl_sparse_s_export_csr(C, &c_indexing, &c_rows, &c_cols, - &rows_start, &rows_end, &columns, - &values)) { - throw std::runtime_error( - "ERROR at exporting result matrix in mkl_sparse_spmm\n"); - return; - } - - if (SPARSE_INDEX_BASE_ZERO != c_indexing) { - throw std::runtime_error("C is not zero based indexed\n"); - return; - } - - // KokkosKernels::Impl::copy_vector (m, rows_start, - // row_mapC); idx nnz = row_mapC(m) = rows_end[m - 1]; - idx nnz = rows_end[m - 1]; - using non_const_size_type = - typename cin_row_index_view_type::non_const_value_type; - auto *tmpPtr = const_cast(row_mapC.data()); - tmpPtr[m] = nnz; - - KokkosKernels::Impl::copy_vector< - MKL_INT *, typename cin_nonzero_index_view_type::non_const_type, - MyExecSpace>(nnz, columns, entriesC); - KokkosKernels::Impl::copy_vector< - float *, typename cin_nonzero_value_view_type::non_const_type, - MyExecSpace>(nnz, values, valuesC); - } + private: + template + static void apply(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k, + a_rowmap_view_type row_mapA, a_index_view_type entriesA, + a_values_view_type valuesA, - if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(A)) { - throw std::runtime_error("Error at mkl_sparse_destroy A\n"); - return; - } + bool transposeA, b_rowmap_view_type row_mapB, + b_index_view_type entriesB, b_values_view_type valuesB, + bool transposeB, bool verbose, const CB &callback) { + if (!std::is_same::value) { + throw std::runtime_error("MKL requires local ordinals to be integer.\n"); + } - if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(B)) { - throw std::runtime_error("Error at mkl_sparse_destroy B\n"); - return; - } - if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(C)) { - throw std::runtime_error("Error at mkl_sparse_destroy C\n"); - return; - } - } else if (std::is_same::value) { - /* - std::cout << "create a" << std::endl; - std::cout << "m:" << m << " n:" << n << std::endl; - std::cout << "a_xadj[0]:" << a_xadj[0] << " a_xadj[m]:" << a_xadj[m] << - std::endl; std::cout << "a_adj[a_xadj[m] - 1]:" << a_adj[a_xadj[m] - 1] << - " a_ew[a_xadj[m] - 1]:" << a_ew[a_xadj[m] - 1] << std::endl; - */ - if (SPARSE_STATUS_SUCCESS != - mkl_sparse_d_create_csr(&A, SPARSE_INDEX_BASE_ZERO, m, n, a_xadj, - a_xadj + 1, a_adj, (double *)a_ew)) { - throw std::runtime_error( - "CANNOT CREATE mkl_sparse_s_create_csr A matrix\n"); - return; - } + if (m < 1 || n < 1 || k < 1 || row_mapA(m) < 1 || row_mapB(n) < 1) { + return; + } - // std::cout << "create b" << std::endl; - if (SPARSE_STATUS_SUCCESS != - mkl_sparse_d_create_csr(&B, SPARSE_INDEX_BASE_ZERO, n, k, b_xadj, - b_xadj + 1, b_adj, (double *)b_ew)) { - throw std::runtime_error( - "CANNOT CREATE mkl_sparse_s_create_csr B matrix\n"); - return; - } + int *a_xadj = (int *)row_mapA.data(); + int *b_xadj = (int *)row_mapB.data(); + int_tmp_view_t a_xadj_v, b_xadj_v; - sparse_operation_t operation; - if (transposeA && transposeB) { - operation = SPARSE_OPERATION_TRANSPOSE; - } else if (!(transposeA || transposeB)) { - operation = SPARSE_OPERATION_NON_TRANSPOSE; - } else { + if (!std::is_same::value) { + if (entriesA.extent(0) > max_integer || + entriesB.extent(0) > max_integer) { throw std::runtime_error( - "MKL either transpose both matrices, or none for SPGEMM\n"); - return; + "MKL requires integer values for size type for SPGEMM. Copying " + "to " + "integer will cause overflow.\n"); } + static_assert( + std::is_same::value, + "deep_copy requires non-const destination type"); - Kokkos::Timer timer1; - bool success = - SPARSE_STATUS_SUCCESS != mkl_sparse_spmm(operation, A, B, &C); - if (verbose) - std::cout << "Actual DOUBLE MKL SPMM Time Without Free:" - << timer1.seconds() << std::endl; - - mkl_free_buffers(); + Kokkos::Timer copy_time; + a_xadj_v = int_tmp_view_t("tmpa", m + 1); + b_xadj_v = int_tmp_view_t("tmpb", n + 1); + Kokkos::deep_copy(a_xadj_v, row_mapA); + Kokkos::deep_copy(b_xadj_v, row_mapB); + a_xadj = (int *)a_xadj_v.data(); + b_xadj = (int *)b_xadj_v.data(); if (verbose) - std::cout << "Actual DOUBLE MKL SPMM Time:" << timer1.seconds() - << std::endl; - - if (success) { - throw std::runtime_error( - "ERROR at SPGEMM multiplication in mkl_sparse_spmm\n"); - return; - } else { - sparse_index_base_t c_indexing; - MKL_INT c_rows, c_cols, *rows_start, *rows_end, *columns; - double *values; - - if (SPARSE_STATUS_SUCCESS != - mkl_sparse_d_export_csr(C, &c_indexing, &c_rows, &c_cols, - &rows_start, &rows_end, &columns, - &values)) { - throw std::runtime_error( - "ERROR at exporting result matrix in mkl_sparse_spmm\n"); - return; - } - - if (SPARSE_INDEX_BASE_ZERO != c_indexing) { - throw std::runtime_error("C is not zero based indexed\n"); - return; - } - if (handle->mkl_keep_output) { - Kokkos::Timer copy_time; - - // KokkosKernels::Impl::copy_vector (m, - // rows_start, row_mapC); idx nnz = row_mapC(m) = rows_end[m - 1]; - idx nnz = rows_end[m - 1]; - using non_const_size_type = - typename cin_row_index_view_type::non_const_value_type; - auto *tmpPtr = const_cast(row_mapC.data()); - tmpPtr[m] = nnz; - - KokkosKernels::Impl::copy_vector< - MKL_INT *, typename cin_nonzero_index_view_type::non_const_type, - MyExecSpace>(nnz, columns, entriesC); - KokkosKernels::Impl::copy_vector< - double *, typename cin_nonzero_value_view_type::non_const_type, - MyExecSpace>(nnz, values, valuesC); - double copy_time_d = copy_time.seconds(); - if (verbose) std::cout << "MKL COPYTIME:" << copy_time_d << std::endl; - } - } + std::cout << "\tMKL int-type temp rowmap copy time:" + << copy_time.seconds() << std::endl; + } - if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(A)) { - throw std::runtime_error("Error at mkl_sparse_destroy A\n"); - return; - } + value_type *a_ew = (value_type *)valuesA.data(); + value_type *b_ew = (value_type *)valuesB.data(); - if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(B)) { - throw std::runtime_error("Error at mkl_sparse_destroy B\n"); - return; - } - if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(C)) { - throw std::runtime_error("Error at mkl_sparse_destroy C\n"); - return; - } + using Matrix = MKLSparseMatrix; + Matrix A(m, n, a_xadj, (int *)(entriesA.data()), a_ew); + Matrix B(n, k, b_xadj, (int *)entriesB.data(), b_ew); + sparse_operation_t operation; + if (transposeA && transposeB) { + operation = SPARSE_OPERATION_TRANSPOSE; + } else if (!(transposeA || transposeB)) { + operation = SPARSE_OPERATION_NON_TRANSPOSE; } else { throw std::runtime_error( - "MKL requires float or double values. Complex values are not " - "implemented yet.\n"); - return; + "MKL either transpose both matrices, or none for SPGEMM\n"); } - } else { - throw std::runtime_error("MKL requires local ordinals to be integer.\n"); - return; + + Kokkos::Timer timer1; + Matrix C = Matrix::spmm(operation, A, B); + if (verbose) { + std::cout << "\tMKL spmm ("; + if (std::is_same::value) + std::cout << "FLOAT"; + else if (std::is_same::value) + std::cout << "DOUBLE"; + else + std::cout << "?"; + std::cout << ") time:" << timer1.seconds() << std::endl; + } + + MKL_INT c_rows, c_cols, *rows_start, *columns; + value_type *values; + C.get(c_rows, c_cols, rows_start, columns, values); + callback(m, rows_start, columns, values); + + A.destroy(); + B.destroy(); + C.destroy(); + } + + template + KOKKOS_INLINE_FUNCTION static void copy(size_t num_elems, from_type from, + to_type to) { + KokkosKernels::Impl::copy_vector(num_elems, + from, to); } +}; +#endif // KOKKOSKERNELS_ENABLE_TPL_MKL + +template +void mkl_symbolic(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k, + a_rowmap_type row_mapA, a_index_type entriesA, + bool transposeA, b_rowmap_type row_mapB, + b_index_type entriesB, bool transposeB, + c_rowmap_type row_mapC, bool verbose = false) { +#ifndef KOKKOSKERNELS_ENABLE_TPL_MKL + throw std::runtime_error("MKL was not enabled in this build!"); +#else + using values_type = typename KernelHandle::scalar_temp_work_view_t; + using c_index_type = b_index_type; + using mkl = MKLApply; + mkl::mkl_symbolic(handle, m, n, k, row_mapA, entriesA, transposeA, row_mapB, + entriesB, transposeB, row_mapC, verbose); +#endif +} + +template +void mkl_apply(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k, + a_rowmap_type row_mapA, a_index_type entriesA, + a_values_type valuesA, bool transposeA, b_rowmap_type row_mapB, + b_index_type entriesB, b_values_type valuesB, bool transposeB, + c_rowmap_type row_mapC, c_index_type entriesC, + c_values_type valuesC, bool verbose = false) { +#ifndef KOKKOSKERNELS_ENABLE_TPL_MKL + throw std::runtime_error("MKL was not enabled in this build!"); #else - (void)handle; - (void)m; - (void)n; - (void)k; - (void)row_mapA; - (void)row_mapB; - (void)row_mapC; - (void)entriesA; - (void)entriesB; - (void)entriesC; - (void)valuesA; - (void)valuesB; - (void)valuesC; - (void)transposeA; - (void)transposeB; - (void)verbose; - throw std::runtime_error("MKL IS NOT DEFINED\n"); - // return; + using mkl = MKLApply; + mkl::mkl_numeric(handle, m, n, k, row_mapA, entriesA, valuesA, transposeA, + row_mapB, entriesB, valuesB, transposeB, row_mapC, entriesC, + valuesC, verbose); #endif } + } // namespace Impl } // namespace KokkosSparse From 272461125c6ea2afae9c6ea1c79c02ad89c75cc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Fri, 7 Jan 2022 14:19:05 +0100 Subject: [PATCH 02/19] Fix MKL dispatch in SpGEMM unit test --- unit_test/sparse/Test_Sparse_spgemm.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unit_test/sparse/Test_Sparse_spgemm.hpp b/unit_test/sparse/Test_Sparse_spgemm.hpp index b84ef6acc4..e5ab088bdc 100644 --- a/unit_test/sparse/Test_Sparse_spgemm.hpp +++ b/unit_test/sparse/Test_Sparse_spgemm.hpp @@ -280,7 +280,7 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth, SPGEMM_KK_SPEED /* alias SPGEMM_KK_DENSE */ }; -#ifdef HAVE_KOKKOSKERNELS_MKL +#ifdef KOKKOSKERNELS_ENABLE_TPL_MKL algorithms.push_back(SPGEMM_MKL); #endif From 5d535fea8744262e775abd3e31b53b4fdea64554 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Fri, 7 Jan 2022 14:19:05 +0100 Subject: [PATCH 03/19] Fixed inlining: don't comile exception throwing MKL wrappers for GPU --- .../impl/KokkosSparse_spgemm_mkl_impl.hpp | 55 ++++++++----------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp index 1b22906ea3..44ae49fc34 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp @@ -54,8 +54,7 @@ namespace Impl { #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL -KOKKOS_INLINE_FUNCTION -void mkl_call(sparse_status_t result, const char *err_msg) { +inline void mkl_call(sparse_status_t result, const char *err_msg) { if (SPARSE_STATUS_SUCCESS != result) { throw std::runtime_error(err_msg); } @@ -66,12 +65,10 @@ class MKLSparseMatrix { sparse_matrix_t mtx; public: - KOKKOS_INLINE_FUNCTION - MKLSparseMatrix(const MKL_INT m, const MKL_INT n, MKL_INT *xadj, MKL_INT *adj, - value_type *values); + inline MKLSparseMatrix(const MKL_INT m, const MKL_INT n, MKL_INT *xadj, + MKL_INT *adj, value_type *values); - KOKKOS_INLINE_FUNCTION - static MKLSparseMatrix spmm( + inline static MKLSparseMatrix spmm( sparse_operation_t operation, const MKLSparseMatrix &A, const MKLSparseMatrix &B) { sparse_matrix_t c; @@ -80,44 +77,41 @@ class MKLSparseMatrix { return MKLSparseMatrix(c); } - KOKKOS_INLINE_FUNCTION - void get(MKL_INT &rows, MKL_INT &cols, MKL_INT *&rows_start, - MKL_INT *&columns, value_type *&values); + inline void get(MKL_INT &rows, MKL_INT &cols, MKL_INT *&rows_start, + MKL_INT *&columns, value_type *&values); - KOKKOS_INLINE_FUNCTION - void destroy() { + inline void destroy() { mkl_call(mkl_sparse_destroy(mtx), "mkl_sparse_destroy() failed!"); } private: - KOKKOS_INLINE_FUNCTION - MKLSparseMatrix(sparse_matrix_t mtx_) : mtx(mtx_) {} + inline MKLSparseMatrix(sparse_matrix_t mtx_) : mtx(mtx_) {} }; template <> -KOKKOS_INLINE_FUNCTION MKLSparseMatrix::MKLSparseMatrix( - const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj, - float *values) { +inline MKLSparseMatrix::MKLSparseMatrix(const MKL_INT rows, + const MKL_INT cols, + MKL_INT *xadj, MKL_INT *adj, + float *values) { mkl_call(mkl_sparse_s_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, xadj, xadj + 1, adj, values), "mkl_sparse_s_create_csr() failed!"); } template <> -KOKKOS_INLINE_FUNCTION MKLSparseMatrix::MKLSparseMatrix( - const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj, - double *values) { +inline MKLSparseMatrix::MKLSparseMatrix(const MKL_INT rows, + const MKL_INT cols, + MKL_INT *xadj, MKL_INT *adj, + double *values) { mkl_call(mkl_sparse_d_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, xadj, xadj + 1, adj, values), "mkl_sparse_d_create_csr() failed!"); } template <> -KOKKOS_INLINE_FUNCTION void MKLSparseMatrix::get(MKL_INT &rows, - MKL_INT &cols, - MKL_INT *&rows_start, - MKL_INT *&columns, - float *&values) { +inline void MKLSparseMatrix::get(MKL_INT &rows, MKL_INT &cols, + MKL_INT *&rows_start, MKL_INT *&columns, + float *&values) { sparse_index_base_t indexing; MKL_INT *rows_end; mkl_call(mkl_sparse_s_export_csr(mtx, &indexing, &rows, &cols, &rows_start, @@ -131,11 +125,9 @@ KOKKOS_INLINE_FUNCTION void MKLSparseMatrix::get(MKL_INT &rows, } template <> -KOKKOS_INLINE_FUNCTION void MKLSparseMatrix::get(MKL_INT &rows, - MKL_INT &cols, - MKL_INT *&rows_start, - MKL_INT *&columns, - double *&values) { +inline void MKLSparseMatrix::get(MKL_INT &rows, MKL_INT &cols, + MKL_INT *&rows_start, + MKL_INT *&columns, double *&values) { sparse_index_base_t indexing; MKL_INT *rows_end; mkl_call(mkl_sparse_d_export_csr(mtx, &indexing, &rows, &cols, &rows_start, @@ -326,8 +318,7 @@ class MKLApply { } template - KOKKOS_INLINE_FUNCTION static void copy(size_t num_elems, from_type from, - to_type to) { + inline static void copy(size_t num_elems, from_type from, to_type to) { KokkosKernels::Impl::copy_vector(num_elems, from, to); } From 3556dffffc2cb4088e883bf55e805f227885a8a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Fri, 7 Jan 2022 14:19:05 +0100 Subject: [PATCH 04/19] Support GPU memory space in MKL spgemm --- .../impl/KokkosSparse_spgemm_mkl_impl.hpp | 81 ++++++++++++------- unit_test/sparse/Test_Sparse_spgemm.hpp | 6 -- 2 files changed, 54 insertions(+), 33 deletions(-) diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp index 44ae49fc34..9bc4a9faac 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp @@ -150,9 +150,8 @@ class MKLApply { typedef typename KernelHandle::nnz_lno_t nnz_lno_t; typedef typename KernelHandle::size_type size_type; typedef typename KernelHandle::nnz_scalar_t value_type; - typedef typename KernelHandle::HandleTempMemorySpace HandleTempMemorySpace; typedef typename KernelHandle::HandleExecSpace MyExecSpace; - typedef typename Kokkos::View int_tmp_view_t; + typedef typename Kokkos::View int_tmp_view_t; public: static void mkl_symbolic(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, @@ -161,7 +160,8 @@ class MKLApply { b_rowmap_view_type row_mapB, b_index_view_type entriesB, bool transposeB, c_rowmap_view_type row_mapC, bool verbose = false) { - if (m < 1 || n < 1 || k < 1 || row_mapA(m) < 1 || row_mapB(n) < 1) { + if (m < 1 || n < 1 || k < 1 || entriesA.extent(0) < 1 || + entriesB.extent(0) < 1) { // set correct values in non-empty 0-nnz corner case handle->set_c_nnz(0); Kokkos::deep_copy(row_mapC, 0); @@ -170,8 +170,6 @@ class MKLApply { Kokkos::Timer timer; using scalar_t = typename KernelHandle::nnz_scalar_t; - using tmp_values_type = - Kokkos::View; const auto export_rowmap = [&](MKL_INT m, MKL_INT *rows_start, MKL_INT *columns, scalar_t *values) { @@ -179,7 +177,7 @@ class MKLApply { Kokkos::Timer copy_time; const nnz_lno_t nnz = rows_start[m]; handle->set_c_nnz(nnz); - copy(m + 1, rows_start, row_mapC); + copy(make_host_view(rows_start, m + 1), row_mapC); if (verbose) std::cout << "\tMKL rowmap export time:" << copy_time.seconds() << std::endl; @@ -187,12 +185,15 @@ class MKLApply { }; // use dummy values for A and B inputs - tmp_values_type tmp_values( - Kokkos::ViewAllocateWithoutInitializing("tmp_values"), - KOKKOSKERNELS_MACRO_MAX(entriesA.extent(0), entriesB.extent(0))); + a_values_view_type tmp_valsA( + Kokkos::ViewAllocateWithoutInitializing("tmp_valuesA"), + entriesA.extent(0)); + b_values_view_type tmp_valsB( + Kokkos::ViewAllocateWithoutInitializing("tmp_valuesB"), + entriesB.extent(0)); - apply(handle, m, n, k, row_mapA, entriesA, tmp_values, transposeA, row_mapB, - entriesB, tmp_values, transposeB, verbose, export_rowmap); + apply(handle, m, n, k, row_mapA, entriesA, tmp_valsA, transposeA, row_mapB, + entriesB, tmp_valsB, transposeB, verbose, export_rowmap); if (verbose) std::cout << "MKL symbolic time:" << timer.seconds() << std::endl; @@ -213,8 +214,8 @@ class MKLApply { if (handle->mkl_keep_output) { Kokkos::Timer copy_time; const nnz_lno_t nnz = rows_start[m]; - copy(nnz, columns, entriesC); - copy(nnz, values, valuesC); + copy(make_host_view(columns, nnz), entriesC); + copy(make_host_view(values, nnz), valuesC); if (verbose) std::cout << "\tMKL values export time:" << copy_time.seconds() << std::endl; @@ -244,12 +245,19 @@ class MKLApply { throw std::runtime_error("MKL requires local ordinals to be integer.\n"); } - if (m < 1 || n < 1 || k < 1 || row_mapA(m) < 1 || row_mapB(n) < 1) { + if (m < 1 || n < 1 || k < 1 || entriesA.extent(0) < 1 || + entriesB.extent(0) < 1) { return; } - int *a_xadj = (int *)row_mapA.data(); - int *b_xadj = (int *)row_mapB.data(); + const auto create_mirror = [](auto view) { + return Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), view); + }; + + auto h_rowsA = create_mirror(row_mapA); + auto h_rowsB = create_mirror(row_mapB); + const int *a_xadj = reinterpret_cast(h_rowsA.data()); + const int *b_xadj = reinterpret_cast(h_rowsB.data()); int_tmp_view_t a_xadj_v, b_xadj_v; if (!std::is_same::value) { @@ -268,8 +276,8 @@ class MKLApply { Kokkos::Timer copy_time; a_xadj_v = int_tmp_view_t("tmpa", m + 1); b_xadj_v = int_tmp_view_t("tmpb", n + 1); - Kokkos::deep_copy(a_xadj_v, row_mapA); - Kokkos::deep_copy(b_xadj_v, row_mapB); + Kokkos::deep_copy(a_xadj_v, h_rowsA); + Kokkos::deep_copy(b_xadj_v, h_rowsB); a_xadj = (int *)a_xadj_v.data(); b_xadj = (int *)b_xadj_v.data(); if (verbose) @@ -277,12 +285,20 @@ class MKLApply { << copy_time.seconds() << std::endl; } - value_type *a_ew = (value_type *)valuesA.data(); - value_type *b_ew = (value_type *)valuesB.data(); - + auto h_valsA = create_mirror(valuesA); + auto h_valsB = create_mirror(valuesB); + auto h_entriesA = create_mirror(entriesA); + auto h_entriesB = create_mirror(entriesB); + const int *a_adj = h_entriesA.data(); + const int *b_adj = h_entriesB.data(); + const value_type *a_ew = h_valsA.data(); + const value_type *b_ew = h_valsB.data(); + + // Hack: we discard const with pointer casts here to work around MKL + // requiring mutable input and our symbolic interface not providing it using Matrix = MKLSparseMatrix; - Matrix A(m, n, a_xadj, (int *)(entriesA.data()), a_ew); - Matrix B(n, k, b_xadj, (int *)entriesB.data(), b_ew); + Matrix A(m, n, (int *)a_xadj, (int *)a_adj, (value_type *)a_ew); + Matrix B(n, k, (int *)b_xadj, (int *)b_adj, (value_type *)b_ew); sparse_operation_t operation; if (transposeA && transposeB) { @@ -317,10 +333,21 @@ class MKLApply { C.destroy(); } - template - inline static void copy(size_t num_elems, from_type from, to_type to) { - KokkosKernels::Impl::copy_vector(num_elems, - from, to); + template + inline static void copy(from_view_type from, dst_view_type to) { + auto h_from = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), from); + auto h_to = Kokkos::create_mirror_view(Kokkos::HostSpace(), to); + Kokkos::deep_copy(h_to, h_from); // view copy (for different element types) + Kokkos::deep_copy(to, h_to); + Kokkos::fence(); + } + + template + inline static decltype(auto) make_host_view(const T *data, size_t num_elems) { + using device_type = + Kokkos::Device; + return Kokkos::View(data, num_elems); } }; #endif // KOKKOSKERNELS_ENABLE_TPL_MKL diff --git a/unit_test/sparse/Test_Sparse_spgemm.hpp b/unit_test/sparse/Test_Sparse_spgemm.hpp index e5ab088bdc..cb3d04b019 100644 --- a/unit_test/sparse/Test_Sparse_spgemm.hpp +++ b/unit_test/sparse/Test_Sparse_spgemm.hpp @@ -315,12 +315,6 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth, if (A.values.extent(0) > max_integer) { is_expected_to_fail = true; } - - if (!(Kokkos::SpaceAccessibility< - typename Kokkos::HostSpace::execution_space, - typename device::memory_space>::accessible)) { - is_expected_to_fail = true; - } break; case SPGEMM_KK: algo = "SPGEMM_KK"; break; From 0ba8b395bdb56f027c86f69c4f8e50521aff63f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Wed, 19 Jan 2022 15:56:50 +0100 Subject: [PATCH 05/19] fix -Wunused-parameter errors --- .../impl/KokkosSparse_spgemm_mkl_impl.hpp | 39 ++++++++++++++++--- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp index 9bc4a9faac..13d0c00e1e 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp @@ -172,7 +172,8 @@ class MKLApply { using scalar_t = typename KernelHandle::nnz_scalar_t; const auto export_rowmap = [&](MKL_INT m, MKL_INT *rows_start, - MKL_INT *columns, scalar_t *values) { + MKL_INT * /*columns*/, + scalar_t * /*values*/) { if (handle->mkl_keep_output) { Kokkos::Timer copy_time; const nnz_lno_t nnz = rows_start[m]; @@ -204,7 +205,7 @@ class MKLApply { a_rowmap_view_type row_mapA, a_index_view_type entriesA, a_values_view_type valuesA, bool transposeA, b_rowmap_view_type row_mapB, b_index_view_type entriesB, b_values_view_type valuesB, bool transposeB, - c_rowmap_view_type row_mapC, c_index_view_type entriesC, + c_rowmap_view_type /* row_mapC */, c_index_view_type entriesC, c_values_view_type valuesC, bool verbose = false) { Kokkos::Timer timer; @@ -234,9 +235,9 @@ class MKLApply { private: template - static void apply(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k, - a_rowmap_view_type row_mapA, a_index_view_type entriesA, - a_values_view_type valuesA, + static void apply(KernelHandle * /* handle */, nnz_lno_t m, nnz_lno_t n, + nnz_lno_t k, a_rowmap_view_type row_mapA, + a_index_view_type entriesA, a_values_view_type valuesA, bool transposeA, b_rowmap_view_type row_mapB, b_index_view_type entriesB, b_values_view_type valuesB, @@ -362,6 +363,18 @@ void mkl_symbolic(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k, c_rowmap_type row_mapC, bool verbose = false) { #ifndef KOKKOSKERNELS_ENABLE_TPL_MKL throw std::runtime_error("MKL was not enabled in this build!"); + (void)handle; + (void)m; + (void)n; + (void)k; + (void)row_mapA; + (void)entriesA; + (void)transposeA; + (void)row_mapB; + (void)entriesB; + (void)transposeB; + (void)row_mapC; + (void)verbose; #else using values_type = typename KernelHandle::scalar_temp_work_view_t; using c_index_type = b_index_type; @@ -386,6 +399,22 @@ void mkl_apply(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k, c_values_type valuesC, bool verbose = false) { #ifndef KOKKOSKERNELS_ENABLE_TPL_MKL throw std::runtime_error("MKL was not enabled in this build!"); + (void)handle; + (void)m; + (void)n; + (void)k; + (void)row_mapA; + (void)entriesA; + (void)valuesA; + (void)transposeA; + (void)row_mapB; + (void)entriesB; + (void)valuesB; + (void)transposeB; + (void)row_mapC; + (void)entriesC; + (void)valuesC; + (void)verbose; #else using mkl = MKLApply Date: Wed, 2 Feb 2022 21:51:30 +0100 Subject: [PATCH 06/19] Fix name shadowing --- src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp index 13d0c00e1e..e6babd1a30 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp @@ -171,14 +171,14 @@ class MKLApply { Kokkos::Timer timer; using scalar_t = typename KernelHandle::nnz_scalar_t; - const auto export_rowmap = [&](MKL_INT m, MKL_INT *rows_start, + const auto export_rowmap = [&](MKL_INT num_rows, MKL_INT *rows_start, MKL_INT * /*columns*/, scalar_t * /*values*/) { if (handle->mkl_keep_output) { Kokkos::Timer copy_time; - const nnz_lno_t nnz = rows_start[m]; + const nnz_lno_t nnz = rows_start[num_rows]; handle->set_c_nnz(nnz); - copy(make_host_view(rows_start, m + 1), row_mapC); + copy(make_host_view(rows_start, num_rows + 1), row_mapC); if (verbose) std::cout << "\tMKL rowmap export time:" << copy_time.seconds() << std::endl; @@ -210,11 +210,11 @@ class MKLApply { Kokkos::Timer timer; const auto export_values = - [&](MKL_INT m, MKL_INT *rows_start, MKL_INT *columns, + [&](MKL_INT num_rows, MKL_INT *rows_start, MKL_INT *columns, typename KernelHandle::nnz_scalar_t *values) { if (handle->mkl_keep_output) { Kokkos::Timer copy_time; - const nnz_lno_t nnz = rows_start[m]; + const nnz_lno_t nnz = rows_start[num_rows]; copy(make_host_view(columns, nnz), entriesC); copy(make_host_view(values, nnz), valuesC); if (verbose) From 850db252d3e5be106e3c9acfcae44f978284c87a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Wed, 2 Feb 2022 21:51:58 +0100 Subject: [PATCH 07/19] Remove unnecessary fence --- src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp index e6babd1a30..4f73703065 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp @@ -341,7 +341,6 @@ class MKLApply { auto h_to = Kokkos::create_mirror_view(Kokkos::HostSpace(), to); Kokkos::deep_copy(h_to, h_from); // view copy (for different element types) Kokkos::deep_copy(to, h_to); - Kokkos::fence(); } template From 62f0549de7aab3e7e7d1924c2dbfe276c24373a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Wed, 2 Feb 2022 21:52:30 +0100 Subject: [PATCH 08/19] Clean up make_host_view() --- src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp index 4f73703065..9770465eb3 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp @@ -343,11 +343,10 @@ class MKLApply { Kokkos::deep_copy(to, h_to); } - template - inline static decltype(auto) make_host_view(const T *data, size_t num_elems) { - using device_type = - Kokkos::Device; - return Kokkos::View(data, num_elems); + template > + inline static view_type make_host_view(const T *data, size_t num_elems) { + return view_type(data, num_elems); } }; #endif // KOKKOSKERNELS_ENABLE_TPL_MKL From 146fcfe649228fdad5950a573bf1002e6bfaf6d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Wed, 2 Feb 2022 22:01:18 +0100 Subject: [PATCH 09/19] Rename get() to export_data() --- .../impl/KokkosSparse_spgemm_mkl_impl.hpp | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp index 9770465eb3..d0b36c2a50 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp @@ -77,8 +77,9 @@ class MKLSparseMatrix { return MKLSparseMatrix(c); } - inline void get(MKL_INT &rows, MKL_INT &cols, MKL_INT *&rows_start, - MKL_INT *&columns, value_type *&values); + inline void export_data(MKL_INT &num_rows, MKL_INT &num_cols, + MKL_INT *&rows_start, MKL_INT *&columns, + value_type *&values); inline void destroy() { mkl_call(mkl_sparse_destroy(mtx), "mkl_sparse_destroy() failed!"); @@ -109,13 +110,15 @@ inline MKLSparseMatrix::MKLSparseMatrix(const MKL_INT rows, } template <> -inline void MKLSparseMatrix::get(MKL_INT &rows, MKL_INT &cols, - MKL_INT *&rows_start, MKL_INT *&columns, - float *&values) { +inline void MKLSparseMatrix::export_data(MKL_INT &num_rows, + MKL_INT &num_cols, + MKL_INT *&rows_start, + MKL_INT *&columns, + float *&values) { sparse_index_base_t indexing; MKL_INT *rows_end; - mkl_call(mkl_sparse_s_export_csr(mtx, &indexing, &rows, &cols, &rows_start, - &rows_end, &columns, &values), + mkl_call(mkl_sparse_s_export_csr(mtx, &indexing, &num_rows, &num_cols, + &rows_start, &rows_end, &columns, &values), "Failed to export matrix with mkl_sparse_s_export_csr()!"); if (SPARSE_INDEX_BASE_ZERO != indexing) { throw std::runtime_error( @@ -125,13 +128,15 @@ inline void MKLSparseMatrix::get(MKL_INT &rows, MKL_INT &cols, } template <> -inline void MKLSparseMatrix::get(MKL_INT &rows, MKL_INT &cols, - MKL_INT *&rows_start, - MKL_INT *&columns, double *&values) { +inline void MKLSparseMatrix::export_data(MKL_INT &num_rows, + MKL_INT &num_cols, + MKL_INT *&rows_start, + MKL_INT *&columns, + double *&values) { sparse_index_base_t indexing; MKL_INT *rows_end; - mkl_call(mkl_sparse_d_export_csr(mtx, &indexing, &rows, &cols, &rows_start, - &rows_end, &columns, &values), + mkl_call(mkl_sparse_d_export_csr(mtx, &indexing, &num_rows, &num_cols, + &rows_start, &rows_end, &columns, &values), "Failed to export matrix with mkl_sparse_s_export_csr()!"); if (SPARSE_INDEX_BASE_ZERO != indexing) { throw std::runtime_error( @@ -324,9 +329,9 @@ class MKLApply { std::cout << ") time:" << timer1.seconds() << std::endl; } - MKL_INT c_rows, c_cols, *rows_start, *columns; + MKL_INT num_rows, num_cols, *rows_start, *columns; value_type *values; - C.get(c_rows, c_cols, rows_start, columns, values); + C.export_data(num_rows, num_cols, rows_start, columns, values); callback(m, rows_start, columns, values); A.destroy(); From 102eb6f44865510fbd3d831fd4316c68538e4a55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Thu, 17 Feb 2022 13:25:24 +0100 Subject: [PATCH 10/19] Fix -Wunused-parameter errors --- .../impl/KokkosSparse_spgemm_mkl2phase_impl.hpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp index 5715c7f098..90c35dbaf8 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp @@ -302,6 +302,11 @@ void mkl2phase_symbolic( (void)transposeA; (void)transposeB; (void)verbose; + (void)a_xadj; + (void)b_xadj; + (void)c_xadj; + (void)a_adj; + (void)b_adj; #endif } else { @@ -351,9 +356,7 @@ void mkl2phase_apply( typename KernelHandle::HandlePersistentMemorySpace; using int_persistent_work_view_t = typename Kokkos::View; - using MyExecSpace = typename KernelHandle::HandleExecSpace; - using value_type = typename KernelHandle::nnz_scalar_t; - using idx = typename KernelHandle::nnz_lno_t; + using idx = typename KernelHandle::nnz_lno_t; if (std::is_same::value) { int *a_xadj = (int *)row_mapA.data(); @@ -639,6 +642,11 @@ void mkl2phase_apply( (void)transposeA; (void)transposeB; (void)verbose; + (void)a_xadj; + (void)b_xadj; + (void)c_xadj; + (void)a_adj; + (void)b_adj; #endif // __INTEL_MKL__ == 2018 && __INTEL_MKL_UPDATE__ >= 2 } else { (void)m; From 67a603d0b5808e63070b3568bb7ee67bbf85b06a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Thu, 17 Feb 2022 13:48:53 +0100 Subject: [PATCH 11/19] Gather MKL utilities within dedicated header --- perf_test/sparse/KokkosSparse_spadd.cpp | 30 ++----- src/common/KokkosKernels_SparseUtils_mkl.hpp | 87 +++++++++++++++++++ ...kosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp | 54 ++++-------- .../tpls/KokkosSparse_spmv_tpl_spec_decl.hpp | 38 ++------ .../impl/KokkosSparse_spgemm_mkl_impl.hpp | 38 ++++---- 5 files changed, 137 insertions(+), 110 deletions(-) create mode 100644 src/common/KokkosKernels_SparseUtils_mkl.hpp diff --git a/perf_test/sparse/KokkosSparse_spadd.cpp b/perf_test/sparse/KokkosSparse_spadd.cpp index 7b0bd42d2a..49034930e6 100644 --- a/perf_test/sparse/KokkosSparse_spadd.cpp +++ b/perf_test/sparse/KokkosSparse_spadd.cpp @@ -47,6 +47,7 @@ #include "KokkosKernels_Handle.hpp" #include "KokkosKernels_IOUtils.hpp" #include "KokkosKernels_SparseUtils_cusparse.hpp" +#include "KokkosKernels_SparseUtils_mkl.hpp" #include "KokkosSparse_spadd.hpp" #include "KokkosKernels_TestUtils.hpp" @@ -57,21 +58,6 @@ #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL #include #include - -inline void spadd_mkl_internal_safe_call(sparse_status_t mklStatus, - const char* name, - const char* file = nullptr, - const int line = 0) { - if (SPARSE_STATUS_SUCCESS != mklStatus) { - std::ostringstream oss; - oss << "MKL call \"" << name << "\" encountered error at " << file << ":" - << line << '\n'; - Kokkos::abort(oss.str().c_str()); - } -} - -#define SPADD_MKL_SAFE_CALL(call) \ - spadd_mkl_internal_safe_call(call, #call, __FILE__, __LINE__) #endif #if defined(KOKKOSKERNELS_INST_DOUBLE) && \ @@ -259,11 +245,11 @@ void run_experiment(const Params& params) { #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL sparse_matrix_t Amkl, Bmkl, Cmkl; if (params.use_mkl) { - SPADD_MKL_SAFE_CALL(mkl_sparse_d_create_csr( + MKL_SAFE_CALL(mkl_sparse_d_create_csr( &Amkl, SPARSE_INDEX_BASE_ZERO, m, n, (int*)A.graph.row_map.data(), (int*)A.graph.row_map.data() + 1, A.graph.entries.data(), A.values.data())); - SPADD_MKL_SAFE_CALL(mkl_sparse_d_create_csr( + MKL_SAFE_CALL(mkl_sparse_d_create_csr( &Bmkl, SPARSE_INDEX_BASE_ZERO, m, n, (int*)B.graph.row_map.data(), (int*)B.graph.row_map.data() + 1, B.graph.entries.data(), B.values.data())); @@ -326,9 +312,9 @@ void run_experiment(const Params& params) { #endif } else if (params.use_mkl) { #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL - SPADD_MKL_SAFE_CALL(mkl_sparse_d_add(SPARSE_OPERATION_NON_TRANSPOSE, - Amkl, 1.0, Bmkl, &Cmkl)); - SPADD_MKL_SAFE_CALL(mkl_sparse_destroy(Cmkl)); + MKL_SAFE_CALL(mkl_sparse_d_add(SPARSE_OPERATION_NON_TRANSPOSE, Amkl, + 1.0, Bmkl, &Cmkl)); + MKL_SAFE_CALL(mkl_sparse_destroy(Cmkl)); #endif } else { spadd_numeric( @@ -351,8 +337,8 @@ void run_experiment(const Params& params) { #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL if (params.use_mkl) { - SPADD_MKL_SAFE_CALL(mkl_sparse_destroy(Amkl)); - SPADD_MKL_SAFE_CALL(mkl_sparse_destroy(Bmkl)); + MKL_SAFE_CALL(mkl_sparse_destroy(Amkl)); + MKL_SAFE_CALL(mkl_sparse_destroy(Bmkl)); } #endif diff --git a/src/common/KokkosKernels_SparseUtils_mkl.hpp b/src/common/KokkosKernels_SparseUtils_mkl.hpp new file mode 100644 index 0000000000..7085851092 --- /dev/null +++ b/src/common/KokkosKernels_SparseUtils_mkl.hpp @@ -0,0 +1,87 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef _KOKKOSKERNELS_SPARSEUTILS_MKL_HPP +#define _KOKKOSKERNELS_SPARSEUTILS_MKL_HPP + +#include "KokkosKernels_config.h" + +#ifdef KOKKOSKERNELS_ENABLE_TPL_MKL + +#include + +namespace KokkosSparse { +namespace Impl { + +inline void mkl_internal_safe_call(sparse_status_t mkl_status, const char *name, + const char *file = nullptr, + const int line = 0) { + if (SPARSE_STATUS_SUCCESS != mkl_status) { + std::ostringstream oss; + oss << "MKL call \"" << name << "\" encountered error at " << file << ":" + << line << '\n'; + Kokkos::abort(oss.str().c_str()); + } +} + +#define MKL_SAFE_CALL(call) \ + KokkosSparse::Impl::mkl_internal_safe_call(call, #call, __FILE__, __LINE__) + +inline sparse_operation_t mode_kk_to_mkl(char mode_kk) { + switch (toupper(mode_kk)) { + case 'N': return SPARSE_OPERATION_NON_TRANSPOSE; + case 'T': return SPARSE_OPERATION_TRANSPOSE; + case 'H': return SPARSE_OPERATION_CONJUGATE_TRANSPOSE; + default:; + } + throw std::invalid_argument( + "Invalid mode for MKL (should be one of N, T, H)"); +} + +} // namespace Impl +} // namespace KokkosSparse + +#endif // KOKKOSKERNELS_ENABLE_TPL_MKL + +#endif // _KOKKOSKERNELS_SPARSEUTILS_MKL_HPP \ No newline at end of file diff --git a/src/impl/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp b/src/impl/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp index a6eec44449..d3c15e0267 100644 --- a/src/impl/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp @@ -46,6 +46,7 @@ #define KOKKOSKERNELS_KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_DECL_HPP #include "KokkosKernels_Controls.hpp" +#include "KokkosKernels_SparseUtils_mkl.hpp" #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL #include @@ -57,26 +58,7 @@ namespace Impl { #if (__INTEL_MKL__ > 2017) // MKL 2018 and above: use new interface: sparse_matrix_t and mkl_sparse_?_mv() -namespace BSR { -inline void mkl_safe_call(int errcode) { - if (errcode != SPARSE_STATUS_SUCCESS) - throw std::runtime_error("MKL returned non-success error code"); -} - -inline sparse_operation_t mode_kk_to_mkl(char mode_kk) { - switch (toupper(mode_kk)) { - case 'N': return SPARSE_OPERATION_NON_TRANSPOSE; - case 'T': return SPARSE_OPERATION_TRANSPOSE; - case 'H': return SPARSE_OPERATION_CONJUGATE_TRANSPOSE; - default:; - } - throw std::invalid_argument( - "Invalid mode for MKL (should be one of N, T, H)"); -} -} // namespace BSR - -using BSR::mkl_safe_call; -using BSR::mode_kk_to_mkl; +using KokkosSparse::Impl::mode_kk_to_mkl; inline matrix_descr getDescription() { matrix_descr A_descr; @@ -91,13 +73,13 @@ inline void spmv_block_impl_mkl(sparse_operation_t op, float alpha, float beta, const int* Aentries, const float* Avalues, const float* x, float* y) { sparse_matrix_t A_mkl; - mkl_safe_call(mkl_sparse_s_create_bsr( + MKL_SAFE_CALL(mkl_sparse_s_create_bsr( &A_mkl, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b, const_cast(Arowptrs), const_cast(Arowptrs + 1), const_cast(Aentries), const_cast(Avalues))); matrix_descr A_descr = getDescription(); - mkl_safe_call(mkl_sparse_s_mv(op, alpha, A_mkl, A_descr, x, beta, y)); + MKL_SAFE_CALL(mkl_sparse_s_mv(op, alpha, A_mkl, A_descr, x, beta, y)); } inline void spmv_block_impl_mkl(sparse_operation_t op, double alpha, @@ -106,13 +88,13 @@ inline void spmv_block_impl_mkl(sparse_operation_t op, double alpha, const double* Avalues, const double* x, double* y) { sparse_matrix_t A_mkl; - mkl_safe_call(mkl_sparse_d_create_bsr( + MKL_SAFE_CALL(mkl_sparse_d_create_bsr( &A_mkl, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b, const_cast(Arowptrs), const_cast(Arowptrs + 1), const_cast(Aentries), const_cast(Avalues))); matrix_descr A_descr = getDescription(); - mkl_safe_call(mkl_sparse_d_mv(op, alpha, A_mkl, A_descr, x, beta, y)); + MKL_SAFE_CALL(mkl_sparse_d_mv(op, alpha, A_mkl, A_descr, x, beta, y)); } inline void spmv_block_impl_mkl(sparse_operation_t op, @@ -123,7 +105,7 @@ inline void spmv_block_impl_mkl(sparse_operation_t op, const Kokkos::complex* x, Kokkos::complex* y) { sparse_matrix_t A_mkl; - mkl_safe_call(mkl_sparse_c_create_bsr( + MKL_SAFE_CALL(mkl_sparse_c_create_bsr( &A_mkl, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b, const_cast(Arowptrs), const_cast(Arowptrs + 1), const_cast(Aentries), (MKL_Complex8*)Avalues)); @@ -131,7 +113,7 @@ inline void spmv_block_impl_mkl(sparse_operation_t op, MKL_Complex8& alpha_mkl = reinterpret_cast(alpha); MKL_Complex8& beta_mkl = reinterpret_cast(beta); matrix_descr A_descr = getDescription(); - mkl_safe_call(mkl_sparse_c_mv(op, alpha_mkl, A_mkl, A_descr, + MKL_SAFE_CALL(mkl_sparse_c_mv(op, alpha_mkl, A_mkl, A_descr, reinterpret_cast(x), beta_mkl, reinterpret_cast(y))); } @@ -144,7 +126,7 @@ inline void spmv_block_impl_mkl(sparse_operation_t op, const Kokkos::complex* x, Kokkos::complex* y) { sparse_matrix_t A_mkl; - mkl_safe_call(mkl_sparse_z_create_bsr( + MKL_SAFE_CALL(mkl_sparse_z_create_bsr( &A_mkl, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b, const_cast(Arowptrs), const_cast(Arowptrs + 1), const_cast(Aentries), (MKL_Complex16*)Avalues)); @@ -152,7 +134,7 @@ inline void spmv_block_impl_mkl(sparse_operation_t op, matrix_descr A_descr = getDescription(); MKL_Complex16& alpha_mkl = reinterpret_cast(alpha); MKL_Complex16& beta_mkl = reinterpret_cast(beta); - mkl_safe_call(mkl_sparse_z_mv(op, alpha_mkl, A_mkl, A_descr, + MKL_SAFE_CALL(mkl_sparse_z_mv(op, alpha_mkl, A_mkl, A_descr, reinterpret_cast(x), beta_mkl, reinterpret_cast(y))); } @@ -163,13 +145,13 @@ inline void spm_mv_block_impl_mkl(sparse_operation_t op, float alpha, const float* Avalues, const float* x, int colx, int ldx, float* y, int ldy) { sparse_matrix_t A_mkl; - mkl_safe_call(mkl_sparse_s_create_bsr( + MKL_SAFE_CALL(mkl_sparse_s_create_bsr( &A_mkl, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b, const_cast(Arowptrs), const_cast(Arowptrs + 1), const_cast(Aentries), const_cast(Avalues))); matrix_descr A_descr = getDescription(); - mkl_safe_call(mkl_sparse_s_mm(op, alpha, A_mkl, A_descr, + MKL_SAFE_CALL(mkl_sparse_s_mm(op, alpha, A_mkl, A_descr, SPARSE_LAYOUT_ROW_MAJOR, x, colx, ldx, beta, y, ldy)); } @@ -180,13 +162,13 @@ inline void spm_mv_block_impl_mkl(sparse_operation_t op, double alpha, const double* Avalues, const double* x, int colx, int ldx, double* y, int ldy) { sparse_matrix_t A_mkl; - mkl_safe_call(mkl_sparse_d_create_bsr( + MKL_SAFE_CALL(mkl_sparse_d_create_bsr( &A_mkl, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b, const_cast(Arowptrs), const_cast(Arowptrs + 1), const_cast(Aentries), const_cast(Avalues))); matrix_descr A_descr = getDescription(); - mkl_safe_call(mkl_sparse_d_mm(op, alpha, A_mkl, A_descr, + MKL_SAFE_CALL(mkl_sparse_d_mm(op, alpha, A_mkl, A_descr, SPARSE_LAYOUT_ROW_MAJOR, x, colx, ldx, beta, y, ldy)); } @@ -200,7 +182,7 @@ inline void spm_mv_block_impl_mkl(sparse_operation_t op, const Kokkos::complex* x, int colx, int ldx, Kokkos::complex* y, int ldy) { sparse_matrix_t A_mkl; - mkl_safe_call(mkl_sparse_c_create_bsr( + MKL_SAFE_CALL(mkl_sparse_c_create_bsr( &A_mkl, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b, const_cast(Arowptrs), const_cast(Arowptrs + 1), const_cast(Aentries), (MKL_Complex8*)Avalues)); @@ -208,7 +190,7 @@ inline void spm_mv_block_impl_mkl(sparse_operation_t op, MKL_Complex8& alpha_mkl = reinterpret_cast(alpha); MKL_Complex8& beta_mkl = reinterpret_cast(beta); matrix_descr A_descr = getDescription(); - mkl_safe_call( + MKL_SAFE_CALL( mkl_sparse_c_mm(op, alpha_mkl, A_mkl, A_descr, SPARSE_LAYOUT_ROW_MAJOR, reinterpret_cast(x), colx, ldx, beta_mkl, reinterpret_cast(y), ldy)); @@ -221,7 +203,7 @@ inline void spm_mv_block_impl_mkl( const Kokkos::complex* x, int colx, int ldx, Kokkos::complex* y, int ldy) { sparse_matrix_t A_mkl; - mkl_safe_call(mkl_sparse_z_create_bsr( + MKL_SAFE_CALL(mkl_sparse_z_create_bsr( &A_mkl, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b, const_cast(Arowptrs), const_cast(Arowptrs + 1), const_cast(Aentries), (MKL_Complex16*)Avalues)); @@ -229,7 +211,7 @@ inline void spm_mv_block_impl_mkl( matrix_descr A_descr = getDescription(); MKL_Complex16& alpha_mkl = reinterpret_cast(alpha); MKL_Complex16& beta_mkl = reinterpret_cast(beta); - mkl_safe_call( + MKL_SAFE_CALL( mkl_sparse_z_mm(op, alpha_mkl, A_mkl, A_descr, SPARSE_LAYOUT_ROW_MAJOR, reinterpret_cast(x), colx, ldx, beta_mkl, reinterpret_cast(y), ldy)); diff --git a/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp b/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp index 17a72b2ad3..bacc749840 100644 --- a/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp @@ -530,6 +530,7 @@ KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, Kokkos::LayoutRight, #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL #include +#include "KokkosKernels_SparseUtils_mkl.hpp" namespace KokkosSparse { namespace Impl { @@ -537,27 +538,6 @@ namespace Impl { #if (__INTEL_MKL__ > 2017) // MKL 2018 and above: use new interface: sparse_matrix_t and mkl_sparse_?_mv() -// Note 12/03/21 - lbv: -// mkl_safe_call and mode_kk_to_mkl should -// be moved to some sparse or mkl utility -// header. It is likely that these will be -// reused for other kernels. -inline void mkl_safe_call(int errcode) { - if (errcode != SPARSE_STATUS_SUCCESS) - throw std::runtime_error("MKL returned non-success error code"); -} - -inline sparse_operation_t mode_kk_to_mkl(char mode_kk) { - switch (toupper(mode_kk)) { - case 'N': return SPARSE_OPERATION_NON_TRANSPOSE; - case 'T': return SPARSE_OPERATION_TRANSPOSE; - case 'H': return SPARSE_OPERATION_CONJUGATE_TRANSPOSE; - default:; - } - throw std::invalid_argument( - "Invalid mode for MKL (should be one of N, T, H)"); -} - inline void spmv_mkl(sparse_operation_t op, float alpha, float beta, int m, int n, const int* Arowptrs, const int* Aentries, const float* Avalues, const float* x, float* y) { @@ -566,11 +546,11 @@ inline void spmv_mkl(sparse_operation_t op, float alpha, float beta, int m, A_descr.type = SPARSE_MATRIX_TYPE_GENERAL; A_descr.mode = SPARSE_FILL_MODE_FULL; A_descr.diag = SPARSE_DIAG_NON_UNIT; - mkl_safe_call(mkl_sparse_s_create_csr( + MKL_SAFE_CALL(mkl_sparse_s_create_csr( &A_mkl, SPARSE_INDEX_BASE_ZERO, m, n, const_cast(Arowptrs), const_cast(Arowptrs + 1), const_cast(Aentries), const_cast(Avalues))); - mkl_safe_call(mkl_sparse_s_mv(op, alpha, A_mkl, A_descr, x, beta, y)); + MKL_SAFE_CALL(mkl_sparse_s_mv(op, alpha, A_mkl, A_descr, x, beta, y)); } inline void spmv_mkl(sparse_operation_t op, double alpha, double beta, int m, @@ -581,11 +561,11 @@ inline void spmv_mkl(sparse_operation_t op, double alpha, double beta, int m, A_descr.type = SPARSE_MATRIX_TYPE_GENERAL; A_descr.mode = SPARSE_FILL_MODE_FULL; A_descr.diag = SPARSE_DIAG_NON_UNIT; - mkl_safe_call(mkl_sparse_d_create_csr( + MKL_SAFE_CALL(mkl_sparse_d_create_csr( &A_mkl, SPARSE_INDEX_BASE_ZERO, m, n, const_cast(Arowptrs), const_cast(Arowptrs + 1), const_cast(Aentries), const_cast(Avalues))); - mkl_safe_call(mkl_sparse_d_mv(op, alpha, A_mkl, A_descr, x, beta, y)); + MKL_SAFE_CALL(mkl_sparse_d_mv(op, alpha, A_mkl, A_descr, x, beta, y)); } inline void spmv_mkl(sparse_operation_t op, Kokkos::complex alpha, @@ -599,13 +579,13 @@ inline void spmv_mkl(sparse_operation_t op, Kokkos::complex alpha, A_descr.type = SPARSE_MATRIX_TYPE_GENERAL; A_descr.mode = SPARSE_FILL_MODE_FULL; A_descr.diag = SPARSE_DIAG_NON_UNIT; - mkl_safe_call(mkl_sparse_c_create_csr( + MKL_SAFE_CALL(mkl_sparse_c_create_csr( &A_mkl, SPARSE_INDEX_BASE_ZERO, m, n, const_cast(Arowptrs), const_cast(Arowptrs + 1), const_cast(Aentries), (MKL_Complex8*)Avalues)); MKL_Complex8& alpha_mkl = reinterpret_cast(alpha); MKL_Complex8& beta_mkl = reinterpret_cast(beta); - mkl_safe_call(mkl_sparse_c_mv(op, alpha_mkl, A_mkl, A_descr, + MKL_SAFE_CALL(mkl_sparse_c_mv(op, alpha_mkl, A_mkl, A_descr, reinterpret_cast(x), beta_mkl, reinterpret_cast(y))); } @@ -621,13 +601,13 @@ inline void spmv_mkl(sparse_operation_t op, Kokkos::complex alpha, A_descr.type = SPARSE_MATRIX_TYPE_GENERAL; A_descr.mode = SPARSE_FILL_MODE_FULL; A_descr.diag = SPARSE_DIAG_NON_UNIT; - mkl_safe_call(mkl_sparse_z_create_csr( + MKL_SAFE_CALL(mkl_sparse_z_create_csr( &A_mkl, SPARSE_INDEX_BASE_ZERO, m, n, const_cast(Arowptrs), const_cast(Arowptrs + 1), const_cast(Aentries), (MKL_Complex16*)Avalues)); MKL_Complex16& alpha_mkl = reinterpret_cast(alpha); MKL_Complex16& beta_mkl = reinterpret_cast(beta); - mkl_safe_call(mkl_sparse_z_mv(op, alpha_mkl, A_mkl, A_descr, + MKL_SAFE_CALL(mkl_sparse_z_mv(op, alpha_mkl, A_mkl, A_descr, reinterpret_cast(x), beta_mkl, reinterpret_cast(y))); } diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp index d0b36c2a50..50bf840e58 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp @@ -45,6 +45,9 @@ #ifndef _KOKKOSSPGEMMMKL_HPP #define _KOKKOSSPGEMMMKL_HPP +#include "KokkosKernels_config.h" +#include "KokkosKernels_SparseUtils_mkl.hpp" + #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL #include "mkl_spblas.h" #endif @@ -54,12 +57,6 @@ namespace Impl { #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL -inline void mkl_call(sparse_status_t result, const char *err_msg) { - if (SPARSE_STATUS_SUCCESS != result) { - throw std::runtime_error(err_msg); - } -} - template class MKLSparseMatrix { sparse_matrix_t mtx; @@ -72,8 +69,7 @@ class MKLSparseMatrix { sparse_operation_t operation, const MKLSparseMatrix &A, const MKLSparseMatrix &B) { sparse_matrix_t c; - mkl_call(mkl_sparse_spmm(operation, A.mtx, B.mtx, &c), - "mkl_sparse_spmm() failed!"); + MKL_SAFE_CALL(mkl_sparse_spmm(operation, A.mtx, B.mtx, &c)); return MKLSparseMatrix(c); } @@ -81,9 +77,7 @@ class MKLSparseMatrix { MKL_INT *&rows_start, MKL_INT *&columns, value_type *&values); - inline void destroy() { - mkl_call(mkl_sparse_destroy(mtx), "mkl_sparse_destroy() failed!"); - } + inline void destroy() { MKL_SAFE_CALL(mkl_sparse_destroy(mtx)); } private: inline MKLSparseMatrix(sparse_matrix_t mtx_) : mtx(mtx_) {} @@ -94,9 +88,8 @@ inline MKLSparseMatrix::MKLSparseMatrix(const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj, float *values) { - mkl_call(mkl_sparse_s_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, - xadj, xadj + 1, adj, values), - "mkl_sparse_s_create_csr() failed!"); + MKL_SAFE_CALL(mkl_sparse_s_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, + cols, xadj, xadj + 1, adj, values)); } template <> @@ -104,9 +97,8 @@ inline MKLSparseMatrix::MKLSparseMatrix(const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj, double *values) { - mkl_call(mkl_sparse_d_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, - xadj, xadj + 1, adj, values), - "mkl_sparse_d_create_csr() failed!"); + MKL_SAFE_CALL(mkl_sparse_d_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, + cols, xadj, xadj + 1, adj, values)); } template <> @@ -117,9 +109,9 @@ inline void MKLSparseMatrix::export_data(MKL_INT &num_rows, float *&values) { sparse_index_base_t indexing; MKL_INT *rows_end; - mkl_call(mkl_sparse_s_export_csr(mtx, &indexing, &num_rows, &num_cols, - &rows_start, &rows_end, &columns, &values), - "Failed to export matrix with mkl_sparse_s_export_csr()!"); + MKL_SAFE_CALL(mkl_sparse_s_export_csr(mtx, &indexing, &num_rows, &num_cols, + &rows_start, &rows_end, &columns, + &values)); if (SPARSE_INDEX_BASE_ZERO != indexing) { throw std::runtime_error( "Expected zero based indexing in exported MKL sparse matrix\n"); @@ -135,9 +127,9 @@ inline void MKLSparseMatrix::export_data(MKL_INT &num_rows, double *&values) { sparse_index_base_t indexing; MKL_INT *rows_end; - mkl_call(mkl_sparse_d_export_csr(mtx, &indexing, &num_rows, &num_cols, - &rows_start, &rows_end, &columns, &values), - "Failed to export matrix with mkl_sparse_s_export_csr()!"); + MKL_SAFE_CALL(mkl_sparse_d_export_csr(mtx, &indexing, &num_rows, &num_cols, + &rows_start, &rows_end, &columns, + &values)); if (SPARSE_INDEX_BASE_ZERO != indexing) { throw std::runtime_error( "Expected zero based indexing in exported MKL sparse matrix\n"); From 05293435613e65e0a865e595b8b5c373424368eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Thu, 17 Feb 2022 14:51:27 +0100 Subject: [PATCH 12/19] Move MKLSparseMatrix to MKL utils header --- src/common/KokkosKernels_SparseUtils_mkl.hpp | 79 +++++++++++++++++ .../impl/KokkosSparse_spgemm_mkl_impl.hpp | 86 ++----------------- 2 files changed, 87 insertions(+), 78 deletions(-) diff --git a/src/common/KokkosKernels_SparseUtils_mkl.hpp b/src/common/KokkosKernels_SparseUtils_mkl.hpp index 7085851092..a2ab16fba9 100644 --- a/src/common/KokkosKernels_SparseUtils_mkl.hpp +++ b/src/common/KokkosKernels_SparseUtils_mkl.hpp @@ -79,6 +79,85 @@ inline sparse_operation_t mode_kk_to_mkl(char mode_kk) { "Invalid mode for MKL (should be one of N, T, H)"); } +// MKLSparseMatrix provides thin wrapper around MKL matrix handle +// (sparse_matrix_t) and encapsulates MKL call dispatches related to details +// like value_type, allowing simple client code in kernels. +template +class MKLSparseMatrix { + sparse_matrix_t mtx; + + public: + inline MKLSparseMatrix(sparse_matrix_t mtx_) : mtx(mtx_) {} + + // Constructs MKL sparse matrix from KK sparse views (m rows x n cols) + inline MKLSparseMatrix(const MKL_INT num_rows, const MKL_INT num_cols, + MKL_INT *xadj, MKL_INT *adj, value_type *values); + + // Allows using MKLSparseMatrix directly in MKL calls + inline operator sparse_matrix_t() const { return mtx; } + + // Exports MKL sparse matrix contents into KK views + inline void export_data(MKL_INT &num_rows, MKL_INT &num_cols, + MKL_INT *&rows_start, MKL_INT *&columns, + value_type *&values); + + inline void destroy() { MKL_SAFE_CALL(mkl_sparse_destroy(mtx)); } +}; + +template <> +inline MKLSparseMatrix::MKLSparseMatrix(const MKL_INT rows, + const MKL_INT cols, + MKL_INT *xadj, MKL_INT *adj, + float *values) { + MKL_SAFE_CALL(mkl_sparse_s_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, + cols, xadj, xadj + 1, adj, values)); +} + +template <> +inline MKLSparseMatrix::MKLSparseMatrix(const MKL_INT rows, + const MKL_INT cols, + MKL_INT *xadj, MKL_INT *adj, + double *values) { + MKL_SAFE_CALL(mkl_sparse_d_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, + cols, xadj, xadj + 1, adj, values)); +} + +template <> +inline void MKLSparseMatrix::export_data(MKL_INT &num_rows, + MKL_INT &num_cols, + MKL_INT *&rows_start, + MKL_INT *&columns, + float *&values) { + sparse_index_base_t indexing; + MKL_INT *rows_end; + MKL_SAFE_CALL(mkl_sparse_s_export_csr(mtx, &indexing, &num_rows, &num_cols, + &rows_start, &rows_end, &columns, + &values)); + if (SPARSE_INDEX_BASE_ZERO != indexing) { + throw std::runtime_error( + "Expected zero based indexing in exported MKL sparse matrix\n"); + return; + } +} + +template <> +inline void MKLSparseMatrix::export_data(MKL_INT &num_rows, + MKL_INT &num_cols, + MKL_INT *&rows_start, + MKL_INT *&columns, + double *&values) { + sparse_index_base_t indexing; + MKL_INT *rows_end; + MKL_SAFE_CALL(mkl_sparse_d_export_csr(mtx, &indexing, &num_rows, &num_cols, + &rows_start, &rows_end, &columns, + &values)); + if (SPARSE_INDEX_BASE_ZERO != indexing) { + throw std::runtime_error( + "Expected zero based indexing in exported MKL sparse matrix\n"); + return; + } +} + } // namespace Impl } // namespace KokkosSparse diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp index 50bf840e58..3044b2c576 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp @@ -57,84 +57,14 @@ namespace Impl { #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL +// multiplies two sparse MKL matrices and returns sparse MKL matrix template -class MKLSparseMatrix { - sparse_matrix_t mtx; - - public: - inline MKLSparseMatrix(const MKL_INT m, const MKL_INT n, MKL_INT *xadj, - MKL_INT *adj, value_type *values); - - inline static MKLSparseMatrix spmm( - sparse_operation_t operation, const MKLSparseMatrix &A, - const MKLSparseMatrix &B) { - sparse_matrix_t c; - MKL_SAFE_CALL(mkl_sparse_spmm(operation, A.mtx, B.mtx, &c)); - return MKLSparseMatrix(c); - } - - inline void export_data(MKL_INT &num_rows, MKL_INT &num_cols, - MKL_INT *&rows_start, MKL_INT *&columns, - value_type *&values); - - inline void destroy() { MKL_SAFE_CALL(mkl_sparse_destroy(mtx)); } - - private: - inline MKLSparseMatrix(sparse_matrix_t mtx_) : mtx(mtx_) {} -}; - -template <> -inline MKLSparseMatrix::MKLSparseMatrix(const MKL_INT rows, - const MKL_INT cols, - MKL_INT *xadj, MKL_INT *adj, - float *values) { - MKL_SAFE_CALL(mkl_sparse_s_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, - cols, xadj, xadj + 1, adj, values)); -} - -template <> -inline MKLSparseMatrix::MKLSparseMatrix(const MKL_INT rows, - const MKL_INT cols, - MKL_INT *xadj, MKL_INT *adj, - double *values) { - MKL_SAFE_CALL(mkl_sparse_d_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, - cols, xadj, xadj + 1, adj, values)); -} - -template <> -inline void MKLSparseMatrix::export_data(MKL_INT &num_rows, - MKL_INT &num_cols, - MKL_INT *&rows_start, - MKL_INT *&columns, - float *&values) { - sparse_index_base_t indexing; - MKL_INT *rows_end; - MKL_SAFE_CALL(mkl_sparse_s_export_csr(mtx, &indexing, &num_rows, &num_cols, - &rows_start, &rows_end, &columns, - &values)); - if (SPARSE_INDEX_BASE_ZERO != indexing) { - throw std::runtime_error( - "Expected zero based indexing in exported MKL sparse matrix\n"); - return; - } -} - -template <> -inline void MKLSparseMatrix::export_data(MKL_INT &num_rows, - MKL_INT &num_cols, - MKL_INT *&rows_start, - MKL_INT *&columns, - double *&values) { - sparse_index_base_t indexing; - MKL_INT *rows_end; - MKL_SAFE_CALL(mkl_sparse_d_export_csr(mtx, &indexing, &num_rows, &num_cols, - &rows_start, &rows_end, &columns, - &values)); - if (SPARSE_INDEX_BASE_ZERO != indexing) { - throw std::runtime_error( - "Expected zero based indexing in exported MKL sparse matrix\n"); - return; - } +inline static MKLSparseMatrix mkl_spmm( + sparse_operation_t operation, const MKLSparseMatrix &A, + const MKLSparseMatrix &B) { + sparse_matrix_t C; + MKL_SAFE_CALL(mkl_sparse_spmm(operation, A, B, &C)); + return MKLSparseMatrix(C); } template ::value) From 3339c8deae2f350c4a71ef831508d93e72cbf23c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Thu, 17 Feb 2022 14:56:49 +0100 Subject: [PATCH 13/19] Rename "apply" into "spmm" --- .../impl/KokkosSparse_spgemm_mkl_impl.hpp | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp index 3044b2c576..43b2b5081b 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp @@ -72,7 +72,7 @@ template -class MKLApply { +class MKL_SPMM { public: typedef typename KernelHandle::nnz_lno_t nnz_lno_t; typedef typename KernelHandle::size_type size_type; @@ -120,8 +120,8 @@ class MKLApply { Kokkos::ViewAllocateWithoutInitializing("tmp_valuesB"), entriesB.extent(0)); - apply(handle, m, n, k, row_mapA, entriesA, tmp_valsA, transposeA, row_mapB, - entriesB, tmp_valsB, transposeB, verbose, export_rowmap); + spmm(handle, m, n, k, row_mapA, entriesA, tmp_valsA, transposeA, row_mapB, + entriesB, tmp_valsB, transposeB, verbose, export_rowmap); if (verbose) std::cout << "MKL symbolic time:" << timer.seconds() << std::endl; @@ -150,8 +150,8 @@ class MKLApply { } }; - apply(handle, m, n, k, row_mapA, entriesA, valuesA, transposeA, row_mapB, - entriesB, valuesB, transposeB, verbose, export_values); + spmm(handle, m, n, k, row_mapA, entriesA, valuesA, transposeA, row_mapB, + entriesB, valuesB, transposeB, verbose, export_values); if (verbose) std::cout << "MKL numeric time:" << timer.seconds() << std::endl; @@ -162,13 +162,13 @@ class MKLApply { private: template - static void apply(KernelHandle * /* handle */, nnz_lno_t m, nnz_lno_t n, - nnz_lno_t k, a_rowmap_view_type row_mapA, - a_index_view_type entriesA, a_values_view_type valuesA, + static void spmm(KernelHandle * /* handle */, nnz_lno_t m, nnz_lno_t n, + nnz_lno_t k, a_rowmap_view_type row_mapA, + a_index_view_type entriesA, a_values_view_type valuesA, - bool transposeA, b_rowmap_view_type row_mapB, - b_index_view_type entriesB, b_values_view_type valuesB, - bool transposeB, bool verbose, const CB &callback) { + bool transposeA, b_rowmap_view_type row_mapB, + b_index_view_type entriesB, b_values_view_type valuesB, + bool transposeB, bool verbose, const CB &callback) { if (!std::is_same::value) { throw std::runtime_error("MKL requires local ordinals to be integer.\n"); } @@ -303,7 +303,7 @@ void mkl_symbolic(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k, #else using values_type = typename KernelHandle::scalar_temp_work_view_t; using c_index_type = b_index_type; - using mkl = MKLApply; mkl::mkl_symbolic(handle, m, n, k, row_mapA, entriesA, transposeA, row_mapB, @@ -341,7 +341,7 @@ void mkl_apply(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k, (void)valuesC; (void)verbose; #else - using mkl = MKLApply; mkl::mkl_numeric(handle, m, n, k, row_mapA, entriesA, valuesA, transposeA, From 8c8cbdf8b7cf6e508b7cd5f3587ff61f01e847de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Thu, 17 Feb 2022 14:59:14 +0100 Subject: [PATCH 14/19] Guard whole file with ENABLE_TPL_MKL --- .../impl/KokkosSparse_spgemm_mkl_impl.hpp | 43 +------------------ .../impl/KokkosSparse_spgemm_numeric_spec.hpp | 4 ++ .../KokkosSparse_spgemm_symbolic_spec.hpp | 4 ++ 3 files changed, 10 insertions(+), 41 deletions(-) diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp index 43b2b5081b..6c95e648e9 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp @@ -50,13 +50,10 @@ #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL #include "mkl_spblas.h" -#endif namespace KokkosSparse { namespace Impl { -#ifdef KOKKOSKERNELS_ENABLE_TPL_MKL - // multiplies two sparse MKL matrices and returns sparse MKL matrix template inline static MKLSparseMatrix mkl_spmm( @@ -276,7 +273,6 @@ class MKL_SPMM { return view_type(data, num_elems); } }; -#endif // KOKKOSKERNELS_ENABLE_TPL_MKL template ; mkl::mkl_symbolic(handle, m, n, k, row_mapA, entriesA, transposeA, row_mapB, entriesB, transposeB, row_mapC, verbose); -#endif } template ; mkl::mkl_numeric(handle, m, n, k, row_mapA, entriesA, valuesA, transposeA, row_mapB, entriesB, valuesB, transposeB, row_mapC, entriesC, valuesC, verbose); -#endif } } // namespace Impl } // namespace KokkosSparse -#endif +#endif // KOKKOSKERNELS_ENABLE_TPL_MKL +#endif // _KOKKOSSPGEMMMKL_HPP diff --git a/src/sparse/impl/KokkosSparse_spgemm_numeric_spec.hpp b/src/sparse/impl/KokkosSparse_spgemm_numeric_spec.hpp index beb969fc77..68e5e82bdb 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_numeric_spec.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_numeric_spec.hpp @@ -245,9 +245,13 @@ struct SPGEMM_NUMERIC< transposeB, row_mapC, entriesC, valuesC); break; case SPGEMM_MKL: +#ifdef KOKKOSKERNELS_ENABLE_TPL_MKL mkl_apply(sh, m, n, k, row_mapA, entriesA, valuesA, transposeA, row_mapB, entriesB, valuesB, transposeB, row_mapC, entriesC, valuesC, handle->get_verbose()); +#else + throw std::runtime_error("MKL was not enabled in this build!"); +#endif break; case SPGEMM_MKL2PHASE: mkl2phase_apply(sh, m, n, k, row_mapA, entriesA, valuesA, transposeA, diff --git a/src/sparse/impl/KokkosSparse_spgemm_symbolic_spec.hpp b/src/sparse/impl/KokkosSparse_spgemm_symbolic_spec.hpp index 181984ebe9..d83ae6767c 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_symbolic_spec.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_symbolic_spec.hpp @@ -179,9 +179,13 @@ struct SPGEMM_SYMBOLICget_verbose()); break; +#else + throw std::runtime_error("MKL was not enabled in this build!"); +#endif } sh->set_call_symbolic(); } From 70bb051a5a42e3bf5395c60363bfba2cddc2f64f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Fri, 18 Feb 2022 13:39:17 +0100 Subject: [PATCH 15/19] Add explicit compilation error about scalar types not supported by MKL --- src/common/KokkosKernels_SparseUtils_mkl.hpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/common/KokkosKernels_SparseUtils_mkl.hpp b/src/common/KokkosKernels_SparseUtils_mkl.hpp index a2ab16fba9..780c75ea51 100644 --- a/src/common/KokkosKernels_SparseUtils_mkl.hpp +++ b/src/common/KokkosKernels_SparseUtils_mkl.hpp @@ -79,6 +79,14 @@ inline sparse_operation_t mode_kk_to_mkl(char mode_kk) { "Invalid mode for MKL (should be one of N, T, H)"); } +template +struct mkl_is_supported_value_type : std::false_type {}; + +template <> +struct mkl_is_supported_value_type : std::true_type {}; +template <> +struct mkl_is_supported_value_type : std::true_type {}; + // MKLSparseMatrix provides thin wrapper around MKL matrix handle // (sparse_matrix_t) and encapsulates MKL call dispatches related to details // like value_type, allowing simple client code in kernels. @@ -86,6 +94,10 @@ template class MKLSparseMatrix { sparse_matrix_t mtx; + static_assert(mkl_is_supported_value_type::value, + "Scalar type used in MKLSparseMatrix is NOT " + "supported by MKL"); + public: inline MKLSparseMatrix(sparse_matrix_t mtx_) : mtx(mtx_) {} From 650cd176926ab306b586d5169114a398be65e1d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Fri, 18 Feb 2022 13:53:50 +0100 Subject: [PATCH 16/19] Add Kokkos::complex support to MKL sparse matrix --- src/common/KokkosKernels_SparseUtils_mkl.hpp | 54 ++++++++++++++++++++ unit_test/sparse/Test_Sparse_spgemm.hpp | 9 ++-- 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/src/common/KokkosKernels_SparseUtils_mkl.hpp b/src/common/KokkosKernels_SparseUtils_mkl.hpp index 780c75ea51..3bd1deb96a 100644 --- a/src/common/KokkosKernels_SparseUtils_mkl.hpp +++ b/src/common/KokkosKernels_SparseUtils_mkl.hpp @@ -86,6 +86,10 @@ template <> struct mkl_is_supported_value_type : std::true_type {}; template <> struct mkl_is_supported_value_type : std::true_type {}; +template <> +struct mkl_is_supported_value_type> : std::true_type {}; +template <> +struct mkl_is_supported_value_type> : std::true_type {}; // MKLSparseMatrix provides thin wrapper around MKL matrix handle // (sparse_matrix_t) and encapsulates MKL call dispatches related to details @@ -134,6 +138,24 @@ inline MKLSparseMatrix::MKLSparseMatrix(const MKL_INT rows, cols, xadj, xadj + 1, adj, values)); } +template <> +inline MKLSparseMatrix>::MKLSparseMatrix( + const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj, + Kokkos::complex *values) { + MKL_SAFE_CALL(mkl_sparse_c_create_csr( + &mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, xadj, xadj + 1, adj, + reinterpret_cast(values))); +} + +template <> +inline MKLSparseMatrix>::MKLSparseMatrix( + const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj, + Kokkos::complex *values) { + MKL_SAFE_CALL(mkl_sparse_z_create_csr( + &mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, xadj, xadj + 1, adj, + reinterpret_cast(values))); +} + template <> inline void MKLSparseMatrix::export_data(MKL_INT &num_rows, MKL_INT &num_cols, @@ -170,6 +192,38 @@ inline void MKLSparseMatrix::export_data(MKL_INT &num_rows, } } +template <> +inline void MKLSparseMatrix>::export_data( + MKL_INT &num_rows, MKL_INT &num_cols, MKL_INT *&rows_start, + MKL_INT *&columns, Kokkos::complex *&values) { + sparse_index_base_t indexing; + MKL_INT *rows_end; + MKL_SAFE_CALL(mkl_sparse_c_export_csr( + mtx, &indexing, &num_rows, &num_cols, &rows_start, &rows_end, &columns, + reinterpret_cast(&values))); + if (SPARSE_INDEX_BASE_ZERO != indexing) { + throw std::runtime_error( + "Expected zero based indexing in exported MKL sparse matrix\n"); + return; + } +} + +template <> +inline void MKLSparseMatrix>::export_data( + MKL_INT &num_rows, MKL_INT &num_cols, MKL_INT *&rows_start, + MKL_INT *&columns, Kokkos::complex *&values) { + sparse_index_base_t indexing; + MKL_INT *rows_end; + MKL_SAFE_CALL(mkl_sparse_z_export_csr( + mtx, &indexing, &num_rows, &num_cols, &rows_start, &rows_end, &columns, + reinterpret_cast(&values))); + if (SPARSE_INDEX_BASE_ZERO != indexing) { + throw std::runtime_error( + "Expected zero based indexing in exported MKL sparse matrix\n"); + return; + } +} + } // namespace Impl } // namespace KokkosSparse diff --git a/unit_test/sparse/Test_Sparse_spgemm.hpp b/unit_test/sparse/Test_Sparse_spgemm.hpp index cb3d04b019..53158f85ed 100644 --- a/unit_test/sparse/Test_Sparse_spgemm.hpp +++ b/unit_test/sparse/Test_Sparse_spgemm.hpp @@ -299,13 +299,12 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth, #endif break; - case SPGEMM_MKL: - algo = "SPGEMM_MKL"; - // MKL requires scalar to be either float or double - if (!(std::is_same::value || - std::is_same::value)) { + case SPGEMM_MKL: algo = "SPGEMM_MKL"; +#ifdef KOKKOSKERNELS_ENABLE_TPL_MKL + if (!KokkosSparse::Impl::mkl_is_supported_value_type::value) { is_expected_to_fail = true; } +#endif // mkl requires local ordinals to be int. if (!(std::is_same::value)) { is_expected_to_fail = true; From 35a4621faf80cf5534cd66a96ed505860fa44d5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Fri, 18 Feb 2022 16:04:07 +0100 Subject: [PATCH 17/19] Adjust unit test tolerance for MKL float --- unit_test/sparse/Test_Sparse_spgemm.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unit_test/sparse/Test_Sparse_spgemm.hpp b/unit_test/sparse/Test_Sparse_spgemm.hpp index 53158f85ed..ab84b7b0a5 100644 --- a/unit_test/sparse/Test_Sparse_spgemm.hpp +++ b/unit_test/sparse/Test_Sparse_spgemm.hpp @@ -229,7 +229,7 @@ bool is_same_matrix(crsMat_t output_mat_actual, crsMat_t output_mat_reference) { typedef typename Kokkos::Details::ArithTraits< typename scalar_view_t::non_const_value_type>::mag_type eps_type; - eps_type eps = std::is_same::value ? 2 * 1e-3 : 1e-7; + eps_type eps = std::is_same::value ? 3.7e-3 : 1e-7; is_identical = KokkosKernels::Impl::kk_is_relatively_identical_view< scalar_view_t, scalar_view_t, eps_type, typename device::execution_space>( From a972c7523998cf1d59d204361a8ea1bbfd7713d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Fri, 18 Feb 2022 16:06:09 +0100 Subject: [PATCH 18/19] Fix conversion compiler errors --- src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp index 6c95e648e9..36784731d0 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp @@ -214,8 +214,8 @@ class MKL_SPMM { auto h_valsB = create_mirror(valuesB); auto h_entriesA = create_mirror(entriesA); auto h_entriesB = create_mirror(entriesB); - const int *a_adj = h_entriesA.data(); - const int *b_adj = h_entriesB.data(); + const int *a_adj = reinterpret_cast(h_entriesA.data()); + const int *b_adj = reinterpret_cast(h_entriesB.data()); const value_type *a_ew = h_valsA.data(); const value_type *b_ew = h_valsB.data(); From 9d4de666b81b6721142397f7b27ca9aead795dd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Fri, 18 Feb 2022 17:51:05 +0100 Subject: [PATCH 19/19] Fix expected crashes for ordinal_type!=int in unit test --- src/sparse/KokkosSparse_spgemm_numeric.hpp | 4 +++- unit_test/sparse/Test_Sparse_spgemm.hpp | 9 ++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/sparse/KokkosSparse_spgemm_numeric.hpp b/src/sparse/KokkosSparse_spgemm_numeric.hpp index 60a54f5b8b..5bc791397c 100644 --- a/src/sparse/KokkosSparse_spgemm_numeric.hpp +++ b/src/sparse/KokkosSparse_spgemm_numeric.hpp @@ -139,7 +139,9 @@ void spgemm_numeric(KernelHandle *handle, "If you need this case please let kokkos-kernels developers know.\n"); } - if (m < 1 || n < 1 || k < 1) return; + if (m < 1 || n < 1 || k < 1 || entriesA.extent(0) < 1 || + entriesB.extent(0) < 1) + return; typedef typename KernelHandle::const_size_type c_size_t; typedef typename KernelHandle::const_nnz_lno_t c_lno_t; diff --git a/unit_test/sparse/Test_Sparse_spgemm.hpp b/unit_test/sparse/Test_Sparse_spgemm.hpp index ab84b7b0a5..47b06b716a 100644 --- a/unit_test/sparse/Test_Sparse_spgemm.hpp +++ b/unit_test/sparse/Test_Sparse_spgemm.hpp @@ -269,6 +269,8 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth, crsMat_t B = KokkosKernels::Impl::kk_generate_sparse_matrix( k, n, nnz, row_size_variance, bandwidth); + const bool is_empy_case = m < 1 || n < 1 || k < 1 || nnz < 1; + crsMat_t output_mat2; if (oldInterface) run_spgemm_old_interface(A, B, SPGEMM_DEBUG, output_mat2); @@ -305,8 +307,9 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth, is_expected_to_fail = true; } #endif - // mkl requires local ordinals to be int. - if (!(std::is_same::value)) { + // MKL requires local ordinals to be int. + // Note: empty-array special case will NOT fail on this. + if (!std::is_same::value && !is_empy_case) { is_expected_to_fail = true; } // if size_type is larger than int, mkl casts it to int. @@ -345,7 +348,7 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth, EXPECT_TRUE(is_expected_to_fail) << algo << ": " << e.what(); failed = true; } - EXPECT_TRUE((failed == is_expected_to_fail)); + EXPECT_EQ(is_expected_to_fail, failed); // double spgemm_time = timer1.seconds();