From 7129f3b4242ddab0820804b5061421aa5f56f235 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Fri, 7 Jan 2022 14:19:05 +0100
Subject: [PATCH 01/19] Refactor MKL implementation of SpGEMM

---
 .../impl/KokkosSparse_spgemm_mkl_impl.hpp     | 863 ++++++------------
 1 file changed, 283 insertions(+), 580 deletions(-)
diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
index 8eb0bd3930..1b22906ea3 100644
--- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
+++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
@@ -47,634 +47,337 @@
 
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
 #include "mkl_spblas.h"
-#include "mkl.h"
 #endif
 
-#include "KokkosKernels_Utils.hpp"
-#include <Kokkos_Concepts.hpp>
-
 namespace KokkosSparse {
-
 namespace Impl {
 
-template <typename KernelHandle, typename in_row_index_view_type,
-          typename in_nonzero_index_view_type, typename bin_row_index_view_type,
-          typename bin_nonzero_index_view_type,
-          typename cin_row_index_view_type>
-void mkl_symbolic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m,
-                  typename KernelHandle::nnz_lno_t n,
-                  typename KernelHandle::nnz_lno_t k,
-                  in_row_index_view_type row_mapA,
-                  in_nonzero_index_view_type entriesA,
-
-                  bool transposeA, bin_row_index_view_type row_mapB,
-                  bin_nonzero_index_view_type entriesB, bool transposeB,
-                  cin_row_index_view_type row_mapC, bool verbose = false) {
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
 
-  typedef typename KernelHandle::nnz_lno_t idx;
-  typedef typename KernelHandle::size_type size_type;
-
-  typedef typename KernelHandle::HandleTempMemorySpace HandleTempMemorySpace;
-  typedef typename Kokkos::View<int *, HandleTempMemorySpace>
-      int_temp_work_view_t;
-
-  typedef typename KernelHandle::nnz_scalar_t value_type;
-
-  typedef typename KernelHandle::HandleExecSpace MyExecSpace;
-  /*
-    if (!(
-        (Kokkos::SpaceAccessibility<typename
-    Kokkos::HostSpace::execution_space, typename
-    device1::memory_space>::accessible) &&
-        (Kokkos::SpaceAccessibility<typename
-    Kokkos::HostSpace::execution_space, typename
-    device2::memory_space>::accessible) &&
-        (Kokkos::SpaceAccessibility<typename
-    Kokkos::HostSpace::execution_space, typename
-    device3::memory_space>::accessible) )
-        ){
-      throw std::runtime_error ("MEMORY IS NOT ALLOCATED IN HOST DEVICE for
-    MKL\n"); return;
-    }
-  */
-  if (std::is_same<idx, int>::value) {
-    int *a_xadj = NULL;
-    int *b_xadj = NULL;
-    int_temp_work_view_t a_xadj_v, b_xadj_v;
-
-    if (std::is_same<size_type, int>::value) {
-      a_xadj = (int *)row_mapA.data();
-      b_xadj = (int *)row_mapB.data();
-    } else {
-      // TODO test this case.
-
-      Kokkos::Timer copy_time;
-      const int max_integer = 2147483647;
-      if (entriesB.extent(0) > max_integer ||
-          entriesA.extent(0) > max_integer) {
-        throw std::runtime_error(
-            "MKL requires integer values for size type for SPGEMM. Copying to "
-            "integer will cause overflow.\n");
-        return;
-      }
-      a_xadj_v = int_temp_work_view_t("tmpa", m + 1);
-      a_xadj   = (int *)a_xadj_v.data();
-      b_xadj_v = int_temp_work_view_t("tmpb", n + 1);
-      b_xadj   = (int *)b_xadj_v.data();
-
-      KokkosKernels::Impl::copy_vector<in_row_index_view_type,
-                                       int_temp_work_view_t, MyExecSpace>(
-          m + 1, row_mapA, a_xadj_v);
-
-      KokkosKernels::Impl::copy_vector<bin_row_index_view_type,
-                                       int_temp_work_view_t, MyExecSpace>(
-          m + 1, row_mapB, b_xadj_v);
-
-      if (verbose)
-        std::cout << "MKL COPY size type to int TIME:" << copy_time.seconds()
-                  << std::endl;
-    }
-
-    int *a_adj = (int *)entriesA.data();
-    int *b_adj = (int *)entriesB.data();
-
-    std::vector<value_type> tmp_values(
-        KOKKOSKERNELS_MACRO_MAX(entriesB.extent(0), entriesA.extent(0)));
-    value_type *ptmp_values = &(tmp_values[0]);
-    value_type *a_ew        = ptmp_values;
-    value_type *b_ew        = ptmp_values;
-
-    sparse_matrix_t A;
-    sparse_matrix_t B;
-    sparse_matrix_t C;
-
-    if (std::is_same<value_type, float>::value) {
-      if (SPARSE_STATUS_SUCCESS !=
-          mkl_sparse_s_create_csr(&A, SPARSE_INDEX_BASE_ZERO, m, n, a_xadj,
-                                  a_xadj + 1, a_adj, (float *)a_ew)) {
-        throw std::runtime_error(
-            "CANNOT CREATE mkl_sparse_s_create_csr A matrix\n");
-        return;
-      }
-
-      if (SPARSE_STATUS_SUCCESS !=
-          mkl_sparse_s_create_csr(&B, SPARSE_INDEX_BASE_ZERO, n, k, b_xadj,
-                                  b_xadj + 1, b_adj, (float *)b_ew)) {
-        throw std::runtime_error(
-            "CANNOT CREATE mkl_sparse_s_create_csr B matrix\n");
-        return;
-      }
-
-      sparse_operation_t operation;
-      if (transposeA && transposeB) {
-        operation = SPARSE_OPERATION_TRANSPOSE;
-      } else if (!(transposeA || transposeB)) {
-        operation = SPARSE_OPERATION_NON_TRANSPOSE;
-      } else {
-        throw std::runtime_error(
-            "MKL either transpose both matrices, or none for SPGEMM\n");
-        return;
-      }
-
-      Kokkos::Timer timer1;
-      bool success =
-          SPARSE_STATUS_SUCCESS != mkl_sparse_spmm(operation, A, B, &C);
-      if (verbose)
-        std::cout << "Actual FLOAT MKL SPMM Time in symbolic:"
-                  << timer1.seconds() << std::endl;
-
-      if (success) {
-        throw std::runtime_error(
-            "ERROR at SPGEMM multiplication in mkl_sparse_spmm\n");
-
-        return;
-      } else {
-        sparse_index_base_t c_indexing;
-        MKL_INT c_rows, c_cols, *rows_start, *rows_end, *columns;
-        float *values;
-
-        if (SPARSE_STATUS_SUCCESS !=
-            mkl_sparse_s_export_csr(C, &c_indexing, &c_rows, &c_cols,
-                                    &rows_start, &rows_end, &columns,
-                                    &values)) {
-          throw std::runtime_error(
-              "ERROR at exporting result matrix in mkl_sparse_spmm\n");
-          return;
-        }
-
-        if (SPARSE_INDEX_BASE_ZERO != c_indexing) {
-          throw std::runtime_error("C is not zero based indexed\n");
-          return;
-        }
-
-        KokkosKernels::Impl::copy_vector<
-            MKL_INT *, typename cin_row_index_view_type::non_const_type,
-            MyExecSpace>(m, rows_start, row_mapC);
-        idx nnz = row_mapC(m) = rows_end[m - 1];
-        handle->set_c_nnz(nnz);
-      }
-
-      if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(A)) {
-        throw std::runtime_error("Error at mkl_sparse_destroy A\n");
-        return;
-      }
-
-      if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(B)) {
-        throw std::runtime_error("Error at mkl_sparse_destroy B\n");
-        return;
-      }
-      if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(C)) {
-        throw std::runtime_error("Error at mkl_sparse_destroy C\n");
-        return;
-      }
-    } else if (std::is_same<value_type, double>::value) {
-      /*
-      std::cout << "create a" << std::endl;
-      std::cout << "m:" << m << " n:" << n << std::endl;
-      std::cout << "a_xadj[0]:" << a_xadj[0] << " a_xadj[m]:" << a_xadj[m] <<
-      std::endl; std::cout << "a_adj[a_xadj[m] - 1]:" << a_adj[a_xadj[m] - 1] <<
-      " a_ew[a_xadj[m] - 1]:" << a_ew[a_xadj[m] - 1] << std::endl;
-      */
-      if (SPARSE_STATUS_SUCCESS !=
-          mkl_sparse_d_create_csr(&A, SPARSE_INDEX_BASE_ZERO, m, n, a_xadj,
-                                  a_xadj + 1, a_adj, (double *)a_ew)) {
-        throw std::runtime_error(
-            "CANNOT CREATE mkl_sparse_s_create_csr A matrix\n");
-        return;
-      }
-
-      // std::cout << "create b" << std::endl;
-      if (SPARSE_STATUS_SUCCESS !=
-          mkl_sparse_d_create_csr(&B, SPARSE_INDEX_BASE_ZERO, n, k, b_xadj,
-                                  b_xadj + 1, b_adj, (double *)b_ew)) {
-        throw std::runtime_error(
-            "CANNOT CREATE mkl_sparse_s_create_csr B matrix\n");
-        return;
-      }
+KOKKOS_INLINE_FUNCTION
+void mkl_call(sparse_status_t result, const char *err_msg) {
+  if (SPARSE_STATUS_SUCCESS != result) {
+    throw std::runtime_error(err_msg);
+  }
+}
 
-      sparse_operation_t operation;
-      if (transposeA && transposeB) {
-        operation = SPARSE_OPERATION_TRANSPOSE;
-      } else if (!(transposeA || transposeB)) {
-        operation = SPARSE_OPERATION_NON_TRANSPOSE;
-      } else {
-        throw std::runtime_error(
-            "MKL either transpose both matrices, or none for SPGEMM\n");
-        return;
-      }
+template <typename value_type>
+class MKLSparseMatrix {
+  sparse_matrix_t mtx;
+
+ public:
+  KOKKOS_INLINE_FUNCTION
+  MKLSparseMatrix(const MKL_INT m, const MKL_INT n, MKL_INT *xadj, MKL_INT *adj,
+                  value_type *values);
+
+  KOKKOS_INLINE_FUNCTION
+  static MKLSparseMatrix<value_type> spmm(
+      sparse_operation_t operation, const MKLSparseMatrix<value_type> &A,
+      const MKLSparseMatrix<value_type> &B) {
+    sparse_matrix_t c;
+    mkl_call(mkl_sparse_spmm(operation, A.mtx, B.mtx, &c),
+             "mkl_sparse_spmm() failed!");
+    return MKLSparseMatrix<value_type>(c);
+  }
 
-      Kokkos::Timer timer1;
-      bool success =
-          SPARSE_STATUS_SUCCESS != mkl_sparse_spmm(operation, A, B, &C);
-      if (verbose)
-        std::cout << "Actual DOUBLE MKL SPMM Time Without Free:"
-                  << timer1.seconds() << std::endl;
-      mkl_free_buffers();
-      if (verbose)
-        std::cout << "Actual DOUBLE MKL SPMM Time:" << timer1.seconds()
-                  << std::endl;
+  KOKKOS_INLINE_FUNCTION
+  void get(MKL_INT &rows, MKL_INT &cols, MKL_INT *&rows_start,
+           MKL_INT *&columns, value_type *&values);
 
-      if (success) {
-        throw std::runtime_error(
-            "ERROR at SPGEMM multiplication in mkl_sparse_spmm\n");
-        return;
-      } else {
-        sparse_index_base_t c_indexing;
-        MKL_INT c_rows, c_cols, *rows_start, *rows_end, *columns;
-        double *values;
-
-        if (SPARSE_STATUS_SUCCESS !=
-            mkl_sparse_d_export_csr(C, &c_indexing, &c_rows, &c_cols,
-                                    &rows_start, &rows_end, &columns,
-                                    &values)) {
-          throw std::runtime_error(
-              "ERROR at exporting result matrix in mkl_sparse_spmm\n");
-          return;
-        }
-
-        if (SPARSE_INDEX_BASE_ZERO != c_indexing) {
-          throw std::runtime_error("C is not zero based indexed\n");
-          return;
-        }
-        if (handle->mkl_keep_output) {
-          Kokkos::Timer copy_time;
-
-          KokkosKernels::Impl::copy_vector<
-              MKL_INT *, typename cin_row_index_view_type::non_const_type,
-              MyExecSpace>(m, rows_start, row_mapC);
-          idx nnz = row_mapC(m) = rows_end[m - 1];
-          handle->set_c_nnz(nnz);
-
-          double copy_time_d = copy_time.seconds();
-          if (verbose) std::cout << "MKL COPYTIME:" << copy_time_d << std::endl;
-        }
-      }
+  KOKKOS_INLINE_FUNCTION
+  void destroy() {
+    mkl_call(mkl_sparse_destroy(mtx), "mkl_sparse_destroy() failed!");
+  }
 
-      if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(A)) {
-        throw std::runtime_error("Error at mkl_sparse_destroy A\n");
-        return;
-      }
+ private:
+  KOKKOS_INLINE_FUNCTION
+  MKLSparseMatrix(sparse_matrix_t mtx_) : mtx(mtx_) {}
+};
+
+template <>
+KOKKOS_INLINE_FUNCTION MKLSparseMatrix<float>::MKLSparseMatrix(
+    const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj,
+    float *values) {
+  mkl_call(mkl_sparse_s_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, cols,
+                                   xadj, xadj + 1, adj, values),
+           "mkl_sparse_s_create_csr() failed!");
+}
 
-      if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(B)) {
-        throw std::runtime_error("Error at mkl_sparse_destroy B\n");
-        return;
-      }
-      if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(C)) {
-        throw std::runtime_error("Error at mkl_sparse_destroy C\n");
-        return;
-      }
+template <>
+KOKKOS_INLINE_FUNCTION MKLSparseMatrix<double>::MKLSparseMatrix(
+    const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj,
+    double *values) {
+  mkl_call(mkl_sparse_d_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, cols,
+                                   xadj, xadj + 1, adj, values),
+           "mkl_sparse_d_create_csr() failed!");
+}
 
-    } else {
-      throw std::runtime_error(
-          "MKL requires float or double values. Complex values are not "
-          "implemented yet.\n");
-      return;
-    }
-  } else {
-    throw std::runtime_error("MKL requires local ordinals to be integer.\n");
+template <>
+KOKKOS_INLINE_FUNCTION void MKLSparseMatrix<float>::get(MKL_INT &rows,
+                                                        MKL_INT &cols,
+                                                        MKL_INT *&rows_start,
+                                                        MKL_INT *&columns,
+                                                        float *&values) {
+  sparse_index_base_t indexing;
+  MKL_INT *rows_end;
+  mkl_call(mkl_sparse_s_export_csr(mtx, &indexing, &rows, &cols, &rows_start,
+                                   &rows_end, &columns, &values),
+           "Failed to export matrix with mkl_sparse_s_export_csr()!");
+  if (SPARSE_INDEX_BASE_ZERO != indexing) {
+    throw std::runtime_error(
+        "Expected zero based indexing in exported MKL sparse matrix\n");
     return;
   }
-#else
-  (void)handle;
-  (void)m;
-  (void)n;
-  (void)k;
-  (void)row_mapA;
-  (void)row_mapB;
-  (void)row_mapC;
-  (void)entriesA;
-  (void)entriesB;
-  (void)transposeA;
-  (void)transposeB;
-  (void)verbose;
-  throw std::runtime_error("MKL IS NOT DEFINED\n");
-  // return;
-#endif
 }
 
-template <
-    typename KernelHandle, typename in_row_index_view_type,
-    typename in_nonzero_index_view_type, typename in_nonzero_value_view_type,
-    typename bin_row_index_view_type, typename bin_nonzero_index_view_type,
-    typename bin_nonzero_value_view_type, typename cin_row_index_view_type,
-    typename cin_nonzero_index_view_type, typename cin_nonzero_value_view_type>
-void mkl_apply(KernelHandle *handle, typename KernelHandle::nnz_lno_t m,
-               typename KernelHandle::nnz_lno_t n,
-               typename KernelHandle::nnz_lno_t k,
-               in_row_index_view_type row_mapA,
-               in_nonzero_index_view_type entriesA,
-               in_nonzero_value_view_type valuesA,
-
-               bool transposeA, bin_row_index_view_type row_mapB,
-               bin_nonzero_index_view_type entriesB,
-               bin_nonzero_value_view_type valuesB, bool transposeB,
-               cin_row_index_view_type row_mapC,
-               cin_nonzero_index_view_type entriesC,
-               cin_nonzero_value_view_type valuesC, bool verbose = false) {
-#ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
+template <>
+KOKKOS_INLINE_FUNCTION void MKLSparseMatrix<double>::get(MKL_INT &rows,
+                                                         MKL_INT &cols,
+                                                         MKL_INT *&rows_start,
+                                                         MKL_INT *&columns,
+                                                         double *&values) {
+  sparse_index_base_t indexing;
+  MKL_INT *rows_end;
+  mkl_call(mkl_sparse_d_export_csr(mtx, &indexing, &rows, &cols, &rows_start,
+                                   &rows_end, &columns, &values),
+           "Failed to export matrix with mkl_sparse_s_export_csr()!");
+  if (SPARSE_INDEX_BASE_ZERO != indexing) {
+    throw std::runtime_error(
+        "Expected zero based indexing in exported MKL sparse matrix\n");
+    return;
+  }
+}
 
-  typedef typename KernelHandle::nnz_lno_t idx;
+template <typename KernelHandle, typename a_rowmap_view_type,
+          typename a_index_view_type, typename a_values_view_type,
+          typename b_rowmap_view_type, typename b_index_view_type,
+          typename b_values_view_type, typename c_rowmap_view_type,
+          typename c_index_view_type, typename c_values_view_type>
+class MKLApply {
+ public:
+  typedef typename KernelHandle::nnz_lno_t nnz_lno_t;
   typedef typename KernelHandle::size_type size_type;
-
-  typedef typename KernelHandle::HandleTempMemorySpace HandleTempMemorySpace;
-  typedef typename Kokkos::View<int *, HandleTempMemorySpace>
-      int_temp_work_view_t;
-
   typedef typename KernelHandle::nnz_scalar_t value_type;
-
+  typedef typename KernelHandle::HandleTempMemorySpace HandleTempMemorySpace;
   typedef typename KernelHandle::HandleExecSpace MyExecSpace;
-  /*
-      if (!(
-          (Kokkos::SpaceAccessibility<typename
-     Kokkos::HostSpace::execution_space, typename
-     device1::memory_space>::accessible) &&
-          (Kokkos::SpaceAccessibility<typename
-     Kokkos::HostSpace::execution_space, typename
-     device2::memory_space>::accessible) &&
-          (Kokkos::SpaceAccessibility<typename
-     Kokkos::HostSpace::execution_space, typename
-     device3::memory_space>::accessible) )
-          ){
-        throw std::runtime_error ("MEMORY IS NOT ALLOCATED IN HOST DEVICE for
-     MKL\n"); return;
-      }
-  */
-  if (std::is_same<idx, int>::value) {
-    int *a_xadj = NULL;
-    int *b_xadj = NULL;
-    int_temp_work_view_t a_xadj_v, b_xadj_v;
-
-    if (std::is_same<size_type, int>::value) {
-      a_xadj = (int *)row_mapA.data();
-      b_xadj = (int *)row_mapB.data();
-    } else {
-      // TODO test this case.
-
-      Kokkos::Timer copy_time;
-      const int max_integer = 2147483647;
-      if (entriesB.extent(0) > max_integer ||
-          entriesA.extent(0) > max_integer) {
-        throw std::runtime_error(
-            "MKL requires integer values for size type for SPGEMM. Copying to "
-            "integer will cause overflow.\n");
-        return;
-      }
-      a_xadj_v = int_temp_work_view_t("tmpa", m + 1);
-      a_xadj   = (int *)a_xadj_v.data();
-      b_xadj_v = int_temp_work_view_t("tmpb", n + 1);
-      b_xadj   = (int *)b_xadj_v.data();
-
-      KokkosKernels::Impl::copy_vector<in_row_index_view_type,
-                                       int_temp_work_view_t, MyExecSpace>(
-          m + 1, row_mapA, a_xadj_v);
-
-      KokkosKernels::Impl::copy_vector<bin_row_index_view_type,
-                                       int_temp_work_view_t, MyExecSpace>(
-          m + 1, row_mapB, b_xadj_v);
-
-      if (verbose)
-        std::cout << "MKL COPY size type to int TIME:" << copy_time.seconds()
-                  << std::endl;
+  typedef typename Kokkos::View<int *, HandleTempMemorySpace> int_tmp_view_t;
+
+ public:
+  static void mkl_symbolic(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n,
+                           nnz_lno_t k, a_rowmap_view_type row_mapA,
+                           a_index_view_type entriesA, bool transposeA,
+                           b_rowmap_view_type row_mapB,
+                           b_index_view_type entriesB, bool transposeB,
+                           c_rowmap_view_type row_mapC, bool verbose = false) {
+    if (m < 1 || n < 1 || k < 1 || row_mapA(m) < 1 || row_mapB(n) < 1) {
+      // set correct values in non-empty 0-nnz corner case
+      handle->set_c_nnz(0);
+      Kokkos::deep_copy(row_mapC, 0);
+      return;
     }
 
-    int *a_adj = (int *)entriesA.data();
-    int *b_adj = (int *)entriesB.data();
+    Kokkos::Timer timer;
+    using scalar_t = typename KernelHandle::nnz_scalar_t;
+    using tmp_values_type =
+        Kokkos::View<scalar_t *, typename KernelHandle::HandleTempMemorySpace>;
 
-    const value_type *a_ew = valuesA.data();
-    const value_type *b_ew = valuesB.data();
+    const auto export_rowmap = [&](MKL_INT m, MKL_INT *rows_start,
+                                   MKL_INT *columns, scalar_t *values) {
+      if (handle->mkl_keep_output) {
+        Kokkos::Timer copy_time;
+        const nnz_lno_t nnz = rows_start[m];
+        handle->set_c_nnz(nnz);
+        copy(m + 1, rows_start, row_mapC);
+        if (verbose)
+          std::cout << "\tMKL rowmap export time:" << copy_time.seconds()
+                    << std::endl;
+      }
+    };
 
-    sparse_matrix_t A;
-    sparse_matrix_t B;
-    sparse_matrix_t C;
+    // use dummy values for A and B inputs
+    tmp_values_type tmp_values(
+        Kokkos::ViewAllocateWithoutInitializing("tmp_values"),
+        KOKKOSKERNELS_MACRO_MAX(entriesA.extent(0), entriesB.extent(0)));
 
-    if (std::is_same<value_type, float>::value) {
-      if (SPARSE_STATUS_SUCCESS !=
-          mkl_sparse_s_create_csr(&A, SPARSE_INDEX_BASE_ZERO, m, n, a_xadj,
-                                  a_xadj + 1, a_adj, (float *)a_ew)) {
-        throw std::runtime_error(
-            "CANNOT CREATE mkl_sparse_s_create_csr A matrix\n");
-        return;
-      }
+    apply(handle, m, n, k, row_mapA, entriesA, tmp_values, transposeA, row_mapB,
+          entriesB, tmp_values, transposeB, verbose, export_rowmap);
 
-      if (SPARSE_STATUS_SUCCESS !=
-          mkl_sparse_s_create_csr(&B, SPARSE_INDEX_BASE_ZERO, n, k, b_xadj,
-                                  b_xadj + 1, b_adj, (float *)b_ew)) {
-        throw std::runtime_error(
-            "CANNOT CREATE mkl_sparse_s_create_csr B matrix\n");
-        return;
-      }
+    if (verbose)
+      std::cout << "MKL symbolic time:" << timer.seconds() << std::endl;
+  }
 
-      sparse_operation_t operation;
-      if (transposeA && transposeB) {
-        operation = SPARSE_OPERATION_TRANSPOSE;
-      } else if (!(transposeA || transposeB)) {
-        operation = SPARSE_OPERATION_NON_TRANSPOSE;
-      } else {
-        throw std::runtime_error(
-            "MKL either transpose both matrices, or none for SPGEMM\n");
-        return;
-      }
+  static void mkl_numeric(
+      KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k,
+      a_rowmap_view_type row_mapA, a_index_view_type entriesA,
+      a_values_view_type valuesA, bool transposeA, b_rowmap_view_type row_mapB,
+      b_index_view_type entriesB, b_values_view_type valuesB, bool transposeB,
+      c_rowmap_view_type row_mapC, c_index_view_type entriesC,
+      c_values_view_type valuesC, bool verbose = false) {
+    Kokkos::Timer timer;
+
+    const auto export_values =
+        [&](MKL_INT m, MKL_INT *rows_start, MKL_INT *columns,
+            typename KernelHandle::nnz_scalar_t *values) {
+          if (handle->mkl_keep_output) {
+            Kokkos::Timer copy_time;
+            const nnz_lno_t nnz = rows_start[m];
+            copy(nnz, columns, entriesC);
+            copy(nnz, values, valuesC);
+            if (verbose)
+              std::cout << "\tMKL values export time:" << copy_time.seconds()
+                        << std::endl;
+          }
+        };
+
+    apply(handle, m, n, k, row_mapA, entriesA, valuesA, transposeA, row_mapB,
+          entriesB, valuesB, transposeB, verbose, export_values);
+
+    if (verbose)
+      std::cout << "MKL numeric time:" << timer.seconds() << std::endl;
+  }
 
-      Kokkos::Timer timer1;
-      bool success =
-          SPARSE_STATUS_SUCCESS != mkl_sparse_spmm(operation, A, B, &C);
-      if (verbose)
-        std::cout << "Actual FLOAT MKL SPMM Time:" << timer1.seconds()
-                  << std::endl;
+ private:
+  static constexpr int max_integer = 2147483647;
 
-      if (success) {
-        throw std::runtime_error(
-            "ERROR at SPGEMM multiplication in mkl_sparse_spmm\n");
-
-        return;
-      } else {
-        sparse_index_base_t c_indexing;
-        MKL_INT c_rows, c_cols, *rows_start, *rows_end, *columns;
-        float *values;
-
-        if (SPARSE_STATUS_SUCCESS !=
-            mkl_sparse_s_export_csr(C, &c_indexing, &c_rows, &c_cols,
-                                    &rows_start, &rows_end, &columns,
-                                    &values)) {
-          throw std::runtime_error(
-              "ERROR at exporting result matrix in mkl_sparse_spmm\n");
-          return;
-        }
-
-        if (SPARSE_INDEX_BASE_ZERO != c_indexing) {
-          throw std::runtime_error("C is not zero based indexed\n");
-          return;
-        }
-
-        // KokkosKernels::Impl::copy_vector<MKL_INT *, typename
-        // cin_row_index_view_type::non_const_type, MyExecSpace> (m, rows_start,
-        // row_mapC); idx nnz = row_mapC(m) = rows_end[m - 1];
-        idx nnz = rows_end[m - 1];
-        using non_const_size_type =
-            typename cin_row_index_view_type::non_const_value_type;
-        auto *tmpPtr = const_cast<non_const_size_type *>(row_mapC.data());
-        tmpPtr[m]    = nnz;
-
-        KokkosKernels::Impl::copy_vector<
-            MKL_INT *, typename cin_nonzero_index_view_type::non_const_type,
-            MyExecSpace>(nnz, columns, entriesC);
-        KokkosKernels::Impl::copy_vector<
-            float *, typename cin_nonzero_value_view_type::non_const_type,
-            MyExecSpace>(nnz, values, valuesC);
-      }
+ private:
+  template <typename CB>
+  static void apply(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k,
+                    a_rowmap_view_type row_mapA, a_index_view_type entriesA,
+                    a_values_view_type valuesA,
 
-      if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(A)) {
-        throw std::runtime_error("Error at mkl_sparse_destroy A\n");
-        return;
-      }
+                    bool transposeA, b_rowmap_view_type row_mapB,
+                    b_index_view_type entriesB, b_values_view_type valuesB,
+                    bool transposeB, bool verbose, const CB &callback) {
+    if (!std::is_same<nnz_lno_t, int>::value) {
+      throw std::runtime_error("MKL requires local ordinals to be integer.\n");
+    }
 
-      if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(B)) {
-        throw std::runtime_error("Error at mkl_sparse_destroy B\n");
-        return;
-      }
-      if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(C)) {
-        throw std::runtime_error("Error at mkl_sparse_destroy C\n");
-        return;
-      }
-    } else if (std::is_same<value_type, double>::value) {
-      /*
-      std::cout << "create a" << std::endl;
-      std::cout << "m:" << m << " n:" << n << std::endl;
-      std::cout << "a_xadj[0]:" << a_xadj[0] << " a_xadj[m]:" << a_xadj[m] <<
-      std::endl; std::cout << "a_adj[a_xadj[m] - 1]:" << a_adj[a_xadj[m] - 1] <<
-      " a_ew[a_xadj[m] - 1]:" << a_ew[a_xadj[m] - 1] << std::endl;
-      */
-      if (SPARSE_STATUS_SUCCESS !=
-          mkl_sparse_d_create_csr(&A, SPARSE_INDEX_BASE_ZERO, m, n, a_xadj,
-                                  a_xadj + 1, a_adj, (double *)a_ew)) {
-        throw std::runtime_error(
-            "CANNOT CREATE mkl_sparse_s_create_csr A matrix\n");
-        return;
-      }
+    if (m < 1 || n < 1 || k < 1 || row_mapA(m) < 1 || row_mapB(n) < 1) {
+      return;
+    }
 
-      // std::cout << "create b" << std::endl;
-      if (SPARSE_STATUS_SUCCESS !=
-          mkl_sparse_d_create_csr(&B, SPARSE_INDEX_BASE_ZERO, n, k, b_xadj,
-                                  b_xadj + 1, b_adj, (double *)b_ew)) {
-        throw std::runtime_error(
-            "CANNOT CREATE mkl_sparse_s_create_csr B matrix\n");
-        return;
-      }
+    int *a_xadj = (int *)row_mapA.data();
+    int *b_xadj = (int *)row_mapB.data();
+    int_tmp_view_t a_xadj_v, b_xadj_v;
 
-      sparse_operation_t operation;
-      if (transposeA && transposeB) {
-        operation = SPARSE_OPERATION_TRANSPOSE;
-      } else if (!(transposeA || transposeB)) {
-        operation = SPARSE_OPERATION_NON_TRANSPOSE;
-      } else {
+    if (!std::is_same<size_type, int>::value) {
+      if (entriesA.extent(0) > max_integer ||
+          entriesB.extent(0) > max_integer) {
         throw std::runtime_error(
-            "MKL either transpose both matrices, or none for SPGEMM\n");
-        return;
+            "MKL requires integer values for size type for SPGEMM. Copying "
+            "to "
+            "integer will cause overflow.\n");
       }
+      static_assert(
+          std::is_same<typename int_tmp_view_t::value_type,
+                       typename int_tmp_view_t::non_const_value_type>::value,
+          "deep_copy requires non-const destination type");
 
-      Kokkos::Timer timer1;
-      bool success =
-          SPARSE_STATUS_SUCCESS != mkl_sparse_spmm(operation, A, B, &C);
-      if (verbose)
-        std::cout << "Actual DOUBLE MKL SPMM Time Without Free:"
-                  << timer1.seconds() << std::endl;
-
-      mkl_free_buffers();
+      Kokkos::Timer copy_time;
+      a_xadj_v = int_tmp_view_t("tmpa", m + 1);
+      b_xadj_v = int_tmp_view_t("tmpb", n + 1);
+      Kokkos::deep_copy(a_xadj_v, row_mapA);
+      Kokkos::deep_copy(b_xadj_v, row_mapB);
+      a_xadj = (int *)a_xadj_v.data();
+      b_xadj = (int *)b_xadj_v.data();
       if (verbose)
-        std::cout << "Actual DOUBLE MKL SPMM Time:" << timer1.seconds()
-                  << std::endl;
-
-      if (success) {
-        throw std::runtime_error(
-            "ERROR at SPGEMM multiplication in mkl_sparse_spmm\n");
-        return;
-      } else {
-        sparse_index_base_t c_indexing;
-        MKL_INT c_rows, c_cols, *rows_start, *rows_end, *columns;
-        double *values;
-
-        if (SPARSE_STATUS_SUCCESS !=
-            mkl_sparse_d_export_csr(C, &c_indexing, &c_rows, &c_cols,
-                                    &rows_start, &rows_end, &columns,
-                                    &values)) {
-          throw std::runtime_error(
-              "ERROR at exporting result matrix in mkl_sparse_spmm\n");
-          return;
-        }
-
-        if (SPARSE_INDEX_BASE_ZERO != c_indexing) {
-          throw std::runtime_error("C is not zero based indexed\n");
-          return;
-        }
-        if (handle->mkl_keep_output) {
-          Kokkos::Timer copy_time;
-
-          // KokkosKernels::Impl::copy_vector<MKL_INT *, typename
-          // cin_row_index_view_type::non_const_type, MyExecSpace> (m,
-          // rows_start, row_mapC); idx nnz = row_mapC(m) = rows_end[m - 1];
-          idx nnz = rows_end[m - 1];
-          using non_const_size_type =
-              typename cin_row_index_view_type::non_const_value_type;
-          auto *tmpPtr = const_cast<non_const_size_type *>(row_mapC.data());
-          tmpPtr[m]    = nnz;
-
-          KokkosKernels::Impl::copy_vector<
-              MKL_INT *, typename cin_nonzero_index_view_type::non_const_type,
-              MyExecSpace>(nnz, columns, entriesC);
-          KokkosKernels::Impl::copy_vector<
-              double *, typename cin_nonzero_value_view_type::non_const_type,
-              MyExecSpace>(nnz, values, valuesC);
-          double copy_time_d = copy_time.seconds();
-          if (verbose) std::cout << "MKL COPYTIME:" << copy_time_d << std::endl;
-        }
-      }
+        std::cout << "\tMKL int-type temp rowmap copy time:"
+                  << copy_time.seconds() << std::endl;
+    }
 
-      if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(A)) {
-        throw std::runtime_error("Error at mkl_sparse_destroy A\n");
-        return;
-      }
+    value_type *a_ew = (value_type *)valuesA.data();
+    value_type *b_ew = (value_type *)valuesB.data();
 
-      if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(B)) {
-        throw std::runtime_error("Error at mkl_sparse_destroy B\n");
-        return;
-      }
-      if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy(C)) {
-        throw std::runtime_error("Error at mkl_sparse_destroy C\n");
-        return;
-      }
+    using Matrix = MKLSparseMatrix<value_type>;
+    Matrix A(m, n, a_xadj, (int *)(entriesA.data()), a_ew);
+    Matrix B(n, k, b_xadj, (int *)entriesB.data(), b_ew);
 
+    sparse_operation_t operation;
+    if (transposeA && transposeB) {
+      operation = SPARSE_OPERATION_TRANSPOSE;
+    } else if (!(transposeA || transposeB)) {
+      operation = SPARSE_OPERATION_NON_TRANSPOSE;
     } else {
       throw std::runtime_error(
-          "MKL requires float or double values. Complex values are not "
-          "implemented yet.\n");
-      return;
+          "MKL either transpose both matrices, or none for SPGEMM\n");
     }
-  } else {
-    throw std::runtime_error("MKL requires local ordinals to be integer.\n");
-    return;
+
+    Kokkos::Timer timer1;
+    Matrix C = Matrix::spmm(operation, A, B);
+    if (verbose) {
+      std::cout << "\tMKL spmm (";
+      if (std::is_same<float, value_type>::value)
+        std::cout << "FLOAT";
+      else if (std::is_same<double, value_type>::value)
+        std::cout << "DOUBLE";
+      else
+        std::cout << "?";
+      std::cout << ") time:" << timer1.seconds() << std::endl;
+    }
+
+    MKL_INT c_rows, c_cols, *rows_start, *columns;
+    value_type *values;
+    C.get(c_rows, c_cols, rows_start, columns, values);
+    callback(m, rows_start, columns, values);
+
+    A.destroy();
+    B.destroy();
+    C.destroy();
+  }
+
+  template <typename from_type, typename to_type>
+  KOKKOS_INLINE_FUNCTION static void copy(size_t num_elems, from_type from,
+                                          to_type to) {
+    KokkosKernels::Impl::copy_vector<from_type, to_type, MyExecSpace>(num_elems,
+                                                                      from, to);
   }
+};
+#endif  // KOKKOSKERNELS_ENABLE_TPL_MKL
+
+template <typename KernelHandle, typename a_rowmap_type, typename a_index_type,
+          typename b_rowmap_type, typename b_index_type, typename c_rowmap_type,
+          typename nnz_lno_t = typename KernelHandle::nnz_lno_t>
+void mkl_symbolic(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k,
+                  a_rowmap_type row_mapA, a_index_type entriesA,
+                  bool transposeA, b_rowmap_type row_mapB,
+                  b_index_type entriesB, bool transposeB,
+                  c_rowmap_type row_mapC, bool verbose = false) {
+#ifndef KOKKOSKERNELS_ENABLE_TPL_MKL
+  throw std::runtime_error("MKL was not enabled in this build!");
+#else
+  using values_type  = typename KernelHandle::scalar_temp_work_view_t;
+  using c_index_type = b_index_type;
+  using mkl = MKLApply<KernelHandle, a_rowmap_type, a_index_type, values_type,
+                       b_rowmap_type, b_index_type, values_type, c_rowmap_type,
+                       c_index_type, values_type>;
+  mkl::mkl_symbolic(handle, m, n, k, row_mapA, entriesA, transposeA, row_mapB,
+                    entriesB, transposeB, row_mapC, verbose);
+#endif
+}
+
+template <typename KernelHandle, typename a_rowmap_type, typename a_index_type,
+          typename a_values_type, typename b_rowmap_type, typename b_index_type,
+          typename b_values_type, typename c_rowmap_type, typename c_index_type,
+          typename c_values_type,
+          typename nnz_lno_t = typename KernelHandle::nnz_lno_t>
+void mkl_apply(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k,
+               a_rowmap_type row_mapA, a_index_type entriesA,
+               a_values_type valuesA, bool transposeA, b_rowmap_type row_mapB,
+               b_index_type entriesB, b_values_type valuesB, bool transposeB,
+               c_rowmap_type row_mapC, c_index_type entriesC,
+               c_values_type valuesC, bool verbose = false) {
+#ifndef KOKKOSKERNELS_ENABLE_TPL_MKL
+  throw std::runtime_error("MKL was not enabled in this build!");
 #else
-  (void)handle;
-  (void)m;
-  (void)n;
-  (void)k;
-  (void)row_mapA;
-  (void)row_mapB;
-  (void)row_mapC;
-  (void)entriesA;
-  (void)entriesB;
-  (void)entriesC;
-  (void)valuesA;
-  (void)valuesB;
-  (void)valuesC;
-  (void)transposeA;
-  (void)transposeB;
-  (void)verbose;
-  throw std::runtime_error("MKL IS NOT DEFINED\n");
-  // return;
+  using mkl = MKLApply<KernelHandle, a_rowmap_type, a_index_type, a_values_type,
+                       b_rowmap_type, b_index_type, b_values_type,
+                       c_rowmap_type, c_index_type, c_values_type>;
+  mkl::mkl_numeric(handle, m, n, k, row_mapA, entriesA, valuesA, transposeA,
+                   row_mapB, entriesB, valuesB, transposeB, row_mapC, entriesC,
+                   valuesC, verbose);
 #endif
 }
+
 }  // namespace Impl
 }  // namespace KokkosSparse
 

From 272461125c6ea2afae9c6ea1c79c02ad89c75cc5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Fri, 7 Jan 2022 14:19:05 +0100
Subject: [PATCH 02/19] Fix MKL dispatch in SpGEMM unit test

---
 unit_test/sparse/Test_Sparse_spgemm.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unit_test/sparse/Test_Sparse_spgemm.hpp b/unit_test/sparse/Test_Sparse_spgemm.hpp
index b84ef6acc4..e5ab088bdc 100644
--- a/unit_test/sparse/Test_Sparse_spgemm.hpp
+++ b/unit_test/sparse/Test_Sparse_spgemm.hpp
@@ -280,7 +280,7 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth,
       SPGEMM_KK_SPEED /* alias SPGEMM_KK_DENSE */
   };
 
-#ifdef HAVE_KOKKOSKERNELS_MKL
+#ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
   algorithms.push_back(SPGEMM_MKL);
 #endif
 

From 5d535fea8744262e775abd3e31b53b4fdea64554 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Fri, 7 Jan 2022 14:19:05 +0100
Subject: [PATCH 03/19] Fixed inlining: don't comile exception throwing MKL
 wrappers for GPU

---
 .../impl/KokkosSparse_spgemm_mkl_impl.hpp     | 55 ++++++++-----------
 1 file changed, 23 insertions(+), 32 deletions(-)

diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
index 1b22906ea3..44ae49fc34 100644
--- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
+++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
@@ -54,8 +54,7 @@ namespace Impl {
 
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
 
-KOKKOS_INLINE_FUNCTION
-void mkl_call(sparse_status_t result, const char *err_msg) {
+inline void mkl_call(sparse_status_t result, const char *err_msg) {
   if (SPARSE_STATUS_SUCCESS != result) {
     throw std::runtime_error(err_msg);
   }
@@ -66,12 +65,10 @@ class MKLSparseMatrix {
   sparse_matrix_t mtx;
 
  public:
-  KOKKOS_INLINE_FUNCTION
-  MKLSparseMatrix(const MKL_INT m, const MKL_INT n, MKL_INT *xadj, MKL_INT *adj,
-                  value_type *values);
+  inline MKLSparseMatrix(const MKL_INT m, const MKL_INT n, MKL_INT *xadj,
+                         MKL_INT *adj, value_type *values);
 
-  KOKKOS_INLINE_FUNCTION
-  static MKLSparseMatrix<value_type> spmm(
+  inline static MKLSparseMatrix<value_type> spmm(
       sparse_operation_t operation, const MKLSparseMatrix<value_type> &A,
       const MKLSparseMatrix<value_type> &B) {
     sparse_matrix_t c;
@@ -80,44 +77,41 @@ class MKLSparseMatrix {
     return MKLSparseMatrix<value_type>(c);
   }
 
-  KOKKOS_INLINE_FUNCTION
-  void get(MKL_INT &rows, MKL_INT &cols, MKL_INT *&rows_start,
-           MKL_INT *&columns, value_type *&values);
+  inline void get(MKL_INT &rows, MKL_INT &cols, MKL_INT *&rows_start,
+                  MKL_INT *&columns, value_type *&values);
 
-  KOKKOS_INLINE_FUNCTION
-  void destroy() {
+  inline void destroy() {
     mkl_call(mkl_sparse_destroy(mtx), "mkl_sparse_destroy() failed!");
   }
 
  private:
-  KOKKOS_INLINE_FUNCTION
-  MKLSparseMatrix(sparse_matrix_t mtx_) : mtx(mtx_) {}
+  inline MKLSparseMatrix(sparse_matrix_t mtx_) : mtx(mtx_) {}
 };
 
 template <>
-KOKKOS_INLINE_FUNCTION MKLSparseMatrix<float>::MKLSparseMatrix(
-    const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj,
-    float *values) {
+inline MKLSparseMatrix<float>::MKLSparseMatrix(const MKL_INT rows,
+                                               const MKL_INT cols,
+                                               MKL_INT *xadj, MKL_INT *adj,
+                                               float *values) {
   mkl_call(mkl_sparse_s_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, cols,
                                    xadj, xadj + 1, adj, values),
            "mkl_sparse_s_create_csr() failed!");
 }
 
 template <>
-KOKKOS_INLINE_FUNCTION MKLSparseMatrix<double>::MKLSparseMatrix(
-    const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj,
-    double *values) {
+inline MKLSparseMatrix<double>::MKLSparseMatrix(const MKL_INT rows,
+                                                const MKL_INT cols,
+                                                MKL_INT *xadj, MKL_INT *adj,
+                                                double *values) {
   mkl_call(mkl_sparse_d_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, cols,
                                    xadj, xadj + 1, adj, values),
            "mkl_sparse_d_create_csr() failed!");
 }
 
 template <>
-KOKKOS_INLINE_FUNCTION void MKLSparseMatrix<float>::get(MKL_INT &rows,
-                                                        MKL_INT &cols,
-                                                        MKL_INT *&rows_start,
-                                                        MKL_INT *&columns,
-                                                        float *&values) {
+inline void MKLSparseMatrix<float>::get(MKL_INT &rows, MKL_INT &cols,
+                                        MKL_INT *&rows_start, MKL_INT *&columns,
+                                        float *&values) {
   sparse_index_base_t indexing;
   MKL_INT *rows_end;
   mkl_call(mkl_sparse_s_export_csr(mtx, &indexing, &rows, &cols, &rows_start,
@@ -131,11 +125,9 @@ KOKKOS_INLINE_FUNCTION void MKLSparseMatrix<float>::get(MKL_INT &rows,
 }
 
 template <>
-KOKKOS_INLINE_FUNCTION void MKLSparseMatrix<double>::get(MKL_INT &rows,
-                                                         MKL_INT &cols,
-                                                         MKL_INT *&rows_start,
-                                                         MKL_INT *&columns,
-                                                         double *&values) {
+inline void MKLSparseMatrix<double>::get(MKL_INT &rows, MKL_INT &cols,
+                                         MKL_INT *&rows_start,
+                                         MKL_INT *&columns, double *&values) {
   sparse_index_base_t indexing;
   MKL_INT *rows_end;
   mkl_call(mkl_sparse_d_export_csr(mtx, &indexing, &rows, &cols, &rows_start,
@@ -326,8 +318,7 @@ class MKLApply {
   }
 
   template <typename from_type, typename to_type>
-  KOKKOS_INLINE_FUNCTION static void copy(size_t num_elems, from_type from,
-                                          to_type to) {
+  inline static void copy(size_t num_elems, from_type from, to_type to) {
     KokkosKernels::Impl::copy_vector<from_type, to_type, MyExecSpace>(num_elems,
                                                                       from, to);
   }

From 3556dffffc2cb4088e883bf55e805f227885a8a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Fri, 7 Jan 2022 14:19:05 +0100
Subject: [PATCH 04/19] Support GPU memory space in MKL spgemm

---
 .../impl/KokkosSparse_spgemm_mkl_impl.hpp     | 81 ++++++++++++-------
 unit_test/sparse/Test_Sparse_spgemm.hpp       |  6 --
 2 files changed, 54 insertions(+), 33 deletions(-)

diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
index 44ae49fc34..9bc4a9faac 100644
--- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
+++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
@@ -150,9 +150,8 @@ class MKLApply {
   typedef typename KernelHandle::nnz_lno_t nnz_lno_t;
   typedef typename KernelHandle::size_type size_type;
   typedef typename KernelHandle::nnz_scalar_t value_type;
-  typedef typename KernelHandle::HandleTempMemorySpace HandleTempMemorySpace;
   typedef typename KernelHandle::HandleExecSpace MyExecSpace;
-  typedef typename Kokkos::View<int *, HandleTempMemorySpace> int_tmp_view_t;
+  typedef typename Kokkos::View<int *, Kokkos::HostSpace> int_tmp_view_t;
 
  public:
   static void mkl_symbolic(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n,
@@ -161,7 +160,8 @@ class MKLApply {
                            b_rowmap_view_type row_mapB,
                            b_index_view_type entriesB, bool transposeB,
                            c_rowmap_view_type row_mapC, bool verbose = false) {
-    if (m < 1 || n < 1 || k < 1 || row_mapA(m) < 1 || row_mapB(n) < 1) {
+    if (m < 1 || n < 1 || k < 1 || entriesA.extent(0) < 1 ||
+        entriesB.extent(0) < 1) {
       // set correct values in non-empty 0-nnz corner case
       handle->set_c_nnz(0);
       Kokkos::deep_copy(row_mapC, 0);
@@ -170,8 +170,6 @@ class MKLApply {
 
     Kokkos::Timer timer;
     using scalar_t = typename KernelHandle::nnz_scalar_t;
-    using tmp_values_type =
-        Kokkos::View<scalar_t *, typename KernelHandle::HandleTempMemorySpace>;
 
     const auto export_rowmap = [&](MKL_INT m, MKL_INT *rows_start,
                                    MKL_INT *columns, scalar_t *values) {
@@ -179,7 +177,7 @@ class MKLApply {
         Kokkos::Timer copy_time;
         const nnz_lno_t nnz = rows_start[m];
         handle->set_c_nnz(nnz);
-        copy(m + 1, rows_start, row_mapC);
+        copy(make_host_view(rows_start, m + 1), row_mapC);
         if (verbose)
           std::cout << "\tMKL rowmap export time:" << copy_time.seconds()
                     << std::endl;
@@ -187,12 +185,15 @@ class MKLApply {
     };
 
     // use dummy values for A and B inputs
-    tmp_values_type tmp_values(
-        Kokkos::ViewAllocateWithoutInitializing("tmp_values"),
-        KOKKOSKERNELS_MACRO_MAX(entriesA.extent(0), entriesB.extent(0)));
+    a_values_view_type tmp_valsA(
+        Kokkos::ViewAllocateWithoutInitializing("tmp_valuesA"),
+        entriesA.extent(0));
+    b_values_view_type tmp_valsB(
+        Kokkos::ViewAllocateWithoutInitializing("tmp_valuesB"),
+        entriesB.extent(0));
 
-    apply(handle, m, n, k, row_mapA, entriesA, tmp_values, transposeA, row_mapB,
-          entriesB, tmp_values, transposeB, verbose, export_rowmap);
+    apply(handle, m, n, k, row_mapA, entriesA, tmp_valsA, transposeA, row_mapB,
+          entriesB, tmp_valsB, transposeB, verbose, export_rowmap);
 
     if (verbose)
       std::cout << "MKL symbolic time:" << timer.seconds() << std::endl;
@@ -213,8 +214,8 @@ class MKLApply {
           if (handle->mkl_keep_output) {
             Kokkos::Timer copy_time;
             const nnz_lno_t nnz = rows_start[m];
-            copy(nnz, columns, entriesC);
-            copy(nnz, values, valuesC);
+            copy(make_host_view(columns, nnz), entriesC);
+            copy(make_host_view(values, nnz), valuesC);
             if (verbose)
               std::cout << "\tMKL values export time:" << copy_time.seconds()
                         << std::endl;
@@ -244,12 +245,19 @@ class MKLApply {
       throw std::runtime_error("MKL requires local ordinals to be integer.\n");
     }
 
-    if (m < 1 || n < 1 || k < 1 || row_mapA(m) < 1 || row_mapB(n) < 1) {
+    if (m < 1 || n < 1 || k < 1 || entriesA.extent(0) < 1 ||
+        entriesB.extent(0) < 1) {
       return;
     }
 
-    int *a_xadj = (int *)row_mapA.data();
-    int *b_xadj = (int *)row_mapB.data();
+    const auto create_mirror = [](auto view) {
+      return Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), view);
+    };
+
+    auto h_rowsA      = create_mirror(row_mapA);
+    auto h_rowsB      = create_mirror(row_mapB);
+    const int *a_xadj = reinterpret_cast<const int *>(h_rowsA.data());
+    const int *b_xadj = reinterpret_cast<const int *>(h_rowsB.data());
     int_tmp_view_t a_xadj_v, b_xadj_v;
 
     if (!std::is_same<size_type, int>::value) {
@@ -268,8 +276,8 @@ class MKLApply {
       Kokkos::Timer copy_time;
       a_xadj_v = int_tmp_view_t("tmpa", m + 1);
       b_xadj_v = int_tmp_view_t("tmpb", n + 1);
-      Kokkos::deep_copy(a_xadj_v, row_mapA);
-      Kokkos::deep_copy(b_xadj_v, row_mapB);
+      Kokkos::deep_copy(a_xadj_v, h_rowsA);
+      Kokkos::deep_copy(b_xadj_v, h_rowsB);
       a_xadj = (int *)a_xadj_v.data();
       b_xadj = (int *)b_xadj_v.data();
       if (verbose)
@@ -277,12 +285,20 @@ class MKLApply {
                   << copy_time.seconds() << std::endl;
     }
 
-    value_type *a_ew = (value_type *)valuesA.data();
-    value_type *b_ew = (value_type *)valuesB.data();
-
+    auto h_valsA           = create_mirror(valuesA);
+    auto h_valsB           = create_mirror(valuesB);
+    auto h_entriesA        = create_mirror(entriesA);
+    auto h_entriesB        = create_mirror(entriesB);
+    const int *a_adj       = h_entriesA.data();
+    const int *b_adj       = h_entriesB.data();
+    const value_type *a_ew = h_valsA.data();
+    const value_type *b_ew = h_valsB.data();
+
+    // Hack: we discard const with pointer casts here to work around MKL
+    // requiring mutable input and our symbolic interface not providing it
     using Matrix = MKLSparseMatrix<value_type>;
-    Matrix A(m, n, a_xadj, (int *)(entriesA.data()), a_ew);
-    Matrix B(n, k, b_xadj, (int *)entriesB.data(), b_ew);
+    Matrix A(m, n, (int *)a_xadj, (int *)a_adj, (value_type *)a_ew);
+    Matrix B(n, k, (int *)b_xadj, (int *)b_adj, (value_type *)b_ew);
 
     sparse_operation_t operation;
     if (transposeA && transposeB) {
@@ -317,10 +333,21 @@ class MKLApply {
     C.destroy();
   }
 
-  template <typename from_type, typename to_type>
-  inline static void copy(size_t num_elems, from_type from, to_type to) {
-    KokkosKernels::Impl::copy_vector<from_type, to_type, MyExecSpace>(num_elems,
-                                                                      from, to);
+  template <typename from_view_type, typename dst_view_type>
+  inline static void copy(from_view_type from, dst_view_type to) {
+    auto h_from =
+        Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), from);
+    auto h_to = Kokkos::create_mirror_view(Kokkos::HostSpace(), to);
+    Kokkos::deep_copy(h_to, h_from);  // view copy (for different element types)
+    Kokkos::deep_copy(to, h_to);
+    Kokkos::fence();
+  }
+
+  template <typename T>
+  inline static decltype(auto) make_host_view(const T *data, size_t num_elems) {
+    using device_type =
+        Kokkos::Device<Kokkos::DefaultHostExecutionSpace, Kokkos::HostSpace>;
+    return Kokkos::View<const T *, Kokkos::HostSpace>(data, num_elems);
   }
 };
 #endif  // KOKKOSKERNELS_ENABLE_TPL_MKL
diff --git a/unit_test/sparse/Test_Sparse_spgemm.hpp b/unit_test/sparse/Test_Sparse_spgemm.hpp
index e5ab088bdc..cb3d04b019 100644
--- a/unit_test/sparse/Test_Sparse_spgemm.hpp
+++ b/unit_test/sparse/Test_Sparse_spgemm.hpp
@@ -315,12 +315,6 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth,
         if (A.values.extent(0) > max_integer) {
           is_expected_to_fail = true;
         }
-
-        if (!(Kokkos::SpaceAccessibility<
-                typename Kokkos::HostSpace::execution_space,
-                typename device::memory_space>::accessible)) {
-          is_expected_to_fail = true;
-        }
         break;
 
       case SPGEMM_KK: algo = "SPGEMM_KK"; break;

From 0ba8b395bdb56f027c86f69c4f8e50521aff63f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Wed, 19 Jan 2022 15:56:50 +0100
Subject: [PATCH 05/19] fix -Wunused-parameter errors

---
 .../impl/KokkosSparse_spgemm_mkl_impl.hpp     | 39 ++++++++++++++++---
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
index 9bc4a9faac..13d0c00e1e 100644
--- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
+++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
@@ -172,7 +172,8 @@ class MKLApply {
     using scalar_t = typename KernelHandle::nnz_scalar_t;
 
     const auto export_rowmap = [&](MKL_INT m, MKL_INT *rows_start,
-                                   MKL_INT *columns, scalar_t *values) {
+                                   MKL_INT * /*columns*/,
+                                   scalar_t * /*values*/) {
       if (handle->mkl_keep_output) {
         Kokkos::Timer copy_time;
         const nnz_lno_t nnz = rows_start[m];
@@ -204,7 +205,7 @@ class MKLApply {
       a_rowmap_view_type row_mapA, a_index_view_type entriesA,
       a_values_view_type valuesA, bool transposeA, b_rowmap_view_type row_mapB,
       b_index_view_type entriesB, b_values_view_type valuesB, bool transposeB,
-      c_rowmap_view_type row_mapC, c_index_view_type entriesC,
+      c_rowmap_view_type /* row_mapC */, c_index_view_type entriesC,
       c_values_view_type valuesC, bool verbose = false) {
     Kokkos::Timer timer;
 
@@ -234,9 +235,9 @@ class MKLApply {
 
  private:
   template <typename CB>
-  static void apply(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k,
-                    a_rowmap_view_type row_mapA, a_index_view_type entriesA,
-                    a_values_view_type valuesA,
+  static void apply(KernelHandle * /* handle */, nnz_lno_t m, nnz_lno_t n,
+                    nnz_lno_t k, a_rowmap_view_type row_mapA,
+                    a_index_view_type entriesA, a_values_view_type valuesA,
 
                     bool transposeA, b_rowmap_view_type row_mapB,
                     b_index_view_type entriesB, b_values_view_type valuesB,
@@ -362,6 +363,18 @@ void mkl_symbolic(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k,
                   c_rowmap_type row_mapC, bool verbose = false) {
 #ifndef KOKKOSKERNELS_ENABLE_TPL_MKL
   throw std::runtime_error("MKL was not enabled in this build!");
+  (void)handle;
+  (void)m;
+  (void)n;
+  (void)k;
+  (void)row_mapA;
+  (void)entriesA;
+  (void)transposeA;
+  (void)row_mapB;
+  (void)entriesB;
+  (void)transposeB;
+  (void)row_mapC;
+  (void)verbose;
 #else
   using values_type  = typename KernelHandle::scalar_temp_work_view_t;
   using c_index_type = b_index_type;
@@ -386,6 +399,22 @@ void mkl_apply(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k,
                c_values_type valuesC, bool verbose = false) {
 #ifndef KOKKOSKERNELS_ENABLE_TPL_MKL
   throw std::runtime_error("MKL was not enabled in this build!");
+  (void)handle;
+  (void)m;
+  (void)n;
+  (void)k;
+  (void)row_mapA;
+  (void)entriesA;
+  (void)valuesA;
+  (void)transposeA;
+  (void)row_mapB;
+  (void)entriesB;
+  (void)valuesB;
+  (void)transposeB;
+  (void)row_mapC;
+  (void)entriesC;
+  (void)valuesC;
+  (void)verbose;
 #else
   using mkl = MKLApply<KernelHandle, a_rowmap_type, a_index_type, a_values_type,
                        b_rowmap_type, b_index_type, b_values_type,

From 047267c0a2394be089154ea2453ba8b467cdaba8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Wed, 2 Feb 2022 21:51:30 +0100
Subject: [PATCH 06/19] Fix name shadowing

---
 src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
index 13d0c00e1e..e6babd1a30 100644
--- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
+++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
@@ -171,14 +171,14 @@ class MKLApply {
     Kokkos::Timer timer;
     using scalar_t = typename KernelHandle::nnz_scalar_t;
 
-    const auto export_rowmap = [&](MKL_INT m, MKL_INT *rows_start,
+    const auto export_rowmap = [&](MKL_INT num_rows, MKL_INT *rows_start,
                                    MKL_INT * /*columns*/,
                                    scalar_t * /*values*/) {
       if (handle->mkl_keep_output) {
         Kokkos::Timer copy_time;
-        const nnz_lno_t nnz = rows_start[m];
+        const nnz_lno_t nnz = rows_start[num_rows];
         handle->set_c_nnz(nnz);
-        copy(make_host_view(rows_start, m + 1), row_mapC);
+        copy(make_host_view(rows_start, num_rows + 1), row_mapC);
         if (verbose)
           std::cout << "\tMKL rowmap export time:" << copy_time.seconds()
                     << std::endl;
@@ -210,11 +210,11 @@ class MKLApply {
     Kokkos::Timer timer;
 
     const auto export_values =
-        [&](MKL_INT m, MKL_INT *rows_start, MKL_INT *columns,
+        [&](MKL_INT num_rows, MKL_INT *rows_start, MKL_INT *columns,
             typename KernelHandle::nnz_scalar_t *values) {
           if (handle->mkl_keep_output) {
             Kokkos::Timer copy_time;
-            const nnz_lno_t nnz = rows_start[m];
+            const nnz_lno_t nnz = rows_start[num_rows];
             copy(make_host_view(columns, nnz), entriesC);
             copy(make_host_view(values, nnz), valuesC);
             if (verbose)

From 850db252d3e5be106e3c9acfcae44f978284c87a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Wed, 2 Feb 2022 21:51:58 +0100
Subject: [PATCH 07/19] Remove unnecessary fence

---
 src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
index e6babd1a30..4f73703065 100644
--- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
+++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
@@ -341,7 +341,6 @@ class MKLApply {
     auto h_to = Kokkos::create_mirror_view(Kokkos::HostSpace(), to);
     Kokkos::deep_copy(h_to, h_from);  // view copy (for different element types)
     Kokkos::deep_copy(to, h_to);
-    Kokkos::fence();
   }
 
   template <typename T>

From 62f0549de7aab3e7e7d1924c2dbfe276c24373a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Wed, 2 Feb 2022 21:52:30 +0100
Subject: [PATCH 08/19] Clean up make_host_view()

---
 src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
index 4f73703065..9770465eb3 100644
--- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
+++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
@@ -343,11 +343,10 @@ class MKLApply {
     Kokkos::deep_copy(to, h_to);
   }
 
-  template <typename T>
-  inline static decltype(auto) make_host_view(const T *data, size_t num_elems) {
-    using device_type =
-        Kokkos::Device<Kokkos::DefaultHostExecutionSpace, Kokkos::HostSpace>;
-    return Kokkos::View<const T *, Kokkos::HostSpace>(data, num_elems);
+  template <typename T,
+            typename view_type = Kokkos::View<const T *, Kokkos::HostSpace>>
+  inline static view_type make_host_view(const T *data, size_t num_elems) {
+    return view_type(data, num_elems);
   }
 };
 #endif  // KOKKOSKERNELS_ENABLE_TPL_MKL

From 146fcfe649228fdad5950a573bf1002e6bfaf6d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Wed, 2 Feb 2022 22:01:18 +0100
Subject: [PATCH 09/19] Rename get() to export_data()

---
 .../impl/KokkosSparse_spgemm_mkl_impl.hpp     | 33 +++++++++++--------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
index 9770465eb3..d0b36c2a50 100644
--- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
+++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
@@ -77,8 +77,9 @@ class MKLSparseMatrix {
     return MKLSparseMatrix<value_type>(c);
   }
 
-  inline void get(MKL_INT &rows, MKL_INT &cols, MKL_INT *&rows_start,
-                  MKL_INT *&columns, value_type *&values);
+  inline void export_data(MKL_INT &num_rows, MKL_INT &num_cols,
+                          MKL_INT *&rows_start, MKL_INT *&columns,
+                          value_type *&values);
 
   inline void destroy() {
     mkl_call(mkl_sparse_destroy(mtx), "mkl_sparse_destroy() failed!");
@@ -109,13 +110,15 @@ inline MKLSparseMatrix<double>::MKLSparseMatrix(const MKL_INT rows,
 }
 
 template <>
-inline void MKLSparseMatrix<float>::get(MKL_INT &rows, MKL_INT &cols,
-                                        MKL_INT *&rows_start, MKL_INT *&columns,
-                                        float *&values) {
+inline void MKLSparseMatrix<float>::export_data(MKL_INT &num_rows,
+                                                MKL_INT &num_cols,
+                                                MKL_INT *&rows_start,
+                                                MKL_INT *&columns,
+                                                float *&values) {
   sparse_index_base_t indexing;
   MKL_INT *rows_end;
-  mkl_call(mkl_sparse_s_export_csr(mtx, &indexing, &rows, &cols, &rows_start,
-                                   &rows_end, &columns, &values),
+  mkl_call(mkl_sparse_s_export_csr(mtx, &indexing, &num_rows, &num_cols,
+                                   &rows_start, &rows_end, &columns, &values),
            "Failed to export matrix with mkl_sparse_s_export_csr()!");
   if (SPARSE_INDEX_BASE_ZERO != indexing) {
     throw std::runtime_error(
@@ -125,13 +128,15 @@ inline void MKLSparseMatrix<float>::get(MKL_INT &rows, MKL_INT &cols,
 }
 
 template <>
-inline void MKLSparseMatrix<double>::get(MKL_INT &rows, MKL_INT &cols,
-                                         MKL_INT *&rows_start,
-                                         MKL_INT *&columns, double *&values) {
+inline void MKLSparseMatrix<double>::export_data(MKL_INT &num_rows,
+                                                 MKL_INT &num_cols,
+                                                 MKL_INT *&rows_start,
+                                                 MKL_INT *&columns,
+                                                 double *&values) {
   sparse_index_base_t indexing;
   MKL_INT *rows_end;
-  mkl_call(mkl_sparse_d_export_csr(mtx, &indexing, &rows, &cols, &rows_start,
-                                   &rows_end, &columns, &values),
+  mkl_call(mkl_sparse_d_export_csr(mtx, &indexing, &num_rows, &num_cols,
+                                   &rows_start, &rows_end, &columns, &values),
            "Failed to export matrix with mkl_sparse_s_export_csr()!");
   if (SPARSE_INDEX_BASE_ZERO != indexing) {
     throw std::runtime_error(
@@ -324,9 +329,9 @@ class MKLApply {
       std::cout << ") time:" << timer1.seconds() << std::endl;
     }
 
-    MKL_INT c_rows, c_cols, *rows_start, *columns;
+    MKL_INT num_rows, num_cols, *rows_start, *columns;
     value_type *values;
-    C.get(c_rows, c_cols, rows_start, columns, values);
+    C.export_data(num_rows, num_cols, rows_start, columns, values);
     callback(m, rows_start, columns, values);
 
     A.destroy();

From 102eb6f44865510fbd3d831fd4316c68538e4a55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Thu, 17 Feb 2022 13:25:24 +0100
Subject: [PATCH 10/19] Fix -Wunused-parameter errors

---
 .../impl/KokkosSparse_spgemm_mkl2phase_impl.hpp    | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp
index 5715c7f098..90c35dbaf8 100644
--- a/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp
+++ b/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp
@@ -302,6 +302,11 @@ void mkl2phase_symbolic(
     (void)transposeA;
     (void)transposeB;
     (void)verbose;
+    (void)a_xadj;
+    (void)b_xadj;
+    (void)c_xadj;
+    (void)a_adj;
+    (void)b_adj;
 #endif
 
   } else {
@@ -351,9 +356,7 @@ void mkl2phase_apply(
       typename KernelHandle::HandlePersistentMemorySpace;
   using int_persistent_work_view_t =
       typename Kokkos::View<int *, HandlePersistentMemorySpace>;
-  using MyExecSpace = typename KernelHandle::HandleExecSpace;
-  using value_type  = typename KernelHandle::nnz_scalar_t;
-  using idx         = typename KernelHandle::nnz_lno_t;
+  using idx = typename KernelHandle::nnz_lno_t;
 
   if (std::is_same<idx, int>::value) {
     int *a_xadj = (int *)row_mapA.data();
@@ -639,6 +642,11 @@ void mkl2phase_apply(
     (void)transposeA;
     (void)transposeB;
     (void)verbose;
+    (void)a_xadj;
+    (void)b_xadj;
+    (void)c_xadj;
+    (void)a_adj;
+    (void)b_adj;
 #endif  // __INTEL_MKL__ == 2018 && __INTEL_MKL_UPDATE__ >= 2
   } else {
     (void)m;

From 67a603d0b5808e63070b3568bb7ee67bbf85b06a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Thu, 17 Feb 2022 13:48:53 +0100
Subject: [PATCH 11/19] Gather MKL utilities within dedicated header

---
 perf_test/sparse/KokkosSparse_spadd.cpp       | 30 ++-----
 src/common/KokkosKernels_SparseUtils_mkl.hpp  | 87 +++++++++++++++++++
 ...kosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp | 54 ++++--------
 .../tpls/KokkosSparse_spmv_tpl_spec_decl.hpp  | 38 ++------
 .../impl/KokkosSparse_spgemm_mkl_impl.hpp     | 38 ++++----
 5 files changed, 137 insertions(+), 110 deletions(-)
 create mode 100644 src/common/KokkosKernels_SparseUtils_mkl.hpp

diff --git a/perf_test/sparse/KokkosSparse_spadd.cpp b/perf_test/sparse/KokkosSparse_spadd.cpp
index 7b0bd42d2a..49034930e6 100644
--- a/perf_test/sparse/KokkosSparse_spadd.cpp
+++ b/perf_test/sparse/KokkosSparse_spadd.cpp
@@ -47,6 +47,7 @@
 #include "KokkosKernels_Handle.hpp"
 #include "KokkosKernels_IOUtils.hpp"
 #include "KokkosKernels_SparseUtils_cusparse.hpp"
+#include "KokkosKernels_SparseUtils_mkl.hpp"
 #include "KokkosSparse_spadd.hpp"
 #include "KokkosKernels_TestUtils.hpp"
 
@@ -57,21 +58,6 @@
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
 #include <mkl.h>
 #include <mkl_spblas.h>
-
-inline void spadd_mkl_internal_safe_call(sparse_status_t mklStatus,
-                                         const char* name,
-                                         const char* file = nullptr,
-                                         const int line   = 0) {
-  if (SPARSE_STATUS_SUCCESS != mklStatus) {
-    std::ostringstream oss;
-    oss << "MKL call \"" << name << "\" encountered error at " << file << ":"
-        << line << '\n';
-    Kokkos::abort(oss.str().c_str());
-  }
-}
-
-#define SPADD_MKL_SAFE_CALL(call) \
-  spadd_mkl_internal_safe_call(call, #call, __FILE__, __LINE__)
 #endif
 
 #if defined(KOKKOSKERNELS_INST_DOUBLE) &&     \
@@ -259,11 +245,11 @@ void run_experiment(const Params& params) {
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
   sparse_matrix_t Amkl, Bmkl, Cmkl;
   if (params.use_mkl) {
-    SPADD_MKL_SAFE_CALL(mkl_sparse_d_create_csr(
+    MKL_SAFE_CALL(mkl_sparse_d_create_csr(
         &Amkl, SPARSE_INDEX_BASE_ZERO, m, n, (int*)A.graph.row_map.data(),
         (int*)A.graph.row_map.data() + 1, A.graph.entries.data(),
         A.values.data()));
-    SPADD_MKL_SAFE_CALL(mkl_sparse_d_create_csr(
+    MKL_SAFE_CALL(mkl_sparse_d_create_csr(
         &Bmkl, SPARSE_INDEX_BASE_ZERO, m, n, (int*)B.graph.row_map.data(),
         (int*)B.graph.row_map.data() + 1, B.graph.entries.data(),
         B.values.data()));
@@ -326,9 +312,9 @@ void run_experiment(const Params& params) {
 #endif
       } else if (params.use_mkl) {
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
-        SPADD_MKL_SAFE_CALL(mkl_sparse_d_add(SPARSE_OPERATION_NON_TRANSPOSE,
-                                             Amkl, 1.0, Bmkl, &Cmkl));
-        SPADD_MKL_SAFE_CALL(mkl_sparse_destroy(Cmkl));
+        MKL_SAFE_CALL(mkl_sparse_d_add(SPARSE_OPERATION_NON_TRANSPOSE, Amkl,
+                                       1.0, Bmkl, &Cmkl));
+        MKL_SAFE_CALL(mkl_sparse_destroy(Cmkl));
 #endif
       } else {
         spadd_numeric(
@@ -351,8 +337,8 @@ void run_experiment(const Params& params) {
 
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
   if (params.use_mkl) {
-    SPADD_MKL_SAFE_CALL(mkl_sparse_destroy(Amkl));
-    SPADD_MKL_SAFE_CALL(mkl_sparse_destroy(Bmkl));
+    MKL_SAFE_CALL(mkl_sparse_destroy(Amkl));
+    MKL_SAFE_CALL(mkl_sparse_destroy(Bmkl));
   }
 #endif
 
diff --git a/src/common/KokkosKernels_SparseUtils_mkl.hpp b/src/common/KokkosKernels_SparseUtils_mkl.hpp
new file mode 100644
index 0000000000..7085851092
--- /dev/null
+++ b/src/common/KokkosKernels_SparseUtils_mkl.hpp
@@ -0,0 +1,87 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 3.0
+//       Copyright (2020) National Technology & Engineering
+//               Solutions of Sandia, LLC (NTESS).
+//
+// Under the terms of Contract DE-NA0003525 with NTESS,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Siva Rajamanickam (srajama@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef _KOKKOSKERNELS_SPARSEUTILS_MKL_HPP
+#define _KOKKOSKERNELS_SPARSEUTILS_MKL_HPP
+
+#include "KokkosKernels_config.h"
+
+#ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
+
+#include <mkl.h>
+
+namespace KokkosSparse {
+namespace Impl {
+
+inline void mkl_internal_safe_call(sparse_status_t mkl_status, const char *name,
+                                   const char *file = nullptr,
+                                   const int line   = 0) {
+  if (SPARSE_STATUS_SUCCESS != mkl_status) {
+    std::ostringstream oss;
+    oss << "MKL call \"" << name << "\" encountered error at " << file << ":"
+        << line << '\n';
+    Kokkos::abort(oss.str().c_str());
+  }
+}
+
+#define MKL_SAFE_CALL(call) \
+  KokkosSparse::Impl::mkl_internal_safe_call(call, #call, __FILE__, __LINE__)
+
+inline sparse_operation_t mode_kk_to_mkl(char mode_kk) {
+  switch (toupper(mode_kk)) {
+    case 'N': return SPARSE_OPERATION_NON_TRANSPOSE;
+    case 'T': return SPARSE_OPERATION_TRANSPOSE;
+    case 'H': return SPARSE_OPERATION_CONJUGATE_TRANSPOSE;
+    default:;
+  }
+  throw std::invalid_argument(
+      "Invalid mode for MKL (should be one of N, T, H)");
+}
+
+}  // namespace Impl
+}  // namespace KokkosSparse
+
+#endif  // KOKKOSKERNELS_ENABLE_TPL_MKL
+
+#endif  // _KOKKOSKERNELS_SPARSEUTILS_MKL_HPP
\ No newline at end of file
diff --git a/src/impl/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp b/src/impl/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp
index a6eec44449..d3c15e0267 100644
--- a/src/impl/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp
+++ b/src/impl/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp
@@ -46,6 +46,7 @@
 #define KOKKOSKERNELS_KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_DECL_HPP
 
 #include "KokkosKernels_Controls.hpp"
+#include "KokkosKernels_SparseUtils_mkl.hpp"
 
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
 #include <mkl.h>
@@ -57,26 +58,7 @@ namespace Impl {
 #if (__INTEL_MKL__ > 2017)
 // MKL 2018 and above: use new interface: sparse_matrix_t and mkl_sparse_?_mv()
 
-namespace BSR {
-inline void mkl_safe_call(int errcode) {
-  if (errcode != SPARSE_STATUS_SUCCESS)
-    throw std::runtime_error("MKL returned non-success error code");
-}
-
-inline sparse_operation_t mode_kk_to_mkl(char mode_kk) {
-  switch (toupper(mode_kk)) {
-    case 'N': return SPARSE_OPERATION_NON_TRANSPOSE;
-    case 'T': return SPARSE_OPERATION_TRANSPOSE;
-    case 'H': return SPARSE_OPERATION_CONJUGATE_TRANSPOSE;
-    default:;
-  }
-  throw std::invalid_argument(
-      "Invalid mode for MKL (should be one of N, T, H)");
-}
-}  // namespace BSR
-
-using BSR::mkl_safe_call;
-using BSR::mode_kk_to_mkl;
+using KokkosSparse::Impl::mode_kk_to_mkl;
 
 inline matrix_descr getDescription() {
   matrix_descr A_descr;
@@ -91,13 +73,13 @@ inline void spmv_block_impl_mkl(sparse_operation_t op, float alpha, float beta,
                                 const int* Aentries, const float* Avalues,
                                 const float* x, float* y) {
   sparse_matrix_t A_mkl;
-  mkl_safe_call(mkl_sparse_s_create_bsr(
+  MKL_SAFE_CALL(mkl_sparse_s_create_bsr(
       &A_mkl, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b,
       const_cast<int*>(Arowptrs), const_cast<int*>(Arowptrs + 1),
       const_cast<int*>(Aentries), const_cast<float*>(Avalues)));
 
   matrix_descr A_descr = getDescription();
-  mkl_safe_call(mkl_sparse_s_mv(op, alpha, A_mkl, A_descr, x, beta, y));
+  MKL_SAFE_CALL(mkl_sparse_s_mv(op, alpha, A_mkl, A_descr, x, beta, y));
 }
 
 inline void spmv_block_impl_mkl(sparse_operation_t op, double alpha,
@@ -106,13 +88,13 @@ inline void spmv_block_impl_mkl(sparse_operation_t op, double alpha,
                                 const double* Avalues, const double* x,
                                 double* y) {
   sparse_matrix_t A_mkl;
-  mkl_safe_call(mkl_sparse_d_create_bsr(
+  MKL_SAFE_CALL(mkl_sparse_d_create_bsr(
       &A_mkl, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b,
       const_cast<int*>(Arowptrs), const_cast<int*>(Arowptrs + 1),
       const_cast<int*>(Aentries), const_cast<double*>(Avalues)));
 
   matrix_descr A_descr = getDescription();
-  mkl_safe_call(mkl_sparse_d_mv(op, alpha, A_mkl, A_descr, x, beta, y));
+  MKL_SAFE_CALL(mkl_sparse_d_mv(op, alpha, A_mkl, A_descr, x, beta, y));
 }
 
 inline void spmv_block_impl_mkl(sparse_operation_t op,
@@ -123,7 +105,7 @@ inline void spmv_block_impl_mkl(sparse_operation_t op,
                                 const Kokkos::complex<float>* x,
                                 Kokkos::complex<float>* y) {
   sparse_matrix_t A_mkl;
-  mkl_safe_call(mkl_sparse_c_create_bsr(
+  MKL_SAFE_CALL(mkl_sparse_c_create_bsr(
       &A_mkl, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b,
       const_cast<int*>(Arowptrs), const_cast<int*>(Arowptrs + 1),
       const_cast<int*>(Aentries), (MKL_Complex8*)Avalues));
@@ -131,7 +113,7 @@ inline void spmv_block_impl_mkl(sparse_operation_t op,
   MKL_Complex8& alpha_mkl = reinterpret_cast<MKL_Complex8&>(alpha);
   MKL_Complex8& beta_mkl  = reinterpret_cast<MKL_Complex8&>(beta);
   matrix_descr A_descr    = getDescription();
-  mkl_safe_call(mkl_sparse_c_mv(op, alpha_mkl, A_mkl, A_descr,
+  MKL_SAFE_CALL(mkl_sparse_c_mv(op, alpha_mkl, A_mkl, A_descr,
                                 reinterpret_cast<const MKL_Complex8*>(x),
                                 beta_mkl, reinterpret_cast<MKL_Complex8*>(y)));
 }
@@ -144,7 +126,7 @@ inline void spmv_block_impl_mkl(sparse_operation_t op,
                                 const Kokkos::complex<double>* x,
                                 Kokkos::complex<double>* y) {
   sparse_matrix_t A_mkl;
-  mkl_safe_call(mkl_sparse_z_create_bsr(
+  MKL_SAFE_CALL(mkl_sparse_z_create_bsr(
       &A_mkl, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b,
       const_cast<int*>(Arowptrs), const_cast<int*>(Arowptrs + 1),
       const_cast<int*>(Aentries), (MKL_Complex16*)Avalues));
@@ -152,7 +134,7 @@ inline void spmv_block_impl_mkl(sparse_operation_t op,
   matrix_descr A_descr     = getDescription();
   MKL_Complex16& alpha_mkl = reinterpret_cast<MKL_Complex16&>(alpha);
   MKL_Complex16& beta_mkl  = reinterpret_cast<MKL_Complex16&>(beta);
-  mkl_safe_call(mkl_sparse_z_mv(op, alpha_mkl, A_mkl, A_descr,
+  MKL_SAFE_CALL(mkl_sparse_z_mv(op, alpha_mkl, A_mkl, A_descr,
                                 reinterpret_cast<const MKL_Complex16*>(x),
                                 beta_mkl, reinterpret_cast<MKL_Complex16*>(y)));
 }
@@ -163,13 +145,13 @@ inline void spm_mv_block_impl_mkl(sparse_operation_t op, float alpha,
                                   const float* Avalues, const float* x,
                                   int colx, int ldx, float* y, int ldy) {
   sparse_matrix_t A_mkl;
-  mkl_safe_call(mkl_sparse_s_create_bsr(
+  MKL_SAFE_CALL(mkl_sparse_s_create_bsr(
       &A_mkl, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b,
       const_cast<int*>(Arowptrs), const_cast<int*>(Arowptrs + 1),
       const_cast<int*>(Aentries), const_cast<float*>(Avalues)));
 
   matrix_descr A_descr = getDescription();
-  mkl_safe_call(mkl_sparse_s_mm(op, alpha, A_mkl, A_descr,
+  MKL_SAFE_CALL(mkl_sparse_s_mm(op, alpha, A_mkl, A_descr,
                                 SPARSE_LAYOUT_ROW_MAJOR, x, colx, ldx, beta, y,
                                 ldy));
 }
@@ -180,13 +162,13 @@ inline void spm_mv_block_impl_mkl(sparse_operation_t op, double alpha,
                                   const double* Avalues, const double* x,
                                   int colx, int ldx, double* y, int ldy) {
   sparse_matrix_t A_mkl;
-  mkl_safe_call(mkl_sparse_d_create_bsr(
+  MKL_SAFE_CALL(mkl_sparse_d_create_bsr(
       &A_mkl, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b,
       const_cast<int*>(Arowptrs), const_cast<int*>(Arowptrs + 1),
       const_cast<int*>(Aentries), const_cast<double*>(Avalues)));
 
   matrix_descr A_descr = getDescription();
-  mkl_safe_call(mkl_sparse_d_mm(op, alpha, A_mkl, A_descr,
+  MKL_SAFE_CALL(mkl_sparse_d_mm(op, alpha, A_mkl, A_descr,
                                 SPARSE_LAYOUT_ROW_MAJOR, x, colx, ldx, beta, y,
                                 ldy));
 }
@@ -200,7 +182,7 @@ inline void spm_mv_block_impl_mkl(sparse_operation_t op,
                                   const Kokkos::complex<float>* x, int colx,
                                   int ldx, Kokkos::complex<float>* y, int ldy) {
   sparse_matrix_t A_mkl;
-  mkl_safe_call(mkl_sparse_c_create_bsr(
+  MKL_SAFE_CALL(mkl_sparse_c_create_bsr(
       &A_mkl, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b,
       const_cast<int*>(Arowptrs), const_cast<int*>(Arowptrs + 1),
       const_cast<int*>(Aentries), (MKL_Complex8*)Avalues));
@@ -208,7 +190,7 @@ inline void spm_mv_block_impl_mkl(sparse_operation_t op,
   MKL_Complex8& alpha_mkl = reinterpret_cast<MKL_Complex8&>(alpha);
   MKL_Complex8& beta_mkl  = reinterpret_cast<MKL_Complex8&>(beta);
   matrix_descr A_descr    = getDescription();
-  mkl_safe_call(
+  MKL_SAFE_CALL(
       mkl_sparse_c_mm(op, alpha_mkl, A_mkl, A_descr, SPARSE_LAYOUT_ROW_MAJOR,
                       reinterpret_cast<const MKL_Complex8*>(x), colx, ldx,
                       beta_mkl, reinterpret_cast<MKL_Complex8*>(y), ldy));
@@ -221,7 +203,7 @@ inline void spm_mv_block_impl_mkl(
     const Kokkos::complex<double>* x, int colx, int ldx,
     Kokkos::complex<double>* y, int ldy) {
   sparse_matrix_t A_mkl;
-  mkl_safe_call(mkl_sparse_z_create_bsr(
+  MKL_SAFE_CALL(mkl_sparse_z_create_bsr(
       &A_mkl, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_ROW_MAJOR, m, n, b,
       const_cast<int*>(Arowptrs), const_cast<int*>(Arowptrs + 1),
       const_cast<int*>(Aentries), (MKL_Complex16*)Avalues));
@@ -229,7 +211,7 @@ inline void spm_mv_block_impl_mkl(
   matrix_descr A_descr     = getDescription();
   MKL_Complex16& alpha_mkl = reinterpret_cast<MKL_Complex16&>(alpha);
   MKL_Complex16& beta_mkl  = reinterpret_cast<MKL_Complex16&>(beta);
-  mkl_safe_call(
+  MKL_SAFE_CALL(
       mkl_sparse_z_mm(op, alpha_mkl, A_mkl, A_descr, SPARSE_LAYOUT_ROW_MAJOR,
                       reinterpret_cast<const MKL_Complex16*>(x), colx, ldx,
                       beta_mkl, reinterpret_cast<MKL_Complex16*>(y), ldy));
diff --git a/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp b/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp
index 17a72b2ad3..bacc749840 100644
--- a/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp
+++ b/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp
@@ -530,6 +530,7 @@ KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex<float>, Kokkos::LayoutRight,
 
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
 #include <mkl.h>
+#include "KokkosKernels_SparseUtils_mkl.hpp"
 
 namespace KokkosSparse {
 namespace Impl {
@@ -537,27 +538,6 @@ namespace Impl {
 #if (__INTEL_MKL__ > 2017)
 // MKL 2018 and above: use new interface: sparse_matrix_t and mkl_sparse_?_mv()
 
-// Note 12/03/21 - lbv:
-// mkl_safe_call and mode_kk_to_mkl should
-// be moved to some sparse or mkl utility
-// header. It is likely that these will be
-// reused for other kernels.
-inline void mkl_safe_call(int errcode) {
-  if (errcode != SPARSE_STATUS_SUCCESS)
-    throw std::runtime_error("MKL returned non-success error code");
-}
-
-inline sparse_operation_t mode_kk_to_mkl(char mode_kk) {
-  switch (toupper(mode_kk)) {
-    case 'N': return SPARSE_OPERATION_NON_TRANSPOSE;
-    case 'T': return SPARSE_OPERATION_TRANSPOSE;
-    case 'H': return SPARSE_OPERATION_CONJUGATE_TRANSPOSE;
-    default:;
-  }
-  throw std::invalid_argument(
-      "Invalid mode for MKL (should be one of N, T, H)");
-}
-
 inline void spmv_mkl(sparse_operation_t op, float alpha, float beta, int m,
                      int n, const int* Arowptrs, const int* Aentries,
                      const float* Avalues, const float* x, float* y) {
@@ -566,11 +546,11 @@ inline void spmv_mkl(sparse_operation_t op, float alpha, float beta, int m,
   A_descr.type = SPARSE_MATRIX_TYPE_GENERAL;
   A_descr.mode = SPARSE_FILL_MODE_FULL;
   A_descr.diag = SPARSE_DIAG_NON_UNIT;
-  mkl_safe_call(mkl_sparse_s_create_csr(
+  MKL_SAFE_CALL(mkl_sparse_s_create_csr(
       &A_mkl, SPARSE_INDEX_BASE_ZERO, m, n, const_cast<int*>(Arowptrs),
       const_cast<int*>(Arowptrs + 1), const_cast<int*>(Aentries),
       const_cast<float*>(Avalues)));
-  mkl_safe_call(mkl_sparse_s_mv(op, alpha, A_mkl, A_descr, x, beta, y));
+  MKL_SAFE_CALL(mkl_sparse_s_mv(op, alpha, A_mkl, A_descr, x, beta, y));
 }
 
 inline void spmv_mkl(sparse_operation_t op, double alpha, double beta, int m,
@@ -581,11 +561,11 @@ inline void spmv_mkl(sparse_operation_t op, double alpha, double beta, int m,
   A_descr.type = SPARSE_MATRIX_TYPE_GENERAL;
   A_descr.mode = SPARSE_FILL_MODE_FULL;
   A_descr.diag = SPARSE_DIAG_NON_UNIT;
-  mkl_safe_call(mkl_sparse_d_create_csr(
+  MKL_SAFE_CALL(mkl_sparse_d_create_csr(
       &A_mkl, SPARSE_INDEX_BASE_ZERO, m, n, const_cast<int*>(Arowptrs),
       const_cast<int*>(Arowptrs + 1), const_cast<int*>(Aentries),
       const_cast<double*>(Avalues)));
-  mkl_safe_call(mkl_sparse_d_mv(op, alpha, A_mkl, A_descr, x, beta, y));
+  MKL_SAFE_CALL(mkl_sparse_d_mv(op, alpha, A_mkl, A_descr, x, beta, y));
 }
 
 inline void spmv_mkl(sparse_operation_t op, Kokkos::complex<float> alpha,
@@ -599,13 +579,13 @@ inline void spmv_mkl(sparse_operation_t op, Kokkos::complex<float> alpha,
   A_descr.type = SPARSE_MATRIX_TYPE_GENERAL;
   A_descr.mode = SPARSE_FILL_MODE_FULL;
   A_descr.diag = SPARSE_DIAG_NON_UNIT;
-  mkl_safe_call(mkl_sparse_c_create_csr(
+  MKL_SAFE_CALL(mkl_sparse_c_create_csr(
       &A_mkl, SPARSE_INDEX_BASE_ZERO, m, n, const_cast<int*>(Arowptrs),
       const_cast<int*>(Arowptrs + 1), const_cast<int*>(Aentries),
       (MKL_Complex8*)Avalues));
   MKL_Complex8& alpha_mkl = reinterpret_cast<MKL_Complex8&>(alpha);
   MKL_Complex8& beta_mkl  = reinterpret_cast<MKL_Complex8&>(beta);
-  mkl_safe_call(mkl_sparse_c_mv(op, alpha_mkl, A_mkl, A_descr,
+  MKL_SAFE_CALL(mkl_sparse_c_mv(op, alpha_mkl, A_mkl, A_descr,
                                 reinterpret_cast<const MKL_Complex8*>(x),
                                 beta_mkl, reinterpret_cast<MKL_Complex8*>(y)));
 }
@@ -621,13 +601,13 @@ inline void spmv_mkl(sparse_operation_t op, Kokkos::complex<double> alpha,
   A_descr.type = SPARSE_MATRIX_TYPE_GENERAL;
   A_descr.mode = SPARSE_FILL_MODE_FULL;
   A_descr.diag = SPARSE_DIAG_NON_UNIT;
-  mkl_safe_call(mkl_sparse_z_create_csr(
+  MKL_SAFE_CALL(mkl_sparse_z_create_csr(
       &A_mkl, SPARSE_INDEX_BASE_ZERO, m, n, const_cast<int*>(Arowptrs),
       const_cast<int*>(Arowptrs + 1), const_cast<int*>(Aentries),
       (MKL_Complex16*)Avalues));
   MKL_Complex16& alpha_mkl = reinterpret_cast<MKL_Complex16&>(alpha);
   MKL_Complex16& beta_mkl  = reinterpret_cast<MKL_Complex16&>(beta);
-  mkl_safe_call(mkl_sparse_z_mv(op, alpha_mkl, A_mkl, A_descr,
+  MKL_SAFE_CALL(mkl_sparse_z_mv(op, alpha_mkl, A_mkl, A_descr,
                                 reinterpret_cast<const MKL_Complex16*>(x),
                                 beta_mkl, reinterpret_cast<MKL_Complex16*>(y)));
 }
diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
index d0b36c2a50..50bf840e58 100644
--- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
+++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
@@ -45,6 +45,9 @@
 #ifndef _KOKKOSSPGEMMMKL_HPP
 #define _KOKKOSSPGEMMMKL_HPP
 
+#include "KokkosKernels_config.h"
+#include "KokkosKernels_SparseUtils_mkl.hpp"
+
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
 #include "mkl_spblas.h"
 #endif
@@ -54,12 +57,6 @@ namespace Impl {
 
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
 
-inline void mkl_call(sparse_status_t result, const char *err_msg) {
-  if (SPARSE_STATUS_SUCCESS != result) {
-    throw std::runtime_error(err_msg);
-  }
-}
-
 template <typename value_type>
 class MKLSparseMatrix {
   sparse_matrix_t mtx;
@@ -72,8 +69,7 @@ class MKLSparseMatrix {
       sparse_operation_t operation, const MKLSparseMatrix<value_type> &A,
       const MKLSparseMatrix<value_type> &B) {
     sparse_matrix_t c;
-    mkl_call(mkl_sparse_spmm(operation, A.mtx, B.mtx, &c),
-             "mkl_sparse_spmm() failed!");
+    MKL_SAFE_CALL(mkl_sparse_spmm(operation, A.mtx, B.mtx, &c));
     return MKLSparseMatrix<value_type>(c);
   }
 
@@ -81,9 +77,7 @@ class MKLSparseMatrix {
                           MKL_INT *&rows_start, MKL_INT *&columns,
                           value_type *&values);
 
-  inline void destroy() {
-    mkl_call(mkl_sparse_destroy(mtx), "mkl_sparse_destroy() failed!");
-  }
+  inline void destroy() { MKL_SAFE_CALL(mkl_sparse_destroy(mtx)); }
 
  private:
   inline MKLSparseMatrix(sparse_matrix_t mtx_) : mtx(mtx_) {}
@@ -94,9 +88,8 @@ inline MKLSparseMatrix<float>::MKLSparseMatrix(const MKL_INT rows,
                                                const MKL_INT cols,
                                                MKL_INT *xadj, MKL_INT *adj,
                                                float *values) {
-  mkl_call(mkl_sparse_s_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, cols,
-                                   xadj, xadj + 1, adj, values),
-           "mkl_sparse_s_create_csr() failed!");
+  MKL_SAFE_CALL(mkl_sparse_s_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows,
+                                        cols, xadj, xadj + 1, adj, values));
 }
 
 template <>
@@ -104,9 +97,8 @@ inline MKLSparseMatrix<double>::MKLSparseMatrix(const MKL_INT rows,
                                                 const MKL_INT cols,
                                                 MKL_INT *xadj, MKL_INT *adj,
                                                 double *values) {
-  mkl_call(mkl_sparse_d_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows, cols,
-                                   xadj, xadj + 1, adj, values),
-           "mkl_sparse_d_create_csr() failed!");
+  MKL_SAFE_CALL(mkl_sparse_d_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows,
+                                        cols, xadj, xadj + 1, adj, values));
 }
 
 template <>
@@ -117,9 +109,9 @@ inline void MKLSparseMatrix<float>::export_data(MKL_INT &num_rows,
                                                 float *&values) {
   sparse_index_base_t indexing;
   MKL_INT *rows_end;
-  mkl_call(mkl_sparse_s_export_csr(mtx, &indexing, &num_rows, &num_cols,
-                                   &rows_start, &rows_end, &columns, &values),
-           "Failed to export matrix with mkl_sparse_s_export_csr()!");
+  MKL_SAFE_CALL(mkl_sparse_s_export_csr(mtx, &indexing, &num_rows, &num_cols,
+                                        &rows_start, &rows_end, &columns,
+                                        &values));
   if (SPARSE_INDEX_BASE_ZERO != indexing) {
     throw std::runtime_error(
         "Expected zero based indexing in exported MKL sparse matrix\n");
@@ -135,9 +127,9 @@ inline void MKLSparseMatrix<double>::export_data(MKL_INT &num_rows,
                                                  double *&values) {
   sparse_index_base_t indexing;
   MKL_INT *rows_end;
-  mkl_call(mkl_sparse_d_export_csr(mtx, &indexing, &num_rows, &num_cols,
-                                   &rows_start, &rows_end, &columns, &values),
-           "Failed to export matrix with mkl_sparse_s_export_csr()!");
+  MKL_SAFE_CALL(mkl_sparse_d_export_csr(mtx, &indexing, &num_rows, &num_cols,
+                                        &rows_start, &rows_end, &columns,
+                                        &values));
   if (SPARSE_INDEX_BASE_ZERO != indexing) {
     throw std::runtime_error(
         "Expected zero based indexing in exported MKL sparse matrix\n");

From 05293435613e65e0a865e595b8b5c373424368eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Thu, 17 Feb 2022 14:51:27 +0100
Subject: [PATCH 12/19] Move MKLSparseMatrix to MKL utils header

---
 src/common/KokkosKernels_SparseUtils_mkl.hpp  | 79 +++++++++++++++++
 .../impl/KokkosSparse_spgemm_mkl_impl.hpp     | 86 ++-----------------
 2 files changed, 87 insertions(+), 78 deletions(-)

diff --git a/src/common/KokkosKernels_SparseUtils_mkl.hpp b/src/common/KokkosKernels_SparseUtils_mkl.hpp
index 7085851092..a2ab16fba9 100644
--- a/src/common/KokkosKernels_SparseUtils_mkl.hpp
+++ b/src/common/KokkosKernels_SparseUtils_mkl.hpp
@@ -79,6 +79,85 @@ inline sparse_operation_t mode_kk_to_mkl(char mode_kk) {
       "Invalid mode for MKL (should be one of N, T, H)");
 }
 
+// MKLSparseMatrix provides thin wrapper around MKL matrix handle
+// (sparse_matrix_t) and encapsulates MKL call dispatches related to details
+// like value_type, allowing simple client code in kernels.
+template <typename value_type>
+class MKLSparseMatrix {
+  sparse_matrix_t mtx;
+
+ public:
+  inline MKLSparseMatrix(sparse_matrix_t mtx_) : mtx(mtx_) {}
+
+  // Constructs MKL sparse matrix from KK sparse views (m rows x n cols)
+  inline MKLSparseMatrix(const MKL_INT num_rows, const MKL_INT num_cols,
+                         MKL_INT *xadj, MKL_INT *adj, value_type *values);
+
+  // Allows using MKLSparseMatrix directly in MKL calls
+  inline operator sparse_matrix_t() const { return mtx; }
+
+  // Exports MKL sparse matrix contents into KK views
+  inline void export_data(MKL_INT &num_rows, MKL_INT &num_cols,
+                          MKL_INT *&rows_start, MKL_INT *&columns,
+                          value_type *&values);
+
+  inline void destroy() { MKL_SAFE_CALL(mkl_sparse_destroy(mtx)); }
+};
+
+template <>
+inline MKLSparseMatrix<float>::MKLSparseMatrix(const MKL_INT rows,
+                                               const MKL_INT cols,
+                                               MKL_INT *xadj, MKL_INT *adj,
+                                               float *values) {
+  MKL_SAFE_CALL(mkl_sparse_s_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows,
+                                        cols, xadj, xadj + 1, adj, values));
+}
+
+template <>
+inline MKLSparseMatrix<double>::MKLSparseMatrix(const MKL_INT rows,
+                                                const MKL_INT cols,
+                                                MKL_INT *xadj, MKL_INT *adj,
+                                                double *values) {
+  MKL_SAFE_CALL(mkl_sparse_d_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows,
+                                        cols, xadj, xadj + 1, adj, values));
+}
+
+template <>
+inline void MKLSparseMatrix<float>::export_data(MKL_INT &num_rows,
+                                                MKL_INT &num_cols,
+                                                MKL_INT *&rows_start,
+                                                MKL_INT *&columns,
+                                                float *&values) {
+  sparse_index_base_t indexing;
+  MKL_INT *rows_end;
+  MKL_SAFE_CALL(mkl_sparse_s_export_csr(mtx, &indexing, &num_rows, &num_cols,
+                                        &rows_start, &rows_end, &columns,
+                                        &values));
+  if (SPARSE_INDEX_BASE_ZERO != indexing) {
+    throw std::runtime_error(
+        "Expected zero based indexing in exported MKL sparse matrix\n");
+    return;
+  }
+}
+
+template <>
+inline void MKLSparseMatrix<double>::export_data(MKL_INT &num_rows,
+                                                 MKL_INT &num_cols,
+                                                 MKL_INT *&rows_start,
+                                                 MKL_INT *&columns,
+                                                 double *&values) {
+  sparse_index_base_t indexing;
+  MKL_INT *rows_end;
+  MKL_SAFE_CALL(mkl_sparse_d_export_csr(mtx, &indexing, &num_rows, &num_cols,
+                                        &rows_start, &rows_end, &columns,
+                                        &values));
+  if (SPARSE_INDEX_BASE_ZERO != indexing) {
+    throw std::runtime_error(
+        "Expected zero based indexing in exported MKL sparse matrix\n");
+    return;
+  }
+}
+
 }  // namespace Impl
 }  // namespace KokkosSparse
 
diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
index 50bf840e58..3044b2c576 100644
--- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
+++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
@@ -57,84 +57,14 @@ namespace Impl {
 
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
 
+// multiplies two sparse MKL matrices and returns sparse MKL matrix
 template <typename value_type>
-class MKLSparseMatrix {
-  sparse_matrix_t mtx;
-
- public:
-  inline MKLSparseMatrix(const MKL_INT m, const MKL_INT n, MKL_INT *xadj,
-                         MKL_INT *adj, value_type *values);
-
-  inline static MKLSparseMatrix<value_type> spmm(
-      sparse_operation_t operation, const MKLSparseMatrix<value_type> &A,
-      const MKLSparseMatrix<value_type> &B) {
-    sparse_matrix_t c;
-    MKL_SAFE_CALL(mkl_sparse_spmm(operation, A.mtx, B.mtx, &c));
-    return MKLSparseMatrix<value_type>(c);
-  }
-
-  inline void export_data(MKL_INT &num_rows, MKL_INT &num_cols,
-                          MKL_INT *&rows_start, MKL_INT *&columns,
-                          value_type *&values);
-
-  inline void destroy() { MKL_SAFE_CALL(mkl_sparse_destroy(mtx)); }
-
- private:
-  inline MKLSparseMatrix(sparse_matrix_t mtx_) : mtx(mtx_) {}
-};
-
-template <>
-inline MKLSparseMatrix<float>::MKLSparseMatrix(const MKL_INT rows,
-                                               const MKL_INT cols,
-                                               MKL_INT *xadj, MKL_INT *adj,
-                                               float *values) {
-  MKL_SAFE_CALL(mkl_sparse_s_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows,
-                                        cols, xadj, xadj + 1, adj, values));
-}
-
-template <>
-inline MKLSparseMatrix<double>::MKLSparseMatrix(const MKL_INT rows,
-                                                const MKL_INT cols,
-                                                MKL_INT *xadj, MKL_INT *adj,
-                                                double *values) {
-  MKL_SAFE_CALL(mkl_sparse_d_create_csr(&mtx, SPARSE_INDEX_BASE_ZERO, rows,
-                                        cols, xadj, xadj + 1, adj, values));
-}
-
-template <>
-inline void MKLSparseMatrix<float>::export_data(MKL_INT &num_rows,
-                                                MKL_INT &num_cols,
-                                                MKL_INT *&rows_start,
-                                                MKL_INT *&columns,
-                                                float *&values) {
-  sparse_index_base_t indexing;
-  MKL_INT *rows_end;
-  MKL_SAFE_CALL(mkl_sparse_s_export_csr(mtx, &indexing, &num_rows, &num_cols,
-                                        &rows_start, &rows_end, &columns,
-                                        &values));
-  if (SPARSE_INDEX_BASE_ZERO != indexing) {
-    throw std::runtime_error(
-        "Expected zero based indexing in exported MKL sparse matrix\n");
-    return;
-  }
-}
-
-template <>
-inline void MKLSparseMatrix<double>::export_data(MKL_INT &num_rows,
-                                                 MKL_INT &num_cols,
-                                                 MKL_INT *&rows_start,
-                                                 MKL_INT *&columns,
-                                                 double *&values) {
-  sparse_index_base_t indexing;
-  MKL_INT *rows_end;
-  MKL_SAFE_CALL(mkl_sparse_d_export_csr(mtx, &indexing, &num_rows, &num_cols,
-                                        &rows_start, &rows_end, &columns,
-                                        &values));
-  if (SPARSE_INDEX_BASE_ZERO != indexing) {
-    throw std::runtime_error(
-        "Expected zero based indexing in exported MKL sparse matrix\n");
-    return;
-  }
+inline static MKLSparseMatrix<value_type> mkl_spmm(
+    sparse_operation_t operation, const MKLSparseMatrix<value_type> &A,
+    const MKLSparseMatrix<value_type> &B) {
+  sparse_matrix_t C;
+  MKL_SAFE_CALL(mkl_sparse_spmm(operation, A, B, &C));
+  return MKLSparseMatrix<value_type>(C);
 }
 
 template <typename KernelHandle, typename a_rowmap_view_type,
@@ -309,7 +239,7 @@ class MKLApply {
     }
 
     Kokkos::Timer timer1;
-    Matrix C = Matrix::spmm(operation, A, B);
+    Matrix C = mkl_spmm(operation, A, B);
     if (verbose) {
       std::cout << "\tMKL spmm (";
       if (std::is_same<float, value_type>::value)

From 3339c8deae2f350c4a71ef831508d93e72cbf23c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Thu, 17 Feb 2022 14:56:49 +0100
Subject: [PATCH 13/19] Rename "apply" into "spmm"

---
 .../impl/KokkosSparse_spgemm_mkl_impl.hpp     | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
index 3044b2c576..43b2b5081b 100644
--- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
+++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
@@ -72,7 +72,7 @@ template <typename KernelHandle, typename a_rowmap_view_type,
           typename b_rowmap_view_type, typename b_index_view_type,
           typename b_values_view_type, typename c_rowmap_view_type,
           typename c_index_view_type, typename c_values_view_type>
-class MKLApply {
+class MKL_SPMM {
  public:
   typedef typename KernelHandle::nnz_lno_t nnz_lno_t;
   typedef typename KernelHandle::size_type size_type;
@@ -120,8 +120,8 @@ class MKLApply {
         Kokkos::ViewAllocateWithoutInitializing("tmp_valuesB"),
         entriesB.extent(0));
 
-    apply(handle, m, n, k, row_mapA, entriesA, tmp_valsA, transposeA, row_mapB,
-          entriesB, tmp_valsB, transposeB, verbose, export_rowmap);
+    spmm(handle, m, n, k, row_mapA, entriesA, tmp_valsA, transposeA, row_mapB,
+         entriesB, tmp_valsB, transposeB, verbose, export_rowmap);
 
     if (verbose)
       std::cout << "MKL symbolic time:" << timer.seconds() << std::endl;
@@ -150,8 +150,8 @@ class MKLApply {
           }
         };
 
-    apply(handle, m, n, k, row_mapA, entriesA, valuesA, transposeA, row_mapB,
-          entriesB, valuesB, transposeB, verbose, export_values);
+    spmm(handle, m, n, k, row_mapA, entriesA, valuesA, transposeA, row_mapB,
+         entriesB, valuesB, transposeB, verbose, export_values);
 
     if (verbose)
       std::cout << "MKL numeric time:" << timer.seconds() << std::endl;
@@ -162,13 +162,13 @@ class MKLApply {
 
  private:
   template <typename CB>
-  static void apply(KernelHandle * /* handle */, nnz_lno_t m, nnz_lno_t n,
-                    nnz_lno_t k, a_rowmap_view_type row_mapA,
-                    a_index_view_type entriesA, a_values_view_type valuesA,
+  static void spmm(KernelHandle * /* handle */, nnz_lno_t m, nnz_lno_t n,
+                   nnz_lno_t k, a_rowmap_view_type row_mapA,
+                   a_index_view_type entriesA, a_values_view_type valuesA,
 
-                    bool transposeA, b_rowmap_view_type row_mapB,
-                    b_index_view_type entriesB, b_values_view_type valuesB,
-                    bool transposeB, bool verbose, const CB &callback) {
+                   bool transposeA, b_rowmap_view_type row_mapB,
+                   b_index_view_type entriesB, b_values_view_type valuesB,
+                   bool transposeB, bool verbose, const CB &callback) {
     if (!std::is_same<nnz_lno_t, int>::value) {
       throw std::runtime_error("MKL requires local ordinals to be integer.\n");
     }
@@ -303,7 +303,7 @@ void mkl_symbolic(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k,
 #else
   using values_type  = typename KernelHandle::scalar_temp_work_view_t;
   using c_index_type = b_index_type;
-  using mkl = MKLApply<KernelHandle, a_rowmap_type, a_index_type, values_type,
+  using mkl = MKL_SPMM<KernelHandle, a_rowmap_type, a_index_type, values_type,
                        b_rowmap_type, b_index_type, values_type, c_rowmap_type,
                        c_index_type, values_type>;
   mkl::mkl_symbolic(handle, m, n, k, row_mapA, entriesA, transposeA, row_mapB,
@@ -341,7 +341,7 @@ void mkl_apply(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k,
   (void)valuesC;
   (void)verbose;
 #else
-  using mkl = MKLApply<KernelHandle, a_rowmap_type, a_index_type, a_values_type,
+  using mkl = MKL_SPMM<KernelHandle, a_rowmap_type, a_index_type, a_values_type,
                        b_rowmap_type, b_index_type, b_values_type,
                        c_rowmap_type, c_index_type, c_values_type>;
   mkl::mkl_numeric(handle, m, n, k, row_mapA, entriesA, valuesA, transposeA,

From 8c8cbdf8b7cf6e508b7cd5f3587ff61f01e847de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Thu, 17 Feb 2022 14:59:14 +0100
Subject: [PATCH 14/19] Guard whole file with ENABLE_TPL_MKL

---
 .../impl/KokkosSparse_spgemm_mkl_impl.hpp     | 43 +------------------
 .../impl/KokkosSparse_spgemm_numeric_spec.hpp |  4 ++
 .../KokkosSparse_spgemm_symbolic_spec.hpp     |  4 ++
 3 files changed, 10 insertions(+), 41 deletions(-)

diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
index 43b2b5081b..6c95e648e9 100644
--- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
+++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
@@ -50,13 +50,10 @@
 
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
 #include "mkl_spblas.h"
-#endif
 
 namespace KokkosSparse {
 namespace Impl {
 
-#ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
-
 // multiplies two sparse MKL matrices and returns sparse MKL matrix
 template <typename value_type>
 inline static MKLSparseMatrix<value_type> mkl_spmm(
@@ -276,7 +273,6 @@ class MKL_SPMM {
     return view_type(data, num_elems);
   }
 };
-#endif  // KOKKOSKERNELS_ENABLE_TPL_MKL
 
 template <typename KernelHandle, typename a_rowmap_type, typename a_index_type,
           typename b_rowmap_type, typename b_index_type, typename c_rowmap_type,
@@ -286,21 +282,6 @@ void mkl_symbolic(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k,
                   bool transposeA, b_rowmap_type row_mapB,
                   b_index_type entriesB, bool transposeB,
                   c_rowmap_type row_mapC, bool verbose = false) {
-#ifndef KOKKOSKERNELS_ENABLE_TPL_MKL
-  throw std::runtime_error("MKL was not enabled in this build!");
-  (void)handle;
-  (void)m;
-  (void)n;
-  (void)k;
-  (void)row_mapA;
-  (void)entriesA;
-  (void)transposeA;
-  (void)row_mapB;
-  (void)entriesB;
-  (void)transposeB;
-  (void)row_mapC;
-  (void)verbose;
-#else
   using values_type  = typename KernelHandle::scalar_temp_work_view_t;
   using c_index_type = b_index_type;
   using mkl = MKL_SPMM<KernelHandle, a_rowmap_type, a_index_type, values_type,
@@ -308,7 +289,6 @@ void mkl_symbolic(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k,
                        c_index_type, values_type>;
   mkl::mkl_symbolic(handle, m, n, k, row_mapA, entriesA, transposeA, row_mapB,
                     entriesB, transposeB, row_mapC, verbose);
-#endif
 }
 
 template <typename KernelHandle, typename a_rowmap_type, typename a_index_type,
@@ -322,35 +302,16 @@ void mkl_apply(KernelHandle *handle, nnz_lno_t m, nnz_lno_t n, nnz_lno_t k,
                b_index_type entriesB, b_values_type valuesB, bool transposeB,
                c_rowmap_type row_mapC, c_index_type entriesC,
                c_values_type valuesC, bool verbose = false) {
-#ifndef KOKKOSKERNELS_ENABLE_TPL_MKL
-  throw std::runtime_error("MKL was not enabled in this build!");
-  (void)handle;
-  (void)m;
-  (void)n;
-  (void)k;
-  (void)row_mapA;
-  (void)entriesA;
-  (void)valuesA;
-  (void)transposeA;
-  (void)row_mapB;
-  (void)entriesB;
-  (void)valuesB;
-  (void)transposeB;
-  (void)row_mapC;
-  (void)entriesC;
-  (void)valuesC;
-  (void)verbose;
-#else
   using mkl = MKL_SPMM<KernelHandle, a_rowmap_type, a_index_type, a_values_type,
                        b_rowmap_type, b_index_type, b_values_type,
                        c_rowmap_type, c_index_type, c_values_type>;
   mkl::mkl_numeric(handle, m, n, k, row_mapA, entriesA, valuesA, transposeA,
                    row_mapB, entriesB, valuesB, transposeB, row_mapC, entriesC,
                    valuesC, verbose);
-#endif
 }
 
 }  // namespace Impl
 }  // namespace KokkosSparse
 
-#endif
+#endif  // KOKKOSKERNELS_ENABLE_TPL_MKL
+#endif  // _KOKKOSSPGEMMMKL_HPP
diff --git a/src/sparse/impl/KokkosSparse_spgemm_numeric_spec.hpp b/src/sparse/impl/KokkosSparse_spgemm_numeric_spec.hpp
index beb969fc77..68e5e82bdb 100644
--- a/src/sparse/impl/KokkosSparse_spgemm_numeric_spec.hpp
+++ b/src/sparse/impl/KokkosSparse_spgemm_numeric_spec.hpp
@@ -245,9 +245,13 @@ struct SPGEMM_NUMERIC<
                                     transposeB, row_mapC, entriesC, valuesC);
         break;
       case SPGEMM_MKL:
+#ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
         mkl_apply(sh, m, n, k, row_mapA, entriesA, valuesA, transposeA,
                   row_mapB, entriesB, valuesB, transposeB, row_mapC, entriesC,
                   valuesC, handle->get_verbose());
+#else
+        throw std::runtime_error("MKL was not enabled in this build!");
+#endif
         break;
       case SPGEMM_MKL2PHASE:
         mkl2phase_apply(sh, m, n, k, row_mapA, entriesA, valuesA, transposeA,
diff --git a/src/sparse/impl/KokkosSparse_spgemm_symbolic_spec.hpp b/src/sparse/impl/KokkosSparse_spgemm_symbolic_spec.hpp
index 181984ebe9..d83ae6767c 100644
--- a/src/sparse/impl/KokkosSparse_spgemm_symbolic_spec.hpp
+++ b/src/sparse/impl/KokkosSparse_spgemm_symbolic_spec.hpp
@@ -179,9 +179,13 @@ struct SPGEMM_SYMBOLIC<KernelHandle, a_size_view_t_, a_lno_view_t,
                               row_mapC);
         break;
       case SPGEMM_MKL:
+#ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
         mkl_symbolic(sh, m, n, k, row_mapA, entriesA, transposeA, row_mapB,
                      entriesB, transposeB, row_mapC, handle->get_verbose());
         break;
+#else
+        throw std::runtime_error("MKL was not enabled in this build!");
+#endif
     }
     sh->set_call_symbolic();
   }

From 70bb051a5a42e3bf5395c60363bfba2cddc2f64f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Fri, 18 Feb 2022 13:39:17 +0100
Subject: [PATCH 15/19] Add explicit compilation error about scalar types not
 supported by MKL

---
 src/common/KokkosKernels_SparseUtils_mkl.hpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/common/KokkosKernels_SparseUtils_mkl.hpp b/src/common/KokkosKernels_SparseUtils_mkl.hpp
index a2ab16fba9..780c75ea51 100644
--- a/src/common/KokkosKernels_SparseUtils_mkl.hpp
+++ b/src/common/KokkosKernels_SparseUtils_mkl.hpp
@@ -79,6 +79,14 @@ inline sparse_operation_t mode_kk_to_mkl(char mode_kk) {
       "Invalid mode for MKL (should be one of N, T, H)");
 }
 
+template <typename value_type>
+struct mkl_is_supported_value_type : std::false_type {};
+
+template <>
+struct mkl_is_supported_value_type<float> : std::true_type {};
+template <>
+struct mkl_is_supported_value_type<double> : std::true_type {};
+
 // MKLSparseMatrix provides thin wrapper around MKL matrix handle
 // (sparse_matrix_t) and encapsulates MKL call dispatches related to details
 // like value_type, allowing simple client code in kernels.
@@ -86,6 +94,10 @@ template <typename value_type>
 class MKLSparseMatrix {
   sparse_matrix_t mtx;
 
+  static_assert(mkl_is_supported_value_type<value_type>::value,
+                "Scalar type used in MKLSparseMatrix<value_type> is NOT "
+                "supported by MKL");
+
  public:
   inline MKLSparseMatrix(sparse_matrix_t mtx_) : mtx(mtx_) {}
 

From 650cd176926ab306b586d5169114a398be65e1d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Fri, 18 Feb 2022 13:53:50 +0100
Subject: [PATCH 16/19] Add Kokkos::complex<float|double> support to MKL sparse
 matrix

---
 src/common/KokkosKernels_SparseUtils_mkl.hpp | 54 ++++++++++++++++++++
 unit_test/sparse/Test_Sparse_spgemm.hpp      |  9 ++--
 2 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/src/common/KokkosKernels_SparseUtils_mkl.hpp b/src/common/KokkosKernels_SparseUtils_mkl.hpp
index 780c75ea51..3bd1deb96a 100644
--- a/src/common/KokkosKernels_SparseUtils_mkl.hpp
+++ b/src/common/KokkosKernels_SparseUtils_mkl.hpp
@@ -86,6 +86,10 @@ template <>
 struct mkl_is_supported_value_type<float> : std::true_type {};
 template <>
 struct mkl_is_supported_value_type<double> : std::true_type {};
+template <>
+struct mkl_is_supported_value_type<Kokkos::complex<float>> : std::true_type {};
+template <>
+struct mkl_is_supported_value_type<Kokkos::complex<double>> : std::true_type {};
 
 // MKLSparseMatrix provides thin wrapper around MKL matrix handle
 // (sparse_matrix_t) and encapsulates MKL call dispatches related to details
@@ -134,6 +138,24 @@ inline MKLSparseMatrix<double>::MKLSparseMatrix(const MKL_INT rows,
                                         cols, xadj, xadj + 1, adj, values));
 }
 
+template <>
+inline MKLSparseMatrix<Kokkos::complex<float>>::MKLSparseMatrix(
+    const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj,
+    Kokkos::complex<float> *values) {
+  MKL_SAFE_CALL(mkl_sparse_c_create_csr(
+      &mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, xadj, xadj + 1, adj,
+      reinterpret_cast<MKL_Complex8 *>(values)));
+}
+
+template <>
+inline MKLSparseMatrix<Kokkos::complex<double>>::MKLSparseMatrix(
+    const MKL_INT rows, const MKL_INT cols, MKL_INT *xadj, MKL_INT *adj,
+    Kokkos::complex<double> *values) {
+  MKL_SAFE_CALL(mkl_sparse_z_create_csr(
+      &mtx, SPARSE_INDEX_BASE_ZERO, rows, cols, xadj, xadj + 1, adj,
+      reinterpret_cast<MKL_Complex16 *>(values)));
+}
+
 template <>
 inline void MKLSparseMatrix<float>::export_data(MKL_INT &num_rows,
                                                 MKL_INT &num_cols,
@@ -170,6 +192,38 @@ inline void MKLSparseMatrix<double>::export_data(MKL_INT &num_rows,
   }
 }
 
+template <>
+inline void MKLSparseMatrix<Kokkos::complex<float>>::export_data(
+    MKL_INT &num_rows, MKL_INT &num_cols, MKL_INT *&rows_start,
+    MKL_INT *&columns, Kokkos::complex<float> *&values) {
+  sparse_index_base_t indexing;
+  MKL_INT *rows_end;
+  MKL_SAFE_CALL(mkl_sparse_c_export_csr(
+      mtx, &indexing, &num_rows, &num_cols, &rows_start, &rows_end, &columns,
+      reinterpret_cast<MKL_Complex8 **>(&values)));
+  if (SPARSE_INDEX_BASE_ZERO != indexing) {
+    throw std::runtime_error(
+        "Expected zero based indexing in exported MKL sparse matrix\n");
+    return;
+  }
+}
+
+template <>
+inline void MKLSparseMatrix<Kokkos::complex<double>>::export_data(
+    MKL_INT &num_rows, MKL_INT &num_cols, MKL_INT *&rows_start,
+    MKL_INT *&columns, Kokkos::complex<double> *&values) {
+  sparse_index_base_t indexing;
+  MKL_INT *rows_end;
+  MKL_SAFE_CALL(mkl_sparse_z_export_csr(
+      mtx, &indexing, &num_rows, &num_cols, &rows_start, &rows_end, &columns,
+      reinterpret_cast<MKL_Complex16 **>(&values)));
+  if (SPARSE_INDEX_BASE_ZERO != indexing) {
+    throw std::runtime_error(
+        "Expected zero based indexing in exported MKL sparse matrix\n");
+    return;
+  }
+}
+
 }  // namespace Impl
 }  // namespace KokkosSparse
 
diff --git a/unit_test/sparse/Test_Sparse_spgemm.hpp b/unit_test/sparse/Test_Sparse_spgemm.hpp
index cb3d04b019..53158f85ed 100644
--- a/unit_test/sparse/Test_Sparse_spgemm.hpp
+++ b/unit_test/sparse/Test_Sparse_spgemm.hpp
@@ -299,13 +299,12 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth,
 #endif
         break;
 
-      case SPGEMM_MKL:
-        algo = "SPGEMM_MKL";
-        // MKL requires scalar to be either float or double
-        if (!(std::is_same<float, scalar_t>::value ||
-              std::is_same<double, scalar_t>::value)) {
+      case SPGEMM_MKL: algo = "SPGEMM_MKL";
+#ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
+        if (!KokkosSparse::Impl::mkl_is_supported_value_type<scalar_t>::value) {
           is_expected_to_fail = true;
         }
+#endif
         // mkl requires local ordinals to be int.
         if (!(std::is_same<int, lno_t>::value)) {
           is_expected_to_fail = true;

From 35a4621faf80cf5534cd66a96ed505860fa44d5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Fri, 18 Feb 2022 16:04:07 +0100
Subject: [PATCH 17/19] Adjust unit test tolerance for MKL float

---
 unit_test/sparse/Test_Sparse_spgemm.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unit_test/sparse/Test_Sparse_spgemm.hpp b/unit_test/sparse/Test_Sparse_spgemm.hpp
index 53158f85ed..ab84b7b0a5 100644
--- a/unit_test/sparse/Test_Sparse_spgemm.hpp
+++ b/unit_test/sparse/Test_Sparse_spgemm.hpp
@@ -229,7 +229,7 @@ bool is_same_matrix(crsMat_t output_mat_actual, crsMat_t output_mat_reference) {
 
   typedef typename Kokkos::Details::ArithTraits<
       typename scalar_view_t::non_const_value_type>::mag_type eps_type;
-  eps_type eps = std::is_same<eps_type, float>::value ? 2 * 1e-3 : 1e-7;
+  eps_type eps = std::is_same<eps_type, float>::value ? 3.7e-3 : 1e-7;
 
   is_identical = KokkosKernels::Impl::kk_is_relatively_identical_view<
       scalar_view_t, scalar_view_t, eps_type, typename device::execution_space>(

From a972c7523998cf1d59d204361a8ea1bbfd7713d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Fri, 18 Feb 2022 16:06:09 +0100
Subject: [PATCH 18/19] Fix conversion compiler errors

---
 src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
index 6c95e648e9..36784731d0 100644
--- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
+++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp
@@ -214,8 +214,8 @@ class MKL_SPMM {
     auto h_valsB           = create_mirror(valuesB);
     auto h_entriesA        = create_mirror(entriesA);
     auto h_entriesB        = create_mirror(entriesB);
-    const int *a_adj       = h_entriesA.data();
-    const int *b_adj       = h_entriesB.data();
+    const int *a_adj       = reinterpret_cast<const int *>(h_entriesA.data());
+    const int *b_adj       = reinterpret_cast<const int *>(h_entriesB.data());
     const value_type *a_ew = h_valsA.data();
     const value_type *b_ew = h_valsB.data();
 

From 9d4de666b81b6721142397f7b27ca9aead795dd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= <mikolaj.zuzek@ng-analytics.com>
Date: Fri, 18 Feb 2022 17:51:05 +0100
Subject: [PATCH 19/19] Fix expected crashes for ordinal_type!=int in unit test

---
 src/sparse/KokkosSparse_spgemm_numeric.hpp | 4 +++-
 unit_test/sparse/Test_Sparse_spgemm.hpp    | 9 ++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/sparse/KokkosSparse_spgemm_numeric.hpp b/src/sparse/KokkosSparse_spgemm_numeric.hpp
index 60a54f5b8b..5bc791397c 100644
--- a/src/sparse/KokkosSparse_spgemm_numeric.hpp
+++ b/src/sparse/KokkosSparse_spgemm_numeric.hpp
@@ -139,7 +139,9 @@ void spgemm_numeric(KernelHandle *handle,
         "If you need this case please let kokkos-kernels developers know.\n");
   }
 
-  if (m < 1 || n < 1 || k < 1) return;
+  if (m < 1 || n < 1 || k < 1 || entriesA.extent(0) < 1 ||
+      entriesB.extent(0) < 1)
+    return;
 
   typedef typename KernelHandle::const_size_type c_size_t;
   typedef typename KernelHandle::const_nnz_lno_t c_lno_t;
diff --git a/unit_test/sparse/Test_Sparse_spgemm.hpp b/unit_test/sparse/Test_Sparse_spgemm.hpp
index ab84b7b0a5..47b06b716a 100644
--- a/unit_test/sparse/Test_Sparse_spgemm.hpp
+++ b/unit_test/sparse/Test_Sparse_spgemm.hpp
@@ -269,6 +269,8 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth,
   crsMat_t B = KokkosKernels::Impl::kk_generate_sparse_matrix<crsMat_t>(
       k, n, nnz, row_size_variance, bandwidth);
 
+  const bool is_empy_case = m < 1 || n < 1 || k < 1 || nnz < 1;
+
   crsMat_t output_mat2;
   if (oldInterface)
     run_spgemm_old_interface<crsMat_t, device>(A, B, SPGEMM_DEBUG, output_mat2);
@@ -305,8 +307,9 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth,
           is_expected_to_fail = true;
         }
 #endif
-        // mkl requires local ordinals to be int.
-        if (!(std::is_same<int, lno_t>::value)) {
+        // MKL requires local ordinals to be int.
+        // Note: empty-array special case will NOT fail on this.
+        if (!std::is_same<int, lno_t>::value && !is_empy_case) {
           is_expected_to_fail = true;
         }
         // if size_type is larger than int, mkl casts it to int.
@@ -345,7 +348,7 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth,
       EXPECT_TRUE(is_expected_to_fail) << algo << ": " << e.what();
       failed = true;
     }
-    EXPECT_TRUE((failed == is_expected_to_fail));
+    EXPECT_EQ(is_expected_to_fail, failed);
 
     // double spgemm_time = timer1.seconds();