From 39a74af6ced58f5ea3fc41a96f21c861ccb57d94 Mon Sep 17 00:00:00 2001 From: Damien L-G Date: Mon, 17 Jan 2022 11:18:40 -0500 Subject: [PATCH 1/3] Remove unnecessary #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST guard around InnerProductSpaceTraits specialization --- src/Kokkos_InnerProductSpaceTraits.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Kokkos_InnerProductSpaceTraits.hpp b/src/Kokkos_InnerProductSpaceTraits.hpp index 2174b4975d..b43d34c5f3 100644 --- a/src/Kokkos_InnerProductSpaceTraits.hpp +++ b/src/Kokkos_InnerProductSpaceTraits.hpp @@ -171,7 +171,6 @@ class InnerProductSpaceTraits { /// \brief Partial specialization for long double. /// /// \warning CUDA does not support long double in device functions. -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST template <> struct InnerProductSpaceTraits { typedef long double val_type; @@ -183,7 +182,6 @@ struct InnerProductSpaceTraits { } static dot_type dot(const val_type& x, const val_type& y) { return x * y; } }; -#endif //! Partial specialization for Kokkos::complex. template From ba86e5da19cbcc850fa6ff18374ced77993334d9 Mon Sep 17 00:00:00 2001 From: Damien L-G Date: Mon, 17 Jan 2022 11:49:26 -0500 Subject: [PATCH 2/3] Prefer KOKKOS_IF_{HOST,DEVICE} when available --- ...hed_Eigendecomposition_Serial_Internal.hpp | 10 +++++++- .../impl/KokkosSparse_spmv_struct_impl.hpp | 25 ++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Internal.hpp b/src/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Internal.hpp index f759a1e521..8fc937b783 100644 --- a/src/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Internal.hpp +++ b/src/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Internal.hpp @@ -357,7 +357,15 @@ struct SerialEigendecompositionInternal { const int ers, RealType* ei, const int eis, RealType* UL, const int uls0, const int uls1, RealType* UR, const int urs0, const int urs1, RealType* w, const int wlen) { -#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) +#if defined(KOKKOS_IF_HOST) + KOKKOS_IF_HOST((host_invoke(m, A, as0, as1, er, ers, ei, eis, UL, uls0, + uls1, UR, urs0, urs1, w, wlen);)) + KOKKOS_IF_DEVICE((device_invoke(m, A, as0, as1, er, ers, ei, eis, UL, uls0, + uls1, UR, urs0, urs1, w, wlen);)) +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) // FIXME remove when + // requiring minimum + // version of + // Kokkos 3.6 // if (as0 == 1 || as1 == 1) { /// column major or row major and it runs on host /// potentially it can run tpls internally diff --git a/src/sparse/impl/KokkosSparse_spmv_struct_impl.hpp b/src/sparse/impl/KokkosSparse_spmv_struct_impl.hpp index 53c6da72df..fbfd28ef0e 100644 --- a/src/sparse/impl/KokkosSparse_spmv_struct_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spmv_struct_impl.hpp @@ -548,7 +548,30 @@ struct SPMV_Struct_Functor { const size_type rowOffset = m_A.graph.row_map(rowIdx); y_value_type sum(0.0); -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST +#if defined(KOKKOS_IF_HOST) + // clang-format off + KOKKOS_IF_HOST(( + for (ordinal_type idx = 0; idx < 27; ++idx) { + sum += + m_A.values(rowOffset + idx) * m_x(rowIdx + columnOffsets(idx)); + } + )) + + KOKKOS_IF_DEVICE(( + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(dev, 27), + [&](const ordinal_type& idx, y_value_type& lclSum) { + lclSum += (conjugate ? ATV::conj(m_A.values(rowOffset + idx)) + : m_A.values(rowOffset + idx)) * + m_x(rowIdx + columnOffsets(idx)); + }, + sum); + )) + // clang-format on +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) // FIXME remove when + // requiring minimum + // version of + // Kokkos 3.6 for (ordinal_type idx = 0; idx < 27; ++idx) { sum += m_A.values(rowOffset + idx) * m_x(rowIdx + columnOffsets(idx)); From c1c71d1f329f0e0709699a05014ba2a746a8a18b Mon Sep 17 00:00:00 2001 From: Damien L-G Date: Mon, 17 Jan 2022 11:50:09 -0500 Subject: [PATCH 3/3] Add note about not using #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST next to commented code --- ...atched_Eigendecomposition_Serial_Internal.hpp | 16 ++++++++++++++++ ...ed_Eigendecomposition_TeamVector_Internal.hpp | 13 +++++++++++++ 2 files changed, 29 insertions(+) diff --git a/src/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Internal.hpp b/src/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Internal.hpp index 8fc937b783..41dbe5d9fd 100644 --- a/src/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Internal.hpp +++ b/src/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Internal.hpp @@ -82,6 +82,22 @@ struct SerialEigendecompositionInternal { // /// step 1: Hessenberg reduction A = Q H Q^H // /// Q is stored in QZ + // + //////////////////////////////////////////////////////////////////////////// + // DO NOT USE + // + // #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + // + // #else + // + // #endif + // + // DO THIS INSTEAD + // + // KOKKOS_IF_HOST(()) + // KOKKOS_IF_DEVICE(()) + // + //////////////////////////////////////////////////////////////////////////// // #if (defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && (__INTEL_MKL__ >= 2018)) && // defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) // { diff --git a/src/batched/dense/impl/KokkosBatched_Eigendecomposition_TeamVector_Internal.hpp b/src/batched/dense/impl/KokkosBatched_Eigendecomposition_TeamVector_Internal.hpp index 88c44dbc04..f3be2ff3d8 100644 --- a/src/batched/dense/impl/KokkosBatched_Eigendecomposition_TeamVector_Internal.hpp +++ b/src/batched/dense/impl/KokkosBatched_Eigendecomposition_TeamVector_Internal.hpp @@ -67,6 +67,19 @@ struct TeamVectorEigendecompositionInternal { static_assert(false, "TeamVector eigendecomposition is not implemented yet."); /* + // DO NOT USE + // + // #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + // + // #else + // + // #endif + // + // DO THIS INSTEAD + // + // KOKKOS_IF_HOST(()) + // KOKKOS_IF_DEVICE(()) + // #if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) if (as0 == 1 || as1 == 1) { /// column major or row major and it runs on host