diff --git a/src/Kokkos_InnerProductSpaceTraits.hpp b/src/Kokkos_InnerProductSpaceTraits.hpp index 2174b4975d..b43d34c5f3 100644 --- a/src/Kokkos_InnerProductSpaceTraits.hpp +++ b/src/Kokkos_InnerProductSpaceTraits.hpp @@ -171,7 +171,6 @@ class InnerProductSpaceTraits { /// \brief Partial specialization for long double. /// /// \warning CUDA does not support long double in device functions. -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST template <> struct InnerProductSpaceTraits { typedef long double val_type; @@ -183,7 +182,6 @@ struct InnerProductSpaceTraits { } static dot_type dot(const val_type& x, const val_type& y) { return x * y; } }; -#endif //! Partial specialization for Kokkos::complex. template diff --git a/src/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Internal.hpp b/src/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Internal.hpp index f759a1e521..41dbe5d9fd 100644 --- a/src/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Internal.hpp +++ b/src/batched/dense/impl/KokkosBatched_Eigendecomposition_Serial_Internal.hpp @@ -82,6 +82,22 @@ struct SerialEigendecompositionInternal { // /// step 1: Hessenberg reduction A = Q H Q^H // /// Q is stored in QZ + // + //////////////////////////////////////////////////////////////////////////// + // DO NOT USE + // + // #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + // + // #else + // + // #endif + // + // DO THIS INSTEAD + // + // KOKKOS_IF_HOST(()) + // KOKKOS_IF_DEVICE(()) + // + //////////////////////////////////////////////////////////////////////////// // #if (defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && (__INTEL_MKL__ >= 2018)) && // defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) // { @@ -357,7 +373,15 @@ struct SerialEigendecompositionInternal { const int ers, RealType* ei, const int eis, RealType* UL, const int uls0, const int uls1, RealType* UR, const int urs0, const int urs1, RealType* w, const int wlen) { -#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) +#if defined(KOKKOS_IF_HOST) + KOKKOS_IF_HOST((host_invoke(m, A, as0, as1, er, ers, ei, eis, UL, uls0, + uls1, UR, urs0, urs1, w, wlen);)) + KOKKOS_IF_DEVICE((device_invoke(m, A, as0, as1, er, ers, ei, eis, UL, uls0, + uls1, UR, urs0, urs1, w, wlen);)) +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) // FIXME remove when + // requiring minimum + // version of + // Kokkos 3.6 // if (as0 == 1 || as1 == 1) { /// column major or row major and it runs on host /// potentially it can run tpls internally diff --git a/src/batched/dense/impl/KokkosBatched_Eigendecomposition_TeamVector_Internal.hpp b/src/batched/dense/impl/KokkosBatched_Eigendecomposition_TeamVector_Internal.hpp index 88c44dbc04..f3be2ff3d8 100644 --- a/src/batched/dense/impl/KokkosBatched_Eigendecomposition_TeamVector_Internal.hpp +++ b/src/batched/dense/impl/KokkosBatched_Eigendecomposition_TeamVector_Internal.hpp @@ -67,6 +67,19 @@ struct TeamVectorEigendecompositionInternal { static_assert(false, "TeamVector eigendecomposition is not implemented yet."); /* + // DO NOT USE + // + // #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + // + // #else + // + // #endif + // + // DO THIS INSTEAD + // + // KOKKOS_IF_HOST(()) + // KOKKOS_IF_DEVICE(()) + // #if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) if (as0 == 1 || as1 == 1) { /// column major or row major and it runs on host diff --git a/src/sparse/impl/KokkosSparse_spmv_struct_impl.hpp b/src/sparse/impl/KokkosSparse_spmv_struct_impl.hpp index 53c6da72df..fbfd28ef0e 100644 --- a/src/sparse/impl/KokkosSparse_spmv_struct_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spmv_struct_impl.hpp @@ -548,7 +548,30 @@ struct SPMV_Struct_Functor { const size_type rowOffset = m_A.graph.row_map(rowIdx); y_value_type sum(0.0); -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST +#if defined(KOKKOS_IF_HOST) + // clang-format off + KOKKOS_IF_HOST(( + for (ordinal_type idx = 0; idx < 27; ++idx) { + sum += + m_A.values(rowOffset + idx) * m_x(rowIdx + columnOffsets(idx)); + } + )) + + KOKKOS_IF_DEVICE(( + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(dev, 27), + [&](const ordinal_type& idx, y_value_type& lclSum) { + lclSum += (conjugate ? ATV::conj(m_A.values(rowOffset + idx)) + : m_A.values(rowOffset + idx)) * + m_x(rowIdx + columnOffsets(idx)); + }, + sum); + )) + // clang-format on +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) // FIXME remove when + // requiring minimum + // version of + // Kokkos 3.6 for (ordinal_type idx = 0; idx < 27; ++idx) { sum += m_A.values(rowOffset + idx) * m_x(rowIdx + columnOffsets(idx));