Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid using #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_* macro guards #1266

Merged
merged 3 commits into from
Jan 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/Kokkos_InnerProductSpaceTraits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,6 @@ class InnerProductSpaceTraits {
/// \brief Partial specialization for long double.
///
/// \warning CUDA does not support long double in device functions.
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
template <>
struct InnerProductSpaceTraits<long double> {
typedef long double val_type;
Expand All @@ -183,7 +182,6 @@ struct InnerProductSpaceTraits<long double> {
}
static dot_type dot(const val_type& x, const val_type& y) { return x * y; }
};
#endif

//! Partial specialization for Kokkos::complex<T>.
template <class T>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,22 @@ struct SerialEigendecompositionInternal {

// /// step 1: Hessenberg reduction A = Q H Q^H
// /// Q is stored in QZ
//
////////////////////////////////////////////////////////////////////////////
// DO NOT USE
//
// #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
// <host code>
// #else
// <device code>
// #endif
//
// DO THIS INSTEAD
//
// KOKKOS_IF_HOST((<host code>))
// KOKKOS_IF_DEVICE((<device code>))
//
////////////////////////////////////////////////////////////////////////////
// #if (defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && (__INTEL_MKL__ >= 2018)) &&
// defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
// {
Expand Down Expand Up @@ -357,7 +373,15 @@ struct SerialEigendecompositionInternal {
const int ers, RealType* ei, const int eis, RealType* UL, const int uls0,
const int uls1, RealType* UR, const int urs0, const int urs1, RealType* w,
const int wlen) {
#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
#if defined(KOKKOS_IF_HOST)
KOKKOS_IF_HOST((host_invoke(m, A, as0, as1, er, ers, ei, eis, UL, uls0,
uls1, UR, urs0, urs1, w, wlen);))
KOKKOS_IF_DEVICE((device_invoke(m, A, as0, as1, er, ers, ei, eis, UL, uls0,
uls1, UR, urs0, urs1, w, wlen);))
#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) // FIXME remove when
// requiring minimum
// version of
// Kokkos 3.6
// if (as0 == 1 || as1 == 1) {
/// column major or row major and it runs on host
/// potentially it can run tpls internally
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,19 @@ struct TeamVectorEigendecompositionInternal {
static_assert(false,
"TeamVector eigendecomposition is not implemented yet.");
/*
// DO NOT USE
//
// #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
// <host code>
// #else
// <device code>
// #endif
//
// DO THIS INSTEAD
//
// KOKKOS_IF_HOST((<host code>))
// KOKKOS_IF_DEVICE((<device code>))
//
#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
if (as0 == 1 || as1 == 1) {
/// column major or row major and it runs on host
Expand Down
25 changes: 24 additions & 1 deletion src/sparse/impl/KokkosSparse_spmv_struct_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,30 @@ struct SPMV_Struct_Functor {
const size_type rowOffset = m_A.graph.row_map(rowIdx);

y_value_type sum(0.0);
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
#if defined(KOKKOS_IF_HOST)
// clang-format off
KOKKOS_IF_HOST((
for (ordinal_type idx = 0; idx < 27; ++idx) {
sum +=
m_A.values(rowOffset + idx) * m_x(rowIdx + columnOffsets(idx));
}
))

KOKKOS_IF_DEVICE((
Kokkos::parallel_reduce(
Kokkos::ThreadVectorRange(dev, 27),
[&](const ordinal_type& idx, y_value_type& lclSum) {
lclSum += (conjugate ? ATV::conj(m_A.values(rowOffset + idx))
: m_A.values(rowOffset + idx)) *
m_x(rowIdx + columnOffsets(idx));
},
sum);
))
// clang-format on
#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) // FIXME remove when
// requiring minimum
// version of
// Kokkos 3.6
for (ordinal_type idx = 0; idx < 27; ++idx) {
sum +=
m_A.values(rowOffset + idx) * m_x(rowIdx + columnOffsets(idx));
Expand Down