Skip to content

Commit

Permalink
progress...
Browse files Browse the repository at this point in the history
  • Loading branch information
cwpearson committed Jul 10, 2023
1 parent 56ae59c commit 3b2dea3
Show file tree
Hide file tree
Showing 9 changed files with 402 additions and 203 deletions.
96 changes: 32 additions & 64 deletions blas/impl/KokkosBlas1_scal_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ struct V_Scal_Functor {
XV m_x;
AV m_a;

V_Scal_Functor(const RV& r, const XV& x, const AV& a, const SizeType startingColumn)
V_Scal_Functor(const RV& r, const XV& x, const AV& a)
: m_r(r), m_x(x), m_a(a) {
static_assert(Kokkos::is_view<RV>::value,
"V_Scal_Functor: RV is not a Kokkos::View.");
Expand All @@ -68,15 +68,6 @@ struct V_Scal_Functor {
"V_Scal_Functor: XV is not a Kokkos::View.");
static_assert(RV::rank == 1, "V_Scal_Functor: RV is not rank 1.");
static_assert(XV::rank == 1, "V_Scal_Functor: XV is not rank 1.");


if constexpr (Kokkos::is_view_v<AV>) {
if (startingColumn != 0) {
m_a = Kokkos::subview(
a,
std::make_pair(startingColumn, static_cast<SizeType>(a.extent(0))));
}
}
}

KOKKOS_INLINE_FUNCTION
Expand Down Expand Up @@ -105,54 +96,31 @@ struct V_Scal_Functor {
}
};

#if 0
// Partial specialization of V_Scal_Functor that lets a be a scalar
// (rather than a 1-D View, as in the most general version above).
// This functor computes any of the following:
//
// 1. Y(i) = alpha*X(i) for alpha in -1,0,1
// 2. Y(i) = a*X(i)
template <class RV, class XV, int scalar_x, class SizeType>
struct V_Scal_Functor<RV, typename XV::non_const_value_type, XV, scalar_x,
SizeType> {
typedef SizeType size_type;
typedef Kokkos::ArithTraits<typename RV::non_const_value_type> ATS;
/*! \brief
RV m_r;
XV m_x;
const typename XV::non_const_value_type m_a;
r(i) = av * x(i)
r(i) = av() * x(i)
V_Scal_Functor(const RV& r, const XV& x,
const typename XV::non_const_value_type& a,
const SizeType /* startingColumn */)
: m_r(r), m_x(x), m_a(a) {}
\param space
\param r
\param av
\param x
\param alphaHint A KokkosKernels::Impl::ScalarHint corresponding to the value of av. If not KokkosKernels::Impl:ß:ScalarHint::none, may be used to optimize the implementation
\tparam SizeType
\tparam ExecutionSpace
\tparam RV
\tparam AV
\tparam XV
*/
template <typename SizeType, typename ExecutionSpace, typename RV, typename AV, typename XV>
void V_Scal_Generic(const ExecutionSpace& space, const RV& r, const AV& av,
const XV& x,
const KokkosKernels::Impl::ScalarHint &alphaHint = KokkosKernels::Impl::ScalarHint::none) {

// TODO: assert some things about AV

KOKKOS_INLINE_FUNCTION
void operator()(const size_type& i) const {
if (scalar_x == 0) {
m_r(i) = ATS::zero();
}
if (scalar_x == -1) {
m_r(i) = -m_x(i);
}
if (scalar_x == 1) {
m_r(i) = m_x(i);
}
if (scalar_x == 2) {
m_r(i) = m_a * m_x(i);
}
}
};
#endif

// Variant of MV_Scal_Generic for single vectors (1-D Views) r and x.
// As above, av is either a 1-D View (and only its first entry will be
// read), or a scalar.
template <class execution_space, class RV, class AV, class XV, class SizeType>
void V_Scal_Generic(const execution_space& space, const RV& r, const AV& av,
const XV& x,
const SizeType startingColumn,
const KokkosKernels::Impl::ScalarHint &alphaHint) {
static_assert(Kokkos::is_view<RV>::value,
"V_Scal_Generic: RV is not a Kokkos::View.");
static_assert(Kokkos::is_view<XV>::value,
Expand All @@ -161,26 +129,26 @@ void V_Scal_Generic(const execution_space& space, const RV& r, const AV& av,
static_assert(XV::rank == 1, "V_Scal_Generic: XV is not rank 1.");

const SizeType numRows = x.extent(0);
Kokkos::RangePolicy<execution_space, SizeType> policy(space, 0, numRows);
Kokkos::RangePolicy<ExecutionSpace, SizeType> policy(space, 0, numRows);

if (alphaHint == KokkosKernels::Impl::ScalarHint::zero) {
V_Scal_Functor<RV, AV, XV, KokkosKernels::Impl::ScalarHint::zero, SizeType> op(r, x, av, startingColumn);
Kokkos::parallel_for("KokkosBlas::Scal::S0", policy, op);
V_Scal_Functor<RV, AV, XV, KokkosKernels::Impl::ScalarHint::zero, SizeType> op(r, x, av);
Kokkos::parallel_for("KokkosBlas::Scal::0", policy, op);
return;
}
else if (alphaHint == KokkosKernels::Impl::ScalarHint::neg_one) {
V_Scal_Functor<RV, AV, XV, KokkosKernels::Impl::ScalarHint::neg_one, SizeType> op(r, x, av, startingColumn);
Kokkos::parallel_for("KokkosBlas::Scal::S1", policy, op);
V_Scal_Functor<RV, AV, XV, KokkosKernels::Impl::ScalarHint::neg_one, SizeType> op(r, x, av);
Kokkos::parallel_for("KokkosBlas::Scal::-1", policy, op);
return;
}
else if (alphaHint == KokkosKernels::Impl::ScalarHint::pos_one) {
V_Scal_Functor<RV, AV, XV, KokkosKernels::Impl::ScalarHint::pos_one, SizeType> op(r, x, av, startingColumn);
Kokkos::parallel_for("KokkosBlas::Scal::S2", policy, op);
V_Scal_Functor<RV, AV, XV, KokkosKernels::Impl::ScalarHint::pos_one, SizeType> op(r, x, av);
Kokkos::parallel_for("KokkosBlas::Scal::1", policy, op);
return;
}

V_Scal_Functor<RV, AV, XV, KokkosKernels::Impl::ScalarHint::none, SizeType> op(r, x, av, startingColumn);
Kokkos::parallel_for("KokkosBlas::Scal::S3", policy, op);
V_Scal_Functor<RV, AV, XV, KokkosKernels::Impl::ScalarHint::none, SizeType> op(r, x, av);
Kokkos::parallel_for("KokkosBlas::Scal::none", policy, op);
}

} // namespace Impl
Expand Down
33 changes: 22 additions & 11 deletions blas/impl/KokkosBlas1_scal_mv_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ void MV_Scal_Generic(const execution_space& space, const RVector& r,
template <class execution_space, class RMV, class AV, class XMV, class SizeType>
void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r,
const AV& av, const XMV& x,
const KokkosKernels::Impl::ScalarHint &a = KokkosKernels::Impl::ScalarHint::none) {
const KokkosKernels::Impl::ScalarHint &aHint = KokkosKernels::Impl::ScalarHint::none) {
const SizeType numCols = x.extent(1);

#if KOKKOSBLAS_OPTIMIZATION_LEVEL_SCAL <= 2
Expand All @@ -440,7 +440,7 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r,
typedef decltype(R_cur) RMV2D;

MV_Scal_Unrolled<execution_space, RMV2D, AV, XMV2D, 8, SizeType>(
space, R_cur, av, X_cur, j, a);
space, R_cur, av, X_cur, j, aHint);
}
for (; j + 4 <= numCols; j += 4) {
const std::pair<SizeType, SizeType> rng(j, j + 4);
Expand All @@ -450,7 +450,7 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r,
typedef decltype(R_cur) RMV2D;

MV_Scal_Unrolled<execution_space, RMV2D, AV, XMV2D, 4, SizeType>(
space, R_cur, av, X_cur, j, a);
space, R_cur, av, X_cur, j, aHint);
}
for (; j < numCols; ++j) {
// RMV and XMV need to turn 1-D.
Expand All @@ -459,8 +459,21 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r,
typedef decltype(r_cur) RV;
typedef decltype(x_cur) XV;

V_Scal_Generic<execution_space, RV, AV, XV, SizeType>(space, r_cur, av,
x_cur, j, a);
// If AV is a rank-one vector, get a rank-0 subview
// Otherwise, just pass along AV as-is
// can't short-circuit if constexpr :(
if constexpr (Kokkos::is_view_v<AV>) {
if constexpr (AV::rank == 1) {
auto a_cur = Kokkos::subview(av, j);
V_Scal_Generic<SizeType>(space, r_cur, a_cur, x_cur, aHint);
} else {
V_Scal_Generic<SizeType>(space, r_cur, av, x_cur, aHint);
}
} else {
V_Scal_Generic<SizeType>(space, r_cur, av, x_cur, aHint);
}


}

#else // KOKKOSBLAS_OPTIMIZATION_LEVEL_SCAL > 2
Expand All @@ -472,7 +485,7 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r,
typedef decltype(r_0) RV;
typedef decltype(x_0) XV;

V_Scal_Generic<execution_space, RV, AV, XV, SizeType>(space, r_0, av, x_0,
V_Scal_Generic<SizeType>(space, r_0, av, x_0,
0, a);
break;
}
Expand Down Expand Up @@ -537,7 +550,7 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r,
space, r, av, x, 0, a);
break;
default:
MV_Scal_Generic<execution_space, RMV, AV, XMV, SizeType>(space, r, av, x,
MV_Scal_Generic<SizeType>(space, r, av, x,
0, a);
}

Expand Down Expand Up @@ -574,11 +587,9 @@ void MV_Scal_Invoke_Right(const execution_space& space, const RMV& r,

RV r_0 = Kokkos::subview(r, Kokkos::ALL(), 0);
XV x_0 = Kokkos::subview(x, Kokkos::ALL(), 0);
V_Scal_Generic<execution_space, RMV, aVector, XMV, 1, SizeType>(space, r_0,
av, x_0, a);
V_Scal_Generic<SizeType>(space, r_0, av, x_0, a);
} else {
MV_Scal_Generic<execution_space, RMV, aVector, XMV, SizeType>(space, r, av,
x, a);
MV_Scal_Generic<SizeType>(space, r, av, x, a);
}
}

Expand Down
84 changes: 66 additions & 18 deletions blas/impl/KokkosBlas1_scal_spec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,12 @@ struct scal_eti_spec_avail {

//
// Macro for declaration of full specialization availability
// KokkosBlas::Impl::Scal for rank == 1. This is NOT for users!!! All
// KokkosBlas::Impl::Scal for rank == 1 R and X. This is NOT for users!!! All
// the declarations of full specializations go in this header file.
// We may spread out definitions (see _INST macro below) across one or
// more .cpp files.
//
// Alpha can either be scalar or rank 0
#define KOKKOSBLAS1_SCAL_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \
template <> \
struct scal_eti_spec_avail< \
Expand All @@ -56,15 +57,28 @@ struct scal_eti_spec_avail {
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1> { \
enum : bool { value = true }; \
}; \
template <> \
struct scal_eti_spec_avail< \
EXEC_SPACE, \
Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXEC_SPACE, MEM_SPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<SCALAR, LAYOUT, Kokkos::Device<EXEC_SPACE, MEM_SPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const SCALAR*, LAYOUT, \
Kokkos::Device<EXEC_SPACE, MEM_SPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1> { \
enum : bool { value = true }; \
};

//
// Macro for declaration of full specialization availability
// KokkosBlas::Impl::Scal for rank == 2. This is NOT for users!!! All
// KokkosBlas::Impl::Scal for rank == 2 R and X. This is NOT for users!!! All
// the declarations of full specializations go in this header file.
// We may spread out definitions (see _DEF macro below) across one or
// more .cpp files.
//
// Alpha can either be rank 1, rank 0, or scalar
#define KOKKOSBLAS1_SCAL_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \
MEM_SPACE) \
template <> \
Expand All @@ -82,6 +96,20 @@ struct scal_eti_spec_avail {
enum : bool { value = true }; \
}; \
template <> \
struct scal_eti_spec_avail< \
EXEC_SPACE, \
Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXEC_SPACE, MEM_SPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const SCALAR, LAYOUT, \
Kokkos::Device<EXEC_SPACE, MEM_SPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const SCALAR**, LAYOUT, \
Kokkos::Device<EXEC_SPACE, MEM_SPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
2> { \
enum : bool { value = true }; \
}; \
template <> \
struct scal_eti_spec_avail< \
EXEC_SPACE, \
Kokkos::View<SCALAR**, LAYOUT, Kokkos::Device<EXEC_SPACE, MEM_SPACE>, \
Expand Down Expand Up @@ -151,7 +179,7 @@ struct Scal<execution_space, RV, typename XV::non_const_value_type, XV, 1,
typeid(RV).name(), typeid(AV).name(), typeid(XV).name());
#endif

const size_type numRows = X.extent(0);

KokkosKernels::Impl::ScalarHint alphaHint = KokkosKernels::Impl::ScalarHint::none;
if (alpha == ATA::zero()) {
alphaHint = KokkosKernels::Impl::ScalarHint::zero;
Expand All @@ -161,25 +189,22 @@ struct Scal<execution_space, RV, typename XV::non_const_value_type, XV, 1,
alphaHint = KokkosKernels::Impl::ScalarHint::pos_one;
}

const size_type numRows = X.extent(0);
if (numRows < static_cast<size_type>(INT_MAX)) {
typedef int index_type;
V_Scal_Generic<execution_space, RV, AV, XV, index_type>(space, R, alpha,
X, 0, alphaHint);
V_Scal_Generic<int>(space, R, alpha, X, alphaHint);
} else {
typedef typename XV::size_type index_type;
V_Scal_Generic<execution_space, RV, AV, XV, index_type>(space, R, alpha,
X, 0, alphaHint);
V_Scal_Generic<typename XV::size_type>(space, R, alpha, X, alphaHint);
}
Kokkos::Profiling::popRegion();
}
};

/// \brief Partial specialization of Scal for 2-D Views and 1-D View AV.
/// \brief Partial specialization of Scal for 2-D Views and 1-D, 0-D, or scalar AV.
///
/// Compute any of the following:
///
/// 1. R(i,j) = a*X(i,j) for a in -1,0,1
/// 2. R(i,j) = alpha(j)*X(i,j)
/// 1. R(i,j) = av * X(i,j)
/// 2. R(i,j) = av() * X(i,j)
/// 3. R(i,j) = av(j) * X(i,j)
template <class execution_space, class RMV, class AV, class XMV>
struct Scal<execution_space, RMV, AV, XMV, 2, false,
KOKKOSKERNELS_IMPL_COMPILE_LIBRARY> {
Expand All @@ -189,6 +214,9 @@ struct Scal<execution_space, RMV, AV, XMV, 2, false,

static void scal(const execution_space& space, const RMV& R, const AV& av,
const XMV& X) {

// TODO: assert some things about AV

static_assert(Kokkos::is_view<RMV>::value,
"KokkosBlas::Impl::"
"Scal<2-D>: RMV is not a Kokkos::View.");
Expand All @@ -201,9 +229,6 @@ struct Scal<execution_space, RMV, AV, XMV, 2, false,
static_assert(RMV::rank == 2,
"KokkosBlas::Impl::Scal<2-D>: "
"RMV is not rank 2.");
static_assert(AV::rank == 1,
"KokkosBlas::Impl::Scal<2-D>: "
"AV is not rank 1.");
static_assert(XMV::rank == 2,
"KokkosBlas::Impl::Scal<2-D>: "
"XMV is not rank 2.");
Expand Down Expand Up @@ -312,17 +337,29 @@ struct Scal<execution_space, RMV, typename XMV::non_const_value_type, XMV, 2,

//
// Macro for declaration of full specialization of
// KokkosBlas::Impl::Scal for rank == 2. This is NOT for users!!! All
// KokkosBlas::Impl::Scal for rank == 1. This is NOT for users!!! All
// the declarations of full specializations go in this header file.
// We may spread out definitions (see _DEF macro below) across one or
// more .cpp files.
//
// alpha can be either scalar or rank 0
#define KOKKOSBLAS1_SCAL_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \
extern template struct Scal< \
EXEC_SPACE, \
Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXEC_SPACE, MEM_SPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
SCALAR, \
Kokkos::View<const SCALAR*, LAYOUT, \
Kokkos::Device<EXEC_SPACE, MEM_SPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1, false, true>; \
extern template struct Scal< \
EXEC_SPACE, \
Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXEC_SPACE, MEM_SPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const SCALAR, LAYOUT, \
Kokkos::Device<EXEC_SPACE, MEM_SPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const SCALAR*, LAYOUT, \
Kokkos::Device<EXEC_SPACE, MEM_SPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Expand All @@ -334,6 +371,17 @@ struct Scal<execution_space, RMV, typename XMV::non_const_value_type, XMV, 2,
Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXEC_SPACE, MEM_SPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
SCALAR, \
Kokkos::View<const SCALAR*, LAYOUT, \
Kokkos::Device<EXEC_SPACE, MEM_SPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1, false, true>; \
template struct Scal< \
EXEC_SPACE, \
Kokkos::View<SCALAR*, LAYOUT, Kokkos::Device<EXEC_SPACE, MEM_SPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const SCALAR, LAYOUT, \
Kokkos::Device<EXEC_SPACE, MEM_SPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const SCALAR*, LAYOUT, \
Kokkos::Device<EXEC_SPACE, MEM_SPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Expand Down
Loading

0 comments on commit 3b2dea3

Please sign in to comment.