diff --git a/blas/impl/KokkosBlas1_scal_impl.hpp b/blas/impl/KokkosBlas1_scal_impl.hpp index de216a156e..72606b6bb2 100644 --- a/blas/impl/KokkosBlas1_scal_impl.hpp +++ b/blas/impl/KokkosBlas1_scal_impl.hpp @@ -57,7 +57,7 @@ struct V_Scal_Functor { XV m_x; AV m_a; - V_Scal_Functor(const RV& r, const XV& x, const AV& a, const SizeType startingColumn) + V_Scal_Functor(const RV& r, const XV& x, const AV& a) : m_r(r), m_x(x), m_a(a) { static_assert(Kokkos::is_view::value, "V_Scal_Functor: RV is not a Kokkos::View."); @@ -68,15 +68,6 @@ struct V_Scal_Functor { "V_Scal_Functor: XV is not a Kokkos::View."); static_assert(RV::rank == 1, "V_Scal_Functor: RV is not rank 1."); static_assert(XV::rank == 1, "V_Scal_Functor: XV is not rank 1."); - - - if constexpr (Kokkos::is_view_v) { - if (startingColumn != 0) { - m_a = Kokkos::subview( - a, - std::make_pair(startingColumn, static_cast(a.extent(0)))); - } - } } KOKKOS_INLINE_FUNCTION @@ -105,54 +96,31 @@ struct V_Scal_Functor { } }; -#if 0 -// Partial specialization of V_Scal_Functor that lets a be a scalar -// (rather than a 1-D View, as in the most general version above). -// This functor computes any of the following: -// -// 1. Y(i) = alpha*X(i) for alpha in -1,0,1 -// 2. Y(i) = a*X(i) -template -struct V_Scal_Functor { - typedef SizeType size_type; - typedef Kokkos::ArithTraits ATS; +/*! \brief - RV m_r; - XV m_x; - const typename XV::non_const_value_type m_a; + r(i) = av * x(i) + r(i) = av() * x(i) - V_Scal_Functor(const RV& r, const XV& x, - const typename XV::non_const_value_type& a, - const SizeType /* startingColumn */) - : m_r(r), m_x(x), m_a(a) {} + \param space + \param r + \param av + \param x + \param alphaHint A KokkosKernels::Impl::ScalarHint corresponding to the value of av. If not KokkosKernels::Impl:ß:ScalarHint::none, may be used to optimize the implementation + + \tparam SizeType + \tparam ExecutionSpace + \tparam RV + \tparam AV + \tparam XV + +*/ +template +void V_Scal_Generic(const ExecutionSpace& space, const RV& r, const AV& av, + const XV& x, + const KokkosKernels::Impl::ScalarHint &alphaHint = KokkosKernels::Impl::ScalarHint::none) { + + // TODO: assert some things about AV - KOKKOS_INLINE_FUNCTION - void operator()(const size_type& i) const { - if (scalar_x == 0) { - m_r(i) = ATS::zero(); - } - if (scalar_x == -1) { - m_r(i) = -m_x(i); - } - if (scalar_x == 1) { - m_r(i) = m_x(i); - } - if (scalar_x == 2) { - m_r(i) = m_a * m_x(i); - } - } -}; -#endif - -// Variant of MV_Scal_Generic for single vectors (1-D Views) r and x. -// As above, av is either a 1-D View (and only its first entry will be -// read), or a scalar. -template -void V_Scal_Generic(const execution_space& space, const RV& r, const AV& av, - const XV& x, - const SizeType startingColumn, - const KokkosKernels::Impl::ScalarHint &alphaHint) { static_assert(Kokkos::is_view::value, "V_Scal_Generic: RV is not a Kokkos::View."); static_assert(Kokkos::is_view::value, @@ -161,26 +129,26 @@ void V_Scal_Generic(const execution_space& space, const RV& r, const AV& av, static_assert(XV::rank == 1, "V_Scal_Generic: XV is not rank 1."); const SizeType numRows = x.extent(0); - Kokkos::RangePolicy policy(space, 0, numRows); + Kokkos::RangePolicy policy(space, 0, numRows); if (alphaHint == KokkosKernels::Impl::ScalarHint::zero) { - V_Scal_Functor op(r, x, av, startingColumn); - Kokkos::parallel_for("KokkosBlas::Scal::S0", policy, op); + V_Scal_Functor op(r, x, av); + Kokkos::parallel_for("KokkosBlas::Scal::0", policy, op); return; } else if (alphaHint == KokkosKernels::Impl::ScalarHint::neg_one) { - V_Scal_Functor op(r, x, av, startingColumn); - Kokkos::parallel_for("KokkosBlas::Scal::S1", policy, op); + V_Scal_Functor op(r, x, av); + Kokkos::parallel_for("KokkosBlas::Scal::-1", policy, op); return; } else if (alphaHint == KokkosKernels::Impl::ScalarHint::pos_one) { - V_Scal_Functor op(r, x, av, startingColumn); - Kokkos::parallel_for("KokkosBlas::Scal::S2", policy, op); + V_Scal_Functor op(r, x, av); + Kokkos::parallel_for("KokkosBlas::Scal::1", policy, op); return; } - V_Scal_Functor op(r, x, av, startingColumn); - Kokkos::parallel_for("KokkosBlas::Scal::S3", policy, op); + V_Scal_Functor op(r, x, av); + Kokkos::parallel_for("KokkosBlas::Scal::none", policy, op); } } // namespace Impl diff --git a/blas/impl/KokkosBlas1_scal_mv_impl.hpp b/blas/impl/KokkosBlas1_scal_mv_impl.hpp index 045f3e194b..242c077cc9 100644 --- a/blas/impl/KokkosBlas1_scal_mv_impl.hpp +++ b/blas/impl/KokkosBlas1_scal_mv_impl.hpp @@ -422,7 +422,7 @@ void MV_Scal_Generic(const execution_space& space, const RVector& r, template void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r, const AV& av, const XMV& x, - const KokkosKernels::Impl::ScalarHint &a = KokkosKernels::Impl::ScalarHint::none) { + const KokkosKernels::Impl::ScalarHint &aHint = KokkosKernels::Impl::ScalarHint::none) { const SizeType numCols = x.extent(1); #if KOKKOSBLAS_OPTIMIZATION_LEVEL_SCAL <= 2 @@ -440,7 +440,7 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r, typedef decltype(R_cur) RMV2D; MV_Scal_Unrolled( - space, R_cur, av, X_cur, j, a); + space, R_cur, av, X_cur, j, aHint); } for (; j + 4 <= numCols; j += 4) { const std::pair rng(j, j + 4); @@ -450,7 +450,7 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r, typedef decltype(R_cur) RMV2D; MV_Scal_Unrolled( - space, R_cur, av, X_cur, j, a); + space, R_cur, av, X_cur, j, aHint); } for (; j < numCols; ++j) { // RMV and XMV need to turn 1-D. @@ -459,8 +459,21 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r, typedef decltype(r_cur) RV; typedef decltype(x_cur) XV; - V_Scal_Generic(space, r_cur, av, - x_cur, j, a); + // If AV is a rank-one vector, get a rank-0 subview + // Otherwise, just pass along AV as-is + // can't short-circuit if constexpr :( + if constexpr (Kokkos::is_view_v) { + if constexpr (AV::rank == 1) { + auto a_cur = Kokkos::subview(av, j); + V_Scal_Generic(space, r_cur, a_cur, x_cur, aHint); + } else { + V_Scal_Generic(space, r_cur, av, x_cur, aHint); + } + } else { + V_Scal_Generic(space, r_cur, av, x_cur, aHint); + } + + } #else // KOKKOSBLAS_OPTIMIZATION_LEVEL_SCAL > 2 @@ -472,7 +485,7 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r, typedef decltype(r_0) RV; typedef decltype(x_0) XV; - V_Scal_Generic(space, r_0, av, x_0, + V_Scal_Generic(space, r_0, av, x_0, 0, a); break; } @@ -537,7 +550,7 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r, space, r, av, x, 0, a); break; default: - MV_Scal_Generic(space, r, av, x, + MV_Scal_Generic(space, r, av, x, 0, a); } @@ -574,11 +587,9 @@ void MV_Scal_Invoke_Right(const execution_space& space, const RMV& r, RV r_0 = Kokkos::subview(r, Kokkos::ALL(), 0); XV x_0 = Kokkos::subview(x, Kokkos::ALL(), 0); - V_Scal_Generic(space, r_0, - av, x_0, a); + V_Scal_Generic(space, r_0, av, x_0, a); } else { - MV_Scal_Generic(space, r, av, - x, a); + MV_Scal_Generic(space, r, av, x, a); } } diff --git a/blas/impl/KokkosBlas1_scal_spec.hpp b/blas/impl/KokkosBlas1_scal_spec.hpp index c117a8f8c4..57f120941c 100644 --- a/blas/impl/KokkosBlas1_scal_spec.hpp +++ b/blas/impl/KokkosBlas1_scal_spec.hpp @@ -39,11 +39,12 @@ struct scal_eti_spec_avail { // // Macro for declaration of full specialization availability -// KokkosBlas::Impl::Scal for rank == 1. This is NOT for users!!! All +// KokkosBlas::Impl::Scal for rank == 1 R and X. This is NOT for users!!! All // the declarations of full specializations go in this header file. // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // +// Alpha can either be scalar or rank 0 #define KOKKOSBLAS1_SCAL_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ template <> \ struct scal_eti_spec_avail< \ @@ -56,15 +57,28 @@ struct scal_eti_spec_avail { Kokkos::MemoryTraits >, \ 1> { \ enum : bool { value = true }; \ + }; \ + template <> \ + struct scal_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1> { \ + enum : bool { value = true }; \ }; - // // Macro for declaration of full specialization availability -// KokkosBlas::Impl::Scal for rank == 2. This is NOT for users!!! All +// KokkosBlas::Impl::Scal for rank == 2 R and X. This is NOT for users!!! All // the declarations of full specializations go in this header file. // We may spread out definitions (see _DEF macro below) across one or // more .cpp files. // +// Alpha can either be rank 1, rank 0, or scalar #define KOKKOSBLAS1_SCAL_MV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, \ MEM_SPACE) \ template <> \ @@ -82,6 +96,20 @@ struct scal_eti_spec_avail { enum : bool { value = true }; \ }; \ template <> \ + struct scal_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 2> { \ + enum : bool { value = true }; \ + }; \ + template <> \ struct scal_eti_spec_avail< \ EXEC_SPACE, \ Kokkos::View, \ @@ -151,7 +179,7 @@ struct Scal(INT_MAX)) { - typedef int index_type; - V_Scal_Generic(space, R, alpha, - X, 0, alphaHint); + V_Scal_Generic(space, R, alpha, X, alphaHint); } else { - typedef typename XV::size_type index_type; - V_Scal_Generic(space, R, alpha, - X, 0, alphaHint); + V_Scal_Generic(space, R, alpha, X, alphaHint); } Kokkos::Profiling::popRegion(); } }; -/// \brief Partial specialization of Scal for 2-D Views and 1-D View AV. +/// \brief Partial specialization of Scal for 2-D Views and 1-D, 0-D, or scalar AV. /// /// Compute any of the following: -/// -/// 1. R(i,j) = a*X(i,j) for a in -1,0,1 -/// 2. R(i,j) = alpha(j)*X(i,j) +/// 1. R(i,j) = av * X(i,j) +/// 2. R(i,j) = av() * X(i,j) +/// 3. R(i,j) = av(j) * X(i,j) template struct Scal { @@ -189,6 +214,9 @@ struct Scal::value, "KokkosBlas::Impl::" "Scal<2-D>: RMV is not a Kokkos::View."); @@ -201,9 +229,6 @@ struct Scal: " "RMV is not rank 2."); - static_assert(AV::rank == 1, - "KokkosBlas::Impl::Scal<2-D>: " - "AV is not rank 1."); static_assert(XMV::rank == 2, "KokkosBlas::Impl::Scal<2-D>: " "XMV is not rank 2."); @@ -312,17 +337,29 @@ struct Scal, \ Kokkos::MemoryTraits >, \ SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, false, true>; \ + extern template struct Scal< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -334,6 +371,17 @@ struct Scal, \ Kokkos::MemoryTraits >, \ SCALAR, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, false, true>; \ + template struct Scal< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ diff --git a/blas/impl/KokkosBlas1_scal_unified_scalar_view_impl.hpp b/blas/impl/KokkosBlas1_scal_unified_scalar_view_impl.hpp index 0e7959408c..24faba32d9 100644 --- a/blas/impl/KokkosBlas1_scal_unified_scalar_view_impl.hpp +++ b/blas/impl/KokkosBlas1_scal_unified_scalar_view_impl.hpp @@ -19,31 +19,38 @@ #include #include +#include + /*! \brief -Canonicalizes a variety of different "scalar" values of AVa -into one of four alpha_types depending on whether AV comes -from host-accesible memory or not - -Implements the following table: - -"host" meaning Kokkos::Impl::MemorySpaceAccess::accessible from AV - -Row | RMV | AV | XMV | alpha_type - 1 | Rank-1 | S | Rank-1 | const S - 2 | Rank-2 | S | Rank-2 | const S - 3 | Rank-1 | View | Rank-1 | const S - 4 | Rank-2 | View | Rank-2 | const S - 5 | Rank-1 | View | Rank-1 | View - 6 | Rank-2 | View | Rank-2 | View - 7 | Rank-1 | View | Rank-1 | const S - 8 | Rank-2 | View | Rank-2 | const S - 9 | Rank-1 | View | Rank-1 | const S -10 | Rank-2 | View | Rank-2 | View -11 | Rank-1 | View | Rank-1 | View -12 | Rank-1 | View | Rank-1 | View -13 | Rank-2 | View | Rank-2 | View -14 | Rank-2 | View | Rank-2 | View +Canonicalizes a variety of different "scalar" values of AV to the most +restrictive version still consistent with the interface. + +This may reduce the number of instantiations. + +Transformations made: +* rank-1 AV, RMV, and XMV means AV is actually a scalar. On the host, we can go +further and convert to a true scalar since we can access the value. On device, +take a subview to convert to rank-0 scalar. +* S[1] -> S +* apply const to view data types +* TODO: simplify this by just applying these in turn when possible + +Row | RMV / XMV | AV | alpha_type + 1 | Rank-1 | S | S + 2 | Rank-2 | S | S + 3 | Rank-1 | View | S + 4 | Rank-2 | View | S + 5 | Rank-1 | View | View + 6 | Rank-2 | View | View + 7 | Rank-1 | View | S + 8 | Rank-2 | View | S + 9 | Rank-1 | View | S +10 | Rank-2 | View | View +11 | Rank-1 | View | View +12 | Rank-1 | View | View +13 | Rank-2 | View | View +14 | Rank-2 | View | View See comments on the implementation below for each rows @@ -57,33 +64,29 @@ namespace KokkosBlas::Impl { template struct is_host : std::false_type {}; template -struct is_host< - T, - std::enable_if_t && - !Kokkos::Impl::MemorySpaceAccess< - Kokkos::HostSpace, typename T::memory_space>::accessible>> +struct is_host && + Kokkos::Impl::MemorySpaceAccess< + Kokkos::HostSpace, + typename T::memory_space>::accessible>> : std::true_type {}; template constexpr inline bool is_host_v = is_host::value; -template -constexpr inline bool is_dev_v = !is_host_v; - template -struct is_rank_0 : std::false_type {}; +struct is_dev : std::false_type {}; template -struct is_rank_0 && T::rank == 0>> +struct is_dev && + !Kokkos::Impl::MemorySpaceAccess< + Kokkos::HostSpace, + typename T::memory_space>::accessible>> : std::true_type {}; template -constexpr inline bool is_rank_0_v = is_rank_0::value; - +constexpr inline bool is_dev_v = is_dev::value; template struct is_rank_0_host : std::false_type {}; template -struct is_rank_0_host && T::rank == 0>> +struct is_rank_0_host && T::rank == 0>> : std::true_type {}; template constexpr inline bool is_rank_0_host_v = is_rank_0_host::value; @@ -91,8 +94,7 @@ constexpr inline bool is_rank_0_host_v = is_rank_0_host::value; template struct is_rank_1_host : std::false_type {}; template -struct is_rank_1_host && T::rank == 1>> +struct is_rank_1_host && T::rank == 1>> : std::true_type {}; template constexpr inline bool is_rank_1_host_v = is_rank_1_host::value; @@ -100,19 +102,25 @@ constexpr inline bool is_rank_1_host_v = is_rank_1_host::value; template struct is_rank_1_host_static : std::false_type {}; template -struct is_rank_1_host_static && - T::static_extent(0) == 1>> +struct is_rank_1_host_static< + T, std::enable_if_t && T::static_extent(0) == 1>> + : std::true_type {}; +template +constexpr inline bool is_rank_1_host_static_v = is_rank_1_host_static::value; + +template +struct is_rank_1_host_dynamic : std::false_type {}; +template +struct is_rank_1_host_dynamic< + T, std::enable_if_t && T::rank_dynamic == 1>> : std::true_type {}; template -constexpr inline bool is_rank_1_host_static_v = - is_rank_1_host_static::value; +constexpr inline bool is_rank_1_host_dynamic_v = is_rank_1_host_dynamic::value; template struct is_rank_0_dev : std::false_type {}; template -struct is_rank_0_dev && T::rank == 0>> +struct is_rank_0_dev && T::rank == 0>> : std::true_type {}; template constexpr inline bool is_rank_0_dev_v = is_rank_0_dev::value; @@ -120,8 +128,7 @@ constexpr inline bool is_rank_0_dev_v = is_rank_0_dev::value; template struct is_rank_1_dev : std::false_type {}; template -struct is_rank_1_dev && T::rank == 1>> +struct is_rank_1_dev && T::rank == 1>> : std::true_type {}; template constexpr inline bool is_rank_1_dev_v = is_rank_1_dev::value; @@ -130,54 +137,57 @@ template struct is_rank_1_dev_static : std::false_type {}; template struct is_rank_1_dev_static< - T, - std::enable_if_t && T::static_extent(0) == 1>> + T, std::enable_if_t && T::static_extent(0) == 1>> + : std::true_type {}; +template +constexpr inline bool is_rank_1_dev_static_v = is_rank_1_dev_static::value; + +template +struct is_rank_1_dev_dynamic : std::false_type {}; +template +struct is_rank_1_dev_dynamic< + T, std::enable_if_t && T::rank_dynamic == 1>> : std::true_type {}; template -constexpr inline bool is_rank_1_dev_static_v = - is_rank_1_dev_static::value; +constexpr inline bool is_rank_1_dev_dynamic_v = is_rank_1_dev_dynamic::value; -template +template struct scal_unified_scalar_view; -// Rows 1,2: scalar -> const S +// Rows 1,2: S -> S template struct scal_unified_scalar_view>> { - using alpha_type = const AV; + using alpha_type = AV; - static alpha_type from(const AV &av) { - return av; } + static alpha_type from(const AV &av) { return av; } }; // Rows 3,4: AV is a rank 0 host view template -struct scal_unified_scalar_view< - RMV, AV, XMV, - std::enable_if_t>> { - using alpha_type = const typename AV::data_type; +struct scal_unified_scalar_view>> { + using alpha_type = typename AV::non_const_data_type; static alpha_type from(const AV &av) { return av(); } }; // Rows 5,6: AV is a rank 0 device view template -struct scal_unified_scalar_view< - RMV, AV, XMV, std::enable_if_t>> { - using alpha_type = Kokkos::View; +struct scal_unified_scalar_view>> { + using alpha_type = + Kokkos::View; static alpha_type from(const AV &av) { return alpha_type(av); } }; // Rows 7,8: AV is a rank 1 host view with known extent template -struct scal_unified_scalar_view< - RMV, AV, XMV, - std::enable_if_t>> { - - using alpha_type = const typename AV::value_type; +struct scal_unified_scalar_view>> { + using alpha_type = typename AV::non_const_value_type; static alpha_type from(const AV &av) { return av(0); } }; @@ -187,14 +197,13 @@ struct scal_unified_scalar_view< template struct scal_unified_scalar_view< RMV, AV, XMV, - std::enable_if_t && XMV::rank == 1 && - RMV::rank == 1>> { - - using alpha_type = const typename AV::value_type; + std::enable_if_t && RMV::rank == 1 && XMV::rank == 1>> { + using alpha_type = typename AV::non_const_value_type; static alpha_type from(const AV &av) { return av(0); } }; + // Row 10: AV is a rank 1 host view of unknown size, and we assume // each element is to scale a vector in RMV and XMV template @@ -202,9 +211,9 @@ struct scal_unified_scalar_view< RMV, AV, XMV, std::enable_if_t && XMV::rank == 2 && RMV::rank == 2>> { - - using alpha_type = Kokkos::View; + using alpha_type = + Kokkos::View; static alpha_type from(const AV &av) { return av; } }; @@ -212,11 +221,8 @@ struct scal_unified_scalar_view< // Row 11, 12: AV is a rank 1 dev view, but we assume its // a single scalar since XMV and YMV are rank 1 template -struct scal_unified_scalar_view< - RMV, AV, XMV, - std::enable_if_t && XMV::rank == 1 && - RMV::rank == 1>> { - +struct scal_unified_scalar_view>> { using alpha_type = Kokkos::View; @@ -227,10 +233,8 @@ struct scal_unified_scalar_view< // Row 13: AV is a rank 1 dev view of static size, // so its a single scalar template -struct scal_unified_scalar_view< - RMV, AV, XMV, - std::enable_if_t>> { - +struct scal_unified_scalar_view>> { using alpha_type = Kokkos::View; @@ -244,37 +248,57 @@ struct scal_unified_scalar_view< template struct scal_unified_scalar_view< RMV, AV, XMV, - std::enable_if_t && XMV::rank == 2 && - RMV::rank == 2>> { - - using alpha_type = Kokkos::View; + std::enable_if_t && XMV::rank == 2 && RMV::rank == 2>> { + using alpha_type = + Kokkos::View; static alpha_type from(const AV &av) { return av; } }; -/*! +/*! \brief return av + Get a POD, Kokkos::complex, or 0D view as a scalar */ -template , bool> = true> -KOKKOS_INLINE_FUNCTION -auto as_scalar(const AV &av) { - return av; +template , bool> = true> +KOKKOS_INLINE_FUNCTION auto as_scalar(const AV &av) { + return av; } -/*! +/*! \brief return av() + Get a POD, Kokkos::complex, or 0D view as a scalar */ template , bool> = true> -KOKKOS_INLINE_FUNCTION -auto as_scalar(const AV &av) { - return av(); + std::enable_if_t, bool> = true> +KOKKOS_INLINE_FUNCTION auto as_scalar(const AV &av) { + return av(); } +/*! \brief return av + */ +template , bool> = true> +KOKKOS_INLINE_FUNCTION auto as_scalar(const AV &av, const IndexType & /*i*/) { + return av; +} +/*! \brief return av() + */ +template , bool> = true> +KOKKOS_INLINE_FUNCTION auto as_scalar(const AV &av, const IndexType &i) { + return av(); +} + +/*! \brief return av(i) + */ +template , bool> = true> +KOKKOS_INLINE_FUNCTION auto as_scalar(const AV &av, const IndexType &i) { + return av(i); +} } // namespace KokkosBlas::Impl diff --git a/blas/unit_test/Test_Blas1_scal.hpp b/blas/unit_test/Test_Blas1_scal.hpp index 6c4f7b7f2a..1eb4df004e 100644 --- a/blas/unit_test/Test_Blas1_scal.hpp +++ b/blas/unit_test/Test_Blas1_scal.hpp @@ -21,6 +21,84 @@ #include namespace Test { + +/*! \brief Test scal will AV being a view or a scalar + +*/ +template +void impl_test_scal_a_is_3(int N, const AV &a) { + + typedef typename XView::value_type ScalarX; + typedef typename YView::value_type ScalarY; + typedef Kokkos::ArithTraits AT; + + typename AT::mag_type eps = AT::epsilon() * 1000; + + view_stride_adapter x("X", N); + view_stride_adapter y("Y", N); + + Kokkos::Random_XorShift64_Pool rand_pool( + 13718); + + { + ScalarX randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(x.d_view, rand_pool, randStart, randEnd); + } + + Kokkos::deep_copy(x.h_base, x.d_base); + + KokkosBlas::scal(y.d_view, a, x.d_view); + Kokkos::deep_copy(y.h_base, y.d_base); + for (int i = 0; i < N; i++) { + // scaling factor is 3 + EXPECT_NEAR_KK(static_cast(3 * x.h_view(i)), y.h_view(i), eps); + } + + // Zero out y again and run with const input + Kokkos::deep_copy(y.d_view, Kokkos::ArithTraits::zero()); + KokkosBlas::scal(y.d_view, a, x.d_view_const); + Kokkos::deep_copy(y.h_base, y.d_base); + for (int i = 0; i < N; i++) { + // scaling factor is three + EXPECT_NEAR_KK(static_cast(3 * x.h_view(i)), y.h_view(i), eps); + } + +} + +/*! \brief test scal with alpha is a rank-1 scalar + + \tparam VIEW is alpha a view? + \tparam RANK if VIEW what rank is alpha? + \tparam STATIC if VIEW and RANK=1, is the extent static? +*/ +template +void impl_test_scal_alphas(int N) { + + using XView = Kokkos::View; + using YView = Kokkos::View; + + if constexpr(VIEW) { + if constexpr (1 == RANK && STATIC) { + Kokkos::View a("View"); + Kokkos::deep_copy(a, 3); + impl_test_scal_a_is_3(N, a); + } else if constexpr(1 == RANK) { + Kokkos::View a("View", 1); + Kokkos::deep_copy(a, 3); + impl_test_scal_a_is_3(N, a); + } else if constexpr(0 == RANK) { + Kokkos::View a("View"); + Kokkos::deep_copy(a, 3); + impl_test_scal_a_is_3(N, a); + } + } else { + ScalarA a{3}; + impl_test_scal_a_is_3(N, a); + } + +} + template void impl_test_scal(int N) { typedef typename ViewTypeA::value_type ScalarA; @@ -145,10 +223,26 @@ int test_scal() { !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_ll; typedef Kokkos::View view_type_b_ll; +#if 0 Test::impl_test_scal(0); Test::impl_test_scal(13); Test::impl_test_scal(1024); - // Test::impl_test_scal(132231); +#else + Test::impl_test_scal_alphas(0); + Test::impl_test_scal_alphas(0); + Test::impl_test_scal_alphas(0); + Test::impl_test_scal_alphas(0); + + Test::impl_test_scal_alphas(13); + Test::impl_test_scal_alphas(13); + Test::impl_test_scal_alphas(13); + Test::impl_test_scal_alphas(13); + + Test::impl_test_scal_alphas(1024); + Test::impl_test_scal_alphas(1024); + Test::impl_test_scal_alphas(1024); + Test::impl_test_scal_alphas(1024); +#endif #endif #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ @@ -159,7 +253,6 @@ int test_scal() { Test::impl_test_scal(0); Test::impl_test_scal(13); Test::impl_test_scal(1024); - // Test::impl_test_scal(132231); #endif #if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ @@ -169,7 +262,6 @@ int test_scal() { Test::impl_test_scal(0); Test::impl_test_scal(13); Test::impl_test_scal(1024); - // Test::impl_test_scal(132231); #endif #if !defined(KOKKOSKERNELS_ETI_ONLY) && \ diff --git a/common/src/KokkosKernels_UnifiedScalarView.hpp b/common/src/KokkosKernels_UnifiedScalarView.hpp index 9e50bd0851..bb27a83abd 100644 --- a/common/src/KokkosKernels_UnifiedScalarView.hpp +++ b/common/src/KokkosKernels_UnifiedScalarView.hpp @@ -17,6 +17,8 @@ #ifndef KOKKOSKERNELS_UNIFIEDSCALARVIEW_HPP #define KOKKOSKERNELS_UNIFIEDSCALARVIEW_HPP +#if 0 + #include #include @@ -102,4 +104,6 @@ constexpr unified_scalar_t get_scalar(const Value &v) { } // namespace Impl } // namespace KokkosKernels +#endif + #endif // KOKKOSKERNELS_UNIFIEDSCALARVIEW_HPP \ No newline at end of file diff --git a/common/src/KokkosKernels_ViewUtils.hpp b/common/src/KokkosKernels_ViewUtils.hpp new file mode 100644 index 0000000000..bae4b6fc1d --- /dev/null +++ b/common/src/KokkosKernels_ViewUtils.hpp @@ -0,0 +1,47 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#include + +#include "Kokkos_Core.hpp" + +#ifndef KOKKOSKERNELS_VIEWUTILS_HPP +#define KOKKOSKERNELS_VIEWUTILS_HPP + +namespace KokkosKernels::Impl { + +template +struct is_rank_0 : std::false_type {}; +template +struct is_rank_0 && T::rank == 0>> + : std::true_type {}; +template +constexpr inline bool is_rank_0_v = is_rank_0::value; + +template +struct is_rank_1 : std::false_type {}; +template +struct is_rank_1 && T::rank == 1>> + : std::true_type {}; +template +constexpr inline bool is_rank_1_v = is_rank_1::value; + + +} // namespace KokkosKernels::Impl + +#endif // KOKKOSKERNELS_VIEWUTILS_HPP + diff --git a/common/src/KokkosKernels_helpers.hpp b/common/src/KokkosKernels_helpers.hpp index b36360b991..2e8c29ce20 100644 --- a/common/src/KokkosKernels_helpers.hpp +++ b/common/src/KokkosKernels_helpers.hpp @@ -43,15 +43,19 @@ struct GetUnifiedLayout { default_layout>::array_layout; }; +/* If T is not a view, type is TX::non_const_value_type +*/ template ::value> struct GetUnifiedScalarViewType { typedef typename TX::non_const_value_type type; }; +/* If T is a view, type is T with unified layout & unmanaged +*/ template struct GetUnifiedScalarViewType { - typedef Kokkos::View::array_layout, typename T::device_type, @@ -61,7 +65,7 @@ struct GetUnifiedScalarViewType { template struct GetUnifiedScalarViewType { - typedef Kokkos::View::array_layout, typename T::device_type, @@ -69,6 +73,7 @@ struct GetUnifiedScalarViewType { type; }; + template struct are_integral : std::bool_constant<((std::is_integral_v || std::is_enum_v)&&...)> {}; diff --git a/common/unit_test/Test_Common.hpp b/common/unit_test/Test_Common.hpp index b734522c47..38d4d9c2c8 100644 --- a/common/unit_test/Test_Common.hpp +++ b/common/unit_test/Test_Common.hpp @@ -26,7 +26,7 @@ #include #include #include -#include +// #include #include #endif // TEST_COMMON_HPP