diff --git a/src/batched/dense/KokkosBatched_Axpy.hpp b/src/batched/dense/KokkosBatched_Axpy.hpp index 9b2eef755a..d9c33bf889 100644 --- a/src/batched/dense/KokkosBatched_Axpy.hpp +++ b/src/batched/dense/KokkosBatched_Axpy.hpp @@ -57,7 +57,8 @@ namespace KokkosBatched { /// * y_1, ..., y_N are the N output vectors, /// * alpha_1, ..., alpha_N are N scaling factors for x_1, ..., x_N. /// -/// \tparam ViewType: Input type for X and Y, needs to be a 2D view +/// \tparam XViewType: Input type for X, needs to be a 2D view +/// \tparam YViewType: Input type for Y, needs to be a 2D view /// \tparam alphaViewType: Input type for alpha, needs to be a 1D view /// /// \param alpha [in]: input coefficient for X, a rank 1 view @@ -68,10 +69,10 @@ namespace KokkosBatched { /// struct SerialAxpy { - template + template KOKKOS_INLINE_FUNCTION static int invoke(const alphaViewType &alpha, - const ViewType &X, - const ViewType &Y); + const XViewType &X, + const YViewType &Y); }; /// \brief Team Batched AXPY: @@ -82,7 +83,8 @@ struct SerialAxpy { /// * y_1, ..., y_N are the N output vectors, /// * alpha_1, ..., alpha_N are N scaling factors for x_1, ..., x_N. /// -/// \tparam ViewType: Input type for X and Y, needs to be a 2D view +/// \tparam XViewType: Input type for X, needs to be a 2D view +/// \tparam YViewType: Input type for Y, needs to be a 2D view /// \tparam alphaViewType: Input type for alpha, needs to be a 1D view /// /// \param member [in]: TeamPolicy member @@ -95,11 +97,11 @@ struct SerialAxpy { template struct TeamAxpy { - template + template KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const alphaViewType &alpha, - const ViewType &X, - const ViewType &Y); + const XViewType &X, + const YViewType &Y); }; /// \brief TeamVector Batched AXPY: @@ -110,7 +112,8 @@ struct TeamAxpy { /// * y_1, ..., y_N are the N output vectors, /// * alpha_1, ..., alpha_N are N scaling factors for x_1, ..., x_N. /// -/// \tparam ViewType: Input type for X and Y, needs to be a 2D view +/// \tparam XViewType: Input type for X, needs to be a 2D view +/// \tparam YViewType: Input type for Y, needs to be a 2D view /// \tparam alphaViewType: Input type for alpha, needs to be a 1D view /// /// \param member [in]: TeamPolicy member @@ -124,11 +127,11 @@ struct TeamAxpy { template struct TeamVectorAxpy { - template + template KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const alphaViewType &alpha, - const ViewType &X, - const ViewType &Y); + const XViewType &X, + const YViewType &Y); }; } // namespace KokkosBatched diff --git a/src/batched/dense/KokkosBatched_Copy_Decl.hpp b/src/batched/dense/KokkosBatched_Copy_Decl.hpp index c1b289e5e0..c12c8d7209 100644 --- a/src/batched/dense/KokkosBatched_Copy_Decl.hpp +++ b/src/batched/dense/KokkosBatched_Copy_Decl.hpp @@ -11,7 +11,7 @@ namespace KokkosBatched { /// Serial Copy /// -template +template struct SerialCopy { template KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, @@ -22,7 +22,8 @@ struct SerialCopy { /// Team Copy /// -template +template struct TeamCopy { template KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, @@ -34,7 +35,8 @@ struct TeamCopy { /// TeamVector Copy /// -template +template struct TeamVectorCopy { template KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, @@ -45,7 +47,8 @@ struct TeamVectorCopy { /// /// Selective Interface /// -template +template struct Copy { template KOKKOS_FORCEINLINE_FUNCTION static int invoke(const MemberType &member, @@ -53,11 +56,11 @@ struct Copy { const BViewType &B) { int r_val = 0; if (std::is_same::value) { - r_val = SerialCopy::invoke(A, B); + r_val = SerialCopy::invoke(A, B); } else if (std::is_same::value) { - r_val = TeamCopy::invoke(member, A, B); + r_val = TeamCopy::invoke(member, A, B); } else if (std::is_same::value) { - r_val = TeamVectorCopy::invoke(member, A, B); + r_val = TeamVectorCopy::invoke(member, A, B); } return r_val; } diff --git a/src/batched/dense/KokkosBatched_Dot.hpp b/src/batched/dense/KokkosBatched_Dot.hpp new file mode 100644 index 0000000000..43d8c5ee5b --- /dev/null +++ b/src/batched/dense/KokkosBatched_Dot.hpp @@ -0,0 +1,161 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.4 +// Copyright (2021) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +#ifndef __KOKKOSBATCHED_DOT_HPP__ +#define __KOKKOSBATCHED_DOT_HPP__ + +/// \author Kim Liegeois (knliege@sandia.gov) + +#include "KokkosBatched_Util.hpp" +#include "KokkosBatched_Vector.hpp" + +namespace KokkosBatched { + +/// \brief Serial Batched DOT: +/// +/// Depending on the ArgTrans template, the dot product is +/// row-based (ArgTrans == Trans::NoTranspose): +/// +/// dot_l <- (x_l:, y_l:) for all l = 1, ..., N +/// where: +/// * N is the second dimension of X. +/// +/// Or column-based: +/// dot_l <- (x_:l, y_:l) for all l = 1, ..., n +/// where: +/// * n is the second dimension of X. +/// +/// \tparam ArgTrans: type of dot product (Trans::NoTranspose by default) +/// \tparam XViewType: Input type for X, needs to be a 2D view +/// \tparam YViewType: Input type for Y, needs to be a 2D view +/// \tparam alphaViewType: Input type for alpha, needs to be a 1D view +/// +/// \param X [in]: Input vector X, a rank 2 view +/// \param Y [in]: Input vector Y, a rank 2 view +/// \param dot [out]: Computed dot product, a rank 1 view +/// +/// No nested parallel_for is used inside of the function. +/// + +template +struct SerialDot { + template + KOKKOS_INLINE_FUNCTION static int invoke(const XViewType &X, + const YViewType &Y, + const NormViewType &dot); +}; + +/// \brief Team Batched DOT: +/// +/// Depending on the ArgTrans template, the dot product is +/// row-based (ArgTrans == Trans::NoTranspose): +/// +/// dot_l <- (x_l:, y_l:) for all l = 1, ..., N +/// where: +/// * N is the second dimension of X. +/// +/// Or column-based: +/// dot_l <- (x_:l, y_:l) for all l = 1, ..., n +/// where: +/// * n is the second dimension of X. +/// +/// \tparam ArgTrans: type of dot product (Trans::NoTranspose by default) +/// \tparam XViewType: Input type for X, needs to be a 2D view +/// \tparam YViewType: Input type for Y, needs to be a 2D view +/// \tparam alphaViewType: Input type for alpha, needs to be a 1D view +/// +/// \param X [in]: Input vector X, a rank 2 view +/// \param Y [in]: Input vector Y, a rank 2 view +/// \param dot [out]: Computed dot product, a rank 1 view +/// +/// A nested parallel_for with TeamThreadRange is used. +/// + +template +struct TeamDot { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, + const XViewType &X, + const YViewType &Y, + const NormViewType &dot); +}; + +/// \brief TeamVector Batched DOT: +/// +/// Depending on the ArgTrans template, the dot product is +/// row-based (ArgTrans == Trans::NoTranspose): +/// +/// dot_l <- (x_l:, y_l:) for all l = 1, ..., N +/// where: +/// * N is the second dimension of X. +/// +/// Or column-based: +/// dot_l <- (x_:l, y_:l) for all l = 1, ..., n +/// where: +/// * n is the second dimension of X. +/// +/// \tparam ArgTrans: type of dot product (Trans::NoTranspose by default) +/// \tparam XViewType: Input type for X, needs to be a 2D view +/// \tparam YViewType: Input type for Y, needs to be a 2D view +/// \tparam alphaViewType: Input type for alpha, needs to be a 1D view +/// +/// \param X [in]: Input vector X, a rank 2 view +/// \param Y [in]: Input vector Y, a rank 2 view +/// \param dot [out]: Computed dot product, a rank 1 view +/// +/// Two nested parallel_for with both TeamThreadRange and ThreadVectorRange +/// (or one with TeamVectorRange) are used inside. +/// + +template +struct TeamVectorDot { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, + const XViewType &X, + const YViewType &Y, + const NormViewType &dot); +}; + +} // namespace KokkosBatched + +#include "KokkosBatched_Dot_Internal.hpp" + +#endif diff --git a/src/batched/dense/KokkosBatched_Xpay.hpp b/src/batched/dense/KokkosBatched_Xpay.hpp new file mode 100644 index 0000000000..26cb96835e --- /dev/null +++ b/src/batched/dense/KokkosBatched_Xpay.hpp @@ -0,0 +1,141 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.4 +// Copyright (2021) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +#ifndef __KOKKOSBATCHED_XPAY_HPP__ +#define __KOKKOSBATCHED_XPAY_HPP__ + +/// \author Kim Liegeois (knliege@sandia.gov) + +#include "KokkosBatched_Util.hpp" +#include "KokkosBatched_Vector.hpp" + +namespace KokkosBatched { + +/// \brief Serial Batched XPAY: +/// y_l <- x_l + alpha_l * y_l for all l = 1, ..., N +/// where: +/// * N is the number of vectors, +/// * x_1, ..., x_N are the N input vectors, +/// * y_1, ..., y_N are the N output vectors, +/// * alpha_1, ..., alpha_N are N scaling factors for y_1, ..., y_N. +/// +/// \tparam XViewType: Input type for X, needs to be a 2D view +/// \tparam YViewType: Input type for Y, needs to be a 2D view +/// \tparam alphaViewType: Input type for alpha, needs to be a 1D view +/// +/// \param alpha [in]: input coefficient for Y, a rank 1 view +/// \param X [in]: Input vector X, a rank 2 view +/// \param Y [in/out]: Output vector Y, a rank 2 view +/// +/// No nested parallel_for is used inside of the function. +/// + +struct SerialXpay { + template + KOKKOS_INLINE_FUNCTION static int invoke(const alphaViewType &alpha, + const ViewType &X, + const ViewType &Y); +}; + +/// \brief Team Batched XPAY: +/// y_l <- x_l + alpha_l * y_l for all l = 1, ..., N +/// where: +/// * N is the number of vectors, +/// * x_1, ..., x_N are the N input vectors, +/// * y_1, ..., y_N are the N output vectors, +/// * alpha_1, ..., alpha_N are N scaling factors for y_1, ..., y_N. +/// +/// \tparam XViewType: Input type for X, needs to be a 2D view +/// \tparam YViewType: Input type for Y, needs to be a 2D view +/// \tparam alphaViewType: Input type for alpha, needs to be a 1D view +/// +/// \param member [in]: TeamPolicy member +/// \param alpha [in]: input coefficient for Y, a rank 1 view +/// \param X [in]: Input vector X, a rank 2 view +/// \param Y [in/out]: Output vector Y, a rank 2 view +/// +/// A nested parallel_for with TeamThreadRange is used. +/// + +template +struct TeamXpay { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, + const alphaViewType &alpha, + const ViewType &X, + const ViewType &Y); +}; + +/// \brief TeamVector Batched XPAY: +/// y_l <- x_l + alpha_l * y_l for all l = 1, ..., N +/// where: +/// * N is the number of vectors, +/// * x_1, ..., x_N are the N input vectors, +/// * y_1, ..., y_N are the N output vectors, +/// * alpha_1, ..., alpha_N are N scaling factors for y_1, ..., y_N. +/// +/// \tparam XViewType: Input type for X, needs to be a 2D view +/// \tparam YViewType: Input type for Y, needs to be a 2D view +/// \tparam alphaViewType: Input type for alpha, needs to be a 1D view +/// +/// \param member [in]: TeamPolicy member +/// \param alpha [in]: input coefficient for Y, a rank 1 view +/// \param X [in]: Input vector X, a rank 2 view +/// \param Y [in/out]: Output vector Y, a rank 2 view +/// +/// Two nested parallel_for with both TeamThreadRange and ThreadVectorRange +/// (or one with TeamVectorRange) are used inside. +/// + +template +struct TeamVectorXpay { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, + const alphaViewType &alpha, + const ViewType &X, + const ViewType &Y); +}; + +} // namespace KokkosBatched + +#include "KokkosBatched_Xpay_Impl.hpp" + +#endif diff --git a/src/batched/dense/impl/KokkosBatched_Axpy_Impl.hpp b/src/batched/dense/impl/KokkosBatched_Axpy_Impl.hpp index ed4455485e..10fc9da254 100644 --- a/src/batched/dense/impl/KokkosBatched_Axpy_Impl.hpp +++ b/src/batched/dense/impl/KokkosBatched_Axpy_Impl.hpp @@ -203,17 +203,21 @@ struct TeamVectorAxpyInternal { /// /// Serial Impl /// =========== -template +template KOKKOS_INLINE_FUNCTION int SerialAxpy::invoke(const alphaViewType& alpha, - const ViewType& X, - const ViewType& Y) { + const XViewType& X, + const YViewType& Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::Impl::is_view::value, - "KokkosBatched::axpy: ViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::axpy: XViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::axpy: YViewType is not a Kokkos::View."); static_assert(Kokkos::Impl::is_view::value, "KokkosBatched::axpy: alphaViewType is not a Kokkos::View."); - static_assert(ViewType::Rank == 2, - "KokkosBatched::axpy: ViewType must have rank 2."); + static_assert(XViewType::Rank == 2, + "KokkosBatched::axpy: XViewType must have rank 2."); + static_assert(YViewType::Rank == 2, + "KokkosBatched::axpy: YViewType must have rank 2."); static_assert(alphaViewType::Rank == 1, "KokkosBatched::axpy: alphaViewType must have rank 1."); @@ -236,7 +240,7 @@ KOKKOS_INLINE_FUNCTION int SerialAxpy::invoke(const alphaViewType& alpha, return SerialAxpyInternal::template invoke< typename alphaViewType::non_const_value_type, - typename ViewType::non_const_value_type>( + typename XViewType::non_const_value_type>( X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), X.data(), X.stride_0(), X.stride_1(), Y.data(), Y.stride_0(), Y.stride_1()); } @@ -246,17 +250,21 @@ KOKKOS_INLINE_FUNCTION int SerialAxpy::invoke(const alphaViewType& alpha, /// ========= template -template +template KOKKOS_INLINE_FUNCTION int TeamAxpy::invoke( - const MemberType& member, const alphaViewType& alpha, const ViewType& X, - const ViewType& Y) { + const MemberType& member, const alphaViewType& alpha, const XViewType& X, + const YViewType& Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::Impl::is_view::value, - "KokkosBatched::axpy: ViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::axpy: XViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::axpy: YViewType is not a Kokkos::View."); static_assert(Kokkos::Impl::is_view::value, "KokkosBatched::axpy: alphaViewType is not a Kokkos::View."); - static_assert(ViewType::Rank == 2, - "KokkosBatched::axpy: ViewType must have rank 2."); + static_assert(XViewType::Rank == 2, + "KokkosBatched::axpy: XViewType must have rank 2."); + static_assert(YViewType::Rank == 2, + "KokkosBatched::axpy: YViewType must have rank 2."); static_assert(alphaViewType::Rank == 1, "KokkosBatched::axpy: alphaViewType must have rank 1."); @@ -279,7 +287,7 @@ KOKKOS_INLINE_FUNCTION int TeamAxpy::invoke( return TeamAxpyInternal::template invoke< MemberType, typename alphaViewType::non_const_value_type, - typename ViewType::non_const_value_type>( + typename XViewType::non_const_value_type>( member, X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), X.data(), X.stride_0(), X.stride_1(), Y.data(), Y.stride_0(), Y.stride_1()); @@ -290,17 +298,21 @@ KOKKOS_INLINE_FUNCTION int TeamAxpy::invoke( /// =============== template -template +template KOKKOS_INLINE_FUNCTION int TeamVectorAxpy::invoke( - const MemberType& member, const alphaViewType& alpha, const ViewType& X, - const ViewType& Y) { + const MemberType& member, const alphaViewType& alpha, const XViewType& X, + const YViewType& Y) { #if (KOKKOSKERNELS_DEBUG_LEVEL > 0) - static_assert(Kokkos::Impl::is_view::value, - "KokkosBatched::axpy: ViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::axpy: XViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::axpy: YViewType is not a Kokkos::View."); static_assert(Kokkos::Impl::is_view::value, "KokkosBatched::axpy: alphaViewType is not a Kokkos::View."); - static_assert(ViewType::Rank == 2, - "KokkosBatched::axpy: ViewType must have rank 2."); + static_assert(XViewType::Rank == 2, + "KokkosBatched::axpy: XViewType must have rank 2."); + static_assert(YViewType::Rank == 2, + "KokkosBatched::axpy: YViewType must have rank 2."); static_assert(alphaViewType::Rank == 1, "KokkosBatched::axpy: alphaViewType must have rank 1."); @@ -323,10 +335,11 @@ KOKKOS_INLINE_FUNCTION int TeamVectorAxpy::invoke( return TeamVectorAxpyInternal::invoke< MemberType, typename alphaViewType::non_const_value_type, - typename ViewType::non_const_value_type, typename ViewType::array_layout>( - member, X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), - X.data(), X.stride_0(), X.stride_1(), Y.data(), Y.stride_0(), - Y.stride_1()); + typename XViewType::non_const_value_type, + typename XViewType::array_layout>(member, X.extent(0), X.extent(1), + alpha.data(), alpha.stride_0(), + X.data(), X.stride_0(), X.stride_1(), + Y.data(), Y.stride_0(), Y.stride_1()); } } // namespace KokkosBatched diff --git a/src/batched/dense/impl/KokkosBatched_Copy_Impl.hpp b/src/batched/dense/impl/KokkosBatched_Copy_Impl.hpp index 896ec7cb86..f54e7d7f44 100644 --- a/src/batched/dense/impl/KokkosBatched_Copy_Impl.hpp +++ b/src/batched/dense/impl/KokkosBatched_Copy_Impl.hpp @@ -14,7 +14,23 @@ namespace KokkosBatched { template <> template -KOKKOS_INLINE_FUNCTION int SerialCopy::invoke( +KOKKOS_INLINE_FUNCTION int SerialCopy::invoke( + const AViewType &A, const BViewType &B) { + return SerialCopyInternal::invoke(A.extent(0), A.data(), A.stride_0(), + B.data(), B.stride_0()); +} + +template <> +template +KOKKOS_INLINE_FUNCTION int SerialCopy::invoke( + const AViewType &A, const BViewType &B) { + return SerialCopyInternal::invoke(A.extent(0), A.data(), A.stride_0(), + B.data(), B.stride_0()); +} + +template <> +template +KOKKOS_INLINE_FUNCTION int SerialCopy::invoke( const AViewType &A, const BViewType &B) { return SerialCopyInternal::invoke(A.extent(0), A.extent(1), A.data(), A.stride_0(), A.stride_1(), B.data(), @@ -23,7 +39,7 @@ KOKKOS_INLINE_FUNCTION int SerialCopy::invoke( template <> template -KOKKOS_INLINE_FUNCTION int SerialCopy::invoke( +KOKKOS_INLINE_FUNCTION int SerialCopy::invoke( const AViewType &A, const BViewType &B) { return SerialCopyInternal::invoke(A.extent(1), A.extent(0), A.data(), A.stride_1(), A.stride_0(), B.data(), @@ -35,7 +51,29 @@ KOKKOS_INLINE_FUNCTION int SerialCopy::invoke( /// ========= template -struct TeamCopy { +struct TeamCopy { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, + const AViewType &A, + const BViewType &B) { + return TeamCopyInternal::invoke(member, A.extent(0), A.data(), A.stride_0(), + B.data(), B.stride_0()); + } +}; + +template +struct TeamCopy { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, + const AViewType &A, + const BViewType &B) { + return TeamCopyInternal::invoke(member, A.extent(0), A.data(), A.stride_0(), + B.data(), B.stride_0()); + } +}; + +template +struct TeamCopy { template KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, @@ -47,7 +85,7 @@ struct TeamCopy { }; template -struct TeamCopy { +struct TeamCopy { template KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, @@ -63,7 +101,29 @@ struct TeamCopy { /// ========= template -struct TeamVectorCopy { +struct TeamVectorCopy { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, + const AViewType &A, + const BViewType &B) { + return TeamVectorCopyInternal::invoke(member, A.extent(0), A.data(), + A.stride_0(), B.data(), B.stride_0()); + } +}; + +template +struct TeamVectorCopy { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, + const AViewType &A, + const BViewType &B) { + return TeamVectorCopyInternal::invoke(member, A.extent(0), A.data(), + A.stride_0(), B.data(), B.stride_0()); + } +}; + +template +struct TeamVectorCopy { template KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, @@ -75,7 +135,7 @@ struct TeamVectorCopy { }; template -struct TeamVectorCopy { +struct TeamVectorCopy { template KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, diff --git a/src/batched/dense/impl/KokkosBatched_Dot_Internal.hpp b/src/batched/dense/impl/KokkosBatched_Dot_Internal.hpp index 3492e3b9ab..2f8662d00f 100644 --- a/src/batched/dense/impl/KokkosBatched_Dot_Internal.hpp +++ b/src/batched/dense/impl/KokkosBatched_Dot_Internal.hpp @@ -14,11 +14,11 @@ namespace KokkosBatched { struct SerialDotInternal { // i \in [0,m) // C = conj(A(:))*B(:) - template + template KOKKOS_FORCEINLINE_FUNCTION static int invoke( const int m, const ValueType *__restrict__ A, const int as0, const ValueType *__restrict__ B, const int bs0, - /* */ ValueType *__restrict__ C) { + /* */ MagnitudeType *__restrict__ C) { using ats = Kokkos::ArithTraits; C[0] = ValueType(0); #if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) @@ -33,20 +33,68 @@ struct SerialDotInternal { // j \in [0,n), i \in [0,m) // C(j) = conj(A(:,j))*B(:,j) - template - KOKKOS_FORCEINLINE_FUNCTION static int invoke(const int m, const int n, - const ValueType *__restrict__ A, - const int as0, const int as1, - const ValueType *__restrict__ B, - const int bs0, const int bs1, - /* */ ValueType *__restrict__ C, - const int cs) { + template + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const int n, + const ValueType *__restrict__ A, + const int as0, const int as1, + const ValueType *__restrict__ B, + const int bs0, const int bs1, + /* */ MagnitudeType *__restrict__ C, + const int cs) { for (int j = 0; j < n; ++j) invoke(m, A + j * as1, as0, B + j * bs1, bs0, C + j * cs); return 0; } }; +/// +/// Team Internal Impl +/// ======================== + +// i \in [0,m) +// C = conj(A(:))*B(:) +struct TeamDotInternal { + template + KOKKOS_FORCEINLINE_FUNCTION static int invoke( + const MemberType &member, const int m, const ValueType *__restrict__ A, + const int as0, const ValueType *__restrict__ B, const int bs0, + /* */ MagnitudeType *__restrict__ C) { + using ats = Kokkos::ArithTraits; + ValueType t(0); + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(member, m), + [&](const int &i, ValueType &update) { + const int idx_a = i * as0, idx_b = i * bs0; + update += ats::conj(A[idx_a]) * B[idx_b]; + }, + t); + Kokkos::single(Kokkos::PerThread(member), [&]() { C[0] = t; }); + return 0; + } + + // j \in [0,n), i \in [0,m) + // C(j) = conj(A(:,j))*B(:,j) + template + KOKKOS_FORCEINLINE_FUNCTION static int invoke( + const MemberType &member, const int m, const int n, + const ValueType *__restrict__ A, const int as0, const int as1, + const ValueType *__restrict__ B, const int bs0, const int bs1, + /* */ MagnitudeType *__restrict__ C, const int cs) { + using ats = Kokkos::ArithTraits; + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { + ValueType t(0); + const ValueType *__restrict__ A_at_j = A + j * as1; + const ValueType *__restrict__ B_at_j = B + j * bs1; + for (int i = 0; i < m; ++i) { + const int idx_a = i * as0, idx_b = i * bs0; + t += ats::conj(A_at_j[idx_a]) * B_at_j[idx_b]; + } + Kokkos::single(Kokkos::PerThread(member), [&]() { C[j * cs] = t; }); + }); + return 0; + } +}; + /// /// TeamVector Internal Impl /// ======================== @@ -54,11 +102,11 @@ struct SerialDotInternal { // i \in [0,m) // C = conj(A(:))*B(:) struct TeamVectorDotInternal { - template + template KOKKOS_FORCEINLINE_FUNCTION static int invoke( const MemberType &member, const int m, const ValueType *__restrict__ A, const int as0, const ValueType *__restrict__ B, const int bs0, - /* */ ValueType *__restrict__ C) { + /* */ MagnitudeType *__restrict__ C) { using ats = Kokkos::ArithTraits; ValueType t(0); Kokkos::parallel_reduce( @@ -74,12 +122,12 @@ struct TeamVectorDotInternal { // j \in [0,n), i \in [0,m) // C(j) = conj(A(:,j))*B(:,j) - template + template KOKKOS_FORCEINLINE_FUNCTION static int invoke( const MemberType &member, const int m, const int n, const ValueType *__restrict__ A, const int as0, const int as1, const ValueType *__restrict__ B, const int bs0, const int bs1, - /* */ ValueType *__restrict__ C, const int cs) { + /* */ MagnitudeType *__restrict__ C, const int cs) { using ats = Kokkos::ArithTraits; Kokkos::parallel_for(Kokkos::TeamThreadRange(member, n), [&](const int &j) { ValueType t(0); @@ -98,6 +146,292 @@ struct TeamVectorDotInternal { } }; +/// +/// Serial Impl +/// =========== +template <> +struct SerialDot { + template + KOKKOS_INLINE_FUNCTION static int invoke(const XViewType &X, + const YViewType &Y, + const NormViewType &dot) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: XViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: YViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: NormViewType is not a Kokkos::View."); + static_assert(XViewType::Rank == 2, + "KokkosBatched::dot: XViewType must have rank 2."); + static_assert(YViewType::Rank == 2, + "KokkosBatched::dot: YViewType must have rank 2."); + static_assert(NormViewType::Rank == 1, + "KokkosBatched::dot: NormViewType must have rank 1."); + + // Check compatibility of dimensions at run time. + if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { + printf( + "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " + "Y: %d x %d\n", + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), + (int)Y.extent(1)); + return 1; + } + if (X.extent(1) != dot.extent(0)) { + printf( + "KokkosBatched::dot: Second dimension of X and alpha do not match: " + "X: " + "%d x %d, dot: %d\n", + (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); + return 1; + } +#endif + return SerialDotInternal::template invoke< + typename XViewType::non_const_value_type, + typename NormViewType::non_const_value_type>( + X.extent(0), X.extent(1), X.data(), X.stride_0(), X.stride_1(), + Y.data(), Y.stride_0(), Y.stride_1(), dot.data(), dot.stride_0()); + } +}; + +template <> +struct SerialDot { + template + KOKKOS_INLINE_FUNCTION static int invoke(const XViewType &X, + const YViewType &Y, + const NormViewType &dot) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: XViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: YViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: NormViewType is not a Kokkos::View."); + static_assert(XViewType::Rank == 2, + "KokkosBatched::dot: XViewType must have rank 2."); + static_assert(YViewType::Rank == 2, + "KokkosBatched::dot: YViewType must have rank 2."); + static_assert(NormViewType::Rank == 1, + "KokkosBatched::dot: NormViewType must have rank 1."); + + // Check compatibility of dimensions at run time. + if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { + printf( + "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " + "Y: %d x %d\n", + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), + (int)Y.extent(1)); + return 1; + } + if (X.extent(0) != dot.extent(0)) { + printf( + "KokkosBatched::dot: First dimension of X and alpha do not match: X: " + "%d x %d, dot: %d\n", + (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); + return 1; + } +#endif + return SerialDotInternal::template invoke< + typename XViewType::non_const_value_type, + typename NormViewType::non_const_value_type>( + X.extent(1), X.extent(0), X.data(), X.stride_1(), X.stride_0(), + Y.data(), Y.stride_1(), Y.stride_0(), dot.data(), dot.stride_0()); + } +}; + +/// +/// Team Impl +/// =============== +template +struct TeamDot { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, + const XViewType &X, + const YViewType &Y, + const NormViewType &dot) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: XViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: YViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: NormViewType is not a Kokkos::View."); + static_assert(XViewType::Rank == 2, + "KokkosBatched::dot: XViewType must have rank 2."); + static_assert(YViewType::Rank == 2, + "KokkosBatched::dot: YViewType must have rank 2."); + static_assert(NormViewType::Rank == 1, + "KokkosBatched::dot: NormViewType must have rank 1."); + + // Check compatibility of dimensions at run time. + if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { + printf( + "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " + "Y: %d x %d\n", + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), + (int)Y.extent(1)); + return 1; + } + if (X.extent(1) != dot.extent(0)) { + printf( + "KokkosBatched::dot: Second dimension of X and alpha do not match: " + "X: " + "%d x %d, dot: %d\n", + (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); + return 1; + } +#endif + return TeamDotInternal::template invoke< + MemberType, typename XViewType::non_const_value_type, + typename NormViewType::non_const_value_type>( + member, X.extent(0), X.extent(1), X.data(), X.stride_0(), X.stride_1(), + Y.data(), Y.stride_0(), Y.stride_1(), dot.data(), dot.stride_0()); + } +}; + +template +struct TeamDot { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, + const XViewType &X, + const YViewType &Y, + const NormViewType &dot) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: XViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: YViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: NormViewType is not a Kokkos::View."); + static_assert(XViewType::Rank == 2, + "KokkosBatched::dot: XViewType must have rank 2."); + static_assert(YViewType::Rank == 2, + "KokkosBatched::dot: YViewType must have rank 2."); + static_assert(NormViewType::Rank == 1, + "KokkosBatched::dot: NormViewType must have rank 1."); + + // Check compatibility of dimensions at run time. + if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { + printf( + "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " + "Y: %d x %d\n", + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), + (int)Y.extent(1)); + return 1; + } + if (X.extent(0) != dot.extent(0)) { + printf( + "KokkosBatched::dot: First dimension of X and alpha do not match: X: " + "%d x %d, dot: %d\n", + (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); + return 1; + } +#endif + return TeamDotInternal::template invoke< + MemberType, typename XViewType::non_const_value_type, + typename NormViewType::non_const_value_type>( + member, X.extent(1), X.extent(0), X.data(), X.stride_1(), X.stride_0(), + Y.data(), Y.stride_1(), Y.stride_0(), dot.data(), dot.stride_0()); + } +}; + +/// +/// TeamVector Impl +/// =============== +template +struct TeamVectorDot { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, + const XViewType &X, + const YViewType &Y, + const NormViewType &dot) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: XViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: YViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: NormViewType is not a Kokkos::View."); + static_assert(XViewType::Rank == 2, + "KokkosBatched::dot: XViewType must have rank 2."); + static_assert(YViewType::Rank == 2, + "KokkosBatched::dot: YViewType must have rank 2."); + static_assert(NormViewType::Rank == 1, + "KokkosBatched::dot: NormViewType must have rank 1."); + + // Check compatibility of dimensions at run time. + if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { + printf( + "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " + "Y: %d x %d\n", + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), + (int)Y.extent(1)); + return 1; + } + if (X.extent(1) != dot.extent(0)) { + printf( + "KokkosBatched::dot: Second dimension of X and alpha do not match: " + "X: " + "%d x %d, dot: %d\n", + (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); + return 1; + } +#endif + return TeamVectorDotInternal::template invoke< + MemberType, typename XViewType::non_const_value_type, + typename NormViewType::non_const_value_type>( + member, X.extent(0), X.extent(1), X.data(), X.stride_0(), X.stride_1(), + Y.data(), Y.stride_0(), Y.stride_1(), dot.data(), dot.stride_0()); + } +}; + +template +struct TeamVectorDot { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, + const XViewType &X, + const YViewType &Y, + const NormViewType &dot) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: XViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: YViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::dot: NormViewType is not a Kokkos::View."); + static_assert(XViewType::Rank == 2, + "KokkosBatched::dot: XViewType must have rank 2."); + static_assert(YViewType::Rank == 2, + "KokkosBatched::dot: YViewType must have rank 2."); + static_assert(NormViewType::Rank == 1, + "KokkosBatched::dot: NormViewType must have rank 1."); + + // Check compatibility of dimensions at run time. + if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { + printf( + "KokkosBatched::dot: Dimensions of X and Y do not match: X: %d x %d, " + "Y: %d x %d\n", + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), + (int)Y.extent(1)); + return 1; + } + if (X.extent(0) != dot.extent(0)) { + printf( + "KokkosBatched::dot: First dimension of X and alpha do not match: X: " + "%d x %d, dot: %d\n", + (int)X.extent(0), (int)X.extent(1), (int)dot.extent(0)); + return 1; + } +#endif + return TeamVectorDotInternal::template invoke< + MemberType, typename XViewType::non_const_value_type, + typename NormViewType::non_const_value_type>( + member, X.extent(1), X.extent(0), X.data(), X.stride_1(), X.stride_0(), + Y.data(), Y.stride_1(), Y.stride_0(), dot.data(), dot.stride_0()); + } +}; + } // end namespace KokkosBatched #endif diff --git a/src/batched/dense/impl/KokkosBatched_QR_WithColumnPivoting_TeamVector_Internal.hpp b/src/batched/dense/impl/KokkosBatched_QR_WithColumnPivoting_TeamVector_Internal.hpp index afa505ea0c..e46cd87fd6 100644 --- a/src/batched/dense/impl/KokkosBatched_QR_WithColumnPivoting_TeamVector_Internal.hpp +++ b/src/batched/dense/impl/KokkosBatched_QR_WithColumnPivoting_TeamVector_Internal.hpp @@ -6,7 +6,7 @@ #include "KokkosBatched_Util.hpp" #include "KokkosBatched_FindAmax_Internal.hpp" -#include "KokkosBatched_Dot_Internal.hpp" +#include "KokkosBatched_Dot.hpp" #include "KokkosBatched_ApplyPivot_Internal.hpp" #include "KokkosBatched_Householder_TeamVector_Internal.hpp" diff --git a/src/batched/dense/impl/KokkosBatched_Xpay_Impl.hpp b/src/batched/dense/impl/KokkosBatched_Xpay_Impl.hpp new file mode 100644 index 0000000000..1dc69c8f69 --- /dev/null +++ b/src/batched/dense/impl/KokkosBatched_Xpay_Impl.hpp @@ -0,0 +1,345 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.4 +// Copyright (2021) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +#ifndef __KOKKOSBATCHED_XPAY_IMPL_HPP__ +#define __KOKKOSBATCHED_XPAY_IMPL_HPP__ + +/// \author Kim Liegeois (knliege@sandia.gov) + +#include "KokkosBatched_Util.hpp" + +namespace KokkosBatched { + +/// +/// Serial Internal Impl +/// ==================== +struct SerialXpayInternal { + template + KOKKOS_INLINE_FUNCTION static int invoke(const int m, const ScalarType alpha, + const ValueType* KOKKOS_RESTRICT X, + const int xs0, + /* */ ValueType* KOKKOS_RESTRICT Y, + const int ys0) { +#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) +#pragma unroll +#endif + for (int i = 0; i < m; ++i) { + Y[i * ys0] *= alpha; + Y[i * ys0] += X[i * xs0]; + } + + return 0; + } + + template + KOKKOS_INLINE_FUNCTION static int invoke( + const int m, const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, + const ValueType* KOKKOS_RESTRICT X, const int xs0, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { +#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) +#pragma unroll +#endif + for (int i = 0; i < m; ++i) { + Y[i * ys0] *= alpha[i * alphas0]; + Y[i * ys0] += X[i * xs0]; + } + + return 0; + } + + template + KOKKOS_INLINE_FUNCTION static int invoke( + const int m, const int n, const ScalarType* KOKKOS_RESTRICT alpha, + const int alphas0, const ValueType* KOKKOS_RESTRICT X, const int xs0, + const int xs1, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1) { + if (xs0 > xs1) + for (int i = 0; i < m; ++i) + invoke(n, alpha[i * alphas0], X + i * xs0, xs1, Y + i * ys0, ys1); + else + for (int j = 0; j < n; ++j) + invoke(m, alpha, alphas0, X + j * xs1, xs0, Y + j * ys1, ys0); + + return 0; + } +}; + +/// +/// Team Internal Impl +/// ==================== +struct TeamXpayInternal { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, + const int m, const ScalarType alpha, + const ValueType* KOKKOS_RESTRICT X, + const int xs0, + /* */ ValueType* KOKKOS_RESTRICT Y, + const int ys0) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int& i) { + Y[i * ys0] *= alpha; + Y[i * ys0] += X[i * xs0]; + }); + // member.team_barrier(); + return 0; + } + + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const int m, + const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, + const ValueType* KOKKOS_RESTRICT X, const int xs0, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int& i) { + Y[i * ys0] *= alpha[i * alphas0]; + Y[i * ys0] += X[i * xs0]; + }); + // member.team_barrier(); + return 0; + } + + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const int m, const int n, + const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, + const ValueType* KOKKOS_RESTRICT X, const int xs0, const int xs1, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1) { + if (m > n) { + Kokkos::parallel_for( + Kokkos::TeamThreadRange(member, m), [&](const int& i) { + SerialXpayInternal::invoke(n, alpha[i * alphas0], X + i * xs0, xs1, + Y + i * ys0, ys1); + }); + } else { + Kokkos::parallel_for( + Kokkos::TeamThreadRange(member, n), [&](const int& j) { + SerialXpayInternal::invoke(m, alpha, alphas0, X + j * xs1, xs0, + Y + j * ys1, ys0); + }); + } + // member.team_barrier(); + return 0; + } +}; + +/// +/// TeamVector Internal Impl +/// ======================== +struct TeamVectorXpayInternal { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& member, + const int m, const ScalarType alpha, + const ValueType* KOKKOS_RESTRICT X, + const int xs0, + /* */ ValueType* KOKKOS_RESTRICT Y, + const int ys0) { + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int& i) { + Y[i * ys0] *= alpha; + Y[i * ys0] += X[i * xs0]; + }); + // member.team_barrier(); + return 0; + } + + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const int m, + const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, + const ValueType* KOKKOS_RESTRICT X, const int xs0, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0) { + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, m), [&](const int& i) { + Y[i * ys0] *= alpha[i * alphas0]; + Y[i * ys0] += X[i * xs0]; + }); + // member.team_barrier(); + return 0; + } + + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const int m, const int n, + const ScalarType* KOKKOS_RESTRICT alpha, const int alphas0, + const ValueType* KOKKOS_RESTRICT X, const int xs0, const int xs1, + /* */ ValueType* KOKKOS_RESTRICT Y, const int ys0, const int ys1) { + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, m * n), + [&](const int& iTemp) { + int i, j; + getIndices(iTemp, n, m, j, i); + Y[i * ys0 + j * ys1] *= alpha[i * alphas0]; + Y[i * ys0 + j * ys1] += X[i * xs0 + j * xs1]; + }); + // member.team_barrier(); + return 0; + } +}; + +/// +/// Serial Impl +/// =========== +template +KOKKOS_INLINE_FUNCTION int SerialXpay::invoke(const alphaViewType& alpha, + const ViewType& X, + const ViewType& Y) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::xpay: ViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::xpay: alphaViewType is not a Kokkos::View."); + static_assert(ViewType::Rank == 2, + "KokkosBatched::xpay: ViewType must have rank 2."); + static_assert(alphaViewType::Rank == 1, + "KokkosBatched::xpay: alphaViewType must have rank 1."); + + // Check compatibility of dimensions at run time. + if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { + printf( + "KokkosBatched::xpay: Dimensions of X and Y do not match: X: %d x %d, " + "Y: %d x %d\n", + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); + return 1; + } + if (X.extent(0) != alpha.extent(0)) { + printf( + "KokkosBatched::xpay: First dimension of X and alpha do not match: X: " + "%d x %d, alpha: %d\n", + (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); + return 1; + } +#endif + + return SerialXpayInternal::template invoke< + typename alphaViewType::non_const_value_type, + typename ViewType::non_const_value_type>( + X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), X.data(), + X.stride_0(), X.stride_1(), Y.data(), Y.stride_0(), Y.stride_1()); +} + +/// +/// Team Impl +/// ========= + +template +template +KOKKOS_INLINE_FUNCTION int TeamXpay::invoke( + const MemberType& member, const alphaViewType& alpha, const ViewType& X, + const ViewType& Y) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::xpay: ViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::xpay: alphaViewType is not a Kokkos::View."); + static_assert(ViewType::Rank == 2, + "KokkosBatched::xpay: ViewType must have rank 2."); + static_assert(alphaViewType::Rank == 1, + "KokkosBatched::xpay: alphaViewType must have rank 1."); + + // Check compatibility of dimensions at run time. + if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { + printf( + "KokkosBatched::xpay: Dimensions of X and Y do not match: X: %d x %d, " + "Y: %d x %d\n", + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); + return 1; + } + if (X.extent(0) != alpha.extent(0)) { + printf( + "KokkosBatched::xpay: First dimension of X and alpha do not match: X: " + "%d x %d, alpha: %d\n", + (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); + return 1; + } +#endif + + return TeamXpayInternal::template invoke< + MemberType, typename alphaViewType::non_const_value_type, + typename ViewType::non_const_value_type>( + member, X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), + X.data(), X.stride_0(), X.stride_1(), Y.data(), Y.stride_0(), + Y.stride_1()); +} + +/// +/// TeamVector Impl +/// =============== + +template +template +KOKKOS_INLINE_FUNCTION int TeamVectorXpay::invoke( + const MemberType& member, const alphaViewType& alpha, const ViewType& X, + const ViewType& Y) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::xpay: ViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::xpay: alphaViewType is not a Kokkos::View."); + static_assert(ViewType::Rank == 2, + "KokkosBatched::xpay: ViewType must have rank 2."); + static_assert(alphaViewType::Rank == 1, + "KokkosBatched::xpay: alphaViewType must have rank 1."); + + // Check compatibility of dimensions at run time. + if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { + printf( + "KokkosBatched::xpay: Dimensions of X and Y do not match: X: %d x %d, " + "Y: %d x %d\n", + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), (int)Y.extent(1)); + return 1; + } + if (X.extent(0) != alpha.extent(0)) { + printf( + "KokkosBatched::xpay: First dimension of X and alpha do not match: X: " + "%d x %d, alpha: %d\n", + (int)X.extent(0), (int)X.extent(1), (int)alpha.extent(0)); + return 1; + } +#endif + + return TeamVectorXpayInternal::invoke< + MemberType, typename alphaViewType::non_const_value_type, + typename ViewType::non_const_value_type, typename ViewType::array_layout>( + member, X.extent(0), X.extent(1), alpha.data(), alpha.stride_0(), + X.data(), X.stride_0(), X.stride_1(), Y.data(), Y.stride_0(), + Y.stride_1()); +} + +} // namespace KokkosBatched + +#endif diff --git a/src/batched/sparse/KokkosBatched_CG.hpp b/src/batched/sparse/KokkosBatched_CG.hpp new file mode 100644 index 0000000000..6333ad690f --- /dev/null +++ b/src/batched/sparse/KokkosBatched_CG.hpp @@ -0,0 +1,92 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.4 +// Copyright (2021) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +#ifndef __KOKKOSBATCHED_CG_HPP__ +#define __KOKKOSBATCHED_CG_HPP__ + +/// \author Kim Liegeois (knliege@sandia.gov) + +#include "KokkosBatched_Util.hpp" +#include "KokkosBatched_Vector.hpp" + +/// \brief Batched CG: Selective Interface +/// +/// \tparam OperatorType: The type of the operator of the system +/// \tparam VectorViewType: Input type for the right-hand side and the solution, +/// needs to be a 2D view +/// +/// \param member [in]: TeamPolicy member +/// \param A [in]: batched operator (can be a batched matrix or a (left or right +/// or both) preconditioned batched matrix) \param B [in]: right-hand side, a +/// rank 2 view \param X [in/out]: initial guess and solution, a rank 2 view +/// \param handle [in]: a handle which provides different information such as +/// the tolerance or the maximal number of iterations of the solver. + +#include "KokkosBatched_Krylov_Handle.hpp" +#include "KokkosBatched_CG_Team_Impl.hpp" +#include "KokkosBatched_CG_TeamVector_Impl.hpp" + +namespace KokkosBatched { + +template +struct CG { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType &member, const OperatorType &A, const VectorViewType &B, + const VectorViewType &X, + const KrylovHandle + handle) { + int status = 0; + if (std::is_same::value) { + status = + TeamCG::template invoke( + member, A, B, X, handle); + } else if (std::is_same::value) { + status = TeamVectorCG::template invoke( + member, A, B, X, handle); + } + return status; + } +}; + +} // namespace KokkosBatched +#endif diff --git a/src/batched/sparse/KokkosBatched_CrsMatrix.hpp b/src/batched/sparse/KokkosBatched_CrsMatrix.hpp new file mode 100644 index 0000000000..5448c4684c --- /dev/null +++ b/src/batched/sparse/KokkosBatched_CrsMatrix.hpp @@ -0,0 +1,195 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.4 +// Copyright (2021) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +#ifndef __KOKKOSBATCHED_CRSMATRIX_HPP__ +#define __KOKKOSBATCHED_CRSMATRIX_HPP__ + +/// \author Kim Liegeois (knliege@sandia.gov) + +namespace KokkosBatched { + +/// \brief Batched CrsMatrix: +/// +/// \tparam ValuesViewType: Input type for the values of the batched crs matrix, +/// needs to be a 2D view \tparam IntView: Input type for row offset array and +/// column-index array, needs to be a 1D view + +template +class CrsMatrix { + public: + using ScalarType = typename ValuesViewType::non_const_value_type; + using MagnitudeType = + typename Kokkos::Details::ArithTraits::mag_type; + + private: + ValuesViewType values; + IntViewType row_ptr; + IntViewType colIndices; + int n_operators; + int n_rows; + int n_colums; + + public: + KOKKOS_INLINE_FUNCTION + CrsMatrix(const ValuesViewType &_values, const IntViewType &_row_ptr, + const IntViewType &_colIndices) + : values(_values), row_ptr(_row_ptr), colIndices(_colIndices) { + n_operators = _values.extent(0); + n_rows = _row_ptr.extent(0) - 1; + n_colums = n_rows; + } + + KOKKOS_INLINE_FUNCTION + ~CrsMatrix() {} + + /// \brief apply version that uses constant coefficients alpha and beta + /// + /// y_l <- alpha * A_l * x_l + beta * y_l for all l = 1, ..., N + /// where: + /// * N is the number of matrices, + /// * A_1, ..., A_N are N sparse matrices which share the same sparsity + /// pattern, + /// * x_1, ..., x_N are the N input vectors, + /// * y_1, ..., y_N are the N output vectors, + /// * alpha is a scaling factor for x_1, ..., x_N, + /// * beta is a scaling factor for y_1, ..., y_N. + /// + /// \tparam MemberType: Input type for the TeamPolicy member + /// \tparam XViewType: Input type for X, needs to be a 2D view + /// \tparam YViewType: Input type for Y, needs to be a 2D view + /// \tparam ArgTrans: Argument for transpose or notranspose + /// \tparam ArgMode: Argument for the parallelism used in the apply + /// + /// \param member [in]: TeamPolicy member + /// \param alpha [in]: input coefficient for X (default value 1.) + /// \param X [in]: Input vector X, a rank 2 view + /// \param beta [in]: input coefficient for Y (default value 0.) + /// \param Y [in/out]: Output vector Y, a rank 2 view + + template + KOKKOS_INLINE_FUNCTION void apply( + const MemberType &member, const XViewType &X, const YViewType &Y, + MagnitudeType alpha = Kokkos::Details::ArithTraits::one(), + MagnitudeType beta = + Kokkos::Details::ArithTraits::zero()) const { + if (beta == 0) + KokkosBatched::Spmv::template invoke< + ValuesViewType, IntViewType, XViewType, YViewType, 0>( + member, alpha, values, row_ptr, colIndices, X, beta, Y); + else + KokkosBatched::Spmv::template invoke< + ValuesViewType, IntViewType, XViewType, YViewType, 1>( + member, alpha, values, row_ptr, colIndices, X, beta, Y); + } + + /// \brief apply version that uses variable coefficient alpha and no beta + /// y_l <- alpha_l * A_l * x_l for all l = 1, ..., N + /// where: + /// * N is the number of matrices, + /// * A_1, ..., A_N are N sparse matrices which share the same sparsity + /// pattern, + /// * x_1, ..., x_N are the N input vectors, + /// * y_1, ..., y_N are the N output vectors, + /// * alpha_1, ..., alpha_N are N scaling factors for x_1, ..., x_N. + /// + /// \tparam MemberType: Input type for the TeamPolicy member + /// \tparam XViewType: Input type for X, needs to be a 2D view + /// \tparam YViewType: Input type for Y, needs to be a 2D view + /// \tparam ArgTrans: Argument for transpose or notranspose + /// \tparam ArgMode: Argument for the parallelism used in the apply + /// + /// \param member [in]: TeamPolicy member + /// \param alpha [in]: input coefficient for X, a rank 1 view + /// \param X [in]: Input vector X, a rank 2 view + /// \param Y [out]: Output vector Y, a rank 2 view + + template + KOKKOS_INLINE_FUNCTION void apply(const MemberType &member, + const XViewType &X, const YViewType &Y, + NormViewType alpha) const { + KokkosBatched::Spmv::template invoke< + ValuesViewType, IntViewType, XViewType, YViewType, NormViewType, + NormViewType, 0>(member, alpha, values, row_ptr, colIndices, X, alpha, + Y); + } + + /// \brief apply version that uses variable coefficients alpha and beta + /// y_l <- alpha_l * A_l * x_l + beta_l * y_l for all l = 1, ..., N + /// where: + /// * N is the number of matrices, + /// * A_1, ..., A_N are N sparse matrices which share the same sparsity + /// pattern, + /// * x_1, ..., x_N are the N input vectors, + /// * y_1, ..., y_N are the N output vectors, + /// * alpha_1, ..., alpha_N are N scaling factors for x_1, ..., x_N, + /// * beta_1, ..., beta_N are N scaling factors for y_1, ..., y_N. + /// + /// \tparam MemberType: Input type for the TeamPolicy member + /// \tparam XViewType: Input type for X, needs to be a 2D view + /// \tparam YViewType: Input type for Y, needs to be a 2D view + /// \tparam NormViewType: Input type for alpha and beta, needs to be a 1D view + /// \tparam ArgTrans: Argument for transpose or notranspose + /// \tparam ArgMode: Argument for the parallelism used in the apply + /// + /// \param member [in]: TeamPolicy member + /// \param alpha [in]: input coefficient for X, a rank 1 view + /// \param X [in]: Input vector X, a rank 2 view + /// \param beta [in]: input coefficient for Y, a rank 1 view + /// \param Y [in/out]: Output vector Y, a rank 2 view + + template + KOKKOS_INLINE_FUNCTION void apply(const MemberType &member, + const XViewType &X, const YViewType &Y, + const NormViewType &alpha, + const NormViewType &beta) const { + KokkosBatched::Spmv::template invoke< + ValuesViewType, IntViewType, XViewType, YViewType, NormViewType, + NormViewType, 1>(member, alpha, values, row_ptr, colIndices, X, beta, + Y); + } +}; + +} // namespace KokkosBatched + +#endif \ No newline at end of file diff --git a/src/batched/sparse/KokkosBatched_GMRES.hpp b/src/batched/sparse/KokkosBatched_GMRES.hpp new file mode 100644 index 0000000000..6231c7676a --- /dev/null +++ b/src/batched/sparse/KokkosBatched_GMRES.hpp @@ -0,0 +1,92 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.4 +// Copyright (2021) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +#ifndef __KOKKOSBATCHED_GMRES_HPP__ +#define __KOKKOSBATCHED_GMRES_HPP__ + +/// \author Kim Liegeois (knliege@sandia.gov) + +#include "KokkosBatched_Util.hpp" +#include "KokkosBatched_Vector.hpp" + +/// \brief Batched GMRES: Selective Interface +/// +/// \tparam OperatorType: The type of the operator of the system +/// \tparam VectorViewType: Input type for the right-hand side and the solution, +/// needs to be a 2D view +/// +/// \param member [in]: TeamPolicy member +/// \param A [in]: batched operator (can be a batched matrix or a (left or right +/// or both) preconditioned batched matrix) \param B [in]: right-hand side, a +/// rank 2 view \param X [in/out]: initial guess and solution, a rank 2 view +/// \param handle [in]: a handle which provides different information such as +/// the tolerance or the maximal number of iterations of the solver. + +#include "KokkosBatched_Krylov_Handle.hpp" +#include "KokkosBatched_GMRES_Team_Impl.hpp" +#include "KokkosBatched_GMRES_TeamVector_Impl.hpp" + +namespace KokkosBatched { + +template +struct GMRES { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType &member, const OperatorType &A, const VectorViewType &B, + const VectorViewType &X, + const KrylovHandle + handle) { + int status = 0; + if (std::is_same::value) { + status = + TeamGMRES::template invoke( + member, A, B, X, handle); + } else if (std::is_same::value) { + status = TeamVectorGMRES::template invoke( + member, A, B, X, handle); + } + return status; + } +}; + +} // namespace KokkosBatched +#endif diff --git a/src/batched/sparse/KokkosBatched_Krylov_Handle.hpp b/src/batched/sparse/KokkosBatched_Krylov_Handle.hpp new file mode 100644 index 0000000000..b4e275cfdb --- /dev/null +++ b/src/batched/sparse/KokkosBatched_Krylov_Handle.hpp @@ -0,0 +1,110 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include + +#ifndef __KOKKOSBATCHED_KRYLOV_HANDLE_HPP__ +#define __KOKKOSBATCHED_KRYLOV_HANDLE_HPP__ +//#define VERBOSE + +namespace KokkosBatched { + +/// \brief KrylovHandle +/// +/// \tparam scalar_type: Scalar type of the linear solver + +template +class KrylovHandle { + public: + using norm_type = + typename Kokkos::Details::ArithTraits::mag_type; + + private: + norm_type tolerance; + int max_iteration; + + public: + KOKKOS_INLINE_FUNCTION + KrylovHandle() { + tolerance = Kokkos::Details::ArithTraits::epsilon(); + max_iteration = 200; + } + + /// \brief set_tolerance + /// Set the tolerance of the batched Krylov solver + /// + /// \param _tolerance [in]: New tolerance + + KOKKOS_INLINE_FUNCTION + void set_tolerance(norm_type _tolerance) { tolerance = _tolerance; } + + /// \brief get_tolerance + /// Get the tolerance of the batched Krylov solver + + KOKKOS_INLINE_FUNCTION + norm_type get_tolerance() const { return tolerance; } + + /// \brief set_max_iteration + /// Set the maximum number of iterations of the batched Krylov solver + /// + /// \param _max_iteration [in]: New maximum number of iterations + + KOKKOS_INLINE_FUNCTION + void set_max_iteration(norm_type _max_iteration) { + max_iteration = _max_iteration; + } + + /// \brief get_max_iteration + /// Get the maximum number of iterations of the batched Krylov solver + + KOKKOS_INLINE_FUNCTION + int get_max_iteration() const { return max_iteration; } +}; + +} // namespace KokkosBatched + +#endif diff --git a/src/batched/sparse/KokkosBatched_Spmv.hpp b/src/batched/sparse/KokkosBatched_Spmv.hpp index aadd7ad168..14ce074e41 100644 --- a/src/batched/sparse/KokkosBatched_Spmv.hpp +++ b/src/batched/sparse/KokkosBatched_Spmv.hpp @@ -88,15 +88,26 @@ namespace KokkosBatched { /// No nested parallel_for is used inside of the function. /// -template +template struct SerialSpmv { template KOKKOS_INLINE_FUNCTION static int invoke( const alphaViewType &alpha, const ValuesViewType &values, - const IntView &row_ptr, const IntView &colIndices, const xViewType &X, + const IntView &row_ptr, const IntView &colIndices, const xViewType &x, const betaViewType &beta, const yViewType &Y); + + template + KOKKOS_INLINE_FUNCTION static int invoke( + const typename Kokkos::Details::ArithTraits< + typename ValuesViewType::non_const_value_type>::mag_type &alpha, + const ValuesViewType &values, const IntView &row_ptr, + const IntView &colIndices, const xViewType &X, + const typename Kokkos::Details::ArithTraits< + typename ValuesViewType::non_const_value_type>::mag_type &beta, + const yViewType &Y); }; /// \brief Team Batched SPMV: @@ -139,7 +150,7 @@ struct SerialSpmv { /// A nested parallel_for with TeamThreadRange is used. /// -template +template struct TeamSpmv { template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType &member, + const typename Kokkos::Details::ArithTraits< + typename ValuesViewType::non_const_value_type>::mag_type &alpha, + const ValuesViewType &values, const IntView &row_ptr, + const IntView &colIndices, const xViewType &x, + const typename Kokkos::Details::ArithTraits< + typename ValuesViewType::non_const_value_type>::mag_type &beta, + const yViewType &y); }; /// \brief TeamVector Batched SPMV: @@ -192,7 +215,7 @@ struct TeamSpmv { /// (or one with TeamVectorRange) are used inside. /// -template +template struct TeamVectorSpmv { template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType &member, + const typename Kokkos::Details::ArithTraits< + typename ValuesViewType::non_const_value_type>::mag_type &alpha, + const ValuesViewType &values, const IntView &row_ptr, + const IntView &colIndices, const xViewType &x, + const typename Kokkos::Details::ArithTraits< + typename ValuesViewType::non_const_value_type>::mag_type &beta, + const yViewType &y); }; /// \brief Batched SPMV: Selective Interface @@ -261,8 +296,36 @@ struct Spmv { } return r_val; } -}; + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType &member, + const typename Kokkos::Details::ArithTraits< + typename ValuesViewType::non_const_value_type>::mag_type &alpha, + const ValuesViewType &values, const IntView &row_ptr, + const IntView &colIndices, const xViewType &x, + const typename Kokkos::Details::ArithTraits< + typename ValuesViewType::non_const_value_type>::mag_type &beta, + const yViewType &y) { + int r_val = 0; + if (std::is_same::value) { + r_val = + SerialSpmv::template invoke( + alpha, values, row_ptr, colIndices, x, beta, y); + } else if (std::is_same::value) { + r_val = TeamSpmv::template invoke< + ValuesViewType, IntView, xViewType, yViewType, dobeta>( + member, alpha, values, row_ptr, colIndices, x, beta, y); + } else if (std::is_same::value) { + r_val = TeamVectorSpmv::template invoke< + ValuesViewType, IntView, xViewType, yViewType, dobeta>( + member, alpha, values, row_ptr, colIndices, x, beta, y); + } + return r_val; + } +}; } // namespace KokkosBatched #include "KokkosBatched_Spmv_Serial_Impl.hpp" diff --git a/src/batched/sparse/impl/KokkosBatched_CG_TeamVector_Impl.hpp b/src/batched/sparse/impl/KokkosBatched_CG_TeamVector_Impl.hpp new file mode 100644 index 0000000000..960780b54d --- /dev/null +++ b/src/batched/sparse/impl/KokkosBatched_CG_TeamVector_Impl.hpp @@ -0,0 +1,198 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.4 +// Copyright (2021) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +#ifndef __KOKKOSBATCHED_CG_TEAMVECTOR_IMPL_HPP__ +#define __KOKKOSBATCHED_CG_TEAMVECTOR_IMPL_HPP__ + +/// \author Kim Liegeois (knliege@sandia.gov) + +#include "KokkosBatched_Util.hpp" + +#include "KokkosBatched_Axpy.hpp" +#include "KokkosBatched_Copy_Decl.hpp" +#include "KokkosBatched_Dot.hpp" +#include "KokkosBatched_Spmv.hpp" +#include "KokkosBatched_Xpay.hpp" + +namespace KokkosBatched { + +/// +/// TeamVector CG +/// Two nested parallel_for with both TeamVectorRange and ThreadVectorRange +/// (or one with TeamVectorRange) are used inside. +/// + +template +struct TeamVectorCG { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const OperatorType& A, const VectorViewType& _B, + const VectorViewType& _X, + KrylovHandle* handle) { + typedef int OrdinalType; + typedef typename Kokkos::Details::ArithTraits< + typename VectorViewType::non_const_value_type>::mag_type MagnitudeType; + + const size_t maximum_iteration = handle->get_max_iteration(); + const MagnitudeType tolerance = handle->get_tolerance(); + + using ScratchPadNormViewType = Kokkos::View< + MagnitudeType*, + typename VectorViewType::execution_space::scratch_memory_space>; + using ScratchPadVectorViewType = Kokkos::View< + typename VectorViewType::non_const_value_type**, + typename VectorViewType::array_layout, + typename VectorViewType::execution_space::scratch_memory_space>; + using TeamVectorCopy1D = TeamVectorCopy; + + const OrdinalType numMatrices = _X.extent(0); + const OrdinalType numRows = _X.extent(1); + + ScratchPadVectorViewType P(member.team_scratch(0), numMatrices, numRows); + ScratchPadVectorViewType Q(member.team_scratch(0), numMatrices, numRows); + ScratchPadVectorViewType R(member.team_scratch(0), numMatrices, numRows); + ScratchPadVectorViewType X(member.team_scratch(0), numMatrices, numRows); + + ScratchPadNormViewType sqr_norm_0(member.team_scratch(0), numMatrices); + ScratchPadNormViewType sqr_norm_j(member.team_scratch(0), numMatrices); + ScratchPadNormViewType alpha(member.team_scratch(0), numMatrices); + ScratchPadNormViewType mask(member.team_scratch(0), numMatrices); + ScratchPadNormViewType tmp(member.team_scratch(0), numMatrices); + + TeamVectorCopy::invoke(member, _X, X); + // Deep copy of b into r_0: + TeamVectorCopy::invoke(member, _B, R); + + // r_0 := b - A x_0 + member.team_barrier(); + A.template apply(member, X, R, -1, 1); + member.team_barrier(); + + // Deep copy of r_0 into p_0: + TeamVectorCopy::invoke(member, R, P); + + TeamVectorDot::invoke(member, R, R, sqr_norm_0); + member.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), + [&](const OrdinalType& i) { + mask(i) = + sqr_norm_0(i) > tolerance * tolerance ? 1. : 0; + }); + + TeamVectorCopy1D::invoke(member, sqr_norm_0, sqr_norm_j); + + int status = 1; + int number_not_converged = 0; + + for (size_t j = 0; j < maximum_iteration; ++j) { + // q := A p_j + A.template apply(member, P, Q); + member.team_barrier(); + + TeamVectorDot::invoke(member, P, Q, tmp); + member.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), + [&](const OrdinalType& i) { + alpha(i) = + mask(i) != 0. ? sqr_norm_j(i) / tmp(i) : 0.; + }); + member.team_barrier(); + + // x_{j+1} := alpha p_j + x_j + TeamVectorAxpy::invoke(member, alpha, P, X); + member.team_barrier(); + + // r_{j+1} := - alpha q + r_j + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), + [&](const OrdinalType& i) { alpha(i) = -alpha(i); }); + member.team_barrier(); + + TeamVectorAxpy::invoke(member, alpha, Q, R); + member.team_barrier(); + + TeamVectorDot::invoke(member, R, R, tmp); + member.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), + [&](const OrdinalType& i) { + alpha(i) = + mask(i) != 0. ? tmp(i) / sqr_norm_j(i) : 0.; + }); + + TeamVectorCopy1D::invoke(member, tmp, sqr_norm_j); + + // Relative convergence check: + number_not_converged = 0; + Kokkos::parallel_reduce( + Kokkos::TeamVectorRange(member, 0, numMatrices), + [&](const OrdinalType& i, int& lnumber_not_converged) { + if (sqr_norm_j(i) / sqr_norm_0(i) > tolerance * tolerance) + ++lnumber_not_converged; + else + mask(i) = 0.; + }, + number_not_converged); + + member.team_barrier(); + + if (number_not_converged == 0) { + status = 0; + break; + } + + // p_{j+1} := alpha p_j + r_{j+1} + TeamVectorXpay::invoke(member, alpha, R, P); + member.team_barrier(); + } + + TeamVectorCopy::invoke(member, X, _X); + return status; + } +}; +} // namespace KokkosBatched + +#endif diff --git a/src/batched/sparse/impl/KokkosBatched_CG_Team_Impl.hpp b/src/batched/sparse/impl/KokkosBatched_CG_Team_Impl.hpp new file mode 100644 index 0000000000..4444562750 --- /dev/null +++ b/src/batched/sparse/impl/KokkosBatched_CG_Team_Impl.hpp @@ -0,0 +1,197 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.4 +// Copyright (2021) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +#ifndef __KOKKOSBATCHED_CG_TEAM_IMPL_HPP__ +#define __KOKKOSBATCHED_CG_TEAM_IMPL_HPP__ + +/// \author Kim Liegeois (knliege@sandia.gov) + +#include "KokkosBatched_Util.hpp" + +#include "KokkosBatched_Axpy.hpp" +#include "KokkosBatched_Copy_Decl.hpp" +#include "KokkosBatched_Dot.hpp" +#include "KokkosBatched_Spmv.hpp" +#include "KokkosBatched_Xpay.hpp" + +namespace KokkosBatched { + +/// +/// Team CG +/// A nested parallel_for with TeamThreadRange is used. +/// + +template +struct TeamCG { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const OperatorType& A, const VectorViewType& _B, + const VectorViewType& _X, + KrylovHandle* handle) { + typedef int OrdinalType; + typedef typename Kokkos::Details::ArithTraits< + typename VectorViewType::non_const_value_type>::mag_type MagnitudeType; + + size_t maximum_iteration = handle->get_max_iteration(); + const MagnitudeType tolerance = handle->get_tolerance(); + + using ScratchPadNormViewType = Kokkos::View< + MagnitudeType*, + typename VectorViewType::execution_space::scratch_memory_space>; + using ScratchPadVectorViewType = Kokkos::View< + typename VectorViewType::non_const_value_type**, + typename VectorViewType::array_layout, + typename VectorViewType::execution_space::scratch_memory_space>; + using TeamCopy1D = TeamCopy; + + const OrdinalType numMatrices = _X.extent(0); + const OrdinalType numRows = _X.extent(1); + + ScratchPadVectorViewType P(member.team_scratch(0), numMatrices, numRows); + ScratchPadVectorViewType Q(member.team_scratch(0), numMatrices, numRows); + ScratchPadVectorViewType R(member.team_scratch(0), numMatrices, numRows); + ScratchPadVectorViewType X(member.team_scratch(0), numMatrices, numRows); + + ScratchPadNormViewType sqr_norm_0(member.team_scratch(0), numMatrices); + ScratchPadNormViewType sqr_norm_j(member.team_scratch(0), numMatrices); + ScratchPadNormViewType alpha(member.team_scratch(0), numMatrices); + ScratchPadNormViewType mask(member.team_scratch(0), numMatrices); + ScratchPadNormViewType tmp(member.team_scratch(0), numMatrices); + + TeamCopy::invoke(member, _X, X); + // Deep copy of b into r_0: + TeamCopy::invoke(member, _B, R); + + // r_0 := b - A x_0 + member.team_barrier(); + A.template apply( + member, X, R, -1, 1); + member.team_barrier(); + + // Deep copy of r_0 into p_0: + TeamCopy::invoke(member, R, P); + + TeamDot::invoke(member, R, R, sqr_norm_0); + member.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices), + [&](const OrdinalType& i) { + mask(i) = + sqr_norm_0(i) > tolerance * tolerance ? 1. : 0; + }); + + TeamCopy1D::invoke(member, sqr_norm_0, sqr_norm_j); + + int status = 1; + int number_not_converged = 0; + + for (size_t j = 0; j < maximum_iteration; ++j) { + // q := A p_j + A.template apply(member, P, Q); + member.team_barrier(); + + TeamDot::invoke(member, P, Q, tmp); + member.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices), + [&](const OrdinalType& i) { + alpha(i) = + mask(i) != 0. ? sqr_norm_j(i) / tmp(i) : 0.; + }); + member.team_barrier(); + + // x_{j+1} := alpha p_j + x_j + TeamAxpy::invoke(member, alpha, P, X); + member.team_barrier(); + + // r_{j+1} := - alpha q + r_j + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices), + [&](const OrdinalType& i) { alpha(i) = -alpha(i); }); + member.team_barrier(); + + TeamAxpy::invoke(member, alpha, Q, R); + member.team_barrier(); + + TeamDot::invoke(member, R, R, tmp); + member.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices), + [&](const OrdinalType& i) { + alpha(i) = + mask(i) != 0. ? tmp(i) / sqr_norm_j(i) : 0.; + }); + + TeamCopy1D::invoke(member, tmp, sqr_norm_j); + + // Relative convergence check: + number_not_converged = 0; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(member, 0, numMatrices), + [&](const OrdinalType& i, int& lnumber_not_converged) { + if (sqr_norm_j(i) / sqr_norm_0(i) > tolerance * tolerance) + ++lnumber_not_converged; + else + mask(i) = 0.; + }, + number_not_converged); + + member.team_barrier(); + + if (number_not_converged == 0) { + status = 0; + break; + } + + // p_{j+1} := alpha p_j + r_{j+1} + TeamXpay::invoke(member, alpha, R, P); + member.team_barrier(); + } + + TeamCopy::invoke(member, X, _X); + return status; + } +}; +} // namespace KokkosBatched + +#endif diff --git a/src/batched/sparse/impl/KokkosBatched_GMRES_TeamVector_Impl.hpp b/src/batched/sparse/impl/KokkosBatched_GMRES_TeamVector_Impl.hpp new file mode 100644 index 0000000000..85bcfa50ab --- /dev/null +++ b/src/batched/sparse/impl/KokkosBatched_GMRES_TeamVector_Impl.hpp @@ -0,0 +1,267 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.4 +// Copyright (2021) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +#ifndef __KOKKOSBATCHED_GMRES_TEAMVECTOR_IMPL_HPP__ +#define __KOKKOSBATCHED_GMRES_TEAMVECTOR_IMPL_HPP__ + +/// \author Kim Liegeois (knliege@sandia.gov) + +#include "KokkosBatched_Util.hpp" + +#include "KokkosBatched_Axpy.hpp" +#include "KokkosBatched_Copy_Decl.hpp" +#include "KokkosBatched_Dot.hpp" +#include "KokkosBatched_Spmv.hpp" +#include "KokkosBatched_Xpay.hpp" +#include "KokkosBatched_Givens_Serial_Internal.hpp" +#include "KokkosBatched_Trsm_Decl.hpp" + +namespace KokkosBatched { + +/// +/// TeamVector GMRES +/// Two nested parallel_for with both TeamVectorRange and ThreadVectorRange +/// (or one with TeamVectorRange) are used inside. +/// + +template +struct TeamVectorGMRES { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const OperatorType& A, const VectorViewType& _B, + const VectorViewType& _X, + KrylovHandle* handle) { + typedef int OrdinalType; + typedef typename Kokkos::Details::ArithTraits< + typename VectorViewType::non_const_value_type>::mag_type MagnitudeType; + typedef Kokkos::Details::ArithTraits ATM; + + using ScratchPadNormViewType = Kokkos::View< + MagnitudeType*, + typename VectorViewType::execution_space::scratch_memory_space>; + using ScratchPadVectorViewType = Kokkos::View< + typename VectorViewType::non_const_value_type**, + typename VectorViewType::array_layout, + typename VectorViewType::execution_space::scratch_memory_space>; + using ScratchPadMultiVectorViewType = Kokkos::View< + typename VectorViewType::non_const_value_type***, + typename VectorViewType::array_layout, + typename VectorViewType::execution_space::scratch_memory_space>; + using TeamVectorCopy1D = TeamVectorCopy; + + const OrdinalType numMatrices = _X.extent(0); + const OrdinalType numRows = _X.extent(1); + + size_t maximum_iteration = handle->get_max_iteration() < numRows + ? handle->get_max_iteration() + : numRows; + const MagnitudeType tolerance = handle->get_tolerance(); + const MagnitudeType max_tolerance = 0.; + + ScratchPadMultiVectorViewType V(member.team_scratch(1), numMatrices, + maximum_iteration + 1, numRows); + ScratchPadMultiVectorViewType H(member.team_scratch(1), numMatrices, + maximum_iteration + 1, maximum_iteration); + ScratchPadMultiVectorViewType Givens(member.team_scratch(1), numMatrices, + maximum_iteration, 2); + ScratchPadVectorViewType G(member.team_scratch(1), numMatrices, + maximum_iteration + 1); + + ScratchPadVectorViewType W(member.team_scratch(0), numMatrices, numRows); + ScratchPadVectorViewType Q(member.team_scratch(0), numMatrices, numRows); + ScratchPadVectorViewType R(member.team_scratch(0), numMatrices, numRows); + ScratchPadVectorViewType X(member.team_scratch(0), numMatrices, numRows); + + ScratchPadNormViewType beta(member.team_scratch(0), numMatrices); + ScratchPadNormViewType mask(member.team_scratch(0), numMatrices); + ScratchPadNormViewType tmp(member.team_scratch(0), numMatrices); + + TeamVectorCopy::invoke(member, _X, X); + // Deep copy of b into r_0: + TeamVectorCopy::invoke(member, _B, R); + + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), + [&](const OrdinalType& i) { mask(i) = 1.; }); + + // r_0 := b - A x_0 + member.team_barrier(); + A.template apply(member, X, R, -1, 1); + member.team_barrier(); + + TeamVectorDot::invoke(member, R, R, beta); + member.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, numMatrices), + [&](const OrdinalType& i) { + beta(i) = ATM::sqrt(beta(i)); + G(i, 0) = beta(i) > max_tolerance ? beta(i) : 0.; + tmp(i) = beta(i) > max_tolerance ? 1. / beta(i) : 0.; + }); + + Kokkos::parallel_for( + Kokkos::TeamVectorRange(member, 0, numMatrices * numRows), + [&](const OrdinalType& iTemp) { + OrdinalType iRow, iMatrix; + getIndices( + iTemp, numRows, numMatrices, iRow, iMatrix); + V(iMatrix, 0, iRow) = R(iMatrix, iRow) * tmp(iMatrix); + }); + + int status = 1; + // int number_not_converged = 0; + + for (size_t j = 0; j < maximum_iteration; ++j) { + // q := A p_j + auto V_j = Kokkos::subview(V, Kokkos::ALL, j, Kokkos::ALL); + + A.template apply(member, V_j, W); + member.team_barrier(); + + for (size_t i = 0; i < j + 1; ++i) { + auto V_i = Kokkos::subview(V, Kokkos::ALL, i, Kokkos::ALL); + TeamVectorDot::invoke(member, W, V_i, tmp); + member.team_barrier(); + TeamVectorCopy1D::invoke(member, tmp, + Kokkos::subview(H, Kokkos::ALL, i, j)); + + Kokkos::parallel_for( + Kokkos::TeamVectorRange(member, 0, numMatrices), + [&](const OrdinalType& ii) { tmp(ii) = -tmp(ii); }); + + TeamVectorAxpy::invoke(member, tmp, V_i, W); + } + + TeamVectorDot::invoke(member, W, W, tmp); + member.team_barrier(); + Kokkos::parallel_for( + Kokkos::TeamVectorRange(member, 0, numMatrices), + [&](const OrdinalType& i) { + H(i, j + 1, j) = ATM::sqrt(tmp(i)); + tmp(i) = H(i, j + 1, j) > max_tolerance ? 1. / H(i, j + 1, j) : 0.; + }); + member.team_barrier(); + Kokkos::parallel_for( + Kokkos::TeamVectorRange(member, 0, numMatrices * numRows), + [&](const OrdinalType& iTemp) { + OrdinalType iRow, iMatrix; + getIndices( + iTemp, numRows, numMatrices, iRow, iMatrix); + V(iMatrix, j + 1, iRow) = W(iMatrix, iRow) * tmp(iMatrix); + }); + + Kokkos::parallel_for( + Kokkos::TeamVectorRange(member, 0, numMatrices), + [&](const OrdinalType& l) { + // Apply the previous Givens rotations: + auto H_j = Kokkos::subview(H, l, Kokkos::ALL, j); + + if (mask(l) == 1.) { + for (size_t i = 0; i < j; ++i) { + auto tmp1 = + Givens(l, i, 0) * H_j(i) + Givens(l, i, 1) * H_j(i + 1); + auto tmp2 = + -Givens(l, i, 1) * H_j(i) + Givens(l, i, 0) * H_j(i + 1); + H_j(i) = tmp1; + H_j(i + 1) = tmp2; + } + + // Compute the new Givens rotation: + Kokkos::pair + G_new; + typename VectorViewType::non_const_value_type alpha; + SerialGivensInternal::invoke(H_j(j), H_j(j + 1), &G_new, &alpha); + + Givens(l, j, 0) = G_new.first; + Givens(l, j, 1) = G_new.second; + + // Apply the new Givens rotation: + auto tmp1 = + Givens(l, j, 0) * H_j(j) + Givens(l, j, 1) * H_j(j + 1); + auto tmp2 = + -Givens(l, j, 1) * H_j(j) + Givens(l, j, 0) * H_j(j + 1); + H_j(j) = tmp1; + H_j(j + 1) = tmp2; + + G(l, j + 1) = -Givens(l, j, 1) * G(l, j); + G(l, j) *= Givens(l, j, 0); + } else { + H_j(j) = 1.; + G(l, j + 1) = 0.; + } + + if (mask(l) == 1. && std::abs(G(l, j + 1)) / beta(l) < tolerance) { + mask(l) = 0.; + G(l, j + 1) = 0.; + } + }); + } + + Kokkos::parallel_for( + Kokkos::TeamVectorRange(member, 0, numMatrices), + [&](const OrdinalType& l) { + SerialTrsm::template invoke(1, + Kokkos::subview( + H, l, + Kokkos::ALL, + Kokkos::ALL), + Kokkos::subview( + G, l, + Kokkos::ALL)); + }); + + for (size_t j = 0; j < maximum_iteration; ++j) + TeamVectorAxpy::invoke( + member, Kokkos::subview(G, Kokkos::ALL, j), + Kokkos::subview(V, Kokkos::ALL, j, Kokkos::ALL), X); + + TeamVectorCopy::invoke(member, X, _X); + return status; + } +}; +} // namespace KokkosBatched + +#endif diff --git a/src/batched/sparse/impl/KokkosBatched_GMRES_Team_Impl.hpp b/src/batched/sparse/impl/KokkosBatched_GMRES_Team_Impl.hpp new file mode 100644 index 0000000000..c4c39eac69 --- /dev/null +++ b/src/batched/sparse/impl/KokkosBatched_GMRES_Team_Impl.hpp @@ -0,0 +1,265 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.4 +// Copyright (2021) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +#ifndef __KOKKOSBATCHED_GMRES_TEAM_IMPL_HPP__ +#define __KOKKOSBATCHED_GMRES_TEAM_IMPL_HPP__ + +/// \author Kim Liegeois (knliege@sandia.gov) + +#include "KokkosBatched_Util.hpp" + +#include "KokkosBatched_Axpy.hpp" +#include "KokkosBatched_Copy_Decl.hpp" +#include "KokkosBatched_Dot.hpp" +#include "KokkosBatched_Spmv.hpp" +#include "KokkosBatched_Xpay.hpp" +#include "KokkosBatched_Givens_Serial_Internal.hpp" +#include "KokkosBatched_Trsm_Decl.hpp" + +namespace KokkosBatched { + +/// +/// Team GMRES +/// A nested parallel_for with TeamThreadRange is used. +/// + +template +struct TeamGMRES { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const OperatorType& A, const VectorViewType& _B, + const VectorViewType& _X, + KrylovHandle* handle) { + typedef int OrdinalType; + typedef typename Kokkos::Details::ArithTraits< + typename VectorViewType::non_const_value_type>::mag_type MagnitudeType; + typedef Kokkos::Details::ArithTraits ATM; + + using ScratchPadNormViewType = Kokkos::View< + MagnitudeType*, + typename VectorViewType::execution_space::scratch_memory_space>; + using ScratchPadVectorViewType = Kokkos::View< + typename VectorViewType::non_const_value_type**, + typename VectorViewType::array_layout, + typename VectorViewType::execution_space::scratch_memory_space>; + using ScratchPadMultiVectorViewType = Kokkos::View< + typename VectorViewType::non_const_value_type***, + typename VectorViewType::array_layout, + typename VectorViewType::execution_space::scratch_memory_space>; + using TeamCopy1D = TeamCopy; + + const OrdinalType numMatrices = _X.extent(0); + const OrdinalType numRows = _X.extent(1); + + size_t maximum_iteration = handle->get_max_iteration() < numRows + ? handle->get_max_iteration() + : numRows; + const MagnitudeType tolerance = handle->get_tolerance(); + const MagnitudeType max_tolerance = 0.; + + ScratchPadMultiVectorViewType V(member.team_scratch(1), numMatrices, + maximum_iteration + 1, numRows); + ScratchPadMultiVectorViewType H(member.team_scratch(1), numMatrices, + maximum_iteration + 1, maximum_iteration); + ScratchPadMultiVectorViewType Givens(member.team_scratch(1), numMatrices, + maximum_iteration, 2); + ScratchPadVectorViewType G(member.team_scratch(1), numMatrices, + maximum_iteration + 1); + + ScratchPadVectorViewType W(member.team_scratch(0), numMatrices, numRows); + ScratchPadVectorViewType Q(member.team_scratch(0), numMatrices, numRows); + ScratchPadVectorViewType R(member.team_scratch(0), numMatrices, numRows); + ScratchPadVectorViewType X(member.team_scratch(0), numMatrices, numRows); + + ScratchPadNormViewType beta(member.team_scratch(0), numMatrices); + ScratchPadNormViewType mask(member.team_scratch(0), numMatrices); + ScratchPadNormViewType tmp(member.team_scratch(0), numMatrices); + + TeamCopy::invoke(member, _X, X); + // Deep copy of b into r_0: + TeamCopy::invoke(member, _B, R); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices), + [&](const OrdinalType& i) { mask(i) = 1.; }); + + // r_0 := b - A x_0 + member.team_barrier(); + A.template apply( + member, X, R, -1, 1); + member.team_barrier(); + + TeamDot::invoke(member, R, R, beta); + member.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, numMatrices), + [&](const OrdinalType& i) { + beta(i) = ATM::sqrt(beta(i)); + G(i, 0) = beta(i) > max_tolerance ? beta(i) : 0.; + tmp(i) = beta(i) > max_tolerance ? 1. / beta(i) : 0.; + }); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(member, 0, numMatrices * numRows), + [&](const OrdinalType& iTemp) { + OrdinalType iRow, iMatrix; + getIndices( + iTemp, numRows, numMatrices, iRow, iMatrix); + V(iMatrix, 0, iRow) = R(iMatrix, iRow) * tmp(iMatrix); + }); + + int status = 1; + // int number_not_converged = 0; + + for (size_t j = 0; j < maximum_iteration; ++j) { + // q := A p_j + auto V_j = Kokkos::subview(V, Kokkos::ALL, j, Kokkos::ALL); + + A.template apply(member, V_j, W); + member.team_barrier(); + + for (size_t i = 0; i < j + 1; ++i) { + auto V_i = Kokkos::subview(V, Kokkos::ALL, i, Kokkos::ALL); + TeamDot::invoke(member, W, V_i, tmp); + member.team_barrier(); + TeamCopy1D::invoke(member, tmp, Kokkos::subview(H, Kokkos::ALL, i, j)); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(member, 0, numMatrices), + [&](const OrdinalType& ii) { tmp(ii) = -tmp(ii); }); + + TeamAxpy::invoke(member, tmp, V_i, W); + } + + TeamDot::invoke(member, W, W, tmp); + member.team_barrier(); + Kokkos::parallel_for( + Kokkos::TeamThreadRange(member, 0, numMatrices), + [&](const OrdinalType& i) { + H(i, j + 1, j) = ATM::sqrt(tmp(i)); + tmp(i) = H(i, j + 1, j) > max_tolerance ? 1. / H(i, j + 1, j) : 0.; + }); + member.team_barrier(); + Kokkos::parallel_for( + Kokkos::TeamThreadRange(member, 0, numMatrices * numRows), + [&](const OrdinalType& iTemp) { + OrdinalType iRow, iMatrix; + getIndices( + iTemp, numRows, numMatrices, iRow, iMatrix); + V(iMatrix, j + 1, iRow) = W(iMatrix, iRow) * tmp(iMatrix); + }); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(member, 0, numMatrices), + [&](const OrdinalType& l) { + // Apply the previous Givens rotations: + auto H_j = Kokkos::subview(H, l, Kokkos::ALL, j); + + if (mask(l) == 1.) { + for (size_t i = 0; i < j; ++i) { + auto tmp1 = + Givens(l, i, 0) * H_j(i) + Givens(l, i, 1) * H_j(i + 1); + auto tmp2 = + -Givens(l, i, 1) * H_j(i) + Givens(l, i, 0) * H_j(i + 1); + H_j(i) = tmp1; + H_j(i + 1) = tmp2; + } + + // Compute the new Givens rotation: + Kokkos::pair + G_new; + typename VectorViewType::non_const_value_type alpha; + SerialGivensInternal::invoke(H_j(j), H_j(j + 1), &G_new, &alpha); + + Givens(l, j, 0) = G_new.first; + Givens(l, j, 1) = G_new.second; + + // Apply the new Givens rotation: + auto tmp1 = + Givens(l, j, 0) * H_j(j) + Givens(l, j, 1) * H_j(j + 1); + auto tmp2 = + -Givens(l, j, 1) * H_j(j) + Givens(l, j, 0) * H_j(j + 1); + H_j(j) = tmp1; + H_j(j + 1) = tmp2; + + G(l, j + 1) = -Givens(l, j, 1) * G(l, j); + G(l, j) *= Givens(l, j, 0); + } else { + H_j(j) = 1.; + G(l, j + 1) = 0.; + } + + if (mask(l) == 1. && std::abs(G(l, j + 1)) / beta(l) < tolerance) { + mask(l) = 0.; + G(l, j + 1) = 0.; + } + }); + } + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(member, 0, numMatrices), + [&](const OrdinalType& l) { + SerialTrsm::template invoke(1, + Kokkos::subview( + H, l, + Kokkos::ALL, + Kokkos::ALL), + Kokkos::subview( + G, l, + Kokkos::ALL)); + }); + + for (size_t j = 0; j < maximum_iteration; ++j) + TeamAxpy::invoke( + member, Kokkos::subview(G, Kokkos::ALL, j), + Kokkos::subview(V, Kokkos::ALL, j, Kokkos::ALL), X); + + TeamCopy::invoke(member, X, _X); + return status; + } +}; +} // namespace KokkosBatched + +#endif diff --git a/src/batched/sparse/impl/KokkosBatched_Spmv_Serial_Impl.hpp b/src/batched/sparse/impl/KokkosBatched_Spmv_Serial_Impl.hpp index dc4b5c05c0..dd443041ac 100644 --- a/src/batched/sparse/impl/KokkosBatched_Spmv_Serial_Impl.hpp +++ b/src/batched/sparse/impl/KokkosBatched_Spmv_Serial_Impl.hpp @@ -96,6 +96,49 @@ struct SerialSpmvInternal { return 0; } + + template + KOKKOS_INLINE_FUNCTION static int invoke( + const OrdinalType numMatrices, const OrdinalType numRows, + const ScalarType alpha, const ValueType* KOKKOS_RESTRICT values, + const OrdinalType valuess0, const OrdinalType valuess1, + const OrdinalType* KOKKOS_RESTRICT row_ptr, const OrdinalType row_ptrs0, + const OrdinalType* KOKKOS_RESTRICT colIndices, + const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, + const OrdinalType xs0, const OrdinalType xs1, const ScalarType beta, + /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, + const OrdinalType ys1) { + for (OrdinalType iMatrix = 0; iMatrix < numMatrices; ++iMatrix) { + for (OrdinalType iRow = 0; iRow < numRows; ++iRow) { + const OrdinalType rowLength = + row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; + ValueType sum = 0; +#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) +#pragma unroll +#endif + for (OrdinalType iEntry = 0; iEntry < rowLength; ++iEntry) { + sum += values[iMatrix * valuess0 + + (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * + X[iMatrix * xs0 + + colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * + colIndicess0] * + xs1]; + } + + sum *= alpha; + + if (dobeta == 0) { + Y[iMatrix * ys0 + iRow * ys1] = sum; + } else { + Y[iMatrix * ys0 + iRow * ys1] = + beta * Y[iMatrix * ys0 + iRow * ys1] + sum; + } + } + } + + return 0; + } }; template <> @@ -194,6 +237,82 @@ struct SerialSpmv { X.stride_0(), X.stride_1(), beta.data(), beta.stride_0(), Y.data(), Y.stride_0(), Y.stride_1()); } + + template + KOKKOS_INLINE_FUNCTION static int invoke( + const typename Kokkos::Details::ArithTraits< + typename ValuesViewType::non_const_value_type>::mag_type& alpha, + const ValuesViewType& values, const IntView& row_ptr, + const IntView& colIndices, const xViewType& X, + const typename Kokkos::Details::ArithTraits< + typename ValuesViewType::non_const_value_type>::mag_type& beta, + const yViewType& Y) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::spmv: ValuesViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::spmv: IntView is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::spmv: xViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::spmv: yViewType is not a Kokkos::View."); + + static_assert(ValuesViewType::Rank == 2, + "KokkosBatched::spmv: ValuesViewType must have rank 2."); + static_assert(IntView::Rank == 1, + "KokkosBatched::spmv: IntView must have rank 2."); + static_assert(xViewType::Rank == 2, + "KokkosBatched::spmv: xViewType must have rank 2."); + static_assert(yViewType::Rank == 2, + "KokkosBatched::spmv: yViewType must have rank 2."); + + // Check compatibility of dimensions at run time. + if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { + printf( + "KokkosBatched::spmv: Dimensions of X and Y do not match: X: %d x " + "%d, Y: %d x %d\n", + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), + (int)Y.extent(1)); + return 1; + } + if (X.extent(0) != values.extent(0)) { + printf( + "KokkosBatched::spmv: First dimension of X and the first dimension " + "of values do not match: X: %d x %d, values: %d x %d\n", + (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), + (int)values.extent(1)); + return 1; + } + if (colIndices.extent(0) != values.extent(1)) { + printf( + "KokkosBatched::spmv: Dimension of colIndices and the second " + "dimension of values do not match: colIndices: %d , values: %d x " + "%d\n", + (int)colIndices.extent(0), (int)values.extent(0), + (int)values.extent(1)); + return 1; + } + if (row_ptr.extent(0) - 1 != X.extent(1)) { + printf( + "KokkosBatched::spmv: Dimension of row_ptr and the second dimension " + "of X do not match: colIndices (-1): %d , values: %d x %d\n", + (int)row_ptr.extent(0) - 1, (int)X.extent(0), (int)X.extent(1)); + return 1; + } +#endif + + return SerialSpmvInternal::template invoke< + typename Kokkos::Details::ArithTraits< + typename ValuesViewType::non_const_value_type>::mag_type, + typename ValuesViewType::non_const_value_type, + typename IntView::non_const_value_type, + typename ValuesViewType::array_layout, dobeta>( + X.extent(0), X.extent(1), alpha, values.data(), values.stride_0(), + values.stride_1(), row_ptr.data(), row_ptr.stride_0(), + colIndices.data(), colIndices.stride_0(), X.data(), X.stride_0(), + X.stride_1(), beta, Y.data(), Y.stride_0(), Y.stride_1()); + } }; } // namespace KokkosBatched diff --git a/src/batched/sparse/impl/KokkosBatched_Spmv_TeamVector_Impl.hpp b/src/batched/sparse/impl/KokkosBatched_Spmv_TeamVector_Impl.hpp index d5d5c43404..e0408a1211 100644 --- a/src/batched/sparse/impl/KokkosBatched_Spmv_TeamVector_Impl.hpp +++ b/src/batched/sparse/impl/KokkosBatched_Spmv_TeamVector_Impl.hpp @@ -66,6 +66,20 @@ struct TeamVectorSpmvInternal { const ScalarType* KOKKOS_RESTRICT beta, const OrdinalType betas0, /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, const OrdinalType ys1); + + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const OrdinalType numMatrices, + const OrdinalType numRows, const ScalarType alpha, + const ValueType* KOKKOS_RESTRICT values, const OrdinalType valuess0, + const OrdinalType valuess1, const OrdinalType* KOKKOS_RESTRICT row_ptr, + const OrdinalType row_ptrs0, + const OrdinalType* KOKKOS_RESTRICT colIndices, + const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, + const OrdinalType xs0, const OrdinalType xs1, const ScalarType beta, + /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, + const OrdinalType ys1); }; template +KOKKOS_INLINE_FUNCTION int TeamVectorSpmvInternal::invoke( + const MemberType& member, const OrdinalType numMatrices, + const OrdinalType numRows, const ScalarType alpha, + const ValueType* KOKKOS_RESTRICT values, const OrdinalType valuess0, + const OrdinalType valuess1, const OrdinalType* KOKKOS_RESTRICT row_ptr, + const OrdinalType row_ptrs0, const OrdinalType* KOKKOS_RESTRICT colIndices, + const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, + const OrdinalType xs0, const OrdinalType xs1, const ScalarType beta, + /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, + const OrdinalType ys1) { + Kokkos::parallel_for( + Kokkos::TeamVectorRange(member, 0, numMatrices * numRows), + [&](const OrdinalType& iTemp) { + OrdinalType iRow, iMatrix; + getIndices(iTemp, numRows, numMatrices, iRow, + iMatrix); + + const OrdinalType rowLength = + row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; + ValueType sum = 0; +#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) +#pragma unroll +#endif + for (OrdinalType iEntry = 0; iEntry < rowLength; ++iEntry) { + sum += values[iMatrix * valuess0 + + (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * + X[iMatrix * xs0 + + colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * + colIndicess0] * + xs1]; + } + + sum *= alpha; + + if (dobeta == 0) { + Y[iMatrix * ys0 + iRow * ys1] = sum; + } else { + Y[iMatrix * ys0 + iRow * ys1] = + beta * Y[iMatrix * ys0 + iRow * ys1] + sum; + } + }); + + return 0; +} + template struct TeamVectorSpmv { template { "KokkosBatched::spmv: alphaViewType must have rank 1."); static_assert(betaViewType::Rank == 1, "KokkosBatched::spmv: betaViewType must have rank 1."); + static_assert(alphaViewType::Rank == 1, + "KokkosBatched::spmv: alphaViewType must have rank 1."); + static_assert(betaViewType::Rank == 1, + "KokkosBatched::spmv: betaViewType must have rank 1."); // Check compatibility of dimensions at run time. if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { @@ -214,6 +279,84 @@ struct TeamVectorSpmv { X.stride_0(), X.stride_1(), beta.data(), beta.stride_0(), Y.data(), Y.stride_0(), Y.stride_1()); } + + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, + const typename Kokkos::Details::ArithTraits< + typename ValuesViewType::non_const_value_type>::mag_type& alpha, + const ValuesViewType& values, const IntView& row_ptr, + const IntView& colIndices, const xViewType& X, + const typename Kokkos::Details::ArithTraits< + typename ValuesViewType::non_const_value_type>::mag_type& beta, + const yViewType& Y) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::spmv: ValuesViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::spmv: IntView is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::spmv: xViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::spmv: yViewType is not a Kokkos::View."); + + static_assert(ValuesViewType::Rank == 2, + "KokkosBatched::spmv: ValuesViewType must have rank 2."); + static_assert(IntView::Rank == 1, + "KokkosBatched::spmv: IntView must have rank 2."); + static_assert(xViewType::Rank == 2, + "KokkosBatched::spmv: xViewType must have rank 2."); + static_assert(yViewType::Rank == 2, + "KokkosBatched::spmv: yViewType must have rank 2."); + + // Check compatibility of dimensions at run time. + if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { + printf( + "KokkosBatched::spmv: Dimensions of X and Y do not match: X: %d x " + "%d, Y: %d x %d\n", + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), + (int)Y.extent(1)); + return 1; + } + if (X.extent(0) != values.extent(0)) { + printf( + "KokkosBatched::spmv: First dimension of X and the first dimension " + "of values do not match: X: %d x %d, values: %d x %d\n", + (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), + (int)values.extent(1)); + return 1; + } + if (colIndices.extent(0) != values.extent(1)) { + printf( + "KokkosBatched::spmv: Dimension of colIndices and the second " + "dimension of values do not match: colIndices: %d , values: %d x " + "%d\n", + (int)colIndices.extent(0), (int)values.extent(0), + (int)values.extent(1)); + return 1; + } + if (row_ptr.extent(0) - 1 != X.extent(1)) { + printf( + "KokkosBatched::spmv: Dimension of row_ptr and the second dimension " + "of X do not match: colIndices (-1): %d , values: %d x %d\n", + (int)row_ptr.extent(0) - 1, (int)X.extent(0), (int)X.extent(1)); + return 1; + } +#endif + + return TeamVectorSpmvInternal::template invoke< + MemberType, + typename Kokkos::Details::ArithTraits< + typename ValuesViewType::non_const_value_type>::mag_type, + typename ValuesViewType::non_const_value_type, + typename IntView::non_const_value_type, + typename ValuesViewType::array_layout, dobeta>( + member, X.extent(0), X.extent(1), alpha, values.data(), + values.stride_0(), values.stride_1(), row_ptr.data(), + row_ptr.stride_0(), colIndices.data(), colIndices.stride_0(), X.data(), + X.stride_0(), X.stride_1(), beta, Y.data(), Y.stride_0(), Y.stride_1()); + } }; } // namespace KokkosBatched diff --git a/src/batched/sparse/impl/KokkosBatched_Spmv_Team_Impl.hpp b/src/batched/sparse/impl/KokkosBatched_Spmv_Team_Impl.hpp index 5f7e09df3b..a54488e1e4 100644 --- a/src/batched/sparse/impl/KokkosBatched_Spmv_Team_Impl.hpp +++ b/src/batched/sparse/impl/KokkosBatched_Spmv_Team_Impl.hpp @@ -66,6 +66,20 @@ struct TeamSpmvInternal { const ScalarType* KOKKOS_RESTRICT beta, const OrdinalType betas0, /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, const OrdinalType ys1); + + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const OrdinalType numMatrices, + const OrdinalType numRows, const ScalarType alpha, + const ValueType* KOKKOS_RESTRICT values, const OrdinalType valuess0, + const OrdinalType valuess1, const OrdinalType* KOKKOS_RESTRICT row_ptr, + const OrdinalType row_ptrs0, + const OrdinalType* KOKKOS_RESTRICT colIndices, + const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, + const OrdinalType xs0, const OrdinalType xs1, const ScalarType beta, + /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, + const OrdinalType ys1); }; template +KOKKOS_INLINE_FUNCTION int TeamSpmvInternal::invoke( + const MemberType& member, const OrdinalType numMatrices, + const OrdinalType numRows, const ScalarType alpha, + const ValueType* KOKKOS_RESTRICT values, const OrdinalType valuess0, + const OrdinalType valuess1, const OrdinalType* KOKKOS_RESTRICT row_ptr, + const OrdinalType row_ptrs0, const OrdinalType* KOKKOS_RESTRICT colIndices, + const OrdinalType colIndicess0, const ValueType* KOKKOS_RESTRICT X, + const OrdinalType xs0, const OrdinalType xs1, const ScalarType beta, + /**/ ValueType* KOKKOS_RESTRICT Y, const OrdinalType ys0, + const OrdinalType ys1) { + Kokkos::parallel_for( + Kokkos::TeamThreadRange(member, 0, numMatrices * numRows), + [&](const OrdinalType& iTemp) { + OrdinalType iRow, iMatrix; + getIndices(iTemp, numRows, numMatrices, iRow, + iMatrix); + + const OrdinalType rowLength = + row_ptr[(iRow + 1) * row_ptrs0] - row_ptr[iRow * row_ptrs0]; + ValueType sum = 0; +#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) +#pragma unroll +#endif + for (OrdinalType iEntry = 0; iEntry < rowLength; ++iEntry) { + sum += values[iMatrix * valuess0 + + (row_ptr[iRow * row_ptrs0] + iEntry) * valuess1] * + X[iMatrix * xs0 + + colIndices[(row_ptr[iRow * row_ptrs0] + iEntry) * + colIndicess0] * + xs1]; + } + + sum *= alpha; + + if (dobeta == 0) { + Y[iMatrix * ys0 + iRow * ys1] = sum; + } else { + Y[iMatrix * ys0 + iRow * ys1] = + beta * Y[iMatrix * ys0 + iRow * ys1] + sum; + } + }); + + return 0; +} + template struct TeamSpmv { template { X.stride_0(), X.stride_1(), beta.data(), beta.stride_0(), Y.data(), Y.stride_0(), Y.stride_1()); } + + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, + const typename Kokkos::Details::ArithTraits< + typename ValuesViewType::non_const_value_type>::mag_type& alpha, + const ValuesViewType& values, const IntView& row_ptr, + const IntView& colIndices, const xViewType& X, + const typename Kokkos::Details::ArithTraits< + typename ValuesViewType::non_const_value_type>::mag_type& beta, + const yViewType& Y) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::spmv: ValuesViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::spmv: IntView is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::spmv: xViewType is not a Kokkos::View."); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBatched::spmv: yViewType is not a Kokkos::View."); + + static_assert(ValuesViewType::Rank == 2, + "KokkosBatched::spmv: ValuesViewType must have rank 2."); + static_assert(IntView::Rank == 1, + "KokkosBatched::spmv: IntView must have rank 2."); + static_assert(xViewType::Rank == 2, + "KokkosBatched::spmv: xViewType must have rank 2."); + static_assert(yViewType::Rank == 2, + "KokkosBatched::spmv: yViewType must have rank 2."); + + // Check compatibility of dimensions at run time. + if (X.extent(0) != Y.extent(0) || X.extent(1) != Y.extent(1)) { + printf( + "KokkosBatched::spmv: Dimensions of X and Y do not match: X: %d x " + "%d, Y: %d x %d\n", + (int)X.extent(0), (int)X.extent(1), (int)Y.extent(0), + (int)Y.extent(1)); + return 1; + } + if (X.extent(0) != values.extent(0)) { + printf( + "KokkosBatched::spmv: First dimension of X and the first dimension " + "of values do not match: X: %d x %d, values: %d x %d\n", + (int)X.extent(0), (int)X.extent(1), (int)values.extent(0), + (int)values.extent(1)); + return 1; + } + if (colIndices.extent(0) != values.extent(1)) { + printf( + "KokkosBatched::spmv: Dimension of colIndices and the second " + "dimension of values do not match: colIndices: %d , values: %d x " + "%d\n", + (int)colIndices.extent(0), (int)values.extent(0), + (int)values.extent(1)); + return 1; + } + if (row_ptr.extent(0) - 1 != X.extent(1)) { + printf( + "KokkosBatched::spmv: Dimension of row_ptr and the second dimension " + "of X do not match: colIndices (-1): %d , values: %d x %d\n", + (int)row_ptr.extent(0) - 1, (int)X.extent(0), (int)X.extent(1)); + return 1; + } +#endif + + return TeamSpmvInternal::template invoke< + MemberType, + typename Kokkos::Details::ArithTraits< + typename ValuesViewType::non_const_value_type>::mag_type, + typename ValuesViewType::non_const_value_type, + typename IntView::non_const_value_type, + typename ValuesViewType::array_layout, dobeta>( + member, X.extent(0), X.extent(1), alpha, values.data(), + values.stride_0(), values.stride_1(), row_ptr.data(), + row_ptr.stride_0(), colIndices.data(), colIndices.stride_0(), X.data(), + X.stride_0(), X.stride_1(), beta, Y.data(), Y.stride_0(), Y.stride_1()); + } }; } // namespace KokkosBatched diff --git a/test_common/KokkosKernels_TestUtils.hpp b/test_common/KokkosKernels_TestUtils.hpp index d472e2cee9..77464e3147 100644 --- a/test_common/KokkosKernels_TestUtils.hpp +++ b/test_common/KokkosKernels_TestUtils.hpp @@ -243,24 +243,21 @@ struct Functor_BatchedVanillaGEMM { } }; -//Compute C := alpha * AB + beta * C +// Compute C := alpha * AB + beta * C template void vanillaGEMM(typename ViewTypeC::non_const_value_type alpha, const ViewTypeA& A, const ViewTypeB& B, typename ViewTypeC::non_const_value_type beta, const ViewTypeC& C) { using value_type = typename ViewTypeC::non_const_value_type; - using KAT = Kokkos::ArithTraits; - int m = A.extent(0); - int k = A.extent(1); - int n = B.extent(1); - for(int i = 0; i < m; i++) - { - for(int j = 0; j < n; j++) - { + using KAT = Kokkos::ArithTraits; + int m = A.extent(0); + int k = A.extent(1); + int n = B.extent(1); + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { value_type sum = KAT::zero(); - for(int ii = 0; ii < k; ii++) - { + for (int ii = 0; ii < k; ii++) { sum += A(i, ii) * B(ii, j); } C(i, j) = alpha * sum + beta * C(i, j); @@ -323,103 +320,104 @@ class epsilon { using KokkosKernels::Impl::getRandomBounds; - template - crsMat_t symmetrize(crsMat_t A) - { - typedef typename crsMat_t::StaticCrsGraphType graph_t; - typedef typename crsMat_t::values_type::non_const_type scalar_view_t; - typedef typename graph_t::row_map_type::non_const_type lno_view_t; - typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t; - auto host_rowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map); - auto host_entries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries); - auto host_values = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.values); - lno_t numRows = A.numRows(); - //symmetrize as input_mat + input_mat^T, to still have a diagonally dominant matrix - typedef std::map Row; - std::vector symRows(numRows); - for(lno_t r = 0; r < numRows; r++) - { - auto& row = symRows[r]; - for(size_type i = host_rowmap(r); i < host_rowmap(r + 1); i++) - { - lno_t c = host_entries(i); - auto& col = symRows[c]; - auto it = row.find(c); - if(it == row.end()) - row[c] = host_values(i); - else - row[c] += host_values(i); - it = col.find(r); - if(it == col.end()) - col[r] = host_values(i); - else - col[r] += host_values(i); - } - } - //Count entries - Kokkos::View new_host_rowmap("Rowmap", numRows + 1); - size_t accum = 0; - for(lno_t r = 0; r <= numRows; r++) - { - new_host_rowmap(r) = accum; - if(r < numRows) - accum += symRows[r].size(); +template +crsMat_t symmetrize(crsMat_t A) { + typedef typename crsMat_t::StaticCrsGraphType graph_t; + typedef typename crsMat_t::values_type::non_const_type scalar_view_t; + typedef typename graph_t::row_map_type::non_const_type lno_view_t; + typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t; + auto host_rowmap = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map); + auto host_entries = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries); + auto host_values = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.values); + lno_t numRows = A.numRows(); + // symmetrize as input_mat + input_mat^T, to still have a diagonally dominant + // matrix + typedef std::map Row; + std::vector symRows(numRows); + for (lno_t r = 0; r < numRows; r++) { + auto& row = symRows[r]; + for (size_type i = host_rowmap(r); i < host_rowmap(r + 1); i++) { + lno_t c = host_entries(i); + auto& col = symRows[c]; + auto it = row.find(c); + if (it == row.end()) + row[c] = host_values(i); + else + row[c] += host_values(i); + it = col.find(r); + if (it == col.end()) + col[r] = host_values(i); + else + col[r] += host_values(i); } - //Allocate new entries/values - Kokkos::View new_host_entries("Entries", accum); - Kokkos::View new_host_values("Values", accum); - for(lno_t r = 0; r < numRows; r++) - { - auto rowIt = symRows[r].begin(); - for(size_type i = new_host_rowmap(r); i < new_host_rowmap(r + 1); i++) - { - new_host_entries(i) = rowIt->first; - new_host_values(i) = rowIt->second; - rowIt++; - } + } + // Count entries + Kokkos::View + new_host_rowmap("Rowmap", numRows + 1); + size_t accum = 0; + for (lno_t r = 0; r <= numRows; r++) { + new_host_rowmap(r) = accum; + if (r < numRows) accum += symRows[r].size(); + } + // Allocate new entries/values + Kokkos::View new_host_entries( + "Entries", accum); + Kokkos::View + new_host_values("Values", accum); + for (lno_t r = 0; r < numRows; r++) { + auto rowIt = symRows[r].begin(); + for (size_type i = new_host_rowmap(r); i < new_host_rowmap(r + 1); i++) { + new_host_entries(i) = rowIt->first; + new_host_values(i) = rowIt->second; + rowIt++; } - lno_view_t new_rowmap("Rowmap", numRows + 1); - lno_nnz_view_t new_entries("Entries", accum); - scalar_view_t new_values("Values", accum); - Kokkos::deep_copy(new_rowmap, new_host_rowmap); - Kokkos::deep_copy(new_entries, new_host_entries); - Kokkos::deep_copy(new_values, new_host_values); - return crsMat_t("SymA", numRows, numRows, accum, new_values, new_rowmap, new_entries); } + lno_view_t new_rowmap("Rowmap", numRows + 1); + lno_nnz_view_t new_entries("Entries", accum); + scalar_view_t new_values("Values", accum); + Kokkos::deep_copy(new_rowmap, new_host_rowmap); + Kokkos::deep_copy(new_entries, new_host_entries); + Kokkos::deep_copy(new_values, new_host_values); + return crsMat_t("SymA", numRows, numRows, accum, new_values, new_rowmap, + new_entries); +} - //create_random_x_vector and create_random_y_vector can be used together to generate a random - //linear system Ax = y. - template - vec_t create_random_x_vector(vec_t& kok_x, double max_value = 10.0) { - typedef typename vec_t::value_type scalar_t; - auto h_x = Kokkos::create_mirror_view (kok_x); - for (size_t j = 0; j < h_x.extent(1); ++j){ - for (size_t i = 0; i < h_x.extent(0); ++i){ - scalar_t r = - static_cast (rand()) / - static_cast (RAND_MAX / max_value); - h_x.access(i, j) = r; - } +// create_random_x_vector and create_random_y_vector can be used together to +// generate a random linear system Ax = y. +template +vec_t create_random_x_vector(vec_t& kok_x, double max_value = 10.0) { + typedef typename vec_t::value_type scalar_t; + auto h_x = Kokkos::create_mirror_view(kok_x); + for (size_t j = 0; j < h_x.extent(1); ++j) { + for (size_t i = 0; i < h_x.extent(0); ++i) { + scalar_t r = static_cast(rand()) / + static_cast(RAND_MAX / max_value); + h_x.access(i, j) = r; } - Kokkos::deep_copy (kok_x, h_x); - return kok_x; } + Kokkos::deep_copy(kok_x, h_x); + return kok_x; +} - template - vector_t create_random_y_vector(crsMat_t crsMat, vector_t x_vector){ - vector_t y_vector (Kokkos::view_alloc(Kokkos::WithoutInitializing, "Y VECTOR"), - crsMat.numRows()); - KokkosSparse::spmv("N", 1, crsMat, x_vector, 0, y_vector); - return y_vector; - } +template +vector_t create_random_y_vector(crsMat_t crsMat, vector_t x_vector) { + vector_t y_vector(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Y VECTOR"), + crsMat.numRows()); + KokkosSparse::spmv("N", 1, crsMat, x_vector, 0, y_vector); + return y_vector; +} - template - vector_t create_random_y_vector_mv(crsMat_t crsMat, vector_t x_vector){ - vector_t y_vector (Kokkos::view_alloc(Kokkos::WithoutInitializing, "Y VECTOR"), - crsMat.numRows(), x_vector.extent(1)); - KokkosSparse::spmv("N", 1, crsMat, x_vector, 0, y_vector); - return y_vector; - } +template +vector_t create_random_y_vector_mv(crsMat_t crsMat, vector_t x_vector) { + vector_t y_vector(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Y VECTOR"), + crsMat.numRows(), x_vector.extent(1)); + KokkosSparse::spmv("N", 1, crsMat, x_vector, 0, y_vector); + return y_vector; +} /// \brief SharedParamTag class used to specify how to invoke templates within /// batched unit tests @@ -433,5 +431,31 @@ struct SharedParamTag { using transB = TB; using batchLayout = BL; }; + +/// \brief value_type_name returns a string with the value type name +template +KOKKOS_INLINE_FUNCTION std::string value_type_name() { + return "::UnknowValueType"; +} + +template <> +KOKKOS_INLINE_FUNCTION std::string value_type_name() { + return "::Float"; +} + +template <> +KOKKOS_INLINE_FUNCTION std::string value_type_name() { + return "::Double"; +} + +template <> +KOKKOS_INLINE_FUNCTION std::string value_type_name>() { + return "::ComplexFloat"; +} + +template <> +KOKKOS_INLINE_FUNCTION std::string value_type_name>() { + return "::ComplexDouble"; +} } // namespace Test #endif diff --git a/unit_test/batched/dense/Test_Batched_SerialAxpy.hpp b/unit_test/batched/dense/Test_Batched_SerialAxpy.hpp index 287a6d5b05..0fa1c99935 100644 --- a/unit_test/batched/dense/Test_Batched_SerialAxpy.hpp +++ b/unit_test/batched/dense/Test_Batched_SerialAxpy.hpp @@ -30,25 +30,14 @@ struct Functor_TestBatchedSerialAxpy { auto x = Kokkos::subview(_X, Kokkos::make_pair(k, k + 1), Kokkos::ALL); auto y = Kokkos::subview(_Y, Kokkos::make_pair(k, k + 1), Kokkos::ALL); - KokkosBatched::SerialAxpy::template invoke(alpha, - x, y); + KokkosBatched::SerialAxpy::invoke(alpha, x, y); } inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialAxpy"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _X.extent(0)); Kokkos::parallel_for(name.c_str(), policy, *this); @@ -59,6 +48,8 @@ struct Functor_TestBatchedSerialAxpy { template void impl_test_batched_axpy(const int N, const int BlkSize) { typedef typename ViewType::value_type value_type; + typedef typename ViewType::const_value_type const_value_type; + typedef typename alphaViewType::const_value_type alpha_const_value_type; typedef Kokkos::Details::ArithTraits ats; ViewType X0("x0", N, BlkSize), X1("x1", N, BlkSize), Y0("y0", N, BlkSize), @@ -68,9 +59,9 @@ void impl_test_batched_axpy(const int N, const int BlkSize) { Kokkos::Random_XorShift64_Pool random( 13718); - Kokkos::fill_random(X0, random, value_type(1.0)); - Kokkos::fill_random(Y0, random, value_type(1.0)); - Kokkos::fill_random(alpha, random, value_type(1.0)); + Kokkos::fill_random(X0, random, const_value_type(1.0)); + Kokkos::fill_random(Y0, random, const_value_type(1.0)); + Kokkos::fill_random(alpha, random, alpha_const_value_type(1.0)); Kokkos::fence(); @@ -121,7 +112,7 @@ int test_batched_axpy() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { typedef Kokkos::View ViewType; - typedef Kokkos::View + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { @@ -134,7 +125,7 @@ int test_batched_axpy() { { typedef Kokkos::View ViewType; - typedef Kokkos::View + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { diff --git a/unit_test/batched/dense/Test_Batched_SerialGemm.hpp b/unit_test/batched/dense/Test_Batched_SerialGemm.hpp index 37e171d227..6a8be3fc54 100644 --- a/unit_test/batched/dense/Test_Batched_SerialGemm.hpp +++ b/unit_test/batched/dense/Test_Batched_SerialGemm.hpp @@ -48,18 +48,8 @@ struct Functor_TestBatchedSerialGemm { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialGemm"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _c.extent(0)); Kokkos::parallel_for(name.c_str(), policy, *this); diff --git a/unit_test/batched/dense/Test_Batched_SerialGemv.hpp b/unit_test/batched/dense/Test_Batched_SerialGemv.hpp index c9257f3618..eb1d8f285c 100644 --- a/unit_test/batched/dense/Test_Batched_SerialGemv.hpp +++ b/unit_test/batched/dense/Test_Batched_SerialGemv.hpp @@ -47,18 +47,8 @@ struct Functor_TestBatchedSerialGemv { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialGemv"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _c.extent(0)); Kokkos::parallel_for(name.c_str(), policy, *this); diff --git a/unit_test/batched/dense/Test_Batched_SerialInverseLU.hpp b/unit_test/batched/dense/Test_Batched_SerialInverseLU.hpp index 2442e22dea..fd7d0478fc 100644 --- a/unit_test/batched/dense/Test_Batched_SerialInverseLU.hpp +++ b/unit_test/batched/dense/Test_Batched_SerialInverseLU.hpp @@ -54,18 +54,8 @@ struct Functor_BatchedSerialGemm { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialInverseLU"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _c.extent(0)); Kokkos::parallel_for((name + "::GemmFunctor").c_str(), policy, *this); @@ -92,18 +82,8 @@ struct Functor_BatchedSerialLU { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialInverseLU"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _a.extent(0)); Kokkos::parallel_for((name + "::LUFunctor").c_str(), policy, *this); @@ -132,18 +112,8 @@ struct Functor_TestBatchedSerialInverseLU { inline void run() { typedef typename AViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialInverseLU"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _a.extent(0)); Kokkos::parallel_for((name + "::InverseLUFunctor").c_str(), policy, *this); diff --git a/unit_test/batched/dense/Test_Batched_SerialLU.hpp b/unit_test/batched/dense/Test_Batched_SerialLU.hpp index b42bb2943a..9bb92340a2 100644 --- a/unit_test/batched/dense/Test_Batched_SerialLU.hpp +++ b/unit_test/batched/dense/Test_Batched_SerialLU.hpp @@ -34,18 +34,8 @@ struct Functor_TestBatchedSerialLU { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialLU"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _a.extent(0)); Kokkos::parallel_for(name.c_str(), policy, *this); diff --git a/unit_test/batched/dense/Test_Batched_SerialMatUtil.hpp b/unit_test/batched/dense/Test_Batched_SerialMatUtil.hpp index 79c9d9b2f7..f9a58f5442 100644 --- a/unit_test/batched/dense/Test_Batched_SerialMatUtil.hpp +++ b/unit_test/batched/dense/Test_Batched_SerialMatUtil.hpp @@ -62,17 +62,7 @@ struct Functor_TestBatchedSerialMatUtil { inline int run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialMatUtil"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); + const std::string name_value_type = Test::value_type_name(); std::string name_work_tag = (std::is_same::value ? "::KokkosBatched" diff --git a/unit_test/batched/dense/Test_Batched_SerialSolveLU.hpp b/unit_test/batched/dense/Test_Batched_SerialSolveLU.hpp index 64362d9140..b6d8e1aecf 100644 --- a/unit_test/batched/dense/Test_Batched_SerialSolveLU.hpp +++ b/unit_test/batched/dense/Test_Batched_SerialSolveLU.hpp @@ -54,18 +54,8 @@ struct Functor_BatchedSerialGemm { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialSolveLU"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _c.extent(0)); Kokkos::parallel_for((name + "::GemmFunctor").c_str(), policy, *this); @@ -92,18 +82,8 @@ struct Functor_BatchedSerialLU { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialSolveLU"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _a.extent(0)); Kokkos::parallel_for((name + "::LUFunctor").c_str(), policy, *this); @@ -132,18 +112,8 @@ struct Functor_TestBatchedSerialSolveLU { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialSolveLU"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _a.extent(0)); Kokkos::parallel_for((name + "::SolveLUFunctor").c_str(), policy, *this); diff --git a/unit_test/batched/dense/Test_Batched_SerialTrmm.hpp b/unit_test/batched/dense/Test_Batched_SerialTrmm.hpp index 227a255707..8df653b48f 100644 --- a/unit_test/batched/dense/Test_Batched_SerialTrmm.hpp +++ b/unit_test/batched/dense/Test_Batched_SerialTrmm.hpp @@ -118,18 +118,8 @@ struct Functor_TestBatchedSerialTrmm { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialTrmm"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _a.extent(0)); Kokkos::parallel_for(name.c_str(), policy, *this); diff --git a/unit_test/batched/dense/Test_Batched_SerialTrsm.hpp b/unit_test/batched/dense/Test_Batched_SerialTrsm.hpp index cbf36c503d..81a0456623 100644 --- a/unit_test/batched/dense/Test_Batched_SerialTrsm.hpp +++ b/unit_test/batched/dense/Test_Batched_SerialTrsm.hpp @@ -47,18 +47,8 @@ struct Functor_TestBatchedSerialTrsm { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialTrsm"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _b.extent(0)); Kokkos::parallel_for(name.c_str(), policy, *this); diff --git a/unit_test/batched/dense/Test_Batched_SerialTrsv.hpp b/unit_test/batched/dense/Test_Batched_SerialTrsv.hpp index f9bcaeb84a..995f33178d 100644 --- a/unit_test/batched/dense/Test_Batched_SerialTrsv.hpp +++ b/unit_test/batched/dense/Test_Batched_SerialTrsv.hpp @@ -46,18 +46,8 @@ struct Functor_TestBatchedSerialTrsv { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialTrsv"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _b.extent(0)); Kokkos::parallel_for(name.c_str(), policy, *this); diff --git a/unit_test/batched/dense/Test_Batched_SerialTrtri.hpp b/unit_test/batched/dense/Test_Batched_SerialTrtri.hpp index 75cb1fe2c8..2af8420be3 100644 --- a/unit_test/batched/dense/Test_Batched_SerialTrtri.hpp +++ b/unit_test/batched/dense/Test_Batched_SerialTrtri.hpp @@ -112,18 +112,8 @@ struct Functor_TestBatchedSerialTrtri { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialTrtri"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _a.extent(0)); Kokkos::parallel_for("Functor_TestBatchedSerialTrtri", policy, *this); diff --git a/unit_test/batched/dense/Test_Batched_TeamAxpy.hpp b/unit_test/batched/dense/Test_Batched_TeamAxpy.hpp index 41bc7aa899..144b1f9390 100644 --- a/unit_test/batched/dense/Test_Batched_TeamAxpy.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamAxpy.hpp @@ -41,26 +41,14 @@ struct Functor_TestBatchedTeamAxpy { auto y = Kokkos::subview(_Y, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); - KokkosBatched::TeamAxpy::template invoke( - member, alpha, x, y); + KokkosBatched::TeamAxpy::invoke(member, alpha, x, y); } inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::TeamAxpy"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::TeamPolicy policy(_X.extent(0) / _N_team, Kokkos::AUTO(), Kokkos::AUTO()); @@ -72,6 +60,8 @@ struct Functor_TestBatchedTeamAxpy { template void impl_test_batched_axpy(const int N, const int BlkSize, const int N_team) { typedef typename ViewType::value_type value_type; + typedef typename ViewType::const_value_type const_value_type; + typedef typename alphaViewType::const_value_type alpha_const_value_type; typedef Kokkos::Details::ArithTraits ats; ViewType X0("x0", N, BlkSize), X1("x1", N, BlkSize), Y0("y0", N, BlkSize), @@ -81,9 +71,9 @@ void impl_test_batched_axpy(const int N, const int BlkSize, const int N_team) { Kokkos::Random_XorShift64_Pool random( 13718); - Kokkos::fill_random(X0, random, value_type(1.0)); - Kokkos::fill_random(Y0, random, value_type(1.0)); - Kokkos::fill_random(alpha, random, value_type(1.0)); + Kokkos::fill_random(X0, random, const_value_type(1.0)); + Kokkos::fill_random(Y0, random, const_value_type(1.0)); + Kokkos::fill_random(alpha, random, alpha_const_value_type(1.0)); Kokkos::fence(); @@ -134,7 +124,7 @@ int test_batched_team_axpy() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { typedef Kokkos::View ViewType; - typedef Kokkos::View + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { @@ -147,7 +137,7 @@ int test_batched_team_axpy() { { typedef Kokkos::View ViewType; - typedef Kokkos::View + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { diff --git a/unit_test/batched/dense/Test_Batched_TeamGemm.hpp b/unit_test/batched/dense/Test_Batched_TeamGemm.hpp index 6c5356dc7a..00fb2f4d49 100644 --- a/unit_test/batched/dense/Test_Batched_TeamGemm.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamGemm.hpp @@ -54,18 +54,8 @@ struct Functor_TestBatchedTeamGemm { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::TeamGemm"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _c.extent(0); Kokkos::TeamPolicy policy(league_size, diff --git a/unit_test/batched/dense/Test_Batched_TeamGemv.hpp b/unit_test/batched/dense/Test_Batched_TeamGemv.hpp index 138c591ecc..9f19180a3f 100644 --- a/unit_test/batched/dense/Test_Batched_TeamGemv.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamGemv.hpp @@ -52,18 +52,8 @@ struct Functor_TestBatchedTeamGemv { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialGemm"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _c.extent(0); Kokkos::TeamPolicy policy(league_size, diff --git a/unit_test/batched/dense/Test_Batched_TeamInverseLU.hpp b/unit_test/batched/dense/Test_Batched_TeamInverseLU.hpp index 5226775c1b..4db8a69155 100644 --- a/unit_test/batched/dense/Test_Batched_TeamInverseLU.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamInverseLU.hpp @@ -62,18 +62,8 @@ struct Functor_BatchedTeamGemm { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::TeamInverseLU"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _c.extent(0); @@ -107,18 +97,8 @@ struct Functor_BatchedTeamLU { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::TeamInverseLU"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _a.extent(0); @@ -151,18 +131,8 @@ struct Functor_TestBatchedTeamInverseLU { inline void run() { typedef typename AViewType::value_type value_type; std::string name_region("KokkosBatched::Test::TeamInverseLU"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _a.extent(0); diff --git a/unit_test/batched/dense/Test_Batched_TeamLU.hpp b/unit_test/batched/dense/Test_Batched_TeamLU.hpp index d49472f064..ec9ab01a7d 100644 --- a/unit_test/batched/dense/Test_Batched_TeamLU.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamLU.hpp @@ -40,18 +40,8 @@ struct Functor_TestBatchedTeamLU { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::TeamLU"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _a.extent(0); diff --git a/unit_test/batched/dense/Test_Batched_TeamMatUtil.hpp b/unit_test/batched/dense/Test_Batched_TeamMatUtil.hpp index ff557d5241..16879444f7 100644 --- a/unit_test/batched/dense/Test_Batched_TeamMatUtil.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamMatUtil.hpp @@ -70,17 +70,7 @@ struct Functor_TestBatchedTeamMatUtil { inline int run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialMatUtil"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); + const std::string name_value_type = Test::value_type_name(); std::string name_work_tag = (std::is_same::value ? "::KokkosBatched" diff --git a/unit_test/batched/dense/Test_Batched_TeamSolveLU.hpp b/unit_test/batched/dense/Test_Batched_TeamSolveLU.hpp index 6f31773269..201cc025fc 100644 --- a/unit_test/batched/dense/Test_Batched_TeamSolveLU.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamSolveLU.hpp @@ -62,18 +62,8 @@ struct Functor_BatchedTeamGemm { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::TeamSolveLU"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _c.extent(0); Kokkos::TeamPolicy policy(league_size, @@ -104,18 +94,8 @@ struct Functor_BatchedTeamLU { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::TeamSolveLU"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _a.extent(0); Kokkos::TeamPolicy policy(league_size, Kokkos::AUTO); @@ -146,18 +126,8 @@ struct Functor_TestBatchedTeamSolveLU { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::TeamSolveLU"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _a.extent(0); diff --git a/unit_test/batched/dense/Test_Batched_TeamTrsm.hpp b/unit_test/batched/dense/Test_Batched_TeamTrsm.hpp index eba8fa8ef1..da4b70933a 100644 --- a/unit_test/batched/dense/Test_Batched_TeamTrsm.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamTrsm.hpp @@ -54,18 +54,8 @@ struct Functor_TestBatchedTeamTrsm { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::TeamTrsm"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _b.extent(0); diff --git a/unit_test/batched/dense/Test_Batched_TeamTrsv.hpp b/unit_test/batched/dense/Test_Batched_TeamTrsv.hpp index b354529e72..c33c939488 100644 --- a/unit_test/batched/dense/Test_Batched_TeamTrsv.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamTrsv.hpp @@ -52,18 +52,8 @@ struct Functor_TestBatchedTeamTrsv { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::TeamTrsv"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _b.extent(0); diff --git a/unit_test/batched/dense/Test_Batched_TeamVectorAxpy.hpp b/unit_test/batched/dense/Test_Batched_TeamVectorAxpy.hpp index 1c9c537ba8..7194757687 100644 --- a/unit_test/batched/dense/Test_Batched_TeamVectorAxpy.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamVectorAxpy.hpp @@ -42,26 +42,14 @@ struct Functor_TestBatchedTeamVectorAxpy { auto y = Kokkos::subview(_Y, Kokkos::make_pair(first_matrix, last_matrix), Kokkos::ALL); - KokkosBatched::TeamVectorAxpy::template invoke( - member, alpha, x, y); + KokkosBatched::TeamVectorAxpy::invoke(member, alpha, x, y); } inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::TeamVectorAxpy"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::TeamPolicy policy(_X.extent(0) / _N_team, Kokkos::AUTO(), Kokkos::AUTO()); @@ -73,6 +61,8 @@ struct Functor_TestBatchedTeamVectorAxpy { template void impl_test_batched_axpy(const int N, const int BlkSize, const int N_team) { typedef typename ViewType::value_type value_type; + typedef typename ViewType::const_value_type const_value_type; + typedef typename alphaViewType::const_value_type alpha_const_value_type; typedef Kokkos::Details::ArithTraits ats; ViewType X0("x0", N, BlkSize), X1("x1", N, BlkSize), Y0("y0", N, BlkSize), @@ -82,9 +72,9 @@ void impl_test_batched_axpy(const int N, const int BlkSize, const int N_team) { Kokkos::Random_XorShift64_Pool random( 13718); - Kokkos::fill_random(X0, random, value_type(1.0)); - Kokkos::fill_random(Y0, random, value_type(1.0)); - Kokkos::fill_random(alpha, random, value_type(1.0)); + Kokkos::fill_random(X0, random, const_value_type(1.0)); + Kokkos::fill_random(Y0, random, const_value_type(1.0)); + Kokkos::fill_random(alpha, random, alpha_const_value_type(1.0)); Kokkos::fence(); @@ -135,7 +125,7 @@ int test_batched_teamvector_axpy() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) { typedef Kokkos::View ViewType; - typedef Kokkos::View + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { @@ -148,7 +138,7 @@ int test_batched_teamvector_axpy() { { typedef Kokkos::View ViewType; - typedef Kokkos::View + typedef Kokkos::View alphaViewType; for (int i = 3; i < 10; ++i) { diff --git a/unit_test/batched/dense/Test_Batched_TeamVectorGemm.hpp b/unit_test/batched/dense/Test_Batched_TeamVectorGemm.hpp index 074a1342fa..d104df2b06 100644 --- a/unit_test/batched/dense/Test_Batched_TeamVectorGemm.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamVectorGemm.hpp @@ -49,18 +49,8 @@ struct Functor_TestBatchedTeamVector { inline void run() { typedef typename ViewType::value_type value_type; std::string name_region("KokkosBatched::Test::TeamVector"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _c.extent(0); Kokkos::TeamPolicy policy(league_size, diff --git a/unit_test/batched/dense/Test_Batched_TeamVectorQR.hpp b/unit_test/batched/dense/Test_Batched_TeamVectorQR.hpp index a687ffad20..4ae4ee4133 100644 --- a/unit_test/batched/dense/Test_Batched_TeamVectorQR.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamVectorQR.hpp @@ -79,18 +79,8 @@ struct Functor_TestBatchedTeamVectorQR { inline void run() { typedef typename MatrixViewType::non_const_value_type value_type; std::string name_region("KokkosBatched::Test::TeamVectorQR"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _a.extent(0); diff --git a/unit_test/batched/dense/Test_Batched_TeamVectorQR_WithColumnPivoting.hpp b/unit_test/batched/dense/Test_Batched_TeamVectorQR_WithColumnPivoting.hpp index 61bf8bf7ec..3ae24bda84 100644 --- a/unit_test/batched/dense/Test_Batched_TeamVectorQR_WithColumnPivoting.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamVectorQR_WithColumnPivoting.hpp @@ -89,18 +89,8 @@ struct Functor_TestBatchedTeamVectorQR_WithColumnPivoting { typedef typename MatrixViewType::non_const_value_type value_type; std::string name_region( "KokkosBatched::Test::TeamVectorQR_WithColumnPivoting"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _a.extent(0); diff --git a/unit_test/batched/dense/Test_Batched_TeamVectorSolveUTV.hpp b/unit_test/batched/dense/Test_Batched_TeamVectorSolveUTV.hpp index 4fe2133982..6610383d12 100644 --- a/unit_test/batched/dense/Test_Batched_TeamVectorSolveUTV.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamVectorSolveUTV.hpp @@ -101,18 +101,8 @@ struct Functor_TestBatchedTeamVectorSolveUTV { inline void run() { typedef typename MatrixViewType::non_const_value_type value_type; std::string name_region("KokkosBatched::Test::TeamVectorSolveUTV"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _a.extent(0); diff --git a/unit_test/batched/dense/Test_Batched_TeamVectorSolveUTV2.hpp b/unit_test/batched/dense/Test_Batched_TeamVectorSolveUTV2.hpp index 81bba6fe76..77bec61c28 100644 --- a/unit_test/batched/dense/Test_Batched_TeamVectorSolveUTV2.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamVectorSolveUTV2.hpp @@ -105,18 +105,8 @@ struct Functor_TestBatchedTeamVectorSolveUTV2 { inline void run() { typedef typename MatrixViewType::non_const_value_type value_type; std::string name_region("KokkosBatched::Test::TeamVectorSolveUTV"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _a.extent(0); diff --git a/unit_test/batched/dense/Test_Batched_TeamVectorUTV.hpp b/unit_test/batched/dense/Test_Batched_TeamVectorUTV.hpp index 54512f87d4..0a49db7dce 100644 --- a/unit_test/batched/dense/Test_Batched_TeamVectorUTV.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamVectorUTV.hpp @@ -135,18 +135,8 @@ struct Functor_TestBatchedTeamVectorUTV { inline void run() { typedef typename MatrixViewType::non_const_value_type value_type; std::string name_region("KokkosBatched::Test::TeamVectorUTV"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); const int league_size = _a.extent(0); diff --git a/unit_test/batched/sparse/Test_Batched_SerialSpmv.hpp b/unit_test/batched/sparse/Test_Batched_SerialSpmv.hpp index 46ac10a4d6..c482f8fdb0 100644 --- a/unit_test/batched/sparse/Test_Batched_SerialSpmv.hpp +++ b/unit_test/batched/sparse/Test_Batched_SerialSpmv.hpp @@ -10,6 +10,7 @@ #include "KokkosBatched_Spmv_Serial_Impl.hpp" #include "KokkosKernels_TestUtils.hpp" +#include "Test_Batched_SparseUtils.hpp" using namespace KokkosBatched; @@ -56,18 +57,8 @@ struct Functor_TestBatchedSerialSpmv { inline void run() { typedef typename ValuesViewType::value_type value_type; std::string name_region("KokkosBatched::Test::SerialSpmv"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::RangePolicy policy(0, _D.extent(0)); Kokkos::parallel_for(name.c_str(), policy, *this); @@ -96,46 +87,7 @@ void impl_test_batched_spmv(const int N, const int BlkSize) { Kokkos::deep_copy(alpha, value_type(1.0)); Kokkos::deep_copy(beta, value_type(1.0)); - Kokkos::Random_XorShift64_Pool random( - 13718); - Kokkos::fill_random(X0, random, value_type(1.0)); - Kokkos::fill_random(Y0, random, value_type(1.0)); - - auto D_host = Kokkos::create_mirror_view(D); - auto r_host = Kokkos::create_mirror_view(r); - auto c_host = Kokkos::create_mirror_view(c); - - r_host(0) = 0; - - int current_col = 0; - - for (int i = 0; i < BlkSize; ++i) { - r_host(i + 1) = r_host(i) + (i == 0 || i == (BlkSize - 1) ? 2 : 3); - } - for (int i = 0; i < nnz; ++i) { - if (i % 3 == 0) { - for (int l = 0; l < N; ++l) { - D_host(l, i) = value_type(1.0); - } - c_host(i) = current_col; - ++current_col; - } else { - for (int l = 0; l < N; ++l) { - D_host(l, i) = value_type(0.5); - } - c_host(i) = current_col; - if (i % 3 == 1) - --current_col; - else - ++current_col; - } - } - - Kokkos::fence(); - - Kokkos::deep_copy(D, D_host); - Kokkos::deep_copy(r, r_host); - Kokkos::deep_copy(c, c_host); + create_tridiagonal_batched_matrices(nnz, BlkSize, N, r, c, D, X0, Y0); Kokkos::deep_copy(X1, X0); Kokkos::deep_copy(Y1, Y0); @@ -160,13 +112,13 @@ void impl_test_batched_spmv(const int N, const int BlkSize) { if (i != 0 && i != (BlkSize - 1)) Y0_host(l, i) += alpha_host(l) * - (X0_host(l, i) + 0.5 * X0_host(l, i - 1) + 0.5 * X0_host(l, i + 1)); + (2 * X0_host(l, i) - X0_host(l, i - 1) - X0_host(l, i + 1)); else if (i == 0) Y0_host(l, i) += - alpha_host(l) * (X0_host(l, i) + 0.5 * X0_host(l, i + 1)); + alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i + 1)); else Y0_host(l, i) += - alpha_host(l) * (X0_host(l, i) + 0.5 * X0_host(l, i - 1)); + alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i - 1)); } Functor_TestBatchedSerialSpmv +void create_tridiagonal_batched_matrices(const int nnz, const int BlkSize, + const int N, const IntView &r, + const IntView &c, + const VectorViewType &D, + const VectorViewType &X, + const VectorViewType &B) { + Kokkos::Random_XorShift64_Pool< + typename VectorViewType::device_type::execution_space> + random(13718); + Kokkos::fill_random( + X, random, + Kokkos::reduction_identity::prod()); + Kokkos::fill_random( + B, random, + Kokkos::reduction_identity::prod()); + + auto D_host = Kokkos::create_mirror_view(D); + auto r_host = Kokkos::create_mirror_view(r); + auto c_host = Kokkos::create_mirror_view(c); + + r_host(0) = 0; + + int current_col = 0; + + for (int i = 0; i < BlkSize; ++i) { + r_host(i + 1) = r_host(i) + (i == 0 || i == (BlkSize - 1) ? 2 : 3); + } + for (int i = 0; i < nnz; ++i) { + if (i % 3 == 0) { + for (int l = 0; l < N; ++l) { + D_host(l, i) = typename VectorViewType::value_type(2.0); + } + c_host(i) = current_col; + ++current_col; + } else { + for (int l = 0; l < N; ++l) { + D_host(l, i) = typename VectorViewType::value_type(-1.0); + } + c_host(i) = current_col; + if (i % 3 == 1) + --current_col; + else + ++current_col; + } + } + + Kokkos::fence(); + + Kokkos::deep_copy(D, D_host); + Kokkos::deep_copy(r, r_host); + Kokkos::deep_copy(c, c_host); + + Kokkos::fence(); +} +} // namespace KokkosBatched + +#endif // TEST_BATCHED_SPARSE_HELPER_HPP diff --git a/unit_test/batched/sparse/Test_Batched_TeamCG.hpp b/unit_test/batched/sparse/Test_Batched_TeamCG.hpp new file mode 100644 index 0000000000..2177264489 --- /dev/null +++ b/unit_test/batched/sparse/Test_Batched_TeamCG.hpp @@ -0,0 +1,188 @@ +/// \author Kim Liegeois (knliege@sandia.gov) + +#include "gtest/gtest.h" +#include "Kokkos_Core.hpp" +#include "Kokkos_Random.hpp" +#include "KokkosBatched_CG.hpp" +#include "KokkosKernels_TestUtils.hpp" +#include "KokkosBatched_CrsMatrix.hpp" +#include "Test_Batched_SparseUtils.hpp" + +using namespace KokkosBatched; + +namespace Test { +namespace TeamCG { + +template +struct Functor_TestBatchedTeamCG { + const ValuesViewType _D; + const IntView _r; + const IntView _c; + const VectorViewType _X; + const VectorViewType _B; + const int _N_team; + KrylovHandle *handle; + + KOKKOS_INLINE_FUNCTION + Functor_TestBatchedTeamCG(const ValuesViewType &D, const IntView &r, + const IntView &c, const VectorViewType &X, + const VectorViewType &B, const int N_team) + : _D(D), _r(r), _c(c), _X(X), _B(B), _N_team(N_team) { + handle = new KrylovHandle; + } + + template + KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const { + const int first_matrix = static_cast(member.league_rank()) * _N_team; + const int N = _D.extent(0); + const int last_matrix = + (static_cast(member.league_rank() + 1) * _N_team < N + ? static_cast(member.league_rank() + 1) * _N_team + : N); + + auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), + Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), + Kokkos::ALL); + auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), + Kokkos::ALL); + + using Operator = KokkosBatched::CrsMatrix; + + Operator A(d, _r, _c); + + KokkosBatched::TeamCG::template invoke( + member, A, b, x, handle); + } + + inline void run() { + typedef typename ValuesViewType::value_type value_type; + std::string name_region("KokkosBatched::Test::TeamCG"); + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; + Kokkos::Profiling::pushRegion(name.c_str()); + Kokkos::TeamPolicy policy(_D.extent(0) / _N_team, + Kokkos::AUTO(), Kokkos::AUTO()); + + size_t bytes_0 = ValuesViewType::shmem_size(_N_team, _D.extent(1)); + size_t bytes_1 = ValuesViewType::shmem_size(_N_team, 1); + policy.set_scratch_size(0, Kokkos::PerTeam(4 * bytes_0 + 5 * bytes_1)); + + Kokkos::parallel_for(name.c_str(), policy, *this); + Kokkos::Profiling::popRegion(); + } +}; + +template +void impl_test_batched_CG(const int N, const int BlkSize, const int N_team) { + typedef typename ValuesViewType::value_type value_type; + typedef Kokkos::Details::ArithTraits ats; + + const int nnz = (BlkSize - 2) * 3 + 2 * 2; + + VectorViewType X("x0", N, BlkSize); + VectorViewType R("r0", N, BlkSize); + VectorViewType B("b", N, BlkSize); + ValuesViewType D("D", N, nnz); + IntView r("r", BlkSize + 1); + IntView c("c", nnz); + + using ScalarType = typename ValuesViewType::non_const_value_type; + using Layout = typename ValuesViewType::array_layout; + using EXSP = typename ValuesViewType::execution_space; + + using MagnitudeType = + typename Kokkos::Details::ArithTraits::mag_type; + using NormViewType = Kokkos::View; + + NormViewType sqr_norm_0("sqr_norm_0", N); + NormViewType sqr_norm_j("sqr_norm_j", N); + + create_tridiagonal_batched_matrices(nnz, BlkSize, N, r, c, D, X, B); + + // Compute initial norm + + Kokkos::deep_copy(R, B); + + auto sqr_norm_0_host = Kokkos::create_mirror_view(sqr_norm_0); + auto sqr_norm_j_host = Kokkos::create_mirror_view(sqr_norm_j); + auto R_host = Kokkos::create_mirror_view(R); + auto X_host = Kokkos::create_mirror_view(X); + auto D_host = Kokkos::create_mirror_view(D); + auto r_host = Kokkos::create_mirror_view(r); + auto c_host = Kokkos::create_mirror_view(c); + + Kokkos::deep_copy(R, B); + Kokkos::deep_copy(R_host, R); + Kokkos::deep_copy(X_host, X); + + Kokkos::deep_copy(c_host, c); + Kokkos::deep_copy(r_host, r); + Kokkos::deep_copy(D_host, D); + + KokkosBatched::SerialSpmv::template invoke< + typename ValuesViewType::HostMirror, typename IntView::HostMirror, + typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, + 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, + sqr_norm_0_host); + Functor_TestBatchedTeamCG(D, r, c, X, B, N_team) + .run(); + + Kokkos::fence(); + + Kokkos::deep_copy(R, B); + Kokkos::deep_copy(R_host, R); + Kokkos::deep_copy(X_host, X); + + KokkosBatched::SerialSpmv::template invoke< + typename ValuesViewType::HostMirror, typename IntView::HostMirror, + typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, + 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, + sqr_norm_j_host); + + const MagnitudeType eps = 1.0e3 * ats::epsilon(); + + for (int l = 0; l < N; ++l) + EXPECT_NEAR_KK(sqr_norm_j_host(l) / sqr_norm_0_host(l), 0, eps); +} +} // namespace TeamCG +} // namespace Test + +template +int test_batched_team_CG() { +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) + { + typedef Kokkos::View ViewType; + typedef Kokkos::View IntView; + typedef Kokkos::View + VectorViewType; + + for (int i = 3; i < 10; ++i) { + Test::TeamCG::impl_test_batched_CG(1024, i, 2); + } + } +#endif +#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) + { + typedef Kokkos::View + ViewType; + typedef Kokkos::View IntView; + typedef Kokkos::View + VectorViewType; + + for (int i = 3; i < 10; ++i) { + Test::TeamCG::impl_test_batched_CG(1024, i, 2); + } + } +#endif + + return 0; +} diff --git a/unit_test/batched/sparse/Test_Batched_TeamCG_Real.hpp b/unit_test/batched/sparse/Test_Batched_TeamCG_Real.hpp new file mode 100644 index 0000000000..1e6586a68e --- /dev/null +++ b/unit_test/batched/sparse/Test_Batched_TeamCG_Real.hpp @@ -0,0 +1,12 @@ + +#if defined(KOKKOSKERNELS_INST_FLOAT) +TEST_F(TestCategory, batched_scalar_team_CG_float) { + test_batched_team_CG(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) +TEST_F(TestCategory, batched_scalar_team_CG_double) { + test_batched_team_CG(); +} +#endif diff --git a/unit_test/batched/sparse/Test_Batched_TeamGMRES.hpp b/unit_test/batched/sparse/Test_Batched_TeamGMRES.hpp new file mode 100644 index 0000000000..11b382c640 --- /dev/null +++ b/unit_test/batched/sparse/Test_Batched_TeamGMRES.hpp @@ -0,0 +1,198 @@ +/// \author Kim Liegeois (knliege@sandia.gov) + +#include "gtest/gtest.h" +#include "Kokkos_Core.hpp" +#include "Kokkos_Random.hpp" +#include "KokkosBatched_GMRES.hpp" +#include "KokkosKernels_TestUtils.hpp" +#include "KokkosBatched_CrsMatrix.hpp" +#include "Test_Batched_SparseUtils.hpp" + +using namespace KokkosBatched; + +namespace Test { +namespace TeamGMRES { + +template +struct Functor_TestBatchedTeamGMRES { + const ValuesViewType _D; + const IntView _r; + const IntView _c; + const VectorViewType _X; + const VectorViewType _B; + const int _N_team; + KrylovHandle *handle; + + KOKKOS_INLINE_FUNCTION + Functor_TestBatchedTeamGMRES(const ValuesViewType &D, const IntView &r, + const IntView &c, const VectorViewType &X, + const VectorViewType &B, const int N_team) + : _D(D), _r(r), _c(c), _X(X), _B(B), _N_team(N_team) { + handle = new KrylovHandle; + } + + template + KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const { + const int first_matrix = static_cast(member.league_rank()) * _N_team; + const int N = _D.extent(0); + const int last_matrix = + (static_cast(member.league_rank() + 1) * _N_team < N + ? static_cast(member.league_rank() + 1) * _N_team + : N); + + auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), + Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), + Kokkos::ALL); + auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), + Kokkos::ALL); + + using Operator = KokkosBatched::CrsMatrix; + + Operator A(d, _r, _c); + + KokkosBatched::TeamGMRES::template invoke( + member, A, b, x, handle); + } + + inline void run() { + typedef typename ValuesViewType::value_type value_type; + std::string name_region("KokkosBatched::Test::TeamGMRES"); + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; + Kokkos::Profiling::pushRegion(name.c_str()); + Kokkos::TeamPolicy policy(_D.extent(0) / _N_team, + Kokkos::AUTO(), Kokkos::AUTO()); + + size_t bytes_0 = ValuesViewType::shmem_size(_N_team, _D.extent(1)); + size_t bytes_1 = ValuesViewType::shmem_size(_N_team, 1); + + handle->set_max_iteration(10); + + int maximum_iteration = handle->get_max_iteration(); + + policy.set_scratch_size(0, Kokkos::PerTeam(5 * bytes_0 + 5 * bytes_1)); + policy.set_scratch_size( + 1, Kokkos::PerTeam(maximum_iteration * bytes_0 + + ((maximum_iteration + 3) * maximum_iteration) * + bytes_1)); + + Kokkos::parallel_for(name.c_str(), policy, *this); + Kokkos::Profiling::popRegion(); + } +}; + +template +void impl_test_batched_GMRES(const int N, const int BlkSize, const int N_team) { + typedef typename ValuesViewType::value_type value_type; + typedef Kokkos::Details::ArithTraits ats; + + const int nnz = (BlkSize - 2) * 3 + 2 * 2; + + VectorViewType X("x0", N, BlkSize); + VectorViewType R("r0", N, BlkSize); + VectorViewType B("b", N, BlkSize); + ValuesViewType D("D", N, nnz); + IntView r("r", BlkSize + 1); + IntView c("c", nnz); + + using ScalarType = typename ValuesViewType::non_const_value_type; + using Layout = typename ValuesViewType::array_layout; + using EXSP = typename ValuesViewType::execution_space; + + using MagnitudeType = + typename Kokkos::Details::ArithTraits::mag_type; + using NormViewType = Kokkos::View; + + NormViewType sqr_norm_0("sqr_norm_0", N); + NormViewType sqr_norm_j("sqr_norm_j", N); + + create_tridiagonal_batched_matrices(nnz, BlkSize, N, r, c, D, X, B); + + // Compute initial norm + + Kokkos::deep_copy(R, B); + + auto sqr_norm_0_host = Kokkos::create_mirror_view(sqr_norm_0); + auto sqr_norm_j_host = Kokkos::create_mirror_view(sqr_norm_j); + auto R_host = Kokkos::create_mirror_view(R); + auto X_host = Kokkos::create_mirror_view(X); + auto D_host = Kokkos::create_mirror_view(D); + auto r_host = Kokkos::create_mirror_view(r); + auto c_host = Kokkos::create_mirror_view(c); + + Kokkos::deep_copy(R, B); + Kokkos::deep_copy(R_host, R); + Kokkos::deep_copy(X_host, X); + + Kokkos::deep_copy(c_host, c); + Kokkos::deep_copy(r_host, r); + Kokkos::deep_copy(D_host, D); + + KokkosBatched::SerialSpmv::template invoke< + typename ValuesViewType::HostMirror, typename IntView::HostMirror, + typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, + 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, + sqr_norm_0_host); + Functor_TestBatchedTeamGMRES(D, r, c, X, B, N_team) + .run(); + + Kokkos::fence(); + + Kokkos::deep_copy(R, B); + Kokkos::deep_copy(R_host, R); + Kokkos::deep_copy(X_host, X); + + KokkosBatched::SerialSpmv::template invoke< + typename ValuesViewType::HostMirror, typename IntView::HostMirror, + typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, + 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, + sqr_norm_j_host); + + const MagnitudeType eps = 1.0e5 * ats::epsilon(); + + for (int l = 0; l < N; ++l) + EXPECT_NEAR_KK( + std::sqrt(sqr_norm_j_host(l)) / std::sqrt(sqr_norm_0_host(l)), 0, eps); +} +} // namespace TeamGMRES +} // namespace Test + +template +int test_batched_team_GMRES() { +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) + { + typedef Kokkos::View ViewType; + typedef Kokkos::View IntView; + typedef Kokkos::View + VectorViewType; + + for (int i = 3; i < 10; ++i) { + Test::TeamGMRES::impl_test_batched_GMRES(1024, i, 2); + } + } +#endif +#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) + { + typedef Kokkos::View + ViewType; + typedef Kokkos::View IntView; + typedef Kokkos::View + VectorViewType; + + for (int i = 3; i < 10; ++i) { + Test::TeamGMRES::impl_test_batched_GMRES(1024, i, 2); + } + } +#endif + + return 0; +} diff --git a/unit_test/batched/sparse/Test_Batched_TeamGMRES_Real.hpp b/unit_test/batched/sparse/Test_Batched_TeamGMRES_Real.hpp new file mode 100644 index 0000000000..1003a1d1f2 --- /dev/null +++ b/unit_test/batched/sparse/Test_Batched_TeamGMRES_Real.hpp @@ -0,0 +1,12 @@ + +#if defined(KOKKOSKERNELS_INST_FLOAT) +TEST_F(TestCategory, batched_scalar_team_GMRES_float) { + test_batched_team_GMRES(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) +TEST_F(TestCategory, batched_scalar_team_GMRES_double) { + test_batched_team_GMRES(); +} +#endif diff --git a/unit_test/batched/sparse/Test_Batched_TeamSpmv.hpp b/unit_test/batched/sparse/Test_Batched_TeamSpmv.hpp index 2043788f9c..0b0f96e9b1 100644 --- a/unit_test/batched/sparse/Test_Batched_TeamSpmv.hpp +++ b/unit_test/batched/sparse/Test_Batched_TeamSpmv.hpp @@ -11,6 +11,8 @@ #include "KokkosKernels_TestUtils.hpp" +#include "Test_Batched_SparseUtils.hpp" + using namespace KokkosBatched; namespace Test { @@ -79,18 +81,8 @@ struct Functor_TestBatchedTeamSpmv { inline void run() { typedef typename ValuesViewType::value_type value_type; std::string name_region("KokkosBatched::Test::TeamSpmv"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::TeamPolicy policy( _D.extent(0) / _N_team, Kokkos::AUTO(), Kokkos::AUTO()); @@ -120,46 +112,7 @@ void impl_test_batched_spmv(const int N, const int BlkSize, const int N_team) { Kokkos::deep_copy(alpha, value_type(1.0)); Kokkos::deep_copy(beta, value_type(1.0)); - Kokkos::Random_XorShift64_Pool random( - 13718); - Kokkos::fill_random(X0, random, value_type(1.0)); - Kokkos::fill_random(Y0, random, value_type(1.0)); - - auto D_host = Kokkos::create_mirror_view(D); - auto r_host = Kokkos::create_mirror_view(r); - auto c_host = Kokkos::create_mirror_view(c); - - r_host(0) = 0; - - int current_col = 0; - - for (int i = 0; i < BlkSize; ++i) { - r_host(i + 1) = r_host(i) + (i == 0 || i == (BlkSize - 1) ? 2 : 3); - } - for (int i = 0; i < nnz; ++i) { - if (i % 3 == 0) { - for (int l = 0; l < N; ++l) { - D_host(l, i) = value_type(1.0); - } - c_host(i) = current_col; - ++current_col; - } else { - for (int l = 0; l < N; ++l) { - D_host(l, i) = value_type(0.5); - } - c_host(i) = current_col; - if (i % 3 == 1) - --current_col; - else - ++current_col; - } - } - - Kokkos::fence(); - - Kokkos::deep_copy(D, D_host); - Kokkos::deep_copy(r, r_host); - Kokkos::deep_copy(c, c_host); + create_tridiagonal_batched_matrices(nnz, BlkSize, N, r, c, D, X0, Y0); Kokkos::deep_copy(X1, X0); Kokkos::deep_copy(Y1, Y0); @@ -184,13 +137,13 @@ void impl_test_batched_spmv(const int N, const int BlkSize, const int N_team) { if (i != 0 && i != (BlkSize - 1)) Y0_host(l, i) += alpha_host(l) * - (X0_host(l, i) + 0.5 * X0_host(l, i - 1) + 0.5 * X0_host(l, i + 1)); + (2 * X0_host(l, i) - X0_host(l, i - 1) - X0_host(l, i + 1)); else if (i == 0) Y0_host(l, i) += - alpha_host(l) * (X0_host(l, i) + 0.5 * X0_host(l, i + 1)); + alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i + 1)); else Y0_host(l, i) += - alpha_host(l) * (X0_host(l, i) + 0.5 * X0_host(l, i - 1)); + alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i - 1)); } Functor_TestBatchedTeamSpmv +struct Functor_TestBatchedTeamVectorCG { + const ValuesViewType _D; + const IntView _r; + const IntView _c; + const VectorViewType _X; + const VectorViewType _B; + const int _N_team; + KrylovHandle *handle; + + KOKKOS_INLINE_FUNCTION + Functor_TestBatchedTeamVectorCG(const ValuesViewType &D, const IntView &r, + const IntView &c, const VectorViewType &X, + const VectorViewType &B, const int N_team) + : _D(D), _r(r), _c(c), _X(X), _B(B), _N_team(N_team) { + handle = new KrylovHandle(); + } + + template + KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const { + const int first_matrix = static_cast(member.league_rank()) * _N_team; + const int N = _D.extent(0); + const int last_matrix = + (static_cast(member.league_rank() + 1) * _N_team < N + ? static_cast(member.league_rank() + 1) * _N_team + : N); + + auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), + Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), + Kokkos::ALL); + auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), + Kokkos::ALL); + + using Operator = KokkosBatched::CrsMatrix; + + Operator A(d, _r, _c); + + KokkosBatched::TeamVectorCG::template invoke( + member, A, b, x, handle); + } + + inline void run() { + typedef typename ValuesViewType::value_type value_type; + std::string name_region("KokkosBatched::Test::TeamVectorCG"); + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; + Kokkos::Profiling::pushRegion(name.c_str()); + Kokkos::TeamPolicy policy(_D.extent(0) / _N_team, + Kokkos::AUTO(), Kokkos::AUTO()); + + size_t bytes_0 = ValuesViewType::shmem_size(_N_team, _D.extent(1)); + size_t bytes_1 = ValuesViewType::shmem_size(_N_team, 1); + policy.set_scratch_size(0, Kokkos::PerTeam(4 * bytes_0 + 5 * bytes_1)); + + Kokkos::parallel_for(name.c_str(), policy, *this); + Kokkos::Profiling::popRegion(); + } +}; + +template +void impl_test_batched_CG(const int N, const int BlkSize, const int N_team) { + typedef typename ValuesViewType::value_type value_type; + typedef Kokkos::Details::ArithTraits ats; + + const int nnz = (BlkSize - 2) * 3 + 2 * 2; + + VectorViewType X("x0", N, BlkSize); + VectorViewType R("r0", N, BlkSize); + VectorViewType B("b", N, BlkSize); + ValuesViewType D("D", N, nnz); + IntView r("r", BlkSize + 1); + IntView c("c", nnz); + + using ScalarType = typename ValuesViewType::non_const_value_type; + using Layout = typename ValuesViewType::array_layout; + using EXSP = typename ValuesViewType::execution_space; + + using MagnitudeType = + typename Kokkos::Details::ArithTraits::mag_type; + using NormViewType = Kokkos::View; + + NormViewType sqr_norm_0("sqr_norm_0", N); + NormViewType sqr_norm_j("sqr_norm_j", N); + + create_tridiagonal_batched_matrices(nnz, BlkSize, N, r, c, D, X, B); + + // Compute initial norm + + Kokkos::deep_copy(R, B); + + auto sqr_norm_0_host = Kokkos::create_mirror_view(sqr_norm_0); + auto sqr_norm_j_host = Kokkos::create_mirror_view(sqr_norm_j); + auto R_host = Kokkos::create_mirror_view(R); + auto X_host = Kokkos::create_mirror_view(X); + auto D_host = Kokkos::create_mirror_view(D); + auto r_host = Kokkos::create_mirror_view(r); + auto c_host = Kokkos::create_mirror_view(c); + + Kokkos::deep_copy(R, B); + Kokkos::deep_copy(R_host, R); + Kokkos::deep_copy(X_host, X); + + Kokkos::deep_copy(c_host, c); + Kokkos::deep_copy(r_host, r); + Kokkos::deep_copy(D_host, D); + + KokkosBatched::SerialSpmv::template invoke< + typename ValuesViewType::HostMirror, typename IntView::HostMirror, + typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, + 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, + sqr_norm_0_host); + Functor_TestBatchedTeamVectorCG(D, r, c, X, B, N_team) + .run(); + + Kokkos::fence(); + + Kokkos::deep_copy(R, B); + Kokkos::deep_copy(R_host, R); + Kokkos::deep_copy(X_host, X); + + KokkosBatched::SerialSpmv::template invoke< + typename ValuesViewType::HostMirror, typename IntView::HostMirror, + typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, + 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, + sqr_norm_j_host); + + const MagnitudeType eps = 1.0e3 * ats::epsilon(); + + for (int l = 0; l < N; ++l) + EXPECT_NEAR_KK(sqr_norm_j_host(l) / sqr_norm_0_host(l), 0, eps); +} +} // namespace TeamVectorCG +} // namespace Test + +template +int test_batched_teamvector_CG() { +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) + { + typedef Kokkos::View ViewType; + typedef Kokkos::View IntView; + typedef Kokkos::View + VectorViewType; + + for (int i = 3; i < 10; ++i) { + Test::TeamVectorCG::impl_test_batched_CG(1024, i, 2); + } + } +#endif +#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) + { + typedef Kokkos::View + ViewType; + typedef Kokkos::View IntView; + typedef Kokkos::View + VectorViewType; + + for (int i = 3; i < 10; ++i) { + Test::TeamVectorCG::impl_test_batched_CG(1024, i, 2); + } + } +#endif + + return 0; +} diff --git a/unit_test/batched/sparse/Test_Batched_TeamVectorCG_Real.hpp b/unit_test/batched/sparse/Test_Batched_TeamVectorCG_Real.hpp new file mode 100644 index 0000000000..526f1f7c03 --- /dev/null +++ b/unit_test/batched/sparse/Test_Batched_TeamVectorCG_Real.hpp @@ -0,0 +1,12 @@ + +#if defined(KOKKOSKERNELS_INST_FLOAT) +TEST_F(TestCategory, batched_scalar_teamvector_CG_float) { + test_batched_teamvector_CG(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) +TEST_F(TestCategory, batched_scalar_teamvector_CG_double) { + test_batched_teamvector_CG(); +} +#endif diff --git a/unit_test/batched/sparse/Test_Batched_TeamVectorGMRES.hpp b/unit_test/batched/sparse/Test_Batched_TeamVectorGMRES.hpp new file mode 100644 index 0000000000..bb6abdc1a9 --- /dev/null +++ b/unit_test/batched/sparse/Test_Batched_TeamVectorGMRES.hpp @@ -0,0 +1,200 @@ +/// \author Kim Liegeois (knliege@sandia.gov) + +#include "gtest/gtest.h" +#include "Kokkos_Core.hpp" +#include "Kokkos_Random.hpp" +#include "KokkosBatched_GMRES.hpp" +#include "KokkosKernels_TestUtils.hpp" +#include "KokkosBatched_CrsMatrix.hpp" +#include "Test_Batched_SparseUtils.hpp" + +using namespace KokkosBatched; + +namespace Test { +namespace TeamVectorGMRES { + +template +struct Functor_TestBatchedTeamVectorGMRES { + const ValuesViewType _D; + const IntView _r; + const IntView _c; + const VectorViewType _X; + const VectorViewType _B; + const int _N_team; + KrylovHandle *handle; + + KOKKOS_INLINE_FUNCTION + Functor_TestBatchedTeamVectorGMRES(const ValuesViewType &D, const IntView &r, + const IntView &c, const VectorViewType &X, + const VectorViewType &B, const int N_team) + : _D(D), _r(r), _c(c), _X(X), _B(B), _N_team(N_team) { + handle = new KrylovHandle; + } + + template + KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const { + const int first_matrix = static_cast(member.league_rank()) * _N_team; + const int N = _D.extent(0); + const int last_matrix = + (static_cast(member.league_rank() + 1) * _N_team < N + ? static_cast(member.league_rank() + 1) * _N_team + : N); + + auto d = Kokkos::subview(_D, Kokkos::make_pair(first_matrix, last_matrix), + Kokkos::ALL); + auto x = Kokkos::subview(_X, Kokkos::make_pair(first_matrix, last_matrix), + Kokkos::ALL); + auto b = Kokkos::subview(_B, Kokkos::make_pair(first_matrix, last_matrix), + Kokkos::ALL); + + using Operator = KokkosBatched::CrsMatrix; + + Operator A(d, _r, _c); + + KokkosBatched::TeamVectorGMRES::template invoke( + member, A, b, x, handle); + } + + inline void run() { + typedef typename ValuesViewType::value_type value_type; + std::string name_region("KokkosBatched::Test::TeamVectorGMRES"); + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; + Kokkos::Profiling::pushRegion(name.c_str()); + Kokkos::TeamPolicy policy(_D.extent(0) / _N_team, + Kokkos::AUTO(), Kokkos::AUTO()); + + size_t bytes_0 = ValuesViewType::shmem_size(_N_team, _D.extent(1)); + size_t bytes_1 = ValuesViewType::shmem_size(_N_team, 1); + + handle->set_max_iteration(10); + + int maximum_iteration = handle->get_max_iteration(); + + policy.set_scratch_size(0, Kokkos::PerTeam(5 * bytes_0 + 5 * bytes_1)); + policy.set_scratch_size( + 1, Kokkos::PerTeam(maximum_iteration * bytes_0 + + ((maximum_iteration + 3) * maximum_iteration) * + bytes_1)); + + Kokkos::parallel_for(name.c_str(), policy, *this); + Kokkos::Profiling::popRegion(); + } +}; + +template +void impl_test_batched_GMRES(const int N, const int BlkSize, const int N_team) { + typedef typename ValuesViewType::value_type value_type; + typedef Kokkos::Details::ArithTraits ats; + + const int nnz = (BlkSize - 2) * 3 + 2 * 2; + + VectorViewType X("x0", N, BlkSize); + VectorViewType R("r0", N, BlkSize); + VectorViewType B("b", N, BlkSize); + ValuesViewType D("D", N, nnz); + IntView r("r", BlkSize + 1); + IntView c("c", nnz); + + using ScalarType = typename ValuesViewType::non_const_value_type; + using Layout = typename ValuesViewType::array_layout; + using EXSP = typename ValuesViewType::execution_space; + + using MagnitudeType = + typename Kokkos::Details::ArithTraits::mag_type; + using NormViewType = Kokkos::View; + + NormViewType sqr_norm_0("sqr_norm_0", N); + NormViewType sqr_norm_j("sqr_norm_j", N); + + create_tridiagonal_batched_matrices(nnz, BlkSize, N, r, c, D, X, B); + + // Compute initial norm + + Kokkos::deep_copy(R, B); + + auto sqr_norm_0_host = Kokkos::create_mirror_view(sqr_norm_0); + auto sqr_norm_j_host = Kokkos::create_mirror_view(sqr_norm_j); + auto R_host = Kokkos::create_mirror_view(R); + auto X_host = Kokkos::create_mirror_view(X); + auto D_host = Kokkos::create_mirror_view(D); + auto r_host = Kokkos::create_mirror_view(r); + auto c_host = Kokkos::create_mirror_view(c); + + Kokkos::deep_copy(R, B); + Kokkos::deep_copy(R_host, R); + Kokkos::deep_copy(X_host, X); + + Kokkos::deep_copy(c_host, c); + Kokkos::deep_copy(r_host, r); + Kokkos::deep_copy(D_host, D); + + KokkosBatched::SerialSpmv::template invoke< + typename ValuesViewType::HostMirror, typename IntView::HostMirror, + typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, + 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, + sqr_norm_0_host); + Functor_TestBatchedTeamVectorGMRES(D, r, c, X, B, N_team) + .run(); + + Kokkos::fence(); + + Kokkos::deep_copy(R, B); + Kokkos::deep_copy(R_host, R); + Kokkos::deep_copy(X_host, X); + + KokkosBatched::SerialSpmv::template invoke< + typename ValuesViewType::HostMirror, typename IntView::HostMirror, + typename VectorViewType::HostMirror, typename VectorViewType::HostMirror, + 1>(-1, D_host, r_host, c_host, X_host, 1, R_host); + KokkosBatched::SerialDot::invoke(R_host, R_host, + sqr_norm_j_host); + + const MagnitudeType eps = 1.0e5 * ats::epsilon(); + + for (int l = 0; l < N; ++l) + EXPECT_NEAR_KK( + std::sqrt(sqr_norm_j_host(l)) / std::sqrt(sqr_norm_0_host(l)), 0, eps); +} +} // namespace TeamVectorGMRES +} // namespace Test + +template +int test_batched_teamvector_GMRES() { +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) + { + typedef Kokkos::View ViewType; + typedef Kokkos::View IntView; + typedef Kokkos::View + VectorViewType; + + for (int i = 3; i < 10; ++i) { + Test::TeamVectorGMRES::impl_test_batched_GMRES( + 1024, i, 2); + } + } +#endif +#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) + { + typedef Kokkos::View + ViewType; + typedef Kokkos::View IntView; + typedef Kokkos::View + VectorViewType; + + for (int i = 3; i < 10; ++i) { + Test::TeamVectorGMRES::impl_test_batched_GMRES( + 1024, i, 2); + } + } +#endif + + return 0; +} diff --git a/unit_test/batched/sparse/Test_Batched_TeamVectorGMRES_Real.hpp b/unit_test/batched/sparse/Test_Batched_TeamVectorGMRES_Real.hpp new file mode 100644 index 0000000000..a7cb9225a0 --- /dev/null +++ b/unit_test/batched/sparse/Test_Batched_TeamVectorGMRES_Real.hpp @@ -0,0 +1,12 @@ + +#if defined(KOKKOSKERNELS_INST_FLOAT) +TEST_F(TestCategory, batched_scalar_teamvector_GMRES_float) { + test_batched_teamvector_GMRES(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) +TEST_F(TestCategory, batched_scalar_teamvector_GMRES_double) { + test_batched_teamvector_GMRES(); +} +#endif diff --git a/unit_test/batched/sparse/Test_Batched_TeamVectorSpmv.hpp b/unit_test/batched/sparse/Test_Batched_TeamVectorSpmv.hpp index 9cd1d0504b..205ad6fa73 100644 --- a/unit_test/batched/sparse/Test_Batched_TeamVectorSpmv.hpp +++ b/unit_test/batched/sparse/Test_Batched_TeamVectorSpmv.hpp @@ -11,6 +11,8 @@ #include "KokkosKernels_TestUtils.hpp" +#include "Test_Batched_SparseUtils.hpp" + using namespace KokkosBatched; namespace Test { @@ -79,18 +81,8 @@ struct Functor_TestBatchedTeamVectorSpmv { inline void run() { typedef typename ValuesViewType::value_type value_type; std::string name_region("KokkosBatched::Test::TeamVectorSpmv"); - std::string name_value_type = - (std::is_same::value - ? "::Float" - : std::is_same::value - ? "::Double" - : std::is_same >::value - ? "::ComplexFloat" - : std::is_same >::value - ? "::ComplexDouble" - : "::UnknownValueType"); - std::string name = name_region + name_value_type; + const std::string name_value_type = Test::value_type_name(); + std::string name = name_region + name_value_type; Kokkos::Profiling::pushRegion(name.c_str()); Kokkos::TeamPolicy policy( _D.extent(0) / _N_team, Kokkos::AUTO(), Kokkos::AUTO()); @@ -120,46 +112,7 @@ void impl_test_batched_spmv(const int N, const int BlkSize, const int N_team) { Kokkos::deep_copy(alpha, value_type(1.0)); Kokkos::deep_copy(beta, value_type(1.0)); - Kokkos::Random_XorShift64_Pool random( - 13718); - Kokkos::fill_random(X0, random, value_type(1.0)); - Kokkos::fill_random(Y0, random, value_type(1.0)); - - auto D_host = Kokkos::create_mirror_view(D); - auto r_host = Kokkos::create_mirror_view(r); - auto c_host = Kokkos::create_mirror_view(c); - - r_host(0) = 0; - - int current_col = 0; - - for (int i = 0; i < BlkSize; ++i) { - r_host(i + 1) = r_host(i) + (i == 0 || i == (BlkSize - 1) ? 2 : 3); - } - for (int i = 0; i < nnz; ++i) { - if (i % 3 == 0) { - for (int l = 0; l < N; ++l) { - D_host(l, i) = value_type(1.0); - } - c_host(i) = current_col; - ++current_col; - } else { - for (int l = 0; l < N; ++l) { - D_host(l, i) = value_type(0.5); - } - c_host(i) = current_col; - if (i % 3 == 1) - --current_col; - else - ++current_col; - } - } - - Kokkos::fence(); - - Kokkos::deep_copy(D, D_host); - Kokkos::deep_copy(r, r_host); - Kokkos::deep_copy(c, c_host); + create_tridiagonal_batched_matrices(nnz, BlkSize, N, r, c, D, X0, Y0); Kokkos::deep_copy(X1, X0); Kokkos::deep_copy(Y1, Y0); @@ -184,13 +137,13 @@ void impl_test_batched_spmv(const int N, const int BlkSize, const int N_team) { if (i != 0 && i != (BlkSize - 1)) Y0_host(l, i) += alpha_host(l) * - (X0_host(l, i) + 0.5 * X0_host(l, i - 1) + 0.5 * X0_host(l, i + 1)); + (2 * X0_host(l, i) - X0_host(l, i - 1) - X0_host(l, i + 1)); else if (i == 0) Y0_host(l, i) += - alpha_host(l) * (X0_host(l, i) + 0.5 * X0_host(l, i + 1)); + alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i + 1)); else Y0_host(l, i) += - alpha_host(l) * (X0_host(l, i) + 0.5 * X0_host(l, i - 1)); + alpha_host(l) * (2 * X0_host(l, i) - X0_host(l, i - 1)); } Functor_TestBatchedTeamVectorSpmv