From 90c0d404debc659fb46b6624ab95f1b4b3181b41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Mon, 6 Jun 2022 15:12:54 +0200 Subject: [PATCH 01/15] Add team-vector GEMV to KokkosBlas --- src/blas/KokkosBlas2_teamvector_gemv.hpp | 89 ++++++ unit_test/blas/Test_Blas.hpp | 1 + unit_test/blas/Test_Blas2_teamvector_gemv.hpp | 259 ++++++++++++++++++ 3 files changed, 349 insertions(+) create mode 100644 src/blas/KokkosBlas2_teamvector_gemv.hpp create mode 100644 unit_test/blas/Test_Blas2_teamvector_gemv.hpp diff --git a/src/blas/KokkosBlas2_teamvector_gemv.hpp b/src/blas/KokkosBlas2_teamvector_gemv.hpp new file mode 100644 index 0000000000..67af3b151c --- /dev/null +++ b/src/blas/KokkosBlas2_teamvector_gemv.hpp @@ -0,0 +1,89 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSBLAS2_TEAMVECTOR_GEMV_HPP_ +#define KOKKOSBLAS2_TEAMVECTOR_GEMV_HPP_ + +#include "KokkosBatched_Gemv_Decl.hpp" +#include + +namespace KokkosBlas { +namespace Experimental { + +template +void KOKKOS_INLINE_FUNCTION team_vector_gemv( + const TeamType& team, const char trans, + const typename MatrixType::non_const_value_type& alpha, const MatrixType& A, + const XVector& x, const typename YVector::non_const_value_type& beta, + const YVector& y) { + static_assert( + std::is_same::value && + std::is_same::value, + "TeamVector GEMV requires A,x and y to have same scalar type"); + + const auto run = [&](auto mode) { + using algo = KokkosBatched::Algo::Gemv::Default; + using impl = KokkosBatched::TeamVectorGemv; + impl::invoke(team, alpha, A, x, beta, y); + }; + + if (trans == 'N' || trans == 'n') { + return run(KokkosBatched::Trans::NoTranspose()); + } else if (trans == 'T' || trans == 't') { + return run(KokkosBatched::Trans::Transpose()); + // } else if (trans == 'C' || trans == 'c') { // NOT implemented + // return run(KokkosBatched::Trans::ConjTranspose()); + } else { // conjugate[no-transpose] not supported ? + std::ostringstream os; + os << "Matrix mode not supported: " << trans << std::endl; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } +} + +} // namespace Experimental +} // namespace KokkosBlas + +#endif diff --git a/unit_test/blas/Test_Blas.hpp b/unit_test/blas/Test_Blas.hpp index 3b4f4355c3..4bcbf957c4 100644 --- a/unit_test/blas/Test_Blas.hpp +++ b/unit_test/blas/Test_Blas.hpp @@ -46,6 +46,7 @@ #include "Test_Blas2_serial_gemv.hpp" // Team Blas 2 #include "Test_Blas2_team_gemv.hpp" +#include "Test_Blas2_teamvector_gemv.hpp" // Blas 3 #include "Test_Blas3_gemm.hpp" diff --git a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp new file mode 100644 index 0000000000..19f32014a1 --- /dev/null +++ b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp @@ -0,0 +1,259 @@ +// Note: Luc Berger-Vergiat 04/14/21 +// This tests uses KOKKOS_LAMBDA so we need +// to make sure that these are enabled in +// the CUDA backend before including this test. +#if !defined(TEST_CUDA_BLAS_CPP) || defined(KOKKOS_ENABLE_CUDA_LAMBDA) + +#include +#include +#include +#include +#include +#include + +namespace Test { +template +void impl_test_teamvector_gemv(const char *mode, int N, int M) { + typedef Kokkos::TeamPolicy team_policy; + typedef typename team_policy::member_type team_member; + + // Launch K teams of the maximum number of threads per team + int K = 4; + const team_policy policy(K, Kokkos::AUTO); + const int team_data_size = (N % K == 0) ? (N / K) : (N / K + 1); + + typedef typename ViewTypeA::value_type ScalarA; + typedef typename ViewTypeX::value_type ScalarX; + typedef typename ViewTypeY::value_type ScalarY; + + typedef multivector_layout_adapter vfA_type; + typedef Kokkos::View< + ScalarX * [2], + typename std::conditional::value, + Kokkos::LayoutRight, Kokkos::LayoutLeft>::type, + Device> + BaseTypeX; + typedef Kokkos::View< + ScalarY * [2], + typename std::conditional::value, + Kokkos::LayoutRight, Kokkos::LayoutLeft>::type, + Device> + BaseTypeY; + + ScalarA a = 3; + ScalarX b = 5; + double eps = std::is_same::value ? 2 * 1e-5 : 1e-7; + + typename vfA_type::BaseType b_A("A", N, M); + BaseTypeX b_x("X", M); + BaseTypeY b_y("Y", N); + BaseTypeY b_org_y("Org_Y", N); + + ViewTypeA A = vfA_type::view(b_A); + ViewTypeX x = Kokkos::subview(b_x, Kokkos::ALL(), 0); + ViewTypeY y = Kokkos::subview(b_y, Kokkos::ALL(), 0); + typename ViewTypeX::const_type c_x = x; + typename ViewTypeA::const_type c_A = A; + + typedef multivector_layout_adapter h_vfA_type; + + typename h_vfA_type::BaseType h_b_A = Kokkos::create_mirror_view(b_A); + typename BaseTypeX::HostMirror h_b_x = Kokkos::create_mirror_view(b_x); + typename BaseTypeY::HostMirror h_b_y = Kokkos::create_mirror_view(b_y); + + typename ViewTypeA::HostMirror h_A = h_vfA_type::view(h_b_A); + typename ViewTypeX::HostMirror h_x = Kokkos::subview(h_b_x, Kokkos::ALL(), 0); + typename ViewTypeY::HostMirror h_y = Kokkos::subview(h_b_y, Kokkos::ALL(), 0); + + Kokkos::Random_XorShift64_Pool rand_pool( + 13718); + + Kokkos::fill_random(b_x, rand_pool, ScalarX(10)); + Kokkos::fill_random(b_y, rand_pool, ScalarY(10)); + Kokkos::fill_random(b_A, rand_pool, ScalarA(10)); + + Kokkos::deep_copy(b_org_y, b_y); + + Kokkos::deep_copy(h_b_x, b_x); + Kokkos::deep_copy(h_b_y, b_y); + Kokkos::deep_copy(h_b_A, b_A); + + ScalarY expected_result = 0; + if (mode[0] != 'N' && mode[0] != 'T' && mode[0] != 'C') { + throw std::runtime_error("incorrect matrix mode letter !"); + } + typedef Kokkos::Details::ArithTraits ATV; + for (int i = 0; i < N; i++) { + ScalarY y_i = ScalarY(); + for (int j = 0; j < M; j++) { + const auto a_val = mode[0] == 'C' + ? ATV::conj(h_A(j, i)) + : (mode[0] == 'T' ? h_A(j, i) : h_A(i, j)); + y_i += a_val * h_x(j); + } + expected_result += (b * h_y(i) + a * y_i) * (b * h_y(i) + a * y_i); + } + + char trans = mode[0]; + + const auto team_rows = [&](auto teamId) { + return Kokkos::make_pair( + teamId * team_data_size, + (teamId < K - 1) ? (teamId + 1) * team_data_size : N); + }; + + // KokkosBlas::gemv(mode,a,A,x,b,y); + Kokkos::parallel_for( + "KokkosBlas::Test::TeamVectorGemm", policy, + KOKKOS_LAMBDA(const team_member &teamMember) { + const int teamId = teamMember.league_rank(); + KokkosBlas::Experimental::team_vector_gemv( + teamMember, trans, a, + Kokkos::subview(A, team_rows(teamId), Kokkos::ALL()), x, b, + Kokkos::subview(y, team_rows(teamId))); + }); + + ScalarY nonconst_nonconst_result = KokkosBlas::dot(y, y); + EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, + eps * expected_result); + + Kokkos::deep_copy(b_y, b_org_y); + + // KokkosBlas::gemv(mode,a,A,c_x,b,y); + Kokkos::parallel_for( + "KokkosBlas::Test::TeamVectorGemm", policy, + KOKKOS_LAMBDA(const team_member &teamMember) { + const int teamId = teamMember.league_rank(); + KokkosBlas::Experimental::team_vector_gemv( + teamMember, trans, a, + Kokkos::subview(A, team_rows(teamId), Kokkos::ALL()), c_x, b, + Kokkos::subview(y, team_rows(teamId))); + }); + + ScalarY const_nonconst_result = KokkosBlas::dot(y, y); + EXPECT_NEAR_KK(const_nonconst_result, expected_result, eps * expected_result); + + Kokkos::deep_copy(b_y, b_org_y); + + // KokkosBlas::gemv(mode,a,c_A,c_x,b,y); + Kokkos::parallel_for( + "KokkosBlas::Test::TeamVectorGemm", policy, + KOKKOS_LAMBDA(const team_member &teamMember) { + const int teamId = teamMember.league_rank(); + KokkosBlas::Experimental::team_vector_gemv( + teamMember, trans, a, + Kokkos::subview(c_A, team_rows(teamId), Kokkos::ALL()), c_x, b, + Kokkos::subview(y, team_rows(teamId))); + }); + + ScalarY const_const_result = KokkosBlas::dot(y, y); + EXPECT_NEAR_KK(const_const_result, expected_result, eps * expected_result); +} +} // namespace Test + +template +int test_teamvector_gemv(const char *mode) { +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + typedef Kokkos::View view_type_a_ll; + typedef Kokkos::View view_type_b_ll; + typedef Kokkos::View view_type_c_ll; + Test::impl_test_teamvector_gemv(mode, 0, 1024); + Test::impl_test_teamvector_gemv(mode, 13, 1024); + Test::impl_test_teamvector_gemv(mode, 124, 124); + // Test::impl_test_teamvector_gemv(mode,132231,1024); +#endif + +#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + typedef Kokkos::View view_type_a_lr; + typedef Kokkos::View view_type_b_lr; + typedef Kokkos::View view_type_c_lr; + Test::impl_test_teamvector_gemv(mode, 0, 1024); + Test::impl_test_teamvector_gemv(mode, 13, 1024); + Test::impl_test_teamvector_gemv(mode, 124, 124); + // Test::impl_test_teamvector_gemv(mode,132231,1024); +#endif + +#if defined(KOKKOSKERNELS_INST_LAYOUTSTRIDE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + typedef Kokkos::View view_type_a_ls; + typedef Kokkos::View view_type_b_ls; + typedef Kokkos::View view_type_c_ls; + Test::impl_test_teamvector_gemv(mode, 0, 1024); + Test::impl_test_teamvector_gemv(mode, 13, 1024); + Test::impl_test_teamvector_gemv(mode, 124, 124); + // Test::impl_test_teamvector_gemv(mode,132231,1024); +#endif + +#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) + Test::impl_test_teamvector_gemv(mode, 124, 124); + Test::impl_test_teamvector_gemv(mode, 124, 124); +#endif + + return 1; +} + +#if defined(KOKKOSKERNELS_INST_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, teamvector_gemv_float) { + test_teamvector_gemv("N"); +} +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, teamvector_gemv_double) { + test_teamvector_gemv("N"); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, teamvector_gemv_complex_double) { + test_teamvector_gemv, Kokkos::complex, + Kokkos::complex, TestExecSpace>("N"); +} +#endif + +#if defined(KOKKOSKERNELS_INST_INT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, teamvector_gemv_int) { + test_teamvector_gemv("N"); +} +#endif + +#if 0 // mixed scalar types not allowed in batched impl +#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +TEST_F(TestCategory, teamvector_gemv_double_int) { + test_teamvector_gemv("N"); +} +#endif +#endif + +#endif // Check for lambda availability on CUDA backend From ca48c86f119b3775f6c6a82958e5eba4fd5fac4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Wed, 8 Jun 2022 23:13:42 +0200 Subject: [PATCH 02/15] fix: avoid lambda inside kernels --- src/blas/KokkosBlas2_teamvector_gemv.hpp | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/blas/KokkosBlas2_teamvector_gemv.hpp b/src/blas/KokkosBlas2_teamvector_gemv.hpp index 67af3b151c..99315a5be9 100644 --- a/src/blas/KokkosBlas2_teamvector_gemv.hpp +++ b/src/blas/KokkosBlas2_teamvector_gemv.hpp @@ -63,20 +63,17 @@ void KOKKOS_INLINE_FUNCTION team_vector_gemv( std::is_same::value, "TeamVector GEMV requires A,x and y to have same scalar type"); - - const auto run = [&](auto mode) { - using algo = KokkosBatched::Algo::Gemv::Default; - using impl = KokkosBatched::TeamVectorGemv; - impl::invoke(team, alpha, A, x, beta, y); - }; + using algo = KokkosBatched::Algo::Gemv::Default; if (trans == 'N' || trans == 'n') { - return run(KokkosBatched::Trans::NoTranspose()); + using mode = KokkosBatched::Trans::NoTranspose; + using tv_gemv_impl = KokkosBatched::TeamVectorGemv; + tv_gemv_impl::invoke(team, alpha, A, x, beta, y); } else if (trans == 'T' || trans == 't') { - return run(KokkosBatched::Trans::Transpose()); - // } else if (trans == 'C' || trans == 'c') { // NOT implemented - // return run(KokkosBatched::Trans::ConjTranspose()); - } else { // conjugate[no-transpose] not supported ? + using mode = KokkosBatched::Trans::Transpose; + using tv_gemv_impl = KokkosBatched::TeamVectorGemv; + tv_gemv_impl::invoke(team, alpha, A, x, beta, y); + } else { // NOT supported: Conjugate, ConjTranspose std::ostringstream os; os << "Matrix mode not supported: " << trans << std::endl; KokkosKernels::Impl::throw_runtime_exception(os.str()); From 8c354d428817195775ae4365111e77e103fa610c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Fri, 12 Aug 2022 12:41:33 +0200 Subject: [PATCH 03/15] Move TeamGemv and TeamVector from Batched to Blas: - move implementations - refactor/merge unit tests - support different sclar types for A, x and y - support arbitrary types for alpha and beta --- .../KokkosBatched_Gemv_TeamVector_Impl.hpp | 58 +-- ...KokkosBatched_Gemv_TeamVector_Internal.hpp | 61 +-- .../impl/KokkosBatched_Gemv_Team_Impl.hpp | 58 +-- .../impl/KokkosBatched_Gemv_Team_Internal.hpp | 94 +---- ...osBatched_SolveUTV_TeamVector_Internal.hpp | 8 +- .../impl/KokkosBatched_Trsv_Team_Internal.hpp | 6 +- src/blas/KokkosBlas2_team_gemv.hpp | 59 ++- src/blas/KokkosBlas2_teamvector_gemv.hpp | 86 ---- src/blas/impl/KokkosBlas2_team_gemv_impl.hpp | 212 ++++++++++ src/blas/impl/KokkosBlas2_team_gemv_spec.hpp | 184 +++++++++ .../impl/KokkosSparse_spmv_bsrmatrix_impl.hpp | 33 +- .../impl/KokkosSparse_sptrsv_solve_impl.hpp | 75 ++-- .../batched/dense/Test_Batched_Dense.hpp | 3 - .../batched/dense/Test_Batched_TeamGemv.hpp | 155 ------- .../dense/Test_Batched_TeamGemv_Complex.hpp | 45 -- .../dense/Test_Batched_TeamGemv_Real.hpp | 30 -- .../dense/Test_Batched_TeamVectorQR.hpp | 8 +- ...atched_TeamVectorQR_WithColumnPivoting.hpp | 8 +- .../dense/Test_Batched_TeamVectorSolveUTV.hpp | 8 +- .../dense/Test_Batched_TeamVectorUTV.hpp | 26 +- unit_test/blas/Test_Blas.hpp | 1 + unit_test/blas/Test_Blas2_gemv_util.hpp | 337 +++++++++++++++ unit_test/blas/Test_Blas2_serial_gemv.hpp | 390 +++--------------- unit_test/blas/Test_Blas2_team_gemv.hpp | 295 ++----------- unit_test/blas/Test_Blas2_teamvector_gemv.hpp | 284 +++---------- 25 files changed, 1086 insertions(+), 1438 deletions(-) delete mode 100644 src/blas/KokkosBlas2_teamvector_gemv.hpp create mode 100644 src/blas/impl/KokkosBlas2_team_gemv_impl.hpp delete mode 100644 unit_test/batched/dense/Test_Batched_TeamGemv.hpp delete mode 100644 unit_test/batched/dense/Test_Batched_TeamGemv_Complex.hpp delete mode 100644 unit_test/batched/dense/Test_Batched_TeamGemv_Real.hpp create mode 100644 unit_test/blas/Test_Blas2_gemv_util.hpp diff --git a/src/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Impl.hpp b/src/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Impl.hpp index 0cad2c6c80..45eb828aa8 100644 --- a/src/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Impl.hpp +++ b/src/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Impl.hpp @@ -30,17 +30,15 @@ struct TeamVectorGemv { KOKKOS_INLINE_FUNCTION static int invoke( const MemberType &member, const ScalarType alpha, const AViewType &A, const xViewType &x, const ScalarType beta, const yViewType &y) { - if (AViewType::Rank == 2) - return TeamVectorGemvInternal::invoke( - member, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), - A.stride_1(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); - else - return TeamVectorGemvInternal::template invoke< - MemberType, ScalarType, typename AViewType::array_layout, - typename AViewType::non_const_value_type>( - member, A.extent(0), A.extent(1), A.extent(2), alpha, A.data(), - A.stride_0(), A.stride_1(), A.stride_2(), x.data(), x.stride_0(), - x.stride_1(), beta, y.data(), y.stride_0(), y.stride_1()); + static_assert(AViewType::Rank == 3, + "Batched TeamVectorGemv requires rank-3 A matrix (use " + "KokkosBlas::TeamVectorGemv for regular rank-2 matrix)"); + return TeamVectorGemvInternal::template invoke< + MemberType, ScalarType, typename AViewType::array_layout, + typename AViewType::non_const_value_type>( + member, A.extent(0), A.extent(1), A.extent(2), alpha, A.data(), + A.stride_0(), A.stride_1(), A.stride_2(), x.data(), x.stride_0(), + x.stride_1(), beta, y.data(), y.stride_0(), y.stride_1()); } }; @@ -51,9 +49,12 @@ struct TeamVectorGemv { KOKKOS_INLINE_FUNCTION static int invoke( const MemberType &member, const ScalarType alpha, const AViewType &A, const xViewType &x, const ScalarType beta, const yViewType &y) { - return TeamVectorGemvInternal::invoke( - member, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), - A.stride_1(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); + static_assert(AViewType::Rank == 3, + "Batched TeamVectorGemv requires rank-3 A matrix (use " + "KokkosBlas::TeamVectorGemv for regular rank-2 matrix)"); + Kokkos::abort( + "KokkosBatched::TeamVectorGemv for rank-3 matrix " + "is NOT implemented"); } }; @@ -68,17 +69,15 @@ struct TeamVectorGemv { KOKKOS_INLINE_FUNCTION static int invoke( const MemberType &member, const ScalarType alpha, const AViewType &A, const xViewType &x, const ScalarType beta, const yViewType &y) { - if (AViewType::Rank == 2) - return TeamVectorGemvInternal::invoke( - member, A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), - A.stride_0(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); - else - return TeamVectorGemvInternal::template invoke< - MemberType, ScalarType, typename AViewType::array_layout, - typename AViewType::non_const_value_type>( - member, A.extent(0), A.extent(2), A.extent(1), alpha, A.data(), - A.stride_0(), A.stride_2(), A.stride_1(), x.data(), x.stride_0(), - x.stride_1(), beta, y.data(), y.stride_0(), y.stride_1()); + static_assert(AViewType::Rank == 3, + "Batched TeamVectorGemv requires rank-3 A matrix (use " + "KokkosBlas::TeamVectorGemv for regular rank-2 matrix)"); + return TeamVectorGemvInternal::template invoke< + MemberType, ScalarType, typename AViewType::array_layout, + typename AViewType::non_const_value_type>( + member, A.extent(0), A.extent(2), A.extent(1), alpha, A.data(), + A.stride_0(), A.stride_2(), A.stride_1(), x.data(), x.stride_0(), + x.stride_1(), beta, y.data(), y.stride_0(), y.stride_1()); } }; @@ -89,9 +88,12 @@ struct TeamVectorGemv { KOKKOS_INLINE_FUNCTION static int invoke( const MemberType &member, const ScalarType alpha, const AViewType &A, const xViewType &x, const ScalarType beta, const yViewType &y) { - return TeamVectorGemvInternal::invoke( - member, A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), - A.stride_0(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); + static_assert(AViewType::Rank == 3, + "Batched TeamVectorGemv requires rank-3 A matrix (use " + "KokkosBlas::TeamVectorGemv for regular rank-2 matrix)"); + Kokkos::abort( + "KokkosBatched::TeamVectorGemv for rank-3 matrix " + "is NOT implemented"); } }; diff --git a/src/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Internal.hpp b/src/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Internal.hpp index 4f47212b94..f3b71196f1 100644 --- a/src/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Internal.hpp +++ b/src/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Internal.hpp @@ -4,10 +4,9 @@ /// \author Kyungjoo Kim (kyukim@sandia.gov) #include "KokkosBatched_Util.hpp" - -#include "KokkosBlas1_set_impl.hpp" -#include "KokkosBlas1_team_scal_impl.hpp" -#include "KokkosBlas2_serial_gemv_inner_multiple_dot.hpp" +// #include "KokkosBlas1_set_impl.hpp" +// #include "KokkosBlas1_team_scal_impl.hpp" +// #include "KokkosBlas2_serial_gemv_inner_multiple_dot.hpp" namespace KokkosBatched { @@ -16,17 +15,6 @@ namespace KokkosBatched { /// ==================== template struct TeamVectorGemvInternal { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType & /*member*/, const int /*m*/, const int /*n*/, - const ScalarType /*alpha*/, const ValueType *KOKKOS_RESTRICT /*A*/, - const int /*as0*/, const int /*as1*/, - const ValueType *KOKKOS_RESTRICT /*x*/, const int /*xs0*/, - const ScalarType /*beta*/, - /**/ ValueType *KOKKOS_RESTRICT /*y*/, const int /*ys0*/) { - assert(false && "Error: encounter dummy impl"); - return 0; - } template KOKKOS_INLINE_FUNCTION static int invoke( @@ -43,45 +31,6 @@ struct TeamVectorGemvInternal { } }; -template <> -template -KOKKOS_INLINE_FUNCTION int -TeamVectorGemvInternal::invoke( - const MemberType &member, const int m, const int n, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - const ValueType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, - /**/ ValueType *KOKKOS_RESTRICT y, const int ys0) { - const ScalarType one(1.0), zero(0.0); - - // y = beta y + alpha A x - // y (m), A(m x n), B(n) - - if (beta == zero) - KokkosBlas::Impl::TeamVectorSetInternal::invoke(member, m, zero, y, ys0); - else if (beta != one) - KokkosBlas::Impl::TeamVectorScaleInternal::invoke(member, m, beta, y, ys0); - - if (alpha != zero) { - if (m <= 0 || n <= 0) return 0; - - if (beta != one) member.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int &i) { - ValueType t(0); - const ValueType *KOKKOS_RESTRICT tA = (A + i * as0); - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(member, n), - [&](const int &j, ValueType &update) { - update += tA[j * as1] * x[j * xs0]; - }, - t); - Kokkos::single(Kokkos::PerThread(member), - [&]() { y[i * ys0] += alpha * t; }); - }); - } - return 0; -} - template <> template @@ -98,6 +47,8 @@ TeamVectorGemvInternal::invoke( // y_l (m), A_l(m x n), B_l(n) if (beta == zero) + // TODO: KokkosBlas::Impl::TeamVectorSetInternal::invoke(member, m, zero, y, + // ys0); Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, N * m), [&](const int &iTemp) { int iRow, iMatrix; @@ -105,6 +56,8 @@ TeamVectorGemvInternal::invoke( Y[ys0 * iMatrix + ys1 * iRow] = zero; }); else if (beta != one) + // TODO: KokkosBlas::Impl::TeamVectorScaleInternal::invoke(member, m, beta, + // y, ys0); Kokkos::parallel_for(Kokkos::TeamVectorRange(member, 0, N * m), [&](const int &iTemp) { int iRow, iMatrix; diff --git a/src/batched/dense/impl/KokkosBatched_Gemv_Team_Impl.hpp b/src/batched/dense/impl/KokkosBatched_Gemv_Team_Impl.hpp index d32232524a..6220ab2916 100644 --- a/src/batched/dense/impl/KokkosBatched_Gemv_Team_Impl.hpp +++ b/src/batched/dense/impl/KokkosBatched_Gemv_Team_Impl.hpp @@ -30,17 +30,15 @@ struct TeamGemv { KOKKOS_INLINE_FUNCTION static int invoke( const MemberType &member, const ScalarType alpha, const AViewType &A, const xViewType &x, const ScalarType beta, const yViewType &y) { - if (AViewType::Rank == 2) - return TeamGemvInternal::invoke( - member, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), - A.stride_1(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); - else - return TeamGemvInternal::template invoke< - MemberType, ScalarType, typename AViewType::array_layout, - typename AViewType::non_const_value_type>( - member, A.extent(0), A.extent(1), A.extent(2), alpha, A.data(), - A.stride_0(), A.stride_1(), A.stride_2(), x.data(), x.stride_0(), - x.stride_1(), beta, y.data(), y.stride_0(), y.stride_1()); + static_assert(AViewType::Rank == 3, + "Batched TeamGemv requires rank-3 A matrix (use " + "KokkosBlas::TeamGemv for regular rank-2 matrix)"); + return TeamGemvInternal::template invoke< + MemberType, ScalarType, typename AViewType::array_layout, + typename AViewType::non_const_value_type>( + member, A.extent(0), A.extent(1), A.extent(2), alpha, A.data(), + A.stride_0(), A.stride_1(), A.stride_2(), x.data(), x.stride_0(), + x.stride_1(), beta, y.data(), y.stride_0(), y.stride_1()); } }; @@ -51,9 +49,12 @@ struct TeamGemv { KOKKOS_INLINE_FUNCTION static int invoke( const MemberType &member, const ScalarType alpha, const AViewType &A, const xViewType &x, const ScalarType beta, const yViewType &y) { - return TeamGemvInternal::invoke( - member, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), - A.stride_1(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); + static_assert(AViewType::Rank == 3, + "Batched TeamGemv requires rank-3 A matrix (use " + "KokkosBlas::TeamGemv for regular rank-2 matrix)"); + Kokkos::abort( + "KokkosBlas::TeamGemv for rank-3 matrix is NOT " + "implemented"); } }; @@ -68,17 +69,15 @@ struct TeamGemv { KOKKOS_INLINE_FUNCTION static int invoke( const MemberType &member, const ScalarType alpha, const AViewType &A, const xViewType &x, const ScalarType beta, const yViewType &y) { - if (AViewType::Rank == 2) - return TeamGemvInternal::invoke( - member, A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), - A.stride_0(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); - else - return TeamGemvInternal::template invoke< - MemberType, ScalarType, typename AViewType::array_layout, - typename AViewType::non_const_value_type>( - member, A.extent(0), A.extent(2), A.extent(1), alpha, A.data(), - A.stride_0(), A.stride_2(), A.stride_1(), x.data(), x.stride_0(), - x.stride_1(), beta, y.data(), y.stride_0(), y.stride_1()); + static_assert(AViewType::Rank == 3, + "Batched TeamGemv requires rank-3 A matrix (use " + "KokkosBlas::TeamGemv for regular rank-2 matrix)"); + return TeamGemvInternal::template invoke< + MemberType, ScalarType, typename AViewType::array_layout, + typename AViewType::non_const_value_type>( + member, A.extent(0), A.extent(2), A.extent(1), alpha, A.data(), + A.stride_0(), A.stride_2(), A.stride_1(), x.data(), x.stride_0(), + x.stride_1(), beta, y.data(), y.stride_0(), y.stride_1()); } }; @@ -89,9 +88,12 @@ struct TeamGemv { KOKKOS_INLINE_FUNCTION static int invoke( const MemberType &member, const ScalarType alpha, const AViewType &A, const xViewType &x, const ScalarType beta, const yViewType &y) { - return TeamGemvInternal::invoke( - member, A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), - A.stride_0(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); + static_assert(AViewType::Rank == 3, + "Batched TeamGemv requires rank-3 A matrix (use " + "KokkosBlas::TeamGemv for regular rank-2 matrix)"); + Kokkos::abort( + "KokkosBlas::TeamGemv for rank-3 matrix is NOT " + "implemented"); } }; diff --git a/src/batched/dense/impl/KokkosBatched_Gemv_Team_Internal.hpp b/src/batched/dense/impl/KokkosBatched_Gemv_Team_Internal.hpp index c48a2cd866..5ee01069d5 100644 --- a/src/batched/dense/impl/KokkosBatched_Gemv_Team_Internal.hpp +++ b/src/batched/dense/impl/KokkosBatched_Gemv_Team_Internal.hpp @@ -5,9 +5,9 @@ #include "KokkosBatched_Util.hpp" -#include "KokkosBlas1_set_impl.hpp" -#include "KokkosBlas1_team_scal_impl.hpp" -#include "KokkosBlas2_serial_gemv_inner_multiple_dot.hpp" +// #include "KokkosBlas1_set_impl.hpp" +// #include "KokkosBlas1_team_scal_impl.hpp" +// #include "KokkosBlas2_serial_gemv_inner_multiple_dot.hpp" namespace KokkosBatched { @@ -16,14 +16,6 @@ namespace KokkosBatched { /// ==================== template struct TeamGemvInternal { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const int m, const int n, - const ScalarType alpha, const ValueType *KOKKOS_RESTRICT A, const int as0, - const int as1, const ValueType *KOKKOS_RESTRICT x, const int xs0, - const ScalarType beta, - /**/ ValueType *KOKKOS_RESTRICT y, const int ys0); - template KOKKOS_INLINE_FUNCTION static int invoke( @@ -34,86 +26,6 @@ struct TeamGemvInternal { /**/ ValueType *KOKKOS_RESTRICT y, const int ys0, const int ys1); }; -template <> -template -KOKKOS_INLINE_FUNCTION int TeamGemvInternal::invoke( - const MemberType &member, const int m, const int n, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - const ValueType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, - /**/ ValueType *KOKKOS_RESTRICT y, const int ys0) { - const ScalarType one(1.0), zero(0.0); - - // y = beta y + alpha A x - // y (m), A(m x n), B(n) - - if (beta == zero) - KokkosBlas::Impl::TeamSetInternal::invoke(member, m, zero, y, ys0); - else if (beta != one) - KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, beta, y, ys0); - - if (alpha != zero) { - if (m <= 0 || n <= 0) return 0; - - if (beta != one) member.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, m), - [&](const int &i) { - ValueType t(0); - const ValueType *KOKKOS_RESTRICT tA = (A + i * as0); -#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) -#pragma unroll -#endif - for (int j = 0; j < n; ++j) - t += tA[j * as1] * x[j * xs0]; - y[i * ys0] += alpha * t; - }); - } - return 0; -} - -template <> -template -KOKKOS_INLINE_FUNCTION int TeamGemvInternal::invoke( - const MemberType &member, const int m, const int n, const ScalarType alpha, - const ValueType *KOKKOS_RESTRICT A, const int as0, const int as1, - const ValueType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, - /**/ ValueType *KOKKOS_RESTRICT y, const int ys0) { - const ScalarType one(1.0), zero(0.0); - - // y = beta y + alpha A x - // y (m), A(m x n), B(n) - - constexpr int mbAlgo = Algo::Gemv::Blocked::mb(); - - if (beta == zero) - KokkosBlas::Impl::TeamSetInternal::invoke(member, m, zero, y, ys0); - else if (beta != one) - KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, beta, y, ys0); - - if (alpha != zero) { - if (m <= 0 || n <= 0) return 0; - - if (beta != one) member.team_barrier(); - - KokkosBlas::Impl::InnerMultipleDotProduct inner(as0, as1, xs0, ys0); - const int tsize = member.team_size(); - const int mb_a = m / tsize + (m % tsize > 0), mb_b = mbAlgo; - // Made this non-const in order to WORKAROUND issue #349 - int mb = mb_a < mb_b ? mb_a : mb_b, mp = m % mb; - - Kokkos::parallel_for(Kokkos::TeamThreadRange(member, (m / mb) + (mp > 0)), - [&](const int &ii) { - const int i = ii * mb; - inner.serial_invoke( - alpha, A + i * as0, x, - (i + mb) > m ? (m - i) : mb, n, y + i * ys0); - }); - member.team_barrier(); - } - - return 0; -} - template <> template diff --git a/src/batched/dense/impl/KokkosBatched_SolveUTV_TeamVector_Internal.hpp b/src/batched/dense/impl/KokkosBatched_SolveUTV_TeamVector_Internal.hpp index fa0d4c2a31..88d0bfe561 100644 --- a/src/batched/dense/impl/KokkosBatched_SolveUTV_TeamVector_Internal.hpp +++ b/src/batched/dense/impl/KokkosBatched_SolveUTV_TeamVector_Internal.hpp @@ -5,7 +5,7 @@ #include "KokkosBatched_Util.hpp" -#include "KokkosBatched_Gemv_TeamVector_Internal.hpp" +#include "KokkosBlas2_team_gemv_impl.hpp" #include "KokkosBatched_Trsv_TeamVector_Internal.hpp" #include "KokkosBatched_Gemm_TeamVector_Internal.hpp" @@ -34,7 +34,7 @@ struct TeamVectorSolveUTV_Internal { if (matrix_rank < m) { /// w = U^T b - TeamVectorGemvInternal::invoke( + KokkosBlas::Impl::TeamVectorGemvInternal::invoke( member, matrix_rank, m, one, U, us1, us0, b, bs0, zero, w, ws0); /// w = T^{-1} w @@ -42,10 +42,10 @@ struct TeamVectorSolveUTV_Internal { member, false, matrix_rank, one, T, ts0, ts1, w, ws0); /// x = V^T w - TeamVectorGemvInternal::invoke( + KokkosBlas::Impl::TeamVectorGemvInternal::invoke( member, m, matrix_rank, one, V, vs1, vs0, w, ws0, zero, x, xs0); } else { - TeamVectorGemvInternal::invoke( + KokkosBlas::Impl::TeamVectorGemvInternal::invoke( member, matrix_rank, m, one, U, us1, us0, b, bs0, zero, x, xs0); TeamVectorTrsvInternalUpper::invoke( diff --git a/src/batched/dense/impl/KokkosBatched_Trsv_Team_Internal.hpp b/src/batched/dense/impl/KokkosBatched_Trsv_Team_Internal.hpp index aaf72e9876..5583b58537 100644 --- a/src/batched/dense/impl/KokkosBatched_Trsv_Team_Internal.hpp +++ b/src/batched/dense/impl/KokkosBatched_Trsv_Team_Internal.hpp @@ -8,7 +8,7 @@ #include "KokkosBlas1_set_impl.hpp" #include "KokkosBlas1_team_scal_impl.hpp" #include "KokkosBatched_InnerTrsm_Serial_Impl.hpp" -#include "KokkosBatched_Gemv_Team_Internal.hpp" +#include "KokkosBlas2_team_gemv_spec.hpp" namespace KokkosBatched { @@ -119,7 +119,7 @@ KOKKOS_INLINE_FUNCTION int TeamTrsvInternalLower::invoke( // gemv update member.team_barrier(); - TeamGemvInternal::invoke( + KokkosBlas::Impl::TeamGemvInternal::invoke( member, m - p - pb, pb, minus_one, Ap + pb * as0, as0, as1, bp, 1, one, bp + pb * bs0, bs0); } @@ -227,7 +227,7 @@ KOKKOS_INLINE_FUNCTION int TeamTrsvInternalUpper::invoke( // gemv update member.team_barrier(); - TeamGemvInternal::invoke( + KokkosBlas::Impl::TeamGemvInternal::invoke( member, p, pb, minus_one, Ap - p * as0, as0, as1, bp, 1, one, b, bs0); } } diff --git a/src/blas/KokkosBlas2_team_gemv.hpp b/src/blas/KokkosBlas2_team_gemv.hpp index 874f8919df..e886de4f99 100644 --- a/src/blas/KokkosBlas2_team_gemv.hpp +++ b/src/blas/KokkosBlas2_team_gemv.hpp @@ -50,21 +50,62 @@ namespace KokkosBlas { namespace Experimental { -template +template +void KOKKOS_INLINE_FUNCTION gemv(const TeamType& team, const char trans, + const ScalarType& alpha, const MatrixType& A, + const XVector& x, const ScalarType& beta, + const YVector& y) { + if (trans == 'N' || trans == 'n') + TeamGemv::invoke(team, alpha, A, x, + beta, y); + if (trans == 'T' || trans == 't') + TeamGemv::invoke(team, alpha, A, x, + beta, y); + if (trans == 'C' || trans == 'c') + TeamGemv::invoke(team, alpha, A, x, + beta, y); +} + +// default AlgoTag +template void KOKKOS_INLINE_FUNCTION gemv(const TeamType& team, const char trans, - const typename MatrixType::non_const_value_type& alpha, + const ScalarType& alpha, const MatrixType& A, const XVector& x, - const typename YVector::non_const_value_type& beta, const YVector& y) { - if (trans == 'N' || trans == 'n') - return Impl::TeamGEMV::team_gemv( + const ScalarType& beta, const YVector& y) { + gemv(team, trans, alpha, A, x, beta, y); +} + +template +void KOKKOS_INLINE_FUNCTION +team_vector_gemv(const TeamType& team, const char trans, + const ScalarType& alpha, const MatrixType& A, const XVector& x, + const ScalarType& beta, const YVector& y) { + if (trans == 'N' || trans == 'n') { + KokkosBlas::TeamVectorGemv::invoke( team, alpha, A, x, beta, y); - if (trans == 'T' || trans == 't') - return Impl::TeamGEMV::team_gemv( + } else if (trans == 'T' || trans == 't') { + KokkosBlas::TeamVectorGemv::invoke( team, alpha, A, x, beta, y); - if (trans == 'C' || trans == 'c') - return Impl::TeamGEMV::team_gemv( + } else if (trans == 'C' || trans == 'c') { + KokkosBlas::TeamVectorGemv::invoke( team, alpha, A, x, beta, y); + } else { + Kokkos::abort("Matrix mode not supported"); + } +} + +// default AlgoTag +template +void KOKKOS_INLINE_FUNCTION team_vector_gemv( + const TeamType& team, const char trans, + const ScalarType& alpha, const MatrixType& A, + const XVector& x, const ScalarType& beta, + const YVector& y) { + team_vector_gemv(team, trans, alpha, A, x, + beta, y); } } // namespace Experimental diff --git a/src/blas/KokkosBlas2_teamvector_gemv.hpp b/src/blas/KokkosBlas2_teamvector_gemv.hpp deleted file mode 100644 index 99315a5be9..0000000000 --- a/src/blas/KokkosBlas2_teamvector_gemv.hpp +++ /dev/null @@ -1,86 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOSBLAS2_TEAMVECTOR_GEMV_HPP_ -#define KOKKOSBLAS2_TEAMVECTOR_GEMV_HPP_ - -#include "KokkosBatched_Gemv_Decl.hpp" -#include - -namespace KokkosBlas { -namespace Experimental { - -template -void KOKKOS_INLINE_FUNCTION team_vector_gemv( - const TeamType& team, const char trans, - const typename MatrixType::non_const_value_type& alpha, const MatrixType& A, - const XVector& x, const typename YVector::non_const_value_type& beta, - const YVector& y) { - static_assert( - std::is_same::value && - std::is_same::value, - "TeamVector GEMV requires A,x and y to have same scalar type"); - using algo = KokkosBatched::Algo::Gemv::Default; - - if (trans == 'N' || trans == 'n') { - using mode = KokkosBatched::Trans::NoTranspose; - using tv_gemv_impl = KokkosBatched::TeamVectorGemv; - tv_gemv_impl::invoke(team, alpha, A, x, beta, y); - } else if (trans == 'T' || trans == 't') { - using mode = KokkosBatched::Trans::Transpose; - using tv_gemv_impl = KokkosBatched::TeamVectorGemv; - tv_gemv_impl::invoke(team, alpha, A, x, beta, y); - } else { // NOT supported: Conjugate, ConjTranspose - std::ostringstream os; - os << "Matrix mode not supported: " << trans << std::endl; - KokkosKernels::Impl::throw_runtime_exception(os.str()); - } -} - -} // namespace Experimental -} // namespace KokkosBlas - -#endif diff --git a/src/blas/impl/KokkosBlas2_team_gemv_impl.hpp b/src/blas/impl/KokkosBlas2_team_gemv_impl.hpp new file mode 100644 index 0000000000..fd73e27443 --- /dev/null +++ b/src/blas/impl/KokkosBlas2_team_gemv_impl.hpp @@ -0,0 +1,212 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSBLAS2_TEAM_GEMV_IMPL_HPP_ +#define KOKKOSBLAS2_TEAM_GEMV_IMPL_HPP_ + +#include "KokkosBlas1_set_impl.hpp" +#include "KokkosBlas1_team_scal_impl.hpp" +#include "KokkosBlas2_serial_gemv_inner_multiple_dot.hpp" + +namespace KokkosBlas { +namespace Impl { + +template +struct TeamGemvInternal { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType &member, const int m, const int n, + const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, + const int as0, const int as1, const ValueXType *KOKKOS_RESTRICT x, + const int xs0, const ScalarType beta, + /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0); +}; + +template +struct TeamVectorGemvInternal { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType &member, const int m, const int n, + const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, + const int as0, const int as1, const ValueXType *KOKKOS_RESTRICT x, + const int xs0, const ScalarType beta, + /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0); +}; + +/// +/// Team Internal Impl +/// ==================== + +template <> +template +KOKKOS_INLINE_FUNCTION int TeamGemvInternal::invoke( + const MemberType &member, const int m, const int n, const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, + const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, + /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { + const ScalarType one(1.0), zero(0.0); + + // y = beta y + alpha A x + // y (m), A(m x n), B(n) + + if (beta == zero) + KokkosBlas::Impl::TeamSetInternal::invoke(member, m, zero, y, ys0); + else if (beta != one) + KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, beta, y, ys0); + + if (alpha != zero) { + if (m <= 0 || n <= 0) return 0; + + if (beta != one) member.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, m), + [&](const int &i) { + ValueYType t(0); + const ValueAType *KOKKOS_RESTRICT tA = (A + i * as0); +#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) +#pragma unroll +#endif + for (int j = 0; j < n; ++j) + t += tA[j * as1] * x[j * xs0]; + y[i * ys0] += alpha * t; + }); + } + return 0; +} + +template <> +template +KOKKOS_INLINE_FUNCTION int TeamGemvInternal::invoke( + const MemberType &member, const int m, const int n, const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, + const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, + /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { + const ScalarType one(1.0), zero(0.0); + + // y = beta y + alpha A x + // y (m), A(m x n), B(n) + + constexpr int mbAlgo = Algo::Gemv::Blocked::mb(); + + if (beta == zero) + KokkosBlas::Impl::TeamSetInternal::invoke(member, m, zero, y, ys0); + else if (beta != one) + KokkosBlas::Impl::TeamScaleInternal::invoke(member, m, beta, y, ys0); + + if (alpha != zero) { + if (m <= 0 || n <= 0) return 0; + + if (beta != one) member.team_barrier(); + + KokkosBlas::Impl::InnerMultipleDotProduct inner(as0, as1, xs0, ys0); + const int tsize = member.team_size(); + const int mb_a = m / tsize + (m % tsize > 0), mb_b = mbAlgo; + // Made this non-const in order to WORKAROUND issue #349 + int mb = mb_a < mb_b ? mb_a : mb_b, mp = m % mb; + + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, (m / mb) + (mp > 0)), + [&](const int &ii) { + const int i = ii * mb; + inner.serial_invoke( + alpha, A + i * as0, x, + (i + mb) > m ? (m - i) : mb, n, y + i * ys0); + }); + member.team_barrier(); + } + + return 0; +} + +/// +/// TeamVector Internal Impl +/// ==================== + +template <> +template +KOKKOS_INLINE_FUNCTION int +TeamVectorGemvInternal::invoke( + const MemberType &member, const int m, const int n, const ScalarType alpha, + const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, + const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, + /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { + const ScalarType one(1.0), zero(0.0); + + // y = beta y + alpha A x + // y (m), A(m x n), B(n) + + if (beta == zero) + KokkosBlas::Impl::TeamVectorSetInternal::invoke(member, m, zero, y, ys0); + else if (beta != one) + KokkosBlas::Impl::TeamVectorScaleInternal::invoke(member, m, beta, y, ys0); + + if (alpha != zero) { + if (m <= 0 || n <= 0) return 0; + + if (beta != one) member.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, m), [&](const int &i) { + ValueYType t(0); + const ValueAType *KOKKOS_RESTRICT tA = (A + i * as0); + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(member, n), + [&](const int &j, ValueYType &update) { + update += tA[j * as1] * x[j * xs0]; + }, + t); + Kokkos::single(Kokkos::PerThread(member), + [&]() { y[i * ys0] += alpha * t; }); + }); + } + return 0; +} + +} // namespace Impl +} // namespace KokkosBlas + +#endif diff --git a/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp b/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp index b6cc7bf125..dee023d721 100644 --- a/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp +++ b/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp @@ -49,6 +49,7 @@ #include #include #include +#include namespace KokkosBlas { namespace Experimental { @@ -162,6 +163,189 @@ struct TeamGEMV { }; } // namespace Impl } // namespace Experimental + +template +struct TeamGemv { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& /*member*/, + const ScalarType /*alpha*/, + const AViewType& /*A*/, + const xViewType& /*x*/, + const ScalarType /*beta*/, + const yViewType& /*y*/); +}; + +template +struct TeamVectorGemv { + template + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType& /*member*/, + const ScalarType /*alpha*/, + const AViewType& /*A*/, + const xViewType& /*x*/, + const ScalarType /*beta*/, + const yViewType& /*y*/); +}; + +/// +/// NT +/// + +template +struct TeamGemv { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::Rank == 2, + "KokkosBlas::TeamGemv requires rank-2 A matrix"); + return Impl::TeamGemvInternal::template invoke< + MemberType, ScalarType, typename AViewType::non_const_value_type>( + member, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), + A.stride_1(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); + } +}; + +template +struct TeamGemv { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::Rank == 2, + "KokkosBlas::TeamGemv requires rank-2 A matrix"); + return Impl::TeamGemvInternal::template invoke< + MemberType, ScalarType, typename AViewType::non_const_value_type>( + member, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), + A.stride_1(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); + } +}; + +/// +/// T +/// + +template +struct TeamGemv { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::Rank == 2, + "BLAS TeamGemv requires rank-2 A matrix"); + return Impl::TeamGemvInternal::template invoke< + MemberType, ScalarType, typename AViewType::non_const_value_type>( + member, A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), + A.stride_0(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); + } +}; + +template +struct TeamGemv { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::Rank == 2, + "BLAS TeamGemv requires rank-2 A matrix"); + return Impl::TeamGemvInternal::template invoke< + MemberType, ScalarType, typename AViewType::non_const_value_type>( + member, A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), + A.stride_0(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); + } +}; + +/// +/// CT +/// + +template +struct TeamGemv { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::Rank == 2, + "BLAS TeamGemv requires rank-2 A matrix"); + Kokkos::abort("TODO: implement conjugate-transpose !"); + } +}; + +template +struct TeamGemv { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::Rank == 2, + "BLAS TeamGemv requires rank-2 A matrix"); + Kokkos::abort("TODO: implement conjugate-transpose !"); + } +}; + +/// +/// NT +/// + +template +struct TeamVectorGemv { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::Rank == 2, + "Batched TeamVectorGemv requires rank-2 A matrix"); + return Impl::TeamVectorGemvInternal::invoke( + member, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), + A.stride_1(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); + } +}; + +/// +/// T +/// + +template +struct TeamVectorGemv { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::Rank == 2, + "Batched TeamVectorGemv requires rank-2 A matrix"); + return Impl::TeamVectorGemvInternal::invoke( + member, A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), + A.stride_0(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); + } +}; + +/// +/// CT +/// + +template +struct TeamVectorGemv { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::Rank == 2, + "Batched TeamVectorGemv requires rank-2 A matrix"); + Kokkos::abort("TODO: implement conjugate-transpose !"); + } +}; + } // namespace KokkosBlas #endif diff --git a/src/sparse/impl/KokkosSparse_spmv_bsrmatrix_impl.hpp b/src/sparse/impl/KokkosSparse_spmv_bsrmatrix_impl.hpp index 959982ca30..9796c12352 100644 --- a/src/sparse/impl/KokkosSparse_spmv_bsrmatrix_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spmv_bsrmatrix_impl.hpp @@ -672,13 +672,13 @@ struct BSR_GEMV_Functor { const auto X_ptBeg = X_blkCol * block_dim; const auto X_cur = Kokkos::subview( m_x, ::Kokkos::make_pair(X_ptBeg, X_ptBeg + block_dim)); - KokkosBatched::TeamVectorGemvInternal< - KokkosBatched::Algo::Gemv::Unblocked>:: - invoke(dev, block_dim, block_dim, alpha, A_cur.data(), - static_cast(A_cur.stride_0()), - static_cast(A_cur.stride_1()), X_cur.data(), - static_cast(X_cur.stride_0()), val_one, Y_cur.data(), - static_cast(Y_cur.stride_0())); + KokkosBlas::Impl:: + TeamVectorGemvInternal::invoke( + dev, block_dim, block_dim, alpha, A_cur.data(), + static_cast(A_cur.stride_0()), + static_cast(A_cur.stride_1()), X_cur.data(), + static_cast(X_cur.stride_0()), val_one, Y_cur.data(), + static_cast(Y_cur.stride_0())); } } } @@ -976,19 +976,12 @@ struct BSR_GEMV_Transpose_Functor { for (ordinal_type jBlock = 0; jBlock < count; ++jBlock) { const auto A_cur = myRow.block(jBlock); // - KokkosBatched::TeamVectorGemvInternal< - KokkosBatched::Algo::Gemv::Unblocked>::invoke(dev, block_dim, - block_dim, alpha, - A_cur.data(), - static_cast( - A_cur.stride_1()), - static_cast( - A_cur.stride_0()), - X_cur.data(), - static_cast( - X_cur.stride_0()), - val_zero, shared_y, - 1); + KokkosBlas::Impl:: + TeamVectorGemvInternal::invoke( + dev, block_dim, block_dim, alpha, A_cur.data(), + static_cast(A_cur.stride_1()), + static_cast(A_cur.stride_0()), X_cur.data(), + static_cast(X_cur.stride_0()), val_zero, shared_y, 1); // dev.team_barrier(); // diff --git a/src/sparse/impl/KokkosSparse_sptrsv_solve_impl.hpp b/src/sparse/impl/KokkosSparse_sptrsv_solve_impl.hpp index 49b9d69b24..0e3ed0235a 100644 --- a/src/sparse/impl/KokkosSparse_sptrsv_solve_impl.hpp +++ b/src/sparse/impl/KokkosSparse_sptrsv_solve_impl.hpp @@ -57,15 +57,12 @@ #ifdef KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV // Enable supernodal sptrsv -#include "KokkosBlas2_gemv.hpp" -#include "KokkosBlas2_team_gemv.hpp" #include "KokkosBlas3_trsm.hpp" #include "KokkosSparse_spmv.hpp" #include "KokkosBatched_Util.hpp" -#include "KokkosBatched_Gemv_Decl.hpp" -#include "KokkosBatched_Gemv_Team_Impl.hpp" +#include "KokkosBlas2_team_gemv_spec.hpp" #include "KokkosBatched_Trsm_Team_Impl.hpp" #endif @@ -897,10 +894,12 @@ struct LowerTriSupernodalFunctor { workoffset, workoffset + nsrow)); // needed for gemv instead of trmv/trsv auto Ljj = Kokkos::subview(viewL, range_type(0, nsrow), Kokkos::ALL()); - KokkosBatched::TeamGemv< - member_type, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Algo::Gemv::Unblocked>::invoke(team, one, Ljj, Xj, - zero, Y); + KokkosBlas::TeamGemv::invoke(team, + one, + Ljj, Xj, + zero, + Y); team.team_barrier(); for (int ii = team_rank; ii < nscol; ii += team_size) { Xj(ii) = Y(ii); @@ -923,10 +922,13 @@ struct LowerTriSupernodalFunctor { team.team_barrier(); // calling team-level "Unblocked" gemv on small-size diagonal in // KokkosBatched - KokkosBatched::TeamGemv< - member_type, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Algo::Gemv::Unblocked>::invoke(team, one, Ljj, Y, - zero, Xj); + KokkosBlas::TeamGemv::invoke(team, + one, + Ljj, + Y, + zero, + Xj); } else { // NOTE: we currently supports only default_layout = LayoutLeft Kokkos::View::invoke(team, one, Lij, Xj, - zero, Z); + KokkosBlas::TeamGemv::invoke(team, + one, + Lij, Xj, + zero, + Z); team.team_barrier(); } } @@ -1103,8 +1107,8 @@ struct UpperTriSupernodalFunctor { auto Uij = Kokkos::subview(viewU, range_type(nscol, nsrow), Kokkos::ALL()); using Uij_type = decltype(Uij); - KokkosBatched::TeamGemv:: + KokkosBlas::TeamGemv:: template invoke( team, -one, Uij, Z, one, Xj); team.team_barrier(); @@ -1128,8 +1132,8 @@ struct UpperTriSupernodalFunctor { team.team_barrier(); // caling team-level kernel in KokkosBatched on a small-size diagonal - KokkosBatched::TeamGemv:: + KokkosBlas::TeamGemv:: template invoke( team, one, Ujj, Y, zero, Xj); } else { @@ -1267,10 +1271,12 @@ struct UpperTriTranSupernodalFunctor { workoffset, workoffset + nsrow)); // needed with gemv for update&scatter auto Uij = Kokkos::subview(viewU, range_type(0, nsrow), Kokkos::ALL()); - KokkosBatched::TeamGemv< - member_type, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Algo::Gemv::Unblocked>::invoke(team, one, Uij, Xj, - zero, Y); + KokkosBlas::TeamGemv::invoke(team, + one, + Uij, Xj, + zero, + Y); team.team_barrier(); // copy the diagonal back to output for (int ii = team_rank; ii < nscol; ii += team_size) { @@ -1289,10 +1295,13 @@ struct UpperTriTranSupernodalFunctor { Y(ii) = Xj(ii); } team.team_barrier(); - KokkosBatched::TeamGemv< - member_type, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Algo::Gemv::Unblocked>::invoke(team, one, Ujj, Y, - zero, Xj); + KokkosBlas::TeamGemv::invoke(team, + one, + Ujj, + Y, + zero, + Xj); } else { // NOTE: we currently supports only default_layout = LayoutLeft Kokkos::View::invoke(team, one, Uij, Xj, - zero, Z); + KokkosBlas::TeamGemv::invoke(team, + one, + Uij, Xj, + zero, + Z); team.team_barrier(); } diff --git a/unit_test/batched/dense/Test_Batched_Dense.hpp b/unit_test/batched/dense/Test_Batched_Dense.hpp index a771bcada1..a154e9e14f 100644 --- a/unit_test/batched/dense/Test_Batched_Dense.hpp +++ b/unit_test/batched/dense/Test_Batched_Dense.hpp @@ -45,9 +45,6 @@ #include "Test_Batched_TeamGemm.hpp" #include "Test_Batched_TeamGemm_Real.hpp" #include "Test_Batched_TeamGemm_Complex.hpp" -#include "Test_Batched_TeamGemv.hpp" -#include "Test_Batched_TeamGemv_Real.hpp" -#include "Test_Batched_TeamGemv_Complex.hpp" #include "Test_Batched_TeamGesv.hpp" #include "Test_Batched_TeamGesv_Real.hpp" #include "Test_Batched_TeamInverseLU.hpp" diff --git a/unit_test/batched/dense/Test_Batched_TeamGemv.hpp b/unit_test/batched/dense/Test_Batched_TeamGemv.hpp deleted file mode 100644 index 103aa069ce..0000000000 --- a/unit_test/batched/dense/Test_Batched_TeamGemv.hpp +++ /dev/null @@ -1,155 +0,0 @@ -/// \author Kyungjoo Kim (kyukim@sandia.gov) - -#include "gtest/gtest.h" -#include "Kokkos_Core.hpp" -#include "Kokkos_Random.hpp" - -//#include "KokkosBatched_Vector.hpp" - -#include "KokkosBatched_Gemv_Decl.hpp" - -#include "KokkosKernels_TestUtils.hpp" - -using namespace KokkosBatched; - -namespace Test { -namespace TeamGemv { - -template -struct ParamTag { - typedef T trans; -}; - -template -struct Functor_TestBatchedTeamGemv { - ViewType _a, _b, _c; - - ScalarType _alpha, _beta; - - KOKKOS_INLINE_FUNCTION - Functor_TestBatchedTeamGemv(const ScalarType alpha, const ViewType &a, - const ViewType &b, const ScalarType beta, - const ViewType &c) - : _a(a), _b(b), _c(c), _alpha(alpha), _beta(beta) {} - - template - KOKKOS_INLINE_FUNCTION void operator()(const ParamTagType &, - const MemberType &member) const { - const int k = member.league_rank(); - - auto aa = Kokkos::subview(_a, k, Kokkos::ALL(), Kokkos::ALL()); - auto bb = Kokkos::subview(_b, k, Kokkos::ALL(), 0); - auto cc = Kokkos::subview(_c, k, Kokkos::ALL(), 0); - - KokkosBatched::TeamGemv::invoke(member, _alpha, aa, bb, _beta, - cc); - } - - inline void run() { - typedef typename ViewType::value_type value_type; - std::string name_region("KokkosBatched::Test::SerialGemm"); - const std::string name_value_type = Test::value_type_name(); - std::string name = name_region + name_value_type; - Kokkos::Profiling::pushRegion(name.c_str()); - const int league_size = _c.extent(0); - Kokkos::TeamPolicy policy(league_size, - Kokkos::AUTO); - Kokkos::parallel_for(name.c_str(), policy, *this); - Kokkos::Profiling::popRegion(); - } -}; - -template -void impl_test_batched_gemv(const int N, const int BlkSize) { - typedef typename ViewType::value_type value_type; - typedef Kokkos::Details::ArithTraits ats; - - /// randomized input testing views - ScalarType alpha = 1.5, beta = 3.0; - - ViewType a0("a0", N, BlkSize, BlkSize), a1("a1", N, BlkSize, BlkSize), - b0("b0", N, BlkSize, 1), b1("b1", N, BlkSize, 1), c0("c0", N, BlkSize, 1), - c1("c1", N, BlkSize, 1); - - Kokkos::Random_XorShift64_Pool random( - 13718); - Kokkos::fill_random(a0, random, value_type(1.0)); - Kokkos::fill_random(b0, random, value_type(1.0)); - Kokkos::fill_random(c0, random, value_type(1.0)); - - Kokkos::fence(); - - Kokkos::deep_copy(a1, a0); - Kokkos::deep_copy(b1, b0); - Kokkos::deep_copy(c1, c0); - - /// test body - Functor_TestBatchedTeamGemv(alpha, a0, b0, beta, c0) - .run(); - Functor_TestBatchedTeamGemv(alpha, a1, b1, beta, c1) - .run(); - - Kokkos::fence(); - - /// for comparison send it to host - typename ViewType::HostMirror c0_host = Kokkos::create_mirror_view(c0); - typename ViewType::HostMirror c1_host = Kokkos::create_mirror_view(c1); - - Kokkos::deep_copy(c0_host, c0); - Kokkos::deep_copy(c1_host, c1); - - /// check c0 = c1 ; this eps is about 10^-14 - typedef typename ats::mag_type mag_type; - mag_type sum(1), diff(0); - const mag_type eps = 1.0e3 * ats::epsilon(); - - for (int k = 0; k < N; ++k) - for (int i = 0; i < BlkSize; ++i) - for (int j = 0; j < 1; ++j) { - sum += ats::abs(c0_host(k, i, j)); - diff += ats::abs(c0_host(k, i, j) - c1_host(k, i, j)); - } - EXPECT_NEAR_KK(diff / sum, 0, eps); -} -} // namespace TeamGemv -} // namespace Test - -template -int test_batched_team_gemv() { -#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) - { - typedef Kokkos::View - ViewType; - Test::TeamGemv::impl_test_batched_gemv(0, 10); - for (int i = 0; i < 10; ++i) { - // printf("Testing: LayoutLeft, Blksize %d\n", i); - Test::TeamGemv::impl_test_batched_gemv(1024, - i); - } - } -#endif -#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) - { - typedef Kokkos::View - ViewType; - Test::TeamGemv::impl_test_batched_gemv(0, 10); - for (int i = 0; i < 10; ++i) { - // printf("Testing: LayoutRight, Blksize %d\n", i); - Test::TeamGemv::impl_test_batched_gemv(1024, - i); - } - } -#endif - - return 0; -} diff --git a/unit_test/batched/dense/Test_Batched_TeamGemv_Complex.hpp b/unit_test/batched/dense/Test_Batched_TeamGemv_Complex.hpp deleted file mode 100644 index 3ffc34db23..0000000000 --- a/unit_test/batched/dense/Test_Batched_TeamGemv_Complex.hpp +++ /dev/null @@ -1,45 +0,0 @@ -#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) - -/// dcomplex, dcomplex - -TEST_F(TestCategory, batched_scalar_team_gemv_nt_dcomplex_dcomplex) { - typedef ::Test::TeamGemv::ParamTag param_tag_type; - typedef Algo::Gemv::Blocked algo_tag_type; - test_batched_team_gemv, - Kokkos::complex, param_tag_type, - algo_tag_type>(); -} -TEST_F(TestCategory, batched_scalar_team_gemv_t_dcomplex_dcomplex) { - typedef ::Test::TeamGemv::ParamTag param_tag_type; - typedef Algo::Gemv::Blocked algo_tag_type; - test_batched_team_gemv, - Kokkos::complex, param_tag_type, - algo_tag_type>(); -} -// TEST_F( TestCategory, batched_scalar_team_gemv_ct_dcomplex_dcomplex ) { -// typedef ::Test::TeamGemv::ParamTag param_tag_type; -// typedef Algo::Gemv::Blocked algo_tag_type; -// test_batched_team_gemv,Kokkos::complex,param_tag_type,algo_tag_type>(); -// } - -/// dcomplex, double - -TEST_F(TestCategory, batched_scalar_team_gemv_nt_dcomplex_double) { - typedef ::Test::TeamGemv::ParamTag param_tag_type; - typedef Algo::Gemv::Blocked algo_tag_type; - test_batched_team_gemv, double, - param_tag_type, algo_tag_type>(); -} -TEST_F(TestCategory, batched_scalar_team_gemv_t_dcomplex_double) { - typedef ::Test::TeamGemv::ParamTag param_tag_type; - typedef Algo::Gemv::Blocked algo_tag_type; - test_batched_team_gemv, double, - param_tag_type, algo_tag_type>(); -} -// TEST_F( TestCategory, batched_scalar_team_gemv_ct_dcomplex_double ) { -// typedef ::Test::TeamGemv::ParamTag param_tag_type; -// typedef Algo::Gemv::Blocked algo_tag_type; -// test_batched_team_gemv,double,param_tag_type,algo_tag_type>(); -// } - -#endif diff --git a/unit_test/batched/dense/Test_Batched_TeamGemv_Real.hpp b/unit_test/batched/dense/Test_Batched_TeamGemv_Real.hpp deleted file mode 100644 index 2c4db11b2d..0000000000 --- a/unit_test/batched/dense/Test_Batched_TeamGemv_Real.hpp +++ /dev/null @@ -1,30 +0,0 @@ - -#if defined(KOKKOSKERNELS_INST_FLOAT) -TEST_F(TestCategory, batched_scalar_team_gemv_nt_float_float) { - typedef ::Test::TeamGemv::ParamTag param_tag_type; - typedef Algo::Gemv::Blocked algo_tag_type; - test_batched_team_gemv(); -} -TEST_F(TestCategory, batched_scalar_team_gemv_t_float_float) { - typedef ::Test::TeamGemv::ParamTag param_tag_type; - typedef Algo::Gemv::Blocked algo_tag_type; - test_batched_team_gemv(); -} -#endif - -#if defined(KOKKOSKERNELS_INST_DOUBLE) -TEST_F(TestCategory, batched_scalar_team_gemv_nt_double_double) { - typedef ::Test::TeamGemv::ParamTag param_tag_type; - typedef Algo::Gemv::Blocked algo_tag_type; - test_batched_team_gemv(); -} -TEST_F(TestCategory, batched_scalar_team_gemv_t_double_double) { - typedef ::Test::TeamGemv::ParamTag param_tag_type; - typedef Algo::Gemv::Blocked algo_tag_type; - test_batched_team_gemv(); -} -#endif diff --git a/unit_test/batched/dense/Test_Batched_TeamVectorQR.hpp b/unit_test/batched/dense/Test_Batched_TeamVectorQR.hpp index 80bc7b246a..bb5cd89c9b 100644 --- a/unit_test/batched/dense/Test_Batched_TeamVectorQR.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamVectorQR.hpp @@ -6,7 +6,7 @@ #include "KokkosBlas1_set.hpp" #include "KokkosBatched_Copy_Decl.hpp" -#include "KokkosBatched_Gemv_Decl.hpp" +#include "KokkosBlas2_team_gemv_spec.hpp" #include "KokkosBatched_Trsv_Decl.hpp" #include "KokkosBatched_QR_Decl.hpp" #include "KokkosBatched_ApplyQ_Decl.hpp" @@ -53,9 +53,9 @@ struct Functor_TestBatchedTeamVectorQR { member.team_barrier(); /// bb = AA*xx - TeamVectorGemv::invoke(member, one, aa, xx, zero, - bb); + KokkosBlas::TeamVectorGemv::invoke(member, one, aa, + xx, zero, bb); member.team_barrier(); /// AA = QR diff --git a/unit_test/batched/dense/Test_Batched_TeamVectorQR_WithColumnPivoting.hpp b/unit_test/batched/dense/Test_Batched_TeamVectorQR_WithColumnPivoting.hpp index 72754a5e00..743810d4ce 100644 --- a/unit_test/batched/dense/Test_Batched_TeamVectorQR_WithColumnPivoting.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamVectorQR_WithColumnPivoting.hpp @@ -6,7 +6,7 @@ #include "KokkosBatched_Copy_Decl.hpp" #include "KokkosBatched_ApplyPivot_Decl.hpp" -#include "KokkosBatched_Gemv_Decl.hpp" +#include "KokkosBlas2_team_gemv_spec.hpp" #include "KokkosBatched_Trsv_Decl.hpp" #include "KokkosBatched_QR_WithColumnPivoting_Decl.hpp" #include "KokkosBatched_ApplyQ_Decl.hpp" @@ -53,9 +53,9 @@ struct Functor_TestBatchedTeamVectorQR_WithColumnPivoting { member.team_barrier(); /// bb = AA*xx - TeamVectorGemv::invoke(member, one, aa, xx, zero, - bb); + KokkosBlas::TeamVectorGemv::invoke(member, one, aa, + xx, zero, bb); member.team_barrier(); /// AA P^T = QR diff --git a/unit_test/batched/dense/Test_Batched_TeamVectorSolveUTV.hpp b/unit_test/batched/dense/Test_Batched_TeamVectorSolveUTV.hpp index 6610383d12..08375a95f5 100644 --- a/unit_test/batched/dense/Test_Batched_TeamVectorSolveUTV.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamVectorSolveUTV.hpp @@ -6,7 +6,7 @@ #include "KokkosBatched_Copy_Decl.hpp" #include "KokkosBatched_ApplyPivot_Decl.hpp" -#include "KokkosBatched_Gemv_Decl.hpp" +#include "KokkosBlas2_team_gemv_spec.hpp" #include "KokkosBatched_Trsv_Decl.hpp" #include "KokkosBatched_UTV_Decl.hpp" #include "KokkosBatched_SolveUTV_Decl.hpp" @@ -79,9 +79,9 @@ struct Functor_TestBatchedTeamVectorSolveUTV { TeamVectorCopy::invoke(member, aa, ac); /// bb = AA*xx - TeamVectorGemv::invoke(member, one, aa, xx, zero, - bb); + KokkosBlas::TeamVectorGemv::invoke(member, one, aa, + xx, zero, bb); member.team_barrier(); /// Solving Ax = b using UTV transformation diff --git a/unit_test/batched/dense/Test_Batched_TeamVectorUTV.hpp b/unit_test/batched/dense/Test_Batched_TeamVectorUTV.hpp index 0a49db7dce..06ca4b2fb8 100644 --- a/unit_test/batched/dense/Test_Batched_TeamVectorUTV.hpp +++ b/unit_test/batched/dense/Test_Batched_TeamVectorUTV.hpp @@ -6,7 +6,7 @@ #include "KokkosBatched_Copy_Decl.hpp" #include "KokkosBatched_ApplyPivot_Decl.hpp" -#include "KokkosBatched_Gemv_Decl.hpp" +#include "KokkosBlas2_team_gemv_spec.hpp" #include "KokkosBatched_Trsv_Decl.hpp" #include "KokkosBatched_UTV_Decl.hpp" @@ -78,9 +78,9 @@ struct Functor_TestBatchedTeamVectorUTV { TeamVectorCopy::invoke(member, aa, ac); /// bb = AA*xx - TeamVectorGemv::invoke(member, one, aa, xx, zero, - bb); + KokkosBlas::TeamVectorGemv::invoke(member, one, aa, + xx, zero, bb); member.team_barrier(); /// Solving Ax = b using UTV transformation @@ -98,9 +98,9 @@ struct Functor_TestBatchedTeamVectorUTV { auto vm = Kokkos::subview(vv, range_upto_rank, Kokkos::ALL()); if (matrix_rank < m) { /// w = U^T b - TeamVectorGemv::invoke(member, one, um, bb, zero, - ww); + KokkosBlas::TeamVectorGemv::invoke(member, one, um, + bb, zero, ww); member.team_barrier(); /// w = T^{-1} w @@ -109,15 +109,15 @@ struct Functor_TestBatchedTeamVectorUTV { member.team_barrier(); /// x = V^T w - TeamVectorGemv::invoke(member, one, vm, ww, zero, - xx); + KokkosBlas::TeamVectorGemv::invoke(member, one, vm, + ww, zero, xx); member.team_barrier(); } else { /// x = U^T b - TeamVectorGemv::invoke(member, one, um, bb, zero, - xx); + KokkosBlas::TeamVectorGemv::invoke(member, one, um, + bb, zero, xx); member.team_barrier(); /// x = T^{-1} x diff --git a/unit_test/blas/Test_Blas.hpp b/unit_test/blas/Test_Blas.hpp index 4bcbf957c4..b6d4f88314 100644 --- a/unit_test/blas/Test_Blas.hpp +++ b/unit_test/blas/Test_Blas.hpp @@ -44,6 +44,7 @@ // Serial Blas 2 #include "Test_Blas2_serial_gemv.hpp" + // Team Blas 2 #include "Test_Blas2_team_gemv.hpp" #include "Test_Blas2_teamvector_gemv.hpp" diff --git a/unit_test/blas/Test_Blas2_gemv_util.hpp b/unit_test/blas/Test_Blas2_gemv_util.hpp new file mode 100644 index 0000000000..0ef31d5ab2 --- /dev/null +++ b/unit_test/blas/Test_Blas2_gemv_util.hpp @@ -0,0 +1,337 @@ +#ifndef TEST_BLAS2_GEMV_UTIL_HPP +#define TEST_BLAS2_GEMV_UTIL_HPP + +#include +#include +#include +#include + +namespace Test { + +template ::value> +using simd_vector = + KokkosBatched::Vector, length>; + +template +struct GemvOpBase { + GemvOpBase(char trans_, ScalarType alpha_, AType A_, XType x_, + ScalarType beta_, YType y_) + : trans(trans_), alpha(alpha_), beta(beta_), A(A_), x(x_), y(y_) {} + + protected: + // parameters + char trans; + ScalarType alpha; + ScalarType beta; + // data + AType A; + XType x; + YType y; +}; + +#define KK_DEFINE_BLAS2_GEMV_TEST_OP_CLASS(ClassName) \ + struct ClassName : public GemvOpBase { \ + using params = GemvOpBase; \ + ClassName(char trans_, ScalarType alpha_, AType A_, XType x_, \ + ScalarType beta_, YType y_) \ + : params(trans_, alpha_, A_, x_, beta_, y_) {} +#define KK_END_BLAS2_GEMV_TEST_OP_CLASS \ + } \ + ; + +// Note: vanillaGEMV is called on device here - alternatively one can move +// _strided_ data using safe_device_to_host_deep_copy() etc. +template +KK_DEFINE_BLAS2_GEMV_TEST_OP_CLASS(RefGEMVOp) +template +KOKKOS_INLINE_FUNCTION void operator()(const TeamMember & /* member */) const { + vanillaGEMV(params::trans, params::alpha, params::A, params::x, params::beta, + params::y); +} +KK_END_BLAS2_GEMV_TEST_OP_CLASS + +// fill regular view with random values +template +typename std::enable_if::value>::type +fill_random_view(ViewType A, PoolType &rand_pool, + const ScalarType max_val = 10.0) { + Kokkos::fill_random(A, rand_pool, max_val); + Kokkos::fence(); +} + +// fill rank-1 view of SIMD vectors with random values +template +void fill_random_view( + Kokkos::View< + KokkosBatched::Vector, VecLength> *, + Layout, Props...> + x, + PoolType &rand_pool, const ValueType max_val = 10.0) { + // the view can be strided and have Vector values, so randoms + // are generated in a plain, linear view first and then copied + using device_type = typename decltype(x)::device_type; + Kokkos::View rnd("random_vals", + x.extent(0) * VecLength); + Kokkos::fill_random(rnd, rand_pool, max_val); + using size_type = decltype(x.extent(0)); + for (size_type i = 0; i < x.extent(0); ++i) { + x(i).loadUnaligned(&rnd(i * VecLength)); + } +} + +// fill rank-2 view of SIMD vectors with random values +template +static void fill_random_view( + Kokkos::View< + KokkosBatched::Vector, VecLength> **, + Layout, Props...> + A, + PoolType &rand_pool, const ValueType max_val = 10.0) { + // the view can be strided and have Vector values, so randoms + // are generated in a plain, linear view first and then copied + using device_type = typename decltype(A)::device_type; + Kokkos::View rnd( + "random_vals", A.extent(0) * A.extent(1) * VecLength); + Kokkos::fill_random(rnd, rand_pool, max_val); + using size_type = decltype(A.extent(0)); + size_type idx = 0; + for (size_type i = 0; i < A.extent(0); ++i) { + for (size_type j = 0; j < A.extent(1); ++j) { + A(i, j).loadUnaligned(&rnd(idx)); + idx += VecLength; + } + } +} + +template +struct GEMVTest { + static void run(const char *mode) { + run_algorithms<0, typename GemvFunc::algorithms>(mode); + } + + private: + // ScalarCoef==void default behavior is to derive alpha/beta scalar types + // from A and X scalar types + using ScalarType = typename std::conditional< + !std::is_void::value, ScalarCoef, + typename std::common_type::type>::type; + + template + static std::enable_if_t::value> + run_algorithms(const char *mode) {} + + template + static + typename std::enable_if<(Idx < + std::tuple_size::value)>::type + run_algorithms(const char *mode) { + run_layouts::type>(mode); + run_algorithms(mode); + } + + template + static constexpr bool allow_algorithm = + GemvFunc::template allow_algorithm; + + template + static typename std::enable_if>::type run_layouts( + const char *mode) { + // skip unsupported combinations + } + + template + static typename std::enable_if>::type run_layouts( + const char *mode) { + // Note: all layouts listed here are subview'ed to test Kokkos::LayoutStride +#ifdef KOKKOSKERNELS_TEST_LAYOUTLEFT + run_view_types(mode); +#endif +#ifdef KOKKOSKERNELS_TEST_LAYOUTRIGHT + run_view_types(mode); +#endif +#if defined(KOKKOSKERNELS_TEST_LAYOUTLEFT) && \ + defined(KOKKOSKERNELS_TEST_LAYOUTRIGHT) + using A_t = typename Kokkos::View; + using x_t = typename Kokkos::View; + using y_t = typename Kokkos::View; + run_sizes(mode); +#endif + } + + template + static void run_view_types(const char *mode) { + typedef Kokkos::View view_type_A; + typedef Kokkos::View view_type_x; + typedef Kokkos::View view_type_y; + run_sizes(mode); + } + + template + static void run_sizes(const char *mode) { + // zero cases + run_size(mode, 0, 0); + run_size(mode, 0, 4); + run_size(mode, 4, 0); + // small block sizes + for (int n = 1; n <= 16; ++n) { + run_size(mode, n, n); + } + // other cases + run_size(mode, 1024, 1); + run_size(mode, 1024, 13); + run_size(mode, 1024, 124); + } + + template + static void run_size(const char *mode, int N, int M) { + using A_layout = typename ViewTypeA::array_layout; + using x_layout = typename ViewTypeX::array_layout; + using y_layout = typename ViewTypeY::array_layout; + static_assert(!std::is_same::value, ""); + static_assert(!std::is_same::value, ""); + static_assert(!std::is_same::value, ""); + + const auto trans = mode[0]; + const bool transposed = trans == (char)'T' || trans == (char)'C'; + const auto Nt = transposed ? M : N; + const auto Mt = transposed ? N : M; + + // 1. run on regular (non-strided) views + ViewTypeA A1("A1", Nt, Mt); + ViewTypeX x1("X1", M); + ViewTypeY y1("Y1", N); + run_views(trans, A1, x1, y1); + + // 2. run on strided subviews (enforced by adding extra rank on both sides) + // Note: strided views are not supported by MKL routines + if (!std::is_same::value) { + typedef Kokkos::View BaseTypeA; + typedef Kokkos::View BaseTypeX; + typedef Kokkos::View BaseTypeY; + + BaseTypeA b_A("A", 2, Nt, Mt, 2); + BaseTypeX b_x("X", 2, M, 2); + BaseTypeY b_y("Y", 2, N, 2); + auto A = Kokkos::subview(b_A, 0, Kokkos::ALL(), Kokkos::ALL(), 0); + auto x = Kokkos::subview(b_x, 0, Kokkos::ALL(), 0); + auto y = Kokkos::subview(b_y, 0, Kokkos::ALL(), 0); + + // make sure it's actually LayoutStride there + static_assert(std::is_same::value, + ""); + static_assert(std::is_same::value, + ""); + static_assert(std::is_same::value, + ""); + run_views(trans, A, x, y); + } + } + + template + static void run_views(const char trans, ViewTypeA A, ViewTypeX x, + ViewTypeY y) { + Kokkos::TeamPolicy teams(1, 1); // just run on device + fill_inputs(A, x, y); + ScalarType alpha = 3; // TODO: test also with zero alpha/beta ? + ScalarType beta = 5; + + // get reference results + Kokkos::View y_ref("Y_ref", y.extent(0)); + Kokkos::deep_copy(y_ref, y); + RefGEMVOp gemv_ref( + trans, alpha, A, x, beta, y_ref); + Kokkos::parallel_for(teams, gemv_ref); + + // 1. check non-consts + run_case(trans, alpha, A, x, beta, y, y_ref); + + // 2. check const x + typename ViewTypeX::const_type c_x = x; + run_case(trans, alpha, A, c_x, beta, y, y_ref); + + // 3. check const A and x + typename ViewTypeA::const_type c_A = A; + run_case(trans, alpha, c_A, c_x, beta, y, y_ref); + } + + template + static void run_case(const char trans, ScalarType alpha, ViewTypeA A, + ViewTypeX x, ScalarType beta, ViewTypeY y, + ViewTypeYRef y_ref) { + // run on original y view (not to alter the test) + // but backup it and restore, so it can be reused + Kokkos::View y_backup("Y2", y.extent(0)); + Kokkos::deep_copy(y_backup, y); + + // fetch GEMV functor from the factory + using op_type = + typename GemvFunc::template functor_type; + + op_type gemv_op(trans, alpha, A, x, beta, y); + Kokkos::parallel_for(Kokkos::TeamPolicy(1, 1), gemv_op); + + const double eps = epsilon(ScalarY{}); + EXPECT_NEAR_KK_REL_1DVIEW(y, y_ref, eps); + Kokkos::deep_copy(y, y_backup); + } + + //----- utilities -----// + + // GEMV tolerance for scalar types + static double epsilon(float) { return 2 * 1e-5; } + static double epsilon(double) { return 1e-7; } + static double epsilon(int) { return 0; } + // tolerance for derived types + template + static double epsilon(Kokkos::complex) { + return epsilon(ScalarType{}); + } + template + static double epsilon(simd_vector) { + return epsilon(ScalarType{}); + } + + template + static void fill_inputs(ViewTypeA A, ViewTypeX x, ViewTypeY y) { + using exec_space = typename Device::execution_space; + Kokkos::Random_XorShift64_Pool rand_pool(13718); + fill_random_view(A, rand_pool); + fill_random_view(x, rand_pool); + fill_random_view(y, rand_pool); + } +}; + +} // namespace Test + +#define TEST_CASE4(PREFIX, FACTORY, NAME, SCALAR_A, SCALAR_X, SCALAR_Y, \ + SCALAR_COEF) \ + using PREFIX##_##NAME##_gemv_test = \ + ::Test::GEMVTest<::Test::FACTORY, SCALAR_A, SCALAR_X, SCALAR_Y, \ + TestExecSpace, SCALAR_COEF>; \ + TEST_F(TestCategory, PREFIX##_gemv_nt_##NAME) { \ + PREFIX##_##NAME##_gemv_test::run("N"); \ + } \ + TEST_F(TestCategory, PREFIX##_gemv_t_##NAME) { \ + PREFIX##_##NAME##_gemv_test::run("T"); \ + } \ + TEST_F(TestCategory, PREFIX##_gemv_ct_##NAME) { \ + PREFIX##_##NAME##_gemv_test::run("C"); \ + } + +#define TEST_CASE2(PREFIX, FACTORY, NAME, SCALAR, SCALAR_COEF) \ + TEST_CASE4(PREFIX, FACTORY, NAME, SCALAR, SCALAR, SCALAR, SCALAR_COEF) +#define TEST_CASE(PREFIX, FACTORY, NAME, SCALAR) \ + TEST_CASE2(PREFIX, FACTORY, NAME, SCALAR, SCALAR) + +#endif // TEST_BLAS2_GEMV_UTIL_HPP \ No newline at end of file diff --git a/unit_test/blas/Test_Blas2_serial_gemv.hpp b/unit_test/blas/Test_Blas2_serial_gemv.hpp index e97f8e71a9..99ad6da6a8 100644 --- a/unit_test/blas/Test_Blas2_serial_gemv.hpp +++ b/unit_test/blas/Test_Blas2_serial_gemv.hpp @@ -1,388 +1,100 @@ -#include -#include -#include +#include // for ETI test guards +// Note: include serial gemv before util so it knows if CompactMKL is available #include -#include -#include -#include +#include namespace Test { -template ::value> -using simd_vector = - KokkosBatched::Vector, length>; - -// Note: vanillaGEMV is called on device here - alternatively one can move -// _strided_ data using safe_device_to_host_deep_copy() etc. -template -struct RefGEMVOp { - RefGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, - ScalarType beta_, YType y_) - : trans(trans_), alpha(alpha_), beta(beta_), A(A_), x(x_), y(y_) {} - - template - KOKKOS_INLINE_FUNCTION void operator()( - const TeamMember & /* member */) const { - vanillaGEMV(trans, alpha, A, x, beta, y); - } - - private: - // parameters - char trans; - ScalarType alpha; - ScalarType beta; - // data - AType A; - XType x; - YType y; -}; - template -struct SerialGEMVOp { - SerialGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, - ScalarType beta_, YType y_) - : trans(trans_), alpha(alpha_), beta(beta_), A(A_), x(x_), y(y_) {} - - template - KOKKOS_INLINE_FUNCTION void operator()( - const TeamMember & /* member */) const { - KokkosBlas::Experimental::gemv(trans, alpha, A, x, beta, y); - } - - private: - // parameters - char trans; - ScalarType alpha; - ScalarType beta; - // data - AType A; - XType x; - YType y; -}; - -// fill regular view with random values -template -typename std::enable_if::value>::type -fill_random_view(ViewType A, PoolType &rand_pool, - const ScalarType max_val = 10.0) { - Kokkos::fill_random(A, rand_pool, max_val); - Kokkos::fence(); -} - -// fill rank-1 view of SIMD vectors with random values -template -void fill_random_view( - Kokkos::View< - KokkosBatched::Vector, VecLength> *, - Layout, Props...> - x, - PoolType &rand_pool, const ValueType max_val = 10.0) { - // the view can be strided and have Vector values, so randoms - // are generated in a plain, linear view first and then copied - using device_type = typename decltype(x)::device_type; - Kokkos::View rnd("random_vals", - x.extent(0) * VecLength); - Kokkos::fill_random(rnd, rand_pool, max_val); - using size_type = decltype(x.extent(0)); - for (size_type i = 0; i < x.extent(0); ++i) { - x(i).loadUnaligned(&rnd(i * VecLength)); - } +KK_DEFINE_BLAS2_GEMV_TEST_OP_CLASS(SerialGEMVOp) +template +KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& /* member */) const { + KokkosBlas::Experimental::gemv(params::trans, params::alpha, + params::A, params::x, params::beta, + params::y); } +KK_END_BLAS2_GEMV_TEST_OP_CLASS -// fill rank-2 view of SIMD vectors with random values -template -static void fill_random_view( - Kokkos::View< - KokkosBatched::Vector, VecLength> **, - Layout, Props...> - A, - PoolType &rand_pool, const ValueType max_val = 10.0) { - // the view can be strided and have Vector values, so randoms - // are generated in a plain, linear view first and then copied - using device_type = typename decltype(A)::device_type; - Kokkos::View rnd( - "random_vals", A.extent(0) * A.extent(1) * VecLength); - Kokkos::fill_random(rnd, rand_pool, max_val); - using size_type = decltype(A.extent(0)); - size_type idx = 0; - for (size_type i = 0; i < A.extent(0); ++i) { - for (size_type j = 0; j < A.extent(1); ++j) { - A(i, j).loadUnaligned(&rnd(idx)); - idx += VecLength; - } - } -} - -// -template -struct SerialGEMVTestBase { - // ScalarCoef==void default behavior is to derive alpha/beta scalar types - // from A and X scalar types - using ScalarType = typename std::conditional< - !std::is_void::value, ScalarCoef, - typename std::common_type::type>::type; - - template - static void run_layouts(const char *mode) { - // Note: all layouts listed here are subview'ed to test Kokkos::LayoutStride -#ifdef KOKKOSKERNELS_TEST_LAYOUTLEFT - run_view_types(mode); -#endif -#ifdef KOKKOSKERNELS_TEST_LAYOUTRIGHT - run_view_types(mode); -#endif -#if defined(KOKKOSKERNELS_TEST_LAYOUTLEFT) && \ - defined(KOKKOSKERNELS_TEST_LAYOUTRIGHT) - using A_t = typename Kokkos::View; - using x_t = typename Kokkos::View; - using y_t = typename Kokkos::View; - run_sizes(mode); -#endif - } - - template - static void run_view_types(const char *mode) { - typedef Kokkos::View view_type_A; - typedef Kokkos::View view_type_x; - typedef Kokkos::View view_type_y; - run_sizes(mode); - } - - template - static void run_sizes(const char *mode) { - // zero cases - run_size(mode, 0, 0); - run_size(mode, 0, 4); - run_size(mode, 4, 0); - // small block sizes - for (int n = 1; n <= 16; ++n) { - run_size(mode, n, n); - } - // other cases - run_size(mode, 1024, 1); - run_size(mode, 1024, 13); - run_size(mode, 1024, 124); - } - - template - static void run_size(const char *mode, int N, int M) { - using A_layout = typename ViewTypeA::array_layout; - using x_layout = typename ViewTypeX::array_layout; - using y_layout = typename ViewTypeY::array_layout; - static_assert(!std::is_same::value, ""); - static_assert(!std::is_same::value, ""); - static_assert(!std::is_same::value, ""); - - const auto trans = mode[0]; - const bool transposed = trans == (char)'T' || trans == (char)'C'; - const auto Nt = transposed ? M : N; - const auto Mt = transposed ? N : M; - - // 1. run on regular (non-strided) views - ViewTypeA A1("A1", Nt, Mt); - ViewTypeX x1("X1", M); - ViewTypeY y1("Y1", N); - run_views(trans, A1, x1, y1); - - // 2. run on strided subviews (enforced by adding extra rank on both sides) - // Note: strided views are not supported by MKL routines - if (!std::is_same::value) { - typedef Kokkos::View BaseTypeA; - typedef Kokkos::View BaseTypeX; - typedef Kokkos::View BaseTypeY; - - BaseTypeA b_A("A", 2, Nt, Mt, 2); - BaseTypeX b_x("X", 2, M, 2); - BaseTypeY b_y("Y", 2, N, 2); - auto A = Kokkos::subview(b_A, 0, Kokkos::ALL(), Kokkos::ALL(), 0); - auto x = Kokkos::subview(b_x, 0, Kokkos::ALL(), 0); - auto y = Kokkos::subview(b_y, 0, Kokkos::ALL(), 0); - - // make sure it's actually LayoutStride there - static_assert(std::is_same::value, - ""); - static_assert(std::is_same::value, - ""); - static_assert(std::is_same::value, - ""); - run_views(trans, A, x, y); - } - } - - template - static void run_views(const char trans, ViewTypeA A, ViewTypeX x, - ViewTypeY y) { - Kokkos::TeamPolicy teams(1, 1); // just run on device - fill_inputs(A, x, y); - ScalarType alpha = 3; // TODO: test also with zero alpha/beta ? - ScalarType beta = 5; - - // get reference results - Kokkos::View y_ref("Y_ref", y.extent(0)); - Kokkos::deep_copy(y_ref, y); - RefGEMVOp gemv_ref( - trans, alpha, A, x, beta, y_ref); - Kokkos::parallel_for(teams, gemv_ref); - - // 1. check non-consts - run_case(trans, alpha, A, x, beta, y, y_ref); - - // 2. check const x - typename ViewTypeX::const_type c_x = x; - run_case(trans, alpha, A, c_x, beta, y, y_ref); - - // 3. check const A and x - typename ViewTypeA::const_type c_A = A; - run_case(trans, alpha, c_A, c_x, beta, y, y_ref); - } - +struct SerialGemvFactory { template - static void run_case(const char trans, ScalarType alpha, ViewTypeA A, - ViewTypeX x, ScalarType beta, ViewTypeY y, - ViewTypeYRef y_ref) { - // run on original y view (not to alter the test) - // but backup it and restore, so it can be reused - Kokkos::View y_backup("Y2", y.extent(0)); - Kokkos::deep_copy(y_backup, y); + class Device, class ScalarType> + using functor_type = + SerialGEMVOp; - SerialGEMVOp gemv_op( - trans, alpha, A, x, beta, y); - Kokkos::parallel_for(Kokkos::TeamPolicy(1, 1), gemv_op); - - const double eps = epsilon(ScalarY{}); - EXPECT_NEAR_KK_REL_1DVIEW(y, y_ref, eps); - Kokkos::deep_copy(y, y_backup); - } - - //----- utilities -----// - - // GEMV tolerance for scalar types - static double epsilon(float) { return 2 * 1e-5; } - static double epsilon(double) { return 1e-7; } - static double epsilon(int) { return 0; } - // tolerance for derived types - template - static double epsilon(Kokkos::complex) { - return epsilon(ScalarType{}); - } - template - static double epsilon(simd_vector) { - return epsilon(ScalarType{}); - } - - template - static void fill_inputs(ViewTypeA A, ViewTypeX x, ViewTypeY y) { - using exec_space = typename Device::execution_space; - Kokkos::Random_XorShift64_Pool rand_pool(13718); - fill_random_view(A, rand_pool); - fill_random_view(x, rand_pool); - fill_random_view(y, rand_pool); - } -}; - -template -struct SerialGEMVTest { - static void run(const char *mode) { - using base = - SerialGEMVTestBase; - base::template run_layouts(mode); - base::template run_layouts(mode); - } -}; - -// Special handling of Vector> (instead of plain scalars) -// Note: MKL compact routines don't allow mixed scalar types -template -struct SerialGEMVTest, - simd_vector, - simd_vector, Device, ScalarCoef> { - static void run(const char *mode) { - using vector_type = simd_vector; - using base = SerialGEMVTestBase; - // run all usual, non-vector tests - base::template run_layouts(mode); - base::template run_layouts(mode); - // run vector tests + using algorithms = std::tuple(mode); + , + KokkosBlas::Algo::Gemv::CompactMKL #endif - } + >; + + // block testing of CompackMKL on non-vector scalars + // (they are not supported by the implementation) + template + static constexpr bool allow_algorithm = + !std::is_same::value || + (KokkosBatched::is_vector::value && + KokkosBatched::is_vector::value && + KokkosBatched::is_vector::value); }; } // namespace Test -#define TEST_CASE4(NAME, SCALAR_A, SCALAR_X, SCALAR_Y, SCALAR_COEF) \ - TEST_F(TestCategory, serial_gemv_nt_##NAME) { \ - ::Test::SerialGEMVTest::run("N"); \ - } \ - TEST_F(TestCategory, serial_gemv_t_##NAME) { \ - ::Test::SerialGEMVTest::run("T"); \ - } \ - TEST_F(TestCategory, serial_gemv_ct_##NAME) { \ - ::Test::SerialGEMVTest::run("C"); \ - } - -#define TEST_CASE2(NAME, SCALAR, SCALAR_COEF) \ - TEST_CASE4(NAME, SCALAR, SCALAR, SCALAR, SCALAR_COEF) -#define TEST_CASE(NAME, SCALAR) TEST_CASE2(NAME, SCALAR, SCALAR) +#define TEST_SERIAL_CASE4(N, A, X, Y, SC) \ + TEST_CASE4(serial, SerialGemvFactory, N, A, X, Y, SC) +#define TEST_SERIAL_CASE2(N, S, SC) \ + TEST_CASE2(serial, SerialGemvFactory, N, S, SC) +#define TEST_SERIAL_CASE(N, S) TEST_CASE(serial, SerialGemvFactory, N, S) #ifdef KOKKOSKERNELS_TEST_FLOAT -TEST_CASE(float, float) +TEST_SERIAL_CASE(float, float) // MKL vector types #ifdef __KOKKOSBLAS_ENABLE_INTEL_MKL_COMPACT__ using simd_float_sse = ::Test::simd_vector; using simd_float_avx = ::Test::simd_vector; using simd_float_avx512 = ::Test::simd_vector; -TEST_CASE2(mkl_float_sse, simd_float_sse, float) -TEST_CASE2(mkl_float_avx, simd_float_avx, float) -TEST_CASE2(mkl_float_avx512, simd_float_avx512, float) +TEST_SERIAL_CASE2(mkl_float_sse, simd_float_sse, float) +TEST_SERIAL_CASE2(mkl_float_avx, simd_float_avx, float) +TEST_SERIAL_CASE2(mkl_float_avx512, simd_float_avx512, float) #endif #endif #ifdef KOKKOSKERNELS_TEST_DOUBLE -TEST_CASE(double, double) +TEST_SERIAL_CASE(double, double) // MKL vector types #ifdef __KOKKOSBLAS_ENABLE_INTEL_MKL_COMPACT__ using simd_double_sse = ::Test::simd_vector; using simd_double_avx = ::Test::simd_vector; using simd_double_avx512 = ::Test::simd_vector; -TEST_CASE2(mkl_double_sse, simd_double_sse, double) -TEST_CASE2(mkl_double_avx, simd_double_avx, double) -TEST_CASE2(mkl_double_avx512, simd_double_avx512, double) +TEST_SERIAL_CASE2(mkl_double_sse, simd_double_sse, double) +TEST_SERIAL_CASE2(mkl_double_avx, simd_double_avx, double) +TEST_SERIAL_CASE2(mkl_double_avx512, simd_double_avx512, double) #endif #endif #ifdef KOKKOSKERNELS_TEST_COMPLEX_DOUBLE -TEST_CASE(complex_double, Kokkos::complex) +TEST_SERIAL_CASE(complex_double, Kokkos::complex) #endif #ifdef KOKKOSKERNELS_TEST_COMPLEX_FLOAT -TEST_CASE(complex_float, Kokkos::complex) +TEST_SERIAL_CASE(complex_float, Kokkos::complex) #endif #ifdef KOKKOSKERNELS_TEST_INT -TEST_CASE(int, int) +TEST_SERIAL_CASE(int, int) #endif #ifdef KOKKOSKERNELS_TEST_ALL_TYPES // test mixed scalar types (void -> default alpha/beta) -TEST_CASE4(mixed, double, int, float, void) +TEST_SERIAL_CASE4(mixed, double, int, float, void) // test arbitrary double alpha/beta with complex values -TEST_CASE2(alphabeta, Kokkos::complex, double) +TEST_SERIAL_CASE2(alphabeta, Kokkos::complex, double) #endif + +#undef TEST_SERIAL_CASE4 +#undef TEST_SERIAL_CASE2 +#undef TEST_SERIAL_CASE diff --git a/unit_test/blas/Test_Blas2_team_gemv.hpp b/unit_test/blas/Test_Blas2_team_gemv.hpp index dc2d158a4c..580ae8fa23 100644 --- a/unit_test/blas/Test_Blas2_team_gemv.hpp +++ b/unit_test/blas/Test_Blas2_team_gemv.hpp @@ -1,274 +1,69 @@ -// Note: Luc Berger-Vergiat 04/14/21 -// This tests uses KOKKOS_LAMBDA so we need -// to make sure that these are enabled in -// the CUDA backend before including this test. -#if !defined(TEST_CUDA_BLAS_CPP) || defined(KOKKOS_ENABLE_CUDA_LAMBDA) - -#include -#include -#include +#include // for test/inst guards +// Note: include serial gemv before util so it knows if CompactMKL is available +#include #include -#include -#include namespace Test { -template -void impl_test_team_gemv(const char *mode, int N, int M) { - typedef Kokkos::TeamPolicy team_policy; - typedef typename team_policy::member_type team_member; - - // Launch K teams of the maximum number of threads per team - int K = 4; - const team_policy policy(K, Kokkos::AUTO); - const int team_data_siz = (N % K == 0) ? (N / K) : (N / K + 1); - - typedef typename ViewTypeA::value_type ScalarA; - typedef typename ViewTypeX::value_type ScalarX; - typedef typename ViewTypeY::value_type ScalarY; - - typedef multivector_layout_adapter vfA_type; - typedef Kokkos::View< - ScalarX * [2], - typename std::conditional::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type, - Device> - BaseTypeX; - typedef Kokkos::View< - ScalarY * [2], - typename std::conditional::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type, - Device> - BaseTypeY; - - ScalarA a = 3; - ScalarX b = 5; - double eps = std::is_same::value ? 2 * 1e-5 : 1e-7; - - typename vfA_type::BaseType b_A("A", N, M); - BaseTypeX b_x("X", M); - BaseTypeY b_y("Y", N); - BaseTypeY b_org_y("Org_Y", N); - - ViewTypeA A = vfA_type::view(b_A); - ViewTypeX x = Kokkos::subview(b_x, Kokkos::ALL(), 0); - ViewTypeY y = Kokkos::subview(b_y, Kokkos::ALL(), 0); - typename ViewTypeX::const_type c_x = x; - typename ViewTypeA::const_type c_A = A; - - typedef multivector_layout_adapter h_vfA_type; - - typename h_vfA_type::BaseType h_b_A = Kokkos::create_mirror_view(b_A); - typename BaseTypeX::HostMirror h_b_x = Kokkos::create_mirror_view(b_x); - typename BaseTypeY::HostMirror h_b_y = Kokkos::create_mirror_view(b_y); - - typename ViewTypeA::HostMirror h_A = h_vfA_type::view(h_b_A); - typename ViewTypeX::HostMirror h_x = Kokkos::subview(h_b_x, Kokkos::ALL(), 0); - typename ViewTypeY::HostMirror h_y = Kokkos::subview(h_b_y, Kokkos::ALL(), 0); - - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); - - Kokkos::fill_random(b_x, rand_pool, ScalarX(10)); - Kokkos::fill_random(b_y, rand_pool, ScalarY(10)); - Kokkos::fill_random(b_A, rand_pool, ScalarA(10)); - - Kokkos::deep_copy(b_org_y, b_y); - - Kokkos::deep_copy(h_b_x, b_x); - Kokkos::deep_copy(h_b_y, b_y); - Kokkos::deep_copy(h_b_A, b_A); - - ScalarY expected_result = 0; - - if (mode[0] == 'N') { - for (int i = 0; i < N; i++) { - ScalarY y_i = ScalarY(); - for (int j = 0; j < M; j++) { - y_i += h_A(i, j) * h_x(j); - } - expected_result += (b * h_y(i) + a * y_i) * (b * h_y(i) + a * y_i); - } - } - - char trans = mode[0]; - // KokkosBlas::gemv(mode,a,A,x,b,y); - Kokkos::parallel_for( - "KokkosBlas::Test::TeamGemm", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { - const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::gemv( - teamMember, trans, a, - Kokkos::subview( - A, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < K - 1) ? (teamId + 1) * team_data_siz : N), - Kokkos::ALL()), - x, b, - Kokkos::subview( - y, Kokkos::make_pair( - teamId * team_data_siz, - (teamId < K - 1) ? (teamId + 1) * team_data_siz : N))); - }); - - ScalarY nonconst_nonconst_result = KokkosBlas::dot(y, y); - EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, - eps * expected_result); - - Kokkos::deep_copy(b_y, b_org_y); - - // KokkosBlas::gemv(mode,a,A,c_x,b,y); - Kokkos::parallel_for( - "KokkosBlas::Test::TeamGemm", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { - const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::gemv( - teamMember, trans, a, - Kokkos::subview( - A, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < K - 1) ? (teamId + 1) * team_data_siz : N), - Kokkos::ALL()), - c_x, b, - Kokkos::subview( - y, Kokkos::make_pair( - teamId * team_data_siz, - (teamId < K - 1) ? (teamId + 1) * team_data_siz : N))); - }); +template +KK_DEFINE_BLAS2_GEMV_TEST_OP_CLASS(TeamGEMVOp) +template +KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& member) const { + KokkosBlas::Experimental::gemv(member, params::trans, params::alpha, + params::A, params::x, params::beta, + params::y); +} +KK_END_BLAS2_GEMV_TEST_OP_CLASS - ScalarY const_nonconst_result = KokkosBlas::dot(y, y); - EXPECT_NEAR_KK(const_nonconst_result, expected_result, eps * expected_result); +struct TeamGemvFactory { + template + using functor_type = + TeamGEMVOp; - Kokkos::deep_copy(b_y, b_org_y); + using algorithms = std::tuple; - // KokkosBlas::gemv(mode,a,c_A,c_x,b,y); - Kokkos::parallel_for( - "KokkosBlas::Test::TeamGemm", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { - const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::gemv( - teamMember, trans, a, - Kokkos::subview( - c_A, - Kokkos::make_pair( - teamId * team_data_siz, - (teamId < K - 1) ? (teamId + 1) * team_data_siz : N), - Kokkos::ALL()), - c_x, b, - Kokkos::subview( - y, Kokkos::make_pair( - teamId * team_data_siz, - (teamId < K - 1) ? (teamId + 1) * team_data_siz : N))); - }); + template + static constexpr bool allow_algorithm = true; +}; - ScalarY const_const_result = KokkosBlas::dot(y, y); - EXPECT_NEAR_KK(const_const_result, expected_result, eps * expected_result); -} } // namespace Test -template -int test_team_gemv(const char *mode) { -#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - typedef Kokkos::View view_type_a_ll; - typedef Kokkos::View view_type_b_ll; - typedef Kokkos::View view_type_c_ll; - Test::impl_test_team_gemv(mode, 0, 1024); - Test::impl_test_team_gemv(mode, 13, 1024); - Test::impl_test_team_gemv(mode, 124, 124); - // Test::impl_test_team_gemv(mode,132231,1024); -#endif - -#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - typedef Kokkos::View view_type_a_lr; - typedef Kokkos::View view_type_b_lr; - typedef Kokkos::View view_type_c_lr; - Test::impl_test_team_gemv(mode, 0, 1024); - Test::impl_test_team_gemv(mode, 13, 1024); - Test::impl_test_team_gemv(mode, 124, 124); - // Test::impl_test_team_gemv(mode,132231,1024); -#endif +#define TEST_TEAM_CASE4(N, A, X, Y, SC) \ + TEST_CASE4(team, TeamGemvFactory, N, A, X, Y, SC) +#define TEST_TEAM_CASE2(N, S, SC) TEST_CASE2(team, TeamGemvFactory, N, S, SC) +#define TEST_TEAM_CASE(N, S) TEST_CASE(team, TeamGemvFactory, N, S) -#if defined(KOKKOSKERNELS_INST_LAYOUTSTRIDE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - typedef Kokkos::View view_type_a_ls; - typedef Kokkos::View view_type_b_ls; - typedef Kokkos::View view_type_c_ls; - Test::impl_test_team_gemv(mode, 0, 1024); - Test::impl_test_team_gemv(mode, 13, 1024); - Test::impl_test_team_gemv(mode, 124, 124); - // Test::impl_test_team_gemv(mode,132231,1024); +#ifdef KOKKOSKERNELS_TEST_FLOAT +TEST_TEAM_CASE(float, float) #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) - Test::impl_test_team_gemv(mode, 124, 124); - Test::impl_test_team_gemv(mode, 124, 124); +#ifdef KOKKOSKERNELS_TEST_DOUBLE +TEST_TEAM_CASE(double, double) #endif - return 1; -} - -#if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_gemv_float) { - test_team_gemv("N"); -} +#ifdef KOKKOSKERNELS_TEST_COMPLEX_DOUBLE +TEST_TEAM_CASE(complex_double, Kokkos::complex) #endif -#if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_gemv_double) { - test_team_gemv("N"); -} +#ifdef KOKKOSKERNELS_TEST_COMPLEX_FLOAT +TEST_TEAM_CASE(complex_float, Kokkos::complex) #endif -#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_gemv_complex_double) { - test_team_gemv, Kokkos::complex, - Kokkos::complex, TestExecSpace>("N"); -} +#ifdef KOKKOSKERNELS_TEST_INT +TEST_TEAM_CASE(int, int) #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, team_gemv_int) { - test_team_gemv("N"); -} -#endif +#ifdef KOKKOSKERNELS_TEST_ALL_TYPES +// test mixed scalar types (void -> default alpha/beta) +TEST_TEAM_CASE4(mixed, double, int, float, void) -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) -TEST_F(TestCategory, team_gemv_double_int) { - test_team_gemv("N"); -} +// test arbitrary double alpha/beta with complex values +TEST_TEAM_CASE2(alphabeta, Kokkos::complex, double) #endif -#endif // Check for lambda availability on CUDA backend +#undef TEST_TEAM_CASE4 +#undef TEST_TEAM_CASE2 +#undef TEST_TEAM_CASE diff --git a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp index 19f32014a1..12dd066198 100644 --- a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp +++ b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp @@ -1,259 +1,71 @@ -// Note: Luc Berger-Vergiat 04/14/21 -// This tests uses KOKKOS_LAMBDA so we need -// to make sure that these are enabled in -// the CUDA backend before including this test. -#if !defined(TEST_CUDA_BLAS_CPP) || defined(KOKKOS_ENABLE_CUDA_LAMBDA) - -#include -#include -#include -#include -#include -#include +#include // for test/inst guards +// Note: include serial gemv before util so it knows if CompactMKL is available +#include +#include namespace Test { -template -void impl_test_teamvector_gemv(const char *mode, int N, int M) { - typedef Kokkos::TeamPolicy team_policy; - typedef typename team_policy::member_type team_member; - - // Launch K teams of the maximum number of threads per team - int K = 4; - const team_policy policy(K, Kokkos::AUTO); - const int team_data_size = (N % K == 0) ? (N / K) : (N / K + 1); - - typedef typename ViewTypeA::value_type ScalarA; - typedef typename ViewTypeX::value_type ScalarX; - typedef typename ViewTypeY::value_type ScalarY; - - typedef multivector_layout_adapter vfA_type; - typedef Kokkos::View< - ScalarX * [2], - typename std::conditional::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type, - Device> - BaseTypeX; - typedef Kokkos::View< - ScalarY * [2], - typename std::conditional::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type, - Device> - BaseTypeY; - - ScalarA a = 3; - ScalarX b = 5; - double eps = std::is_same::value ? 2 * 1e-5 : 1e-7; - - typename vfA_type::BaseType b_A("A", N, M); - BaseTypeX b_x("X", M); - BaseTypeY b_y("Y", N); - BaseTypeY b_org_y("Org_Y", N); - - ViewTypeA A = vfA_type::view(b_A); - ViewTypeX x = Kokkos::subview(b_x, Kokkos::ALL(), 0); - ViewTypeY y = Kokkos::subview(b_y, Kokkos::ALL(), 0); - typename ViewTypeX::const_type c_x = x; - typename ViewTypeA::const_type c_A = A; - - typedef multivector_layout_adapter h_vfA_type; - - typename h_vfA_type::BaseType h_b_A = Kokkos::create_mirror_view(b_A); - typename BaseTypeX::HostMirror h_b_x = Kokkos::create_mirror_view(b_x); - typename BaseTypeY::HostMirror h_b_y = Kokkos::create_mirror_view(b_y); - - typename ViewTypeA::HostMirror h_A = h_vfA_type::view(h_b_A); - typename ViewTypeX::HostMirror h_x = Kokkos::subview(h_b_x, Kokkos::ALL(), 0); - typename ViewTypeY::HostMirror h_y = Kokkos::subview(h_b_y, Kokkos::ALL(), 0); - - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); - - Kokkos::fill_random(b_x, rand_pool, ScalarX(10)); - Kokkos::fill_random(b_y, rand_pool, ScalarY(10)); - Kokkos::fill_random(b_A, rand_pool, ScalarA(10)); - - Kokkos::deep_copy(b_org_y, b_y); - - Kokkos::deep_copy(h_b_x, b_x); - Kokkos::deep_copy(h_b_y, b_y); - Kokkos::deep_copy(h_b_A, b_A); - - ScalarY expected_result = 0; - if (mode[0] != 'N' && mode[0] != 'T' && mode[0] != 'C') { - throw std::runtime_error("incorrect matrix mode letter !"); - } - typedef Kokkos::Details::ArithTraits ATV; - for (int i = 0; i < N; i++) { - ScalarY y_i = ScalarY(); - for (int j = 0; j < M; j++) { - const auto a_val = mode[0] == 'C' - ? ATV::conj(h_A(j, i)) - : (mode[0] == 'T' ? h_A(j, i) : h_A(i, j)); - y_i += a_val * h_x(j); - } - expected_result += (b * h_y(i) + a * y_i) * (b * h_y(i) + a * y_i); - } - - char trans = mode[0]; - - const auto team_rows = [&](auto teamId) { - return Kokkos::make_pair( - teamId * team_data_size, - (teamId < K - 1) ? (teamId + 1) * team_data_size : N); - }; - // KokkosBlas::gemv(mode,a,A,x,b,y); - Kokkos::parallel_for( - "KokkosBlas::Test::TeamVectorGemm", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { - const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::team_vector_gemv( - teamMember, trans, a, - Kokkos::subview(A, team_rows(teamId), Kokkos::ALL()), x, b, - Kokkos::subview(y, team_rows(teamId))); - }); - - ScalarY nonconst_nonconst_result = KokkosBlas::dot(y, y); - EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, - eps * expected_result); - - Kokkos::deep_copy(b_y, b_org_y); - - // KokkosBlas::gemv(mode,a,A,c_x,b,y); - Kokkos::parallel_for( - "KokkosBlas::Test::TeamVectorGemm", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { - const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::team_vector_gemv( - teamMember, trans, a, - Kokkos::subview(A, team_rows(teamId), Kokkos::ALL()), c_x, b, - Kokkos::subview(y, team_rows(teamId))); - }); +template +KK_DEFINE_BLAS2_GEMV_TEST_OP_CLASS(TeamVectorGEMVOp) +template +KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& member) const { + KokkosBlas::Experimental::team_vector_gemv( + member, params::trans, params::alpha, params::A, params::x, params::beta, + params::y); +} +KK_END_BLAS2_GEMV_TEST_OP_CLASS - ScalarY const_nonconst_result = KokkosBlas::dot(y, y); - EXPECT_NEAR_KK(const_nonconst_result, expected_result, eps * expected_result); +struct TeamVectorGemvFactory { + template + using functor_type = + TeamVectorGEMVOp; - Kokkos::deep_copy(b_y, b_org_y); + // no Blocked implementation + using algorithms = std::tuple; - // KokkosBlas::gemv(mode,a,c_A,c_x,b,y); - Kokkos::parallel_for( - "KokkosBlas::Test::TeamVectorGemm", policy, - KOKKOS_LAMBDA(const team_member &teamMember) { - const int teamId = teamMember.league_rank(); - KokkosBlas::Experimental::team_vector_gemv( - teamMember, trans, a, - Kokkos::subview(c_A, team_rows(teamId), Kokkos::ALL()), c_x, b, - Kokkos::subview(y, team_rows(teamId))); - }); + template + static constexpr bool allow_algorithm = true; +}; - ScalarY const_const_result = KokkosBlas::dot(y, y); - EXPECT_NEAR_KK(const_const_result, expected_result, eps * expected_result); -} } // namespace Test -template -int test_teamvector_gemv(const char *mode) { -#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - typedef Kokkos::View view_type_a_ll; - typedef Kokkos::View view_type_b_ll; - typedef Kokkos::View view_type_c_ll; - Test::impl_test_teamvector_gemv(mode, 0, 1024); - Test::impl_test_teamvector_gemv(mode, 13, 1024); - Test::impl_test_teamvector_gemv(mode, 124, 124); - // Test::impl_test_teamvector_gemv(mode,132231,1024); -#endif +#define TEST_TEAMVECTOR_CASE4(N, A, X, Y, SC) \ + TEST_CASE4(teamvector, TeamVectorGemvFactory, N, A, X, Y, SC) +#define TEST_TEAMVECTOR_CASE2(N, S, SC) \ + TEST_CASE2(teamvector, TeamVectorGemvFactory, N, S, SC) +#define TEST_TEAMVECTOR_CASE(N, S) \ + TEST_CASE(teamvector, TeamVectorGemvFactory, N, S) -#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - typedef Kokkos::View view_type_a_lr; - typedef Kokkos::View view_type_b_lr; - typedef Kokkos::View view_type_c_lr; - Test::impl_test_teamvector_gemv(mode, 0, 1024); - Test::impl_test_teamvector_gemv(mode, 13, 1024); - Test::impl_test_teamvector_gemv(mode, 124, 124); - // Test::impl_test_teamvector_gemv(mode,132231,1024); +#ifdef KOKKOSKERNELS_TEST_FLOAT +TEST_TEAMVECTOR_CASE(float, float) #endif -#if defined(KOKKOSKERNELS_INST_LAYOUTSTRIDE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) - typedef Kokkos::View view_type_a_ls; - typedef Kokkos::View view_type_b_ls; - typedef Kokkos::View view_type_c_ls; - Test::impl_test_teamvector_gemv(mode, 0, 1024); - Test::impl_test_teamvector_gemv(mode, 13, 1024); - Test::impl_test_teamvector_gemv(mode, 124, 124); - // Test::impl_test_teamvector_gemv(mode,132231,1024); +#ifdef KOKKOSKERNELS_TEST_DOUBLE +TEST_TEAMVECTOR_CASE(double, double) #endif -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) - Test::impl_test_teamvector_gemv(mode, 124, 124); - Test::impl_test_teamvector_gemv(mode, 124, 124); +#ifdef KOKKOSKERNELS_TEST_COMPLEX_DOUBLE +TEST_TEAMVECTOR_CASE(complex_double, Kokkos::complex) #endif - return 1; -} - -#if defined(KOKKOSKERNELS_INST_FLOAT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, teamvector_gemv_float) { - test_teamvector_gemv("N"); -} +#ifdef KOKKOSKERNELS_TEST_COMPLEX_FLOAT +TEST_TEAMVECTOR_CASE(complex_float, Kokkos::complex) #endif -#if defined(KOKKOSKERNELS_INST_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, teamvector_gemv_double) { - test_teamvector_gemv("N"); -} -#endif - -#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, teamvector_gemv_complex_double) { - test_teamvector_gemv, Kokkos::complex, - Kokkos::complex, TestExecSpace>("N"); -} +#ifdef KOKKOSKERNELS_TEST_INT +TEST_TEAMVECTOR_CASE(int, int) #endif -#if defined(KOKKOSKERNELS_INST_INT) || \ - (!defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) -TEST_F(TestCategory, teamvector_gemv_int) { - test_teamvector_gemv("N"); -} -#endif +#ifdef KOKKOSKERNELS_TEST_ALL_TYPES +// test mixed scalar types (void -> default alpha/beta) +TEST_TEAMVECTOR_CASE4(mixed, double, int, float, void) -#if 0 // mixed scalar types not allowed in batched impl -#if !defined(KOKKOSKERNELS_ETI_ONLY) && \ - !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) -TEST_F(TestCategory, teamvector_gemv_double_int) { - test_teamvector_gemv("N"); -} -#endif +// test arbitrary double alpha/beta with complex values +TEST_TEAMVECTOR_CASE2(alphabeta, Kokkos::complex, double) #endif -#endif // Check for lambda availability on CUDA backend +#undef TEST_TEAMVECTOR_CASE4 +#undef TEST_TEAMVECTOR_CASE2 +#undef TEST_TEAMVECTOR_CASE From 0af9691fc923ca7cc598946d26f3eacaecb5cc59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Sat, 13 Aug 2022 13:19:16 +0200 Subject: [PATCH 04/15] implement ConjTranspose in {Team,TeamVector}Gemv --- src/blas/impl/KokkosBlas2_team_gemv_impl.hpp | 73 ++++++++++++++------ src/blas/impl/KokkosBlas2_team_gemv_spec.hpp | 27 +++++--- 2 files changed, 67 insertions(+), 33 deletions(-) diff --git a/src/blas/impl/KokkosBlas2_team_gemv_impl.hpp b/src/blas/impl/KokkosBlas2_team_gemv_impl.hpp index fd73e27443..a4cf662cc9 100644 --- a/src/blas/impl/KokkosBlas2_team_gemv_impl.hpp +++ b/src/blas/impl/KokkosBlas2_team_gemv_impl.hpp @@ -54,6 +54,16 @@ namespace Impl { template struct TeamGemvInternal { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType &member, OpA op, const int m, const int n, + const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, + const int as0, const int as1, const ValueXType *KOKKOS_RESTRICT x, + const int xs0, const ScalarType beta, + /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0); + + // default OpA = OpID template KOKKOS_INLINE_FUNCTION static int invoke( @@ -61,11 +71,24 @@ struct TeamGemvInternal { const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, - /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0); + /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { + return invoke(member, OpID{}, m, n, alpha, A, as0, as1, x, xs0, beta, y, + ys0); + } }; template struct TeamVectorGemvInternal { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType &member, OpA op, const int m, const int n, + const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, + const int as0, const int as1, const ValueXType *KOKKOS_RESTRICT x, + const int xs0, const ScalarType beta, + /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0); + + // default OpA = OpID template KOKKOS_INLINE_FUNCTION static int invoke( @@ -73,7 +96,10 @@ struct TeamVectorGemvInternal { const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, - /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0); + /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { + return invoke(member, OpID{}, m, n, alpha, A, as0, as1, x, xs0, beta, y, + ys0); + } }; /// @@ -81,12 +107,13 @@ struct TeamVectorGemvInternal { /// ==================== template <> -template +template KOKKOS_INLINE_FUNCTION int TeamGemvInternal::invoke( - const MemberType &member, const int m, const int n, const ScalarType alpha, - const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, - const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, + const MemberType &member, OpA op, const int m, const int n, + const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, const int as0, + const int as1, const ValueXType *KOKKOS_RESTRICT x, const int xs0, + const ScalarType beta, /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { const ScalarType one(1.0), zero(0.0); @@ -111,7 +138,7 @@ KOKKOS_INLINE_FUNCTION int TeamGemvInternal::invoke( #pragma unroll #endif for (int j = 0; j < n; ++j) - t += tA[j * as1] * x[j * xs0]; + t += op(tA[j * as1]) * x[j * xs0]; y[i * ys0] += alpha * t; }); } @@ -119,12 +146,13 @@ KOKKOS_INLINE_FUNCTION int TeamGemvInternal::invoke( } template <> -template +template KOKKOS_INLINE_FUNCTION int TeamGemvInternal::invoke( - const MemberType &member, const int m, const int n, const ScalarType alpha, - const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, - const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, + const MemberType &member, OpA /* op */, const int m, const int n, + const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, const int as0, + const int as1, const ValueXType *KOKKOS_RESTRICT x, const int xs0, + const ScalarType beta, /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { const ScalarType one(1.0), zero(0.0); @@ -152,9 +180,9 @@ KOKKOS_INLINE_FUNCTION int TeamGemvInternal::invoke( Kokkos::parallel_for(Kokkos::TeamThreadRange(member, (m / mb) + (mp > 0)), [&](const int &ii) { const int i = ii * mb; - inner.serial_invoke( - alpha, A + i * as0, x, - (i + mb) > m ? (m - i) : mb, n, y + i * ys0); + inner.serial_invoke(alpha, A + i * as0, x, + (i + mb) > m ? (m - i) : mb, + n, y + i * ys0); }); member.team_barrier(); } @@ -167,13 +195,14 @@ KOKKOS_INLINE_FUNCTION int TeamGemvInternal::invoke( /// ==================== template <> -template +template KOKKOS_INLINE_FUNCTION int TeamVectorGemvInternal::invoke( - const MemberType &member, const int m, const int n, const ScalarType alpha, - const ValueAType *KOKKOS_RESTRICT A, const int as0, const int as1, - const ValueXType *KOKKOS_RESTRICT x, const int xs0, const ScalarType beta, + const MemberType &member, OpA op, const int m, const int n, + const ScalarType alpha, const ValueAType *KOKKOS_RESTRICT A, const int as0, + const int as1, const ValueXType *KOKKOS_RESTRICT x, const int xs0, + const ScalarType beta, /**/ ValueYType *KOKKOS_RESTRICT y, const int ys0) { const ScalarType one(1.0), zero(0.0); @@ -196,7 +225,7 @@ TeamVectorGemvInternal::invoke( Kokkos::parallel_reduce( Kokkos::ThreadVectorRange(member, n), [&](const int &j, ValueYType &update) { - update += tA[j * as1] * x[j * xs0]; + update += op(tA[j * as1]) * x[j * xs0]; }, t); Kokkos::single(Kokkos::PerThread(member), diff --git a/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp b/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp index dee023d721..d1c85e9827 100644 --- a/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp +++ b/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp @@ -202,8 +202,7 @@ struct TeamGemv { const xViewType& x, const ScalarType beta, const yViewType& y) { static_assert(AViewType::Rank == 2, "KokkosBlas::TeamGemv requires rank-2 A matrix"); - return Impl::TeamGemvInternal::template invoke< - MemberType, ScalarType, typename AViewType::non_const_value_type>( + return Impl::TeamGemvInternal::invoke( member, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), A.stride_1(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); } @@ -218,8 +217,7 @@ struct TeamGemv { const xViewType& x, const ScalarType beta, const yViewType& y) { static_assert(AViewType::Rank == 2, "KokkosBlas::TeamGemv requires rank-2 A matrix"); - return Impl::TeamGemvInternal::template invoke< - MemberType, ScalarType, typename AViewType::non_const_value_type>( + return Impl::TeamGemvInternal::invoke( member, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), A.stride_1(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); } @@ -238,8 +236,7 @@ struct TeamGemv { const xViewType& x, const ScalarType beta, const yViewType& y) { static_assert(AViewType::Rank == 2, "BLAS TeamGemv requires rank-2 A matrix"); - return Impl::TeamGemvInternal::template invoke< - MemberType, ScalarType, typename AViewType::non_const_value_type>( + return Impl::TeamGemvInternal::invoke( member, A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), A.stride_0(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); } @@ -254,8 +251,7 @@ struct TeamGemv { const xViewType& x, const ScalarType beta, const yViewType& y) { static_assert(AViewType::Rank == 2, "BLAS TeamGemv requires rank-2 A matrix"); - return Impl::TeamGemvInternal::template invoke< - MemberType, ScalarType, typename AViewType::non_const_value_type>( + return Impl::TeamGemvInternal::invoke( member, A.extent(1), A.extent(0), alpha, A.data(), A.stride_1(), A.stride_0(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); } @@ -274,7 +270,10 @@ struct TeamGemv { const xViewType& x, const ScalarType beta, const yViewType& y) { static_assert(AViewType::Rank == 2, "BLAS TeamGemv requires rank-2 A matrix"); - Kokkos::abort("TODO: implement conjugate-transpose !"); + return Impl::TeamGemvInternal::invoke( + member, Impl::OpConj{}, A.extent(1), A.extent(0), alpha, A.data(), + A.stride_1(), A.stride_0(), x.data(), x.stride_0(), beta, y.data(), + y.stride_0()); } }; @@ -287,7 +286,10 @@ struct TeamGemv { const xViewType& x, const ScalarType beta, const yViewType& y) { static_assert(AViewType::Rank == 2, "BLAS TeamGemv requires rank-2 A matrix"); - Kokkos::abort("TODO: implement conjugate-transpose !"); + return Impl::TeamGemvInternal::invoke( + member, Impl::OpConj{}, A.extent(1), A.extent(0), alpha, A.data(), + A.stride_1(), A.stride_0(), x.data(), x.stride_0(), beta, y.data(), + y.stride_0()); } }; @@ -342,7 +344,10 @@ struct TeamVectorGemv { const xViewType& x, const ScalarType beta, const yViewType& y) { static_assert(AViewType::Rank == 2, "Batched TeamVectorGemv requires rank-2 A matrix"); - Kokkos::abort("TODO: implement conjugate-transpose !"); + return Impl::TeamVectorGemvInternal::invoke( + member, Impl::OpConj{}, A.extent(1), A.extent(0), alpha, A.data(), + A.stride_1(), A.stride_0(), x.data(), x.stride_0(), beta, y.data(), + y.stride_0()); } }; From 9741625c72c80e99b44ea22478d52a30e339573b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Sat, 13 Aug 2022 14:20:36 +0200 Subject: [PATCH 05/15] implement ConjNoTranspose in GEMV --- src/blas/KokkosBlas2_serial_gemv.hpp | 8 +++ src/blas/KokkosBlas2_team_gemv.hpp | 14 +++++ .../impl/KokkosBlas2_serial_gemv_impl.hpp | 29 ++++++++++ src/blas/impl/KokkosBlas2_team_gemv_spec.hpp | 57 +++++++++++++++++++ src/blas/impl/KokkosBlas_util.hpp | 1 + .../KokkosBlas2_serial_gemv_tpl_spec_decl.hpp | 17 ++++++ test_common/KokkosKernels_TestUtils.hpp | 2 +- unit_test/blas/Test_Blas2_gemv_util.hpp | 8 ++- unit_test/blas/Test_Blas2_serial_gemv.hpp | 17 ++++-- unit_test/blas/Test_Blas2_team_gemv.hpp | 5 ++ unit_test/blas/Test_Blas2_teamvector_gemv.hpp | 5 ++ 11 files changed, 157 insertions(+), 6 deletions(-) diff --git a/src/blas/KokkosBlas2_serial_gemv.hpp b/src/blas/KokkosBlas2_serial_gemv.hpp index 7b26fdeeb7..4c6cec5012 100644 --- a/src/blas/KokkosBlas2_serial_gemv.hpp +++ b/src/blas/KokkosBlas2_serial_gemv.hpp @@ -65,11 +65,19 @@ void KOKKOS_INLINE_FUNCTION gemv(const char trans, const ScalarType& alpha, } else if (trans == 'C' || trans == 'c') { using mode = KokkosBlas::Trans::ConjTranspose; KokkosBlas::SerialGemv::invoke(alpha, A, x, beta, y); + // + // TODO: what letter should be used here ? + // * in blas "C" means conjugate-transpose + // * in sparse "C" meanse conjugate and "H" conjugate-transpose... + } else if (trans == 'X' || trans == 'x') { + using mode = KokkosBlas::Trans::ConjNoTranspose; + KokkosBlas::SerialGemv::invoke(alpha, A, x, beta, y); } else { Kokkos::abort("Matrix mode not supported"); } } +// default AlgoTag template void KOKKOS_INLINE_FUNCTION gemv(const char trans, const ScalarType& alpha, const MatrixType& A, const XVector& x, diff --git a/src/blas/KokkosBlas2_team_gemv.hpp b/src/blas/KokkosBlas2_team_gemv.hpp index e886de4f99..afe04aebfd 100644 --- a/src/blas/KokkosBlas2_team_gemv.hpp +++ b/src/blas/KokkosBlas2_team_gemv.hpp @@ -65,6 +65,13 @@ void KOKKOS_INLINE_FUNCTION gemv(const TeamType& team, const char trans, if (trans == 'C' || trans == 'c') TeamGemv::invoke(team, alpha, A, x, beta, y); + // + // TODO: what letter should be used here ? + // * in blas "C" means conjugate-transpose + // * in sparse "C" meanse conjugate and "H" conjugate-transpose... + if (trans == 'X' || trans == 'x') + TeamGemv::invoke(team, alpha, A, + x, beta, y); } // default AlgoTag @@ -92,6 +99,13 @@ team_vector_gemv(const TeamType& team, const char trans, } else if (trans == 'C' || trans == 'c') { KokkosBlas::TeamVectorGemv::invoke( team, alpha, A, x, beta, y); + // + // TODO: what letter should be used here ? + // * in blas "C" means conjugate-transpose + // * in sparse "C" meanse conjugate and "H" conjugate-transpose... + } else if (trans == 'X' || trans == 'x') { + KokkosBlas::TeamVectorGemv::invoke(team, alpha, A, x, beta, y); } else { Kokkos::abort("Matrix mode not supported"); } diff --git a/src/blas/impl/KokkosBlas2_serial_gemv_impl.hpp b/src/blas/impl/KokkosBlas2_serial_gemv_impl.hpp index 75164f87ab..9a0734d5e4 100644 --- a/src/blas/impl/KokkosBlas2_serial_gemv_impl.hpp +++ b/src/blas/impl/KokkosBlas2_serial_gemv_impl.hpp @@ -99,6 +99,7 @@ SerialGemv::invoke( A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), A.stride_1(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); } + /// /// T /// @@ -155,6 +156,34 @@ SerialGemv::invoke( A.stride_0(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); } +/// +/// CNT +/// + +template <> +template +KOKKOS_INLINE_FUNCTION int +SerialGemv::invoke( + const ScalarType alpha, const AViewType &A, const xViewType &x, + const ScalarType beta, const yViewType &y) { + return Impl::SerialGemvInternal::invoke( + Impl::OpConj{}, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), + A.stride_1(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); +} + +template <> +template +KOKKOS_INLINE_FUNCTION int +SerialGemv::invoke( + const ScalarType alpha, const AViewType &A, const xViewType &x, + const ScalarType beta, const yViewType &y) { + return Impl::SerialGemvInternal::invoke( + Impl::OpConj{}, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), + A.stride_1(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); +} + } // namespace KokkosBlas #endif diff --git a/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp b/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp index d1c85e9827..e14bca9225 100644 --- a/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp +++ b/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp @@ -293,6 +293,42 @@ struct TeamGemv { } }; +/// +/// CNT +/// + +template +struct TeamGemv { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::Rank == 2, + "BLAS TeamGemv requires rank-2 A matrix"); + return Impl::TeamGemvInternal::invoke( + member, Impl::OpConj{}, A.extent(0), A.extent(1), alpha, A.data(), + A.stride_0(), A.stride_1(), x.data(), x.stride_0(), beta, y.data(), + y.stride_0()); + } +}; + +template +struct TeamGemv { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::Rank == 2, + "BLAS TeamGemv requires rank-2 A matrix"); + return Impl::TeamGemvInternal::invoke( + member, Impl::OpConj{}, A.extent(0), A.extent(1), alpha, A.data(), + A.stride_0(), A.stride_1(), x.data(), x.stride_0(), beta, y.data(), + y.stride_0()); + } +}; + /// /// NT /// @@ -351,6 +387,27 @@ struct TeamVectorGemv { } }; +/// +/// CNT +/// + +template +struct TeamVectorGemv { + template + KOKKOS_INLINE_FUNCTION static int invoke( + const MemberType& member, const ScalarType alpha, const AViewType& A, + const xViewType& x, const ScalarType beta, const yViewType& y) { + static_assert(AViewType::Rank == 2, + "Batched TeamVectorGemv requires rank-2 A matrix"); + return Impl::TeamVectorGemvInternal::invoke( + member, Impl::OpConj{}, A.extent(0), A.extent(1), alpha, A.data(), + A.stride_0(), A.stride_1(), x.data(), x.stride_0(), beta, y.data(), + y.stride_0()); + } +}; + } // namespace KokkosBlas #endif diff --git a/src/blas/impl/KokkosBlas_util.hpp b/src/blas/impl/KokkosBlas_util.hpp index 3ddf0afbd5..0b1bdd5f44 100644 --- a/src/blas/impl/KokkosBlas_util.hpp +++ b/src/blas/impl/KokkosBlas_util.hpp @@ -54,6 +54,7 @@ struct Trans { struct Transpose {}; struct NoTranspose {}; struct ConjTranspose {}; + struct ConjNoTranspose {}; // Note: usually not available in BLAS, MKL, etc. }; #if !defined(KOKKOS_IF_ON_HOST) diff --git a/src/impl/tpls/KokkosBlas2_serial_gemv_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas2_serial_gemv_tpl_spec_decl.hpp index c994929b5b..aba0902664 100644 --- a/src/impl/tpls/KokkosBlas2_serial_gemv_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosBlas2_serial_gemv_tpl_spec_decl.hpp @@ -199,6 +199,23 @@ SerialGemv::invoke( return 0; } +/// +/// CNT (not supported) +/// + +template <> +template +KOKKOS_INLINE_FUNCTION int +SerialGemv::invoke( + const ScalarType alpha, const AViewType &A, const xViewType &x, + const ScalarType beta, const yViewType &y) { + Kokkos::abort( + "CompactMKL implementation of serial GEMV does not support " + "ConjNoTranspose mode"); + return 0; +} + } // namespace KokkosBlas #endif // __KOKKOSBLAS_ENABLE_INTEL_MKL_COMPACT__ diff --git a/test_common/KokkosKernels_TestUtils.hpp b/test_common/KokkosKernels_TestUtils.hpp index b8ce0eb595..8a51139c5f 100644 --- a/test_common/KokkosKernels_TestUtils.hpp +++ b/test_common/KokkosKernels_TestUtils.hpp @@ -370,7 +370,7 @@ KOKKOS_INLINE_FUNCTION void vanillaGEMV(char mode, AlphaType alpha, using ScalarY = typename ViewTypeY::non_const_value_type; using KAT_A = Kokkos::ArithTraits; const bool transposed = mode == 'T' || mode == 'C'; - const bool conjugated = mode == 'C'; + const bool conjugated = mode == 'C' || mode == 'X'; const bool has_beta = beta != Kokkos::ArithTraits::zero(); int M = A.extent(transposed ? 1 : 0); int N = A.extent(transposed ? 0 : 1); diff --git a/unit_test/blas/Test_Blas2_gemv_util.hpp b/unit_test/blas/Test_Blas2_gemv_util.hpp index 0ef31d5ab2..dbb0ebe98c 100644 --- a/unit_test/blas/Test_Blas2_gemv_util.hpp +++ b/unit_test/blas/Test_Blas2_gemv_util.hpp @@ -145,10 +145,13 @@ struct GEMVTest { // skip unsupported combinations } + // Note: all layouts listed here are subview'ed to test Kokkos::LayoutStride template static typename std::enable_if>::type run_layouts( const char *mode) { - // Note: all layouts listed here are subview'ed to test Kokkos::LayoutStride + if (!GemvFunc::template allow_mode(mode[0])) + return; // skip matrix modes not supported by the algorithm #ifdef KOKKOSKERNELS_TEST_LAYOUTLEFT run_view_types(mode); #endif @@ -327,6 +330,9 @@ struct GEMVTest { } \ TEST_F(TestCategory, PREFIX##_gemv_ct_##NAME) { \ PREFIX##_##NAME##_gemv_test::run("C"); \ + } \ + TEST_F(TestCategory, PREFIX##_gemv_cnt_##NAME) { \ + PREFIX##_##NAME##_gemv_test::run("X"); \ } #define TEST_CASE2(PREFIX, FACTORY, NAME, SCALAR, SCALAR_COEF) \ diff --git a/unit_test/blas/Test_Blas2_serial_gemv.hpp b/unit_test/blas/Test_Blas2_serial_gemv.hpp index 99ad6da6a8..9561208950 100644 --- a/unit_test/blas/Test_Blas2_serial_gemv.hpp +++ b/unit_test/blas/Test_Blas2_serial_gemv.hpp @@ -30,15 +30,24 @@ struct SerialGemvFactory { #endif >; + template + static constexpr bool is_mkl = + std::is_same::value; + // block testing of CompackMKL on non-vector scalars // (they are not supported by the implementation) template static constexpr bool allow_algorithm = - !std::is_same::value || - (KokkosBatched::is_vector::value && - KokkosBatched::is_vector::value && - KokkosBatched::is_vector::value); + !is_mkl || (KokkosBatched::is_vector::value && + KokkosBatched::is_vector::value && + KokkosBatched::is_vector::value); + + // block testing of ConjNoTranspose mode on CompactMKL + template + static bool allow_mode(char trans) { + return !is_mkl || toupper(trans) != 'X'; + } }; } // namespace Test diff --git a/unit_test/blas/Test_Blas2_team_gemv.hpp b/unit_test/blas/Test_Blas2_team_gemv.hpp index 580ae8fa23..33f22baa2e 100644 --- a/unit_test/blas/Test_Blas2_team_gemv.hpp +++ b/unit_test/blas/Test_Blas2_team_gemv.hpp @@ -27,6 +27,11 @@ struct TeamGemvFactory { template static constexpr bool allow_algorithm = true; + + template + static bool allow_mode(char trans) { + return true; + } }; } // namespace Test diff --git a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp index 12dd066198..2356426ae1 100644 --- a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp +++ b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp @@ -27,6 +27,11 @@ struct TeamVectorGemvFactory { template static constexpr bool allow_algorithm = true; + + template + static bool allow_mode(char trans) { + return true; + } }; } // namespace Test From 8f3cc4828b7ae80a46d170461f1675f0afe01130 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Sat, 13 Aug 2022 16:45:08 +0200 Subject: [PATCH 06/15] fix linker conflicts in MKL impl of SerialGemv --- .../KokkosBlas2_serial_gemv_tpl_spec_decl.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/impl/tpls/KokkosBlas2_serial_gemv_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas2_serial_gemv_tpl_spec_decl.hpp index aba0902664..f35fa4fb34 100644 --- a/src/impl/tpls/KokkosBlas2_serial_gemv_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosBlas2_serial_gemv_tpl_spec_decl.hpp @@ -64,7 +64,7 @@ namespace Impl { // Note: using GEMM because there is no GEMV in MKL compact routines #define __IMPL_KK_MKL_DGEMM_COMPACT(SCALAR, MKL_ROUTINE) \ - void kk_mkl_gemm_compact( \ + inline void kk_mkl_gemm_compact( \ MKL_LAYOUT layout, MKL_TRANSPOSE transa, MKL_TRANSPOSE transb, \ MKL_INT m, MKL_INT n, MKL_INT k, SCALAR alpha, const SCALAR *a, \ MKL_INT ldap, const SCALAR *b, MKL_INT ldbp, SCALAR beta, SCALAR *c, \ @@ -81,31 +81,31 @@ __IMPL_KK_MKL_DGEMM_COMPACT(float, mkl_sgemm_compact) #undef __IMPL_KK_MKL_DGEMM_COMPACT template -MKL_COMPACT_PACK mkl_compact_format() { +inline MKL_COMPACT_PACK mkl_compact_format() { Kokkos::abort("vector size not supported"); } template <> -MKL_COMPACT_PACK mkl_compact_format() { +inline MKL_COMPACT_PACK mkl_compact_format() { return MKL_COMPACT_SSE; } template <> -MKL_COMPACT_PACK mkl_compact_format() { +inline MKL_COMPACT_PACK mkl_compact_format() { return MKL_COMPACT_SSE; } template <> -MKL_COMPACT_PACK mkl_compact_format() { +inline MKL_COMPACT_PACK mkl_compact_format() { return MKL_COMPACT_AVX; } template <> -MKL_COMPACT_PACK mkl_compact_format() { +inline MKL_COMPACT_PACK mkl_compact_format() { return MKL_COMPACT_AVX; } template <> -MKL_COMPACT_PACK mkl_compact_format() { +inline MKL_COMPACT_PACK mkl_compact_format() { return MKL_COMPACT_AVX512; } template <> -MKL_COMPACT_PACK mkl_compact_format() { +inline MKL_COMPACT_PACK mkl_compact_format() { return MKL_COMPACT_AVX512; } From 2a05bb391225df8a11fb8053a7ae8f9027ef97e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Sat, 13 Aug 2022 14:37:52 +0200 Subject: [PATCH 07/15] remove obsolete/duplicated TeamGEMV --- src/blas/impl/KokkosBlas2_team_gemv_spec.hpp | 111 ------------------ .../impl/KokkosSparse_spmv_bsrmatrix_impl.hpp | 23 ++-- 2 files changed, 8 insertions(+), 126 deletions(-) diff --git a/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp b/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp index e14bca9225..273a9cddb2 100644 --- a/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp +++ b/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp @@ -52,117 +52,6 @@ #include namespace KokkosBlas { -namespace Experimental { -namespace Impl { - -template -struct team_gemv_tpl_spec_avail { - constexpr static bool value = false; -}; - -// Unification and Specialization layer -template ::value> -struct TeamGEMV { - static KOKKOS_INLINE_FUNCTION void team_gemv( - const TeamType& team, const typename XVector::non_const_value_type& alpha, - const MatrixType& A, const XVector& x, - const typename YVector::non_const_value_type& beta, const YVector& y); -}; - -template -struct TeamGEMV { - typedef typename Kokkos::Details::InnerProductSpaceTraits< - typename MatrixType::non_const_value_type>::dot_type dot_type; - static KOKKOS_INLINE_FUNCTION void team_gemv( - const TeamType& team, const typename XVector::non_const_value_type& alpha, - const MatrixType& A, const XVector& x, - const typename YVector::non_const_value_type& beta, const YVector& y) { - const int N = A.extent(0); - const int M = A.extent(1); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) { - dot_type Ax_i; - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(team, M), - [&](const int& j, dot_type& val) { val += A(i, j) * x(j); }, Ax_i); - y(i) = beta * y(i) + alpha * Ax_i; - }); - } -}; - -template -struct TeamGEMV { - typedef typename MatrixType::non_const_value_type value_type; - typedef typename Kokkos::Details::InnerProductSpaceTraits< - typename MatrixType::non_const_value_type>::dot_type dot_type; - static KOKKOS_INLINE_FUNCTION void team_gemv( - const TeamType& team, const typename XVector::non_const_value_type& alpha, - const MatrixType& A, const XVector& x, - const typename YVector::non_const_value_type& beta, const YVector& y) { - const int N = A.extent(0); - const int M = A.extent(1); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) { - dot_type Ax_i; - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(team, M), - [&](const int& j, dot_type& val) { - val += Kokkos::ArithTraits::conj(A(i, j)) * x(j); - }, - Ax_i); - y(i) = beta * y(i) + alpha * Ax_i; - }); - } -}; - -template -struct TeamGEMV { - typedef typename Kokkos::Details::InnerProductSpaceTraits< - typename MatrixType::non_const_value_type>::dot_type dot_type; - static KOKKOS_INLINE_FUNCTION void team_gemv( - const TeamType& team, const typename XVector::non_const_value_type& alpha, - const MatrixType& A, const XVector& x, - const typename YVector::non_const_value_type& beta, const YVector& y) { - const int N = A.extent(1); - const int M = A.extent(0); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) { - dot_type Ax_i; - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(team, M), - [&](const int& j, dot_type& val) { val += A(j, i) * x(j); }, Ax_i); - y(i) = beta * y(i) + alpha * Ax_i; - }); - } -}; - -template -struct TeamGEMV { - typedef typename MatrixType::non_const_value_type value_type; - typedef - typename Kokkos::Details::InnerProductSpaceTraits::dot_type - dot_type; - static KOKKOS_INLINE_FUNCTION void team_gemv( - const TeamType& team, const typename XVector::non_const_value_type& alpha, - const MatrixType& A, const XVector& x, - const typename YVector::non_const_value_type& beta, const YVector& y) { - const int N = A.extent(1); - const int M = A.extent(0); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&](const int& i) { - dot_type Ax_i; - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(team, M), - [&](const int& j, dot_type& val) { - val += Kokkos::ArithTraits::conj(A(j, i)) * x(j); - }, - Ax_i); - y(i) = beta * y(i) + alpha * Ax_i; - }); - } -}; -} // namespace Impl -} // namespace Experimental template diff --git a/src/sparse/impl/KokkosSparse_spmv_bsrmatrix_impl.hpp b/src/sparse/impl/KokkosSparse_spmv_bsrmatrix_impl.hpp index 9796c12352..89a5f79ba9 100644 --- a/src/sparse/impl/KokkosSparse_spmv_bsrmatrix_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spmv_bsrmatrix_impl.hpp @@ -650,20 +650,16 @@ struct BSR_GEMV_Functor { const auto count = myRow.length; if (conjugate) { - typedef Kokkos::View - block_values_type; - for (ordinal_type jBlock = 0; jBlock < count; ++jBlock) { const auto A_cur = myRow.block(jBlock); const auto X_blkCol = myRow.block_colidx(jBlock); const auto X_ptBeg = X_blkCol * block_dim; const auto X_cur = Kokkos::subview( m_x, ::Kokkos::make_pair(X_ptBeg, X_ptBeg + block_dim)); - KokkosBlas::Experimental::Impl::TeamGEMV< - team_member, block_values_type, XVector, YVector, -1, - false>::team_gemv(dev, alpha, A_cur, X_cur, val_one, Y_cur); + KokkosBlas::TeamVectorGemv< + team_member, KokkosBlas::Trans::ConjNoTranspose, + KokkosBlas::Algo::Gemv::Default>::invoke(dev, alpha, A_cur, X_cur, + val_one, Y_cur); } } else { for (ordinal_type jBlock = 0; jBlock < count; ++jBlock) { @@ -944,19 +940,16 @@ struct BSR_GEMV_Transpose_Functor { block_dim * sizeof(y_value_type)); if (conjugate) { - typedef Kokkos::View - block_values_type; Kokkos::View shared_view(shared_y, block_dim); for (ordinal_type jBlock = 0; jBlock < count; ++jBlock) { const auto A_cur = myRow.block(jBlock); // - KokkosBlas::Experimental::Impl::TeamGEMV< - team_member, block_values_type, XVector, YVector, 2, - false>::team_gemv(dev, alpha, A_cur, X_cur, val_zero, shared_view); + KokkosBlas::TeamVectorGemv< + team_member, KokkosBlas::Trans::ConjTranspose, + KokkosBlas::Algo::Gemv::Default>::invoke(dev, alpha, A_cur, X_cur, + val_zero, shared_view); // dev.team_barrier(); // From 4bc8797d4c4459110c30ecefd53f41944933adb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Sat, 13 Aug 2022 16:09:05 +0200 Subject: [PATCH 08/15] Add top-level selective GEMV interface --- src/batched/KokkosBatched_Util.hpp | 13 +---- src/blas/KokkosBlas2_gemv.hpp | 53 +++++++++++++++++++ src/blas/KokkosBlas2_serial_gemv.hpp | 18 ++++--- src/blas/KokkosBlas2_team_gemv.hpp | 46 ++++++++-------- src/blas/impl/KokkosBlas_util.hpp | 13 +++++ unit_test/blas/Test_Blas2_serial_gemv.hpp | 11 ++-- unit_test/blas/Test_Blas2_team_gemv.hpp | 7 +-- unit_test/blas/Test_Blas2_teamvector_gemv.hpp | 3 +- 8 files changed, 114 insertions(+), 50 deletions(-) diff --git a/src/batched/KokkosBatched_Util.hpp b/src/batched/KokkosBatched_Util.hpp index 6022b1b607..3e97c97d72 100644 --- a/src/batched/KokkosBatched_Util.hpp +++ b/src/batched/KokkosBatched_Util.hpp @@ -283,19 +283,8 @@ struct Direct { struct Backward {}; }; -struct Mode { - struct Serial { - static const char *name() { return "Serial"; } - }; - struct Team { - static const char *name() { return "Team"; } - }; - struct TeamVector { - static const char *name() { return "TeamVector"; } - }; -}; - using KokkosBlas::Algo; +using KokkosBlas::Mode; struct Util { template diff --git a/src/blas/KokkosBlas2_gemv.hpp b/src/blas/KokkosBlas2_gemv.hpp index 5c37b74c9b..6acf25ada7 100644 --- a/src/blas/KokkosBlas2_gemv.hpp +++ b/src/blas/KokkosBlas2_gemv.hpp @@ -49,6 +49,8 @@ /// Tpetra::MultiVector use cases. #include +#include +#include #include #include #include @@ -206,6 +208,57 @@ void gemv(const char trans[], typename AViewType::const_value_type& alpha, gemv(space, trans, alpha, A, x, beta, y); } +namespace Experimental { +/// +/// Selective Interface +/// +template +struct Gemv { + template + static void KOKKOS_INLINE_FUNCTION + invoke(const MemberType& member, const char trans, const ScalarType& alpha, + const MatrixType& A, const XVector& x, const ScalarType& beta, + const YVector& y); +}; + +template +struct Gemv { + template + static void KOKKOS_INLINE_FUNCTION + invoke(const MemberType& member, const char trans, const ScalarType& alpha, + const MatrixType& A, const XVector& x, const ScalarType& beta, + const YVector& y) { + serial_gemv(trans, alpha, A, x, beta, y); + } +}; + +template +struct Gemv { + template + static void KOKKOS_INLINE_FUNCTION + invoke(const MemberType& member, const char trans, const ScalarType& alpha, + const MatrixType& A, const XVector& x, const ScalarType& beta, + const YVector& y) { + team_gemv(member, trans, alpha, A, x, beta, y); + } +}; + +template +struct Gemv { + template + static void KOKKOS_INLINE_FUNCTION + invoke(const MemberType& member, const char trans, const ScalarType& alpha, + const MatrixType& A, const XVector& x, const ScalarType& beta, + const YVector& y) { + teamvector_gemv(member, trans, alpha, A, x, beta, y); + } +}; + +} // namespace Experimental } // namespace KokkosBlas #endif // KOKKOS_BLAS2_MV_HPP_ diff --git a/src/blas/KokkosBlas2_serial_gemv.hpp b/src/blas/KokkosBlas2_serial_gemv.hpp index 4c6cec5012..1f53fba87e 100644 --- a/src/blas/KokkosBlas2_serial_gemv.hpp +++ b/src/blas/KokkosBlas2_serial_gemv.hpp @@ -53,9 +53,11 @@ namespace Experimental { template -void KOKKOS_INLINE_FUNCTION gemv(const char trans, const ScalarType& alpha, - const MatrixType& A, const XVector& x, - const ScalarType& beta, const YVector& y) { +void KOKKOS_INLINE_FUNCTION serial_gemv(const char trans, + const ScalarType& alpha, + const MatrixType& A, const XVector& x, + const ScalarType& beta, + const YVector& y) { if (trans == 'N' || trans == 'n') { using mode = KokkosBlas::Trans::NoTranspose; KokkosBlas::SerialGemv::invoke(alpha, A, x, beta, y); @@ -79,10 +81,12 @@ void KOKKOS_INLINE_FUNCTION gemv(const char trans, const ScalarType& alpha, // default AlgoTag template -void KOKKOS_INLINE_FUNCTION gemv(const char trans, const ScalarType& alpha, - const MatrixType& A, const XVector& x, - const ScalarType& beta, const YVector& y) { - gemv(trans, alpha, A, x, beta, y); +void KOKKOS_INLINE_FUNCTION serial_gemv(const char trans, + const ScalarType& alpha, + const MatrixType& A, const XVector& x, + const ScalarType& beta, + const YVector& y) { + serial_gemv(trans, alpha, A, x, beta, y); } } // namespace Experimental diff --git a/src/blas/KokkosBlas2_team_gemv.hpp b/src/blas/KokkosBlas2_team_gemv.hpp index afe04aebfd..1950356804 100644 --- a/src/blas/KokkosBlas2_team_gemv.hpp +++ b/src/blas/KokkosBlas2_team_gemv.hpp @@ -52,10 +52,11 @@ namespace Experimental { template -void KOKKOS_INLINE_FUNCTION gemv(const TeamType& team, const char trans, - const ScalarType& alpha, const MatrixType& A, - const XVector& x, const ScalarType& beta, - const YVector& y) { +void KOKKOS_INLINE_FUNCTION team_gemv(const TeamType& team, const char trans, + const ScalarType& alpha, + const MatrixType& A, const XVector& x, + const ScalarType& beta, + const YVector& y) { if (trans == 'N' || trans == 'n') TeamGemv::invoke(team, alpha, A, x, beta, y); @@ -75,21 +76,22 @@ void KOKKOS_INLINE_FUNCTION gemv(const TeamType& team, const char trans, } // default AlgoTag -template -void KOKKOS_INLINE_FUNCTION -gemv(const TeamType& team, const char trans, - const ScalarType& alpha, - const MatrixType& A, const XVector& x, - const ScalarType& beta, const YVector& y) { - gemv(team, trans, alpha, A, x, beta, y); +template +void KOKKOS_INLINE_FUNCTION team_gemv(const TeamType& team, const char trans, + const ScalarType& alpha, + const MatrixType& A, const XVector& x, + const ScalarType& beta, + const YVector& y) { + team_gemv(team, trans, alpha, A, x, beta, y); } template void KOKKOS_INLINE_FUNCTION -team_vector_gemv(const TeamType& team, const char trans, - const ScalarType& alpha, const MatrixType& A, const XVector& x, - const ScalarType& beta, const YVector& y) { +teamvector_gemv(const TeamType& team, const char trans, const ScalarType& alpha, + const MatrixType& A, const XVector& x, const ScalarType& beta, + const YVector& y) { if (trans == 'N' || trans == 'n') { KokkosBlas::TeamVectorGemv::invoke( team, alpha, A, x, beta, y); @@ -112,14 +114,14 @@ team_vector_gemv(const TeamType& team, const char trans, } // default AlgoTag -template -void KOKKOS_INLINE_FUNCTION team_vector_gemv( - const TeamType& team, const char trans, - const ScalarType& alpha, const MatrixType& A, - const XVector& x, const ScalarType& beta, - const YVector& y) { - team_vector_gemv(team, trans, alpha, A, x, - beta, y); +template +void KOKKOS_INLINE_FUNCTION +team_vector_gemv(const TeamType& team, const char trans, + const ScalarType& alpha, const MatrixType& A, const XVector& x, + const ScalarType& beta, const YVector& y) { + teamvector_gemv(team, trans, alpha, A, x, + beta, y); } } // namespace Experimental diff --git a/src/blas/impl/KokkosBlas_util.hpp b/src/blas/impl/KokkosBlas_util.hpp index 0b1bdd5f44..7ed3afa9c1 100644 --- a/src/blas/impl/KokkosBlas_util.hpp +++ b/src/blas/impl/KokkosBlas_util.hpp @@ -50,6 +50,19 @@ namespace KokkosBlas { //////// Tags for BLAS //////// + +struct Mode { + struct Serial { + static const char *name() { return "Serial"; } + }; + struct Team { + static const char *name() { return "Team"; } + }; + struct TeamVector { + static const char *name() { return "TeamVector"; } + }; +}; + struct Trans { struct Transpose {}; struct NoTranspose {}; diff --git a/unit_test/blas/Test_Blas2_serial_gemv.hpp b/unit_test/blas/Test_Blas2_serial_gemv.hpp index 9561208950..5c98c82b6b 100644 --- a/unit_test/blas/Test_Blas2_serial_gemv.hpp +++ b/unit_test/blas/Test_Blas2_serial_gemv.hpp @@ -1,6 +1,7 @@ +#include #include // for ETI test guards // Note: include serial gemv before util so it knows if CompactMKL is available -#include +#include #include namespace Test { @@ -9,10 +10,10 @@ template KK_DEFINE_BLAS2_GEMV_TEST_OP_CLASS(SerialGEMVOp) template -KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& /* member */) const { - KokkosBlas::Experimental::gemv(params::trans, params::alpha, - params::A, params::x, params::beta, - params::y); +KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& member) const { + KokkosBlas::Experimental::Gemv::invoke( + member, params::trans, params::alpha, params::A, params::x, params::beta, + params::y); } KK_END_BLAS2_GEMV_TEST_OP_CLASS diff --git a/unit_test/blas/Test_Blas2_team_gemv.hpp b/unit_test/blas/Test_Blas2_team_gemv.hpp index 33f22baa2e..0ea3762c34 100644 --- a/unit_test/blas/Test_Blas2_team_gemv.hpp +++ b/unit_test/blas/Test_Blas2_team_gemv.hpp @@ -1,3 +1,4 @@ +#include #include // for test/inst guards // Note: include serial gemv before util so it knows if CompactMKL is available #include @@ -10,9 +11,9 @@ template KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& member) const { - KokkosBlas::Experimental::gemv(member, params::trans, params::alpha, - params::A, params::x, params::beta, - params::y); + KokkosBlas::Experimental::Gemv::invoke( + member, params::trans, params::alpha, params::A, params::x, params::beta, + params::y); } KK_END_BLAS2_GEMV_TEST_OP_CLASS diff --git a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp index 2356426ae1..79d138c922 100644 --- a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp +++ b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp @@ -1,3 +1,4 @@ +#include #include // for test/inst guards // Note: include serial gemv before util so it knows if CompactMKL is available #include @@ -10,7 +11,7 @@ template KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& member) const { - KokkosBlas::Experimental::team_vector_gemv( + KokkosBlas::Experimental::Gemv::invoke( member, params::trans, params::alpha, params::A, params::x, params::beta, params::y); } From 5b463fd1cdc0707b8e4eefe4a1d4200a45ba4f0d Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Mon, 15 Aug 2022 14:35:36 -0600 Subject: [PATCH 09/15] Team/TeamVector GEMV: fixing issues with CI testing Mostly fixing some small issues with unused parameters/variables in specific code paths. Also changing the logic around the MKL compact library testing since Intel 18 had an internal compiler error with the original implementation. --- .../KokkosBatched_Gemv_TeamVector_Impl.hpp | 8 +-- .../impl/KokkosBatched_Gemv_Team_Impl.hpp | 8 +-- src/blas/KokkosBlas2_gemv.hpp | 2 +- unit_test/blas/Test_Blas2_gemv_util.hpp | 53 +++++----------- unit_test/blas/Test_Blas2_serial_gemv.hpp | 63 +++++++++---------- unit_test/blas/Test_Blas2_team_gemv.hpp | 30 +++++---- unit_test/blas/Test_Blas2_teamvector_gemv.hpp | 30 +++++---- 7 files changed, 82 insertions(+), 112 deletions(-) diff --git a/src/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Impl.hpp b/src/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Impl.hpp index 45eb828aa8..a7e63e583e 100644 --- a/src/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Impl.hpp +++ b/src/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Impl.hpp @@ -47,8 +47,8 @@ struct TeamVectorGemv { template KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const xViewType &x, const ScalarType beta, const yViewType &y) { + const MemberType &/*member*/, const ScalarType /*alpha*/, const AViewType &/*A*/, + const xViewType &/*x*/, const ScalarType /*beta*/, const yViewType &/*y*/) { static_assert(AViewType::Rank == 3, "Batched TeamVectorGemv requires rank-3 A matrix (use " "KokkosBlas::TeamVectorGemv for regular rank-2 matrix)"); @@ -86,8 +86,8 @@ struct TeamVectorGemv { template KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const xViewType &x, const ScalarType beta, const yViewType &y) { + const MemberType &/*member*/, const ScalarType /*alpha*/, const AViewType &/*A*/, + const xViewType &/*x*/, const ScalarType /*beta*/, const yViewType &/*y*/) { static_assert(AViewType::Rank == 3, "Batched TeamVectorGemv requires rank-3 A matrix (use " "KokkosBlas::TeamVectorGemv for regular rank-2 matrix)"); diff --git a/src/batched/dense/impl/KokkosBatched_Gemv_Team_Impl.hpp b/src/batched/dense/impl/KokkosBatched_Gemv_Team_Impl.hpp index 6220ab2916..20a9fbb329 100644 --- a/src/batched/dense/impl/KokkosBatched_Gemv_Team_Impl.hpp +++ b/src/batched/dense/impl/KokkosBatched_Gemv_Team_Impl.hpp @@ -47,8 +47,8 @@ struct TeamGemv { template KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const xViewType &x, const ScalarType beta, const yViewType &y) { + const MemberType &/*member*/, const ScalarType /*alpha*/, const AViewType &/*A*/, + const xViewType &/*x*/, const ScalarType /*beta*/, const yViewType &/*y*/) { static_assert(AViewType::Rank == 3, "Batched TeamGemv requires rank-3 A matrix (use " "KokkosBlas::TeamGemv for regular rank-2 matrix)"); @@ -86,8 +86,8 @@ struct TeamGemv { template KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &member, const ScalarType alpha, const AViewType &A, - const xViewType &x, const ScalarType beta, const yViewType &y) { + const MemberType &/*member*/, const ScalarType /*alpha*/, const AViewType &/*A*/, + const xViewType &/*x*/, const ScalarType /*beta*/, const yViewType &/*y*/) { static_assert(AViewType::Rank == 3, "Batched TeamGemv requires rank-3 A matrix (use " "KokkosBlas::TeamGemv for regular rank-2 matrix)"); diff --git a/src/blas/KokkosBlas2_gemv.hpp b/src/blas/KokkosBlas2_gemv.hpp index 6acf25ada7..fb5e008d4d 100644 --- a/src/blas/KokkosBlas2_gemv.hpp +++ b/src/blas/KokkosBlas2_gemv.hpp @@ -227,7 +227,7 @@ struct Gemv { template static void KOKKOS_INLINE_FUNCTION - invoke(const MemberType& member, const char trans, const ScalarType& alpha, + invoke(const MemberType& /*member*/, const char trans, const ScalarType& alpha, const MatrixType& A, const XVector& x, const ScalarType& beta, const YVector& y) { serial_gemv(trans, alpha, A, x, beta, y); diff --git a/unit_test/blas/Test_Blas2_gemv_util.hpp b/unit_test/blas/Test_Blas2_gemv_util.hpp index dbb0ebe98c..29969525a8 100644 --- a/unit_test/blas/Test_Blas2_gemv_util.hpp +++ b/unit_test/blas/Test_Blas2_gemv_util.hpp @@ -30,26 +30,22 @@ struct GemvOpBase { YType y; }; -#define KK_DEFINE_BLAS2_GEMV_TEST_OP_CLASS(ClassName) \ - struct ClassName : public GemvOpBase { \ - using params = GemvOpBase; \ - ClassName(char trans_, ScalarType alpha_, AType A_, XType x_, \ - ScalarType beta_, YType y_) \ - : params(trans_, alpha_, A_, x_, beta_, y_) {} -#define KK_END_BLAS2_GEMV_TEST_OP_CLASS \ - } \ - ; - // Note: vanillaGEMV is called on device here - alternatively one can move // _strided_ data using safe_device_to_host_deep_copy() etc. template -KK_DEFINE_BLAS2_GEMV_TEST_OP_CLASS(RefGEMVOp) -template -KOKKOS_INLINE_FUNCTION void operator()(const TeamMember & /* member */) const { - vanillaGEMV(params::trans, params::alpha, params::A, params::x, params::beta, - params::y); -} -KK_END_BLAS2_GEMV_TEST_OP_CLASS +struct RefGEMVOp : public GemvOpBase { + using params = GemvOpBase; + + RefGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, + ScalarType beta_, YType y_) + : params(trans_, alpha_, A_, x_, beta_, y_) {} + + template + KOKKOS_INLINE_FUNCTION void operator()(const TeamMember & /* member */) const { + vanillaGEMV(params::trans, params::alpha, params::A, params::x, params::beta, + params::y); + } +}; // RefGEMVOp // fill regular view with random values template static std::enable_if_t::value> - run_algorithms(const char *mode) {} + run_algorithms(const char */*mode*/) {} template static @@ -134,24 +130,9 @@ struct GEMVTest { run_algorithms(mode); } - template - static constexpr bool allow_algorithm = - GemvFunc::template allow_algorithm; - - template - static typename std::enable_if>::type run_layouts( - const char *mode) { - // skip unsupported combinations - } - // Note: all layouts listed here are subview'ed to test Kokkos::LayoutStride template - static typename std::enable_if>::type run_layouts( - const char *mode) { - if (!GemvFunc::template allow_mode(mode[0])) - return; // skip matrix modes not supported by the algorithm + static void run_layouts(const char *mode) { #ifdef KOKKOSKERNELS_TEST_LAYOUTLEFT run_view_types(mode); #endif @@ -313,7 +294,7 @@ struct GEMVTest { fill_random_view(x, rand_pool); fill_random_view(y, rand_pool); } -}; +}; // struct GEMVTest } // namespace Test @@ -340,4 +321,4 @@ struct GEMVTest { #define TEST_CASE(PREFIX, FACTORY, NAME, SCALAR) \ TEST_CASE2(PREFIX, FACTORY, NAME, SCALAR, SCALAR) -#endif // TEST_BLAS2_GEMV_UTIL_HPP \ No newline at end of file +#endif // TEST_BLAS2_GEMV_UTIL_HPP diff --git a/unit_test/blas/Test_Blas2_serial_gemv.hpp b/unit_test/blas/Test_Blas2_serial_gemv.hpp index 5c98c82b6b..30617e14e6 100644 --- a/unit_test/blas/Test_Blas2_serial_gemv.hpp +++ b/unit_test/blas/Test_Blas2_serial_gemv.hpp @@ -8,14 +8,20 @@ namespace Test { template -KK_DEFINE_BLAS2_GEMV_TEST_OP_CLASS(SerialGEMVOp) -template -KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& member) const { - KokkosBlas::Experimental::Gemv::invoke( - member, params::trans, params::alpha, params::A, params::x, params::beta, - params::y); -} -KK_END_BLAS2_GEMV_TEST_OP_CLASS +struct SerialGEMVOp : public GemvOpBase { + using params = GemvOpBase; + + SerialGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, + ScalarType beta_, YType y_) + : params(trans_, alpha_, A_, x_, beta_, y_) {} + + template + KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& member) const { + KokkosBlas::Experimental::Gemv::invoke( + member, params::trans, params::alpha, params::A, params::x, params::beta, + params::y); + } +}; struct SerialGemvFactory { template ; using algorithms = std::tuple; +}; + #ifdef __KOKKOSBLAS_ENABLE_INTEL_MKL_COMPACT__ - , - KokkosBlas::Algo::Gemv::CompactMKL -#endif - >; - - template - static constexpr bool is_mkl = - std::is_same::value; - - // block testing of CompackMKL on non-vector scalars - // (they are not supported by the implementation) - template - static constexpr bool allow_algorithm = - !is_mkl || (KokkosBatched::is_vector::value && - KokkosBatched::is_vector::value && - KokkosBatched::is_vector::value); - - // block testing of ConjNoTranspose mode on CompactMKL - template - static bool allow_mode(char trans) { - return !is_mkl || toupper(trans) != 'X'; - } +struct SerialMKLGemvFactory { + template + using functor_type = + SerialGEMVOp; + + using algorithms = std::tuple; }; +#endif } // namespace Test @@ -66,9 +59,9 @@ TEST_SERIAL_CASE(float, float) using simd_float_sse = ::Test::simd_vector; using simd_float_avx = ::Test::simd_vector; using simd_float_avx512 = ::Test::simd_vector; -TEST_SERIAL_CASE2(mkl_float_sse, simd_float_sse, float) -TEST_SERIAL_CASE2(mkl_float_avx, simd_float_avx, float) -TEST_SERIAL_CASE2(mkl_float_avx512, simd_float_avx512, float) +TEST_CASE2(serial, SerialMKLGemvFactory, mkl_float_sse, simd_float_sse, float) +TEST_CASE2(serial, SerialMKLGemvFactory, mkl_float_avx, simd_float_avx, float) +TEST_CASE2(serial, SerialMKLGemvFactory, mkl_float_avx512, simd_float_avx512, float) #endif #endif diff --git a/unit_test/blas/Test_Blas2_team_gemv.hpp b/unit_test/blas/Test_Blas2_team_gemv.hpp index 0ea3762c34..7c4588fa33 100644 --- a/unit_test/blas/Test_Blas2_team_gemv.hpp +++ b/unit_test/blas/Test_Blas2_team_gemv.hpp @@ -8,14 +8,20 @@ namespace Test { template -KK_DEFINE_BLAS2_GEMV_TEST_OP_CLASS(TeamGEMVOp) -template -KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& member) const { - KokkosBlas::Experimental::Gemv::invoke( - member, params::trans, params::alpha, params::A, params::x, params::beta, - params::y); -} -KK_END_BLAS2_GEMV_TEST_OP_CLASS +struct TeamGEMVOp : public GemvOpBase { + using params = GemvOpBase; + + TeamGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, + ScalarType beta_, YType y_) + : params(trans_, alpha_, A_, x_, beta_, y_) {} + + template + KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& member) const { + KokkosBlas::Experimental::Gemv::invoke( + member, params::trans, params::alpha, params::A, params::x, params::beta, + params::y); + } +}; struct TeamGemvFactory { template ; - - template - static constexpr bool allow_algorithm = true; - - template - static bool allow_mode(char trans) { - return true; - } }; } // namespace Test diff --git a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp index 79d138c922..ea17d12d86 100644 --- a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp +++ b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp @@ -8,14 +8,20 @@ namespace Test { template -KK_DEFINE_BLAS2_GEMV_TEST_OP_CLASS(TeamVectorGEMVOp) -template -KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& member) const { - KokkosBlas::Experimental::Gemv::invoke( - member, params::trans, params::alpha, params::A, params::x, params::beta, - params::y); -} -KK_END_BLAS2_GEMV_TEST_OP_CLASS +struct TeamVectorGEMVOp : public GemvOpBase { + using params = GemvOpBase; + + TeamVectorGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, + ScalarType beta_, YType y_) + : params(trans_, alpha_, A_, x_, beta_, y_) {} + + template + KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& member) const { + KokkosBlas::Experimental::Gemv::invoke( + member, params::trans, params::alpha, params::A, params::x, params::beta, + params::y); + } +}; struct TeamVectorGemvFactory { template ; - - template - static constexpr bool allow_algorithm = true; - - template - static bool allow_mode(char trans) { - return true; - } }; } // namespace Test From 134fc069e3fac4a2d6d8e576dcaf5aaf64f1cf3e Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Mon, 15 Aug 2022 14:47:55 -0600 Subject: [PATCH 10/15] Team/TeamVector GEMV: applying clang-format after fixes for CI --- .../KokkosBatched_Gemv_TeamVector_Impl.hpp | 18 ++++++++++++------ .../impl/KokkosBatched_Gemv_Team_Impl.hpp | 18 ++++++++++++------ src/blas/KokkosBlas2_gemv.hpp | 6 +++--- unit_test/blas/Test_Blas2_gemv_util.hpp | 17 +++++++++-------- unit_test/blas/Test_Blas2_serial_gemv.hpp | 11 ++++++----- unit_test/blas/Test_Blas2_team_gemv.hpp | 8 ++++---- unit_test/blas/Test_Blas2_teamvector_gemv.hpp | 12 +++++++----- 7 files changed, 53 insertions(+), 37 deletions(-) diff --git a/src/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Impl.hpp b/src/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Impl.hpp index a7e63e583e..73e146ab01 100644 --- a/src/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Impl.hpp +++ b/src/batched/dense/impl/KokkosBatched_Gemv_TeamVector_Impl.hpp @@ -46,9 +46,12 @@ template struct TeamVectorGemv { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &/*member*/, const ScalarType /*alpha*/, const AViewType &/*A*/, - const xViewType &/*x*/, const ScalarType /*beta*/, const yViewType &/*y*/) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, + const ScalarType /*alpha*/, + const AViewType & /*A*/, + const xViewType & /*x*/, + const ScalarType /*beta*/, + const yViewType & /*y*/) { static_assert(AViewType::Rank == 3, "Batched TeamVectorGemv requires rank-3 A matrix (use " "KokkosBlas::TeamVectorGemv for regular rank-2 matrix)"); @@ -85,9 +88,12 @@ template struct TeamVectorGemv { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &/*member*/, const ScalarType /*alpha*/, const AViewType &/*A*/, - const xViewType &/*x*/, const ScalarType /*beta*/, const yViewType &/*y*/) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, + const ScalarType /*alpha*/, + const AViewType & /*A*/, + const xViewType & /*x*/, + const ScalarType /*beta*/, + const yViewType & /*y*/) { static_assert(AViewType::Rank == 3, "Batched TeamVectorGemv requires rank-3 A matrix (use " "KokkosBlas::TeamVectorGemv for regular rank-2 matrix)"); diff --git a/src/batched/dense/impl/KokkosBatched_Gemv_Team_Impl.hpp b/src/batched/dense/impl/KokkosBatched_Gemv_Team_Impl.hpp index 20a9fbb329..274cfab523 100644 --- a/src/batched/dense/impl/KokkosBatched_Gemv_Team_Impl.hpp +++ b/src/batched/dense/impl/KokkosBatched_Gemv_Team_Impl.hpp @@ -46,9 +46,12 @@ template struct TeamGemv { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &/*member*/, const ScalarType /*alpha*/, const AViewType &/*A*/, - const xViewType &/*x*/, const ScalarType /*beta*/, const yViewType &/*y*/) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, + const ScalarType /*alpha*/, + const AViewType & /*A*/, + const xViewType & /*x*/, + const ScalarType /*beta*/, + const yViewType & /*y*/) { static_assert(AViewType::Rank == 3, "Batched TeamGemv requires rank-3 A matrix (use " "KokkosBlas::TeamGemv for regular rank-2 matrix)"); @@ -85,9 +88,12 @@ template struct TeamGemv { template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType &/*member*/, const ScalarType /*alpha*/, const AViewType &/*A*/, - const xViewType &/*x*/, const ScalarType /*beta*/, const yViewType &/*y*/) { + KOKKOS_INLINE_FUNCTION static int invoke(const MemberType & /*member*/, + const ScalarType /*alpha*/, + const AViewType & /*A*/, + const xViewType & /*x*/, + const ScalarType /*beta*/, + const yViewType & /*y*/) { static_assert(AViewType::Rank == 3, "Batched TeamGemv requires rank-3 A matrix (use " "KokkosBlas::TeamGemv for regular rank-2 matrix)"); diff --git a/src/blas/KokkosBlas2_gemv.hpp b/src/blas/KokkosBlas2_gemv.hpp index fb5e008d4d..fe8418cc40 100644 --- a/src/blas/KokkosBlas2_gemv.hpp +++ b/src/blas/KokkosBlas2_gemv.hpp @@ -227,9 +227,9 @@ struct Gemv { template static void KOKKOS_INLINE_FUNCTION - invoke(const MemberType& /*member*/, const char trans, const ScalarType& alpha, - const MatrixType& A, const XVector& x, const ScalarType& beta, - const YVector& y) { + invoke(const MemberType& /*member*/, const char trans, + const ScalarType& alpha, const MatrixType& A, const XVector& x, + const ScalarType& beta, const YVector& y) { serial_gemv(trans, alpha, A, x, beta, y); } }; diff --git a/unit_test/blas/Test_Blas2_gemv_util.hpp b/unit_test/blas/Test_Blas2_gemv_util.hpp index 29969525a8..ad232ed79c 100644 --- a/unit_test/blas/Test_Blas2_gemv_util.hpp +++ b/unit_test/blas/Test_Blas2_gemv_util.hpp @@ -37,15 +37,16 @@ struct RefGEMVOp : public GemvOpBase { using params = GemvOpBase; RefGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, - ScalarType beta_, YType y_) - : params(trans_, alpha_, A_, x_, beta_, y_) {} + ScalarType beta_, YType y_) + : params(trans_, alpha_, A_, x_, beta_, y_) {} template - KOKKOS_INLINE_FUNCTION void operator()(const TeamMember & /* member */) const { - vanillaGEMV(params::trans, params::alpha, params::A, params::x, params::beta, - params::y); + KOKKOS_INLINE_FUNCTION void operator()( + const TeamMember & /* member */) const { + vanillaGEMV(params::trans, params::alpha, params::A, params::x, + params::beta, params::y); } -}; // RefGEMVOp +}; // RefGEMVOp // fill regular view with random values template static std::enable_if_t::value> - run_algorithms(const char */*mode*/) {} + run_algorithms(const char * /*mode*/) {} template static @@ -294,7 +295,7 @@ struct GEMVTest { fill_random_view(x, rand_pool); fill_random_view(y, rand_pool); } -}; // struct GEMVTest +}; // struct GEMVTest } // namespace Test diff --git a/unit_test/blas/Test_Blas2_serial_gemv.hpp b/unit_test/blas/Test_Blas2_serial_gemv.hpp index 30617e14e6..5e50a72189 100644 --- a/unit_test/blas/Test_Blas2_serial_gemv.hpp +++ b/unit_test/blas/Test_Blas2_serial_gemv.hpp @@ -12,14 +12,14 @@ struct SerialGEMVOp : public GemvOpBase { using params = GemvOpBase; SerialGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, - ScalarType beta_, YType y_) - : params(trans_, alpha_, A_, x_, beta_, y_) {} + ScalarType beta_, YType y_) + : params(trans_, alpha_, A_, x_, beta_, y_) {} template KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& member) const { KokkosBlas::Experimental::Gemv::invoke( - member, params::trans, params::alpha, params::A, params::x, params::beta, - params::y); + member, params::trans, params::alpha, params::A, params::x, + params::beta, params::y); } }; @@ -61,7 +61,8 @@ using simd_float_avx = ::Test::simd_vector; using simd_float_avx512 = ::Test::simd_vector; TEST_CASE2(serial, SerialMKLGemvFactory, mkl_float_sse, simd_float_sse, float) TEST_CASE2(serial, SerialMKLGemvFactory, mkl_float_avx, simd_float_avx, float) -TEST_CASE2(serial, SerialMKLGemvFactory, mkl_float_avx512, simd_float_avx512, float) +TEST_CASE2(serial, SerialMKLGemvFactory, mkl_float_avx512, simd_float_avx512, + float) #endif #endif diff --git a/unit_test/blas/Test_Blas2_team_gemv.hpp b/unit_test/blas/Test_Blas2_team_gemv.hpp index 7c4588fa33..0228bd3b04 100644 --- a/unit_test/blas/Test_Blas2_team_gemv.hpp +++ b/unit_test/blas/Test_Blas2_team_gemv.hpp @@ -12,14 +12,14 @@ struct TeamGEMVOp : public GemvOpBase { using params = GemvOpBase; TeamGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, - ScalarType beta_, YType y_) - : params(trans_, alpha_, A_, x_, beta_, y_) {} + ScalarType beta_, YType y_) + : params(trans_, alpha_, A_, x_, beta_, y_) {} template KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& member) const { KokkosBlas::Experimental::Gemv::invoke( - member, params::trans, params::alpha, params::A, params::x, params::beta, - params::y); + member, params::trans, params::alpha, params::A, params::x, + params::beta, params::y); } }; diff --git a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp index ea17d12d86..2f3ee80d59 100644 --- a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp +++ b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp @@ -12,14 +12,16 @@ struct TeamVectorGEMVOp : public GemvOpBase { using params = GemvOpBase; TeamVectorGEMVOp(char trans_, ScalarType alpha_, AType A_, XType x_, - ScalarType beta_, YType y_) - : params(trans_, alpha_, A_, x_, beta_, y_) {} + ScalarType beta_, YType y_) + : params(trans_, alpha_, A_, x_, beta_, y_) {} template KOKKOS_INLINE_FUNCTION void operator()(const TeamMember& member) const { - KokkosBlas::Experimental::Gemv::invoke( - member, params::trans, params::alpha, params::A, params::x, params::beta, - params::y); + KokkosBlas::Experimental::Gemv::invoke(member, params::trans, + params::alpha, params::A, + params::x, params::beta, + params::y); } }; From 37c074ed6179e26e84bf5f1d673771a95cc72103 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Tue, 16 Aug 2022 10:38:41 +0200 Subject: [PATCH 11/15] Team/TeamVector GEMV: extend MKL fixes to double --- unit_test/blas/Test_Blas2_serial_gemv.hpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/unit_test/blas/Test_Blas2_serial_gemv.hpp b/unit_test/blas/Test_Blas2_serial_gemv.hpp index 5e50a72189..fd73707c9a 100644 --- a/unit_test/blas/Test_Blas2_serial_gemv.hpp +++ b/unit_test/blas/Test_Blas2_serial_gemv.hpp @@ -73,9 +73,12 @@ TEST_SERIAL_CASE(double, double) using simd_double_sse = ::Test::simd_vector; using simd_double_avx = ::Test::simd_vector; using simd_double_avx512 = ::Test::simd_vector; -TEST_SERIAL_CASE2(mkl_double_sse, simd_double_sse, double) -TEST_SERIAL_CASE2(mkl_double_avx, simd_double_avx, double) -TEST_SERIAL_CASE2(mkl_double_avx512, simd_double_avx512, double) +TEST_CASE2(serial, SerialMKLGemvFactory, mkl_double_sse, simd_double_sse, + double) +TEST_CASE2(serial, SerialMKLGemvFactory, mkl_double_avx, simd_double_avx, + double) +TEST_CASE2(serial, SerialMKLGemvFactory, mkl_double_avx512, simd_double_avx512, + double) #endif #endif From 11f12f94f7657485db2dcb05e95c2fe04974f247 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Tue, 16 Aug 2022 10:30:33 +0200 Subject: [PATCH 12/15] Team/TeamVector GEMV: restore CUDA_LAMBDA guards --- unit_test/blas/Test_Blas2_team_gemv.hpp | 8 ++++++++ unit_test/blas/Test_Blas2_teamvector_gemv.hpp | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/unit_test/blas/Test_Blas2_team_gemv.hpp b/unit_test/blas/Test_Blas2_team_gemv.hpp index 0228bd3b04..b985382954 100644 --- a/unit_test/blas/Test_Blas2_team_gemv.hpp +++ b/unit_test/blas/Test_Blas2_team_gemv.hpp @@ -1,3 +1,9 @@ +// Note: Luc Berger-Vergiat 04/14/21 +// This tests uses KOKKOS_LAMBDA so we need +// to make sure that these are enabled in +// the CUDA backend before including this test. +#if !defined(TEST_CUDA_BLAS_CPP) || defined(KOKKOS_ENABLE_CUDA_LAMBDA) + #include #include // for test/inst guards // Note: include serial gemv before util so it knows if CompactMKL is available @@ -71,3 +77,5 @@ TEST_TEAM_CASE2(alphabeta, Kokkos::complex, double) #undef TEST_TEAM_CASE4 #undef TEST_TEAM_CASE2 #undef TEST_TEAM_CASE + +#endif // Check for lambda availability on CUDA backend diff --git a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp index 2f3ee80d59..f5991888ad 100644 --- a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp +++ b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp @@ -1,3 +1,9 @@ +// Note: Luc Berger-Vergiat 04/14/21 +// This tests uses KOKKOS_LAMBDA so we need +// to make sure that these are enabled in +// the CUDA backend before including this test. +#if !defined(TEST_CUDA_BLAS_CPP) || defined(KOKKOS_ENABLE_CUDA_LAMBDA) + #include #include // for test/inst guards // Note: include serial gemv before util so it knows if CompactMKL is available @@ -75,3 +81,5 @@ TEST_TEAMVECTOR_CASE2(alphabeta, Kokkos::complex, double) #undef TEST_TEAMVECTOR_CASE4 #undef TEST_TEAMVECTOR_CASE2 #undef TEST_TEAMVECTOR_CASE + +#endif // Check for lambda availability on CUDA backend From 8448abbf20004736a2abeb0aeaa0d0647cb36d9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Tue, 16 Aug 2022 10:58:39 +0200 Subject: [PATCH 13/15] Team/TeamVector GEMV: skip unsupported ConjNoTranspose in MKL unit tests --- unit_test/blas/Test_Blas2_gemv_util.hpp | 3 +++ unit_test/blas/Test_Blas2_serial_gemv.hpp | 2 ++ unit_test/blas/Test_Blas2_team_gemv.hpp | 1 + unit_test/blas/Test_Blas2_teamvector_gemv.hpp | 1 + 4 files changed, 7 insertions(+) diff --git a/unit_test/blas/Test_Blas2_gemv_util.hpp b/unit_test/blas/Test_Blas2_gemv_util.hpp index ad232ed79c..47a435d50b 100644 --- a/unit_test/blas/Test_Blas2_gemv_util.hpp +++ b/unit_test/blas/Test_Blas2_gemv_util.hpp @@ -134,6 +134,9 @@ struct GEMVTest { // Note: all layouts listed here are subview'ed to test Kokkos::LayoutStride template static void run_layouts(const char *mode) { + if (!GemvFunc::is_supported_mode(mode[0])) { + return; + } #ifdef KOKKOSKERNELS_TEST_LAYOUTLEFT run_view_types(mode); #endif diff --git a/unit_test/blas/Test_Blas2_serial_gemv.hpp b/unit_test/blas/Test_Blas2_serial_gemv.hpp index fd73707c9a..d4bf4cbd28 100644 --- a/unit_test/blas/Test_Blas2_serial_gemv.hpp +++ b/unit_test/blas/Test_Blas2_serial_gemv.hpp @@ -31,6 +31,7 @@ struct SerialGemvFactory { using algorithms = std::tuple; + static bool is_supported_mode(char /* trans */) { return true; } }; #ifdef __KOKKOSBLAS_ENABLE_INTEL_MKL_COMPACT__ @@ -41,6 +42,7 @@ struct SerialMKLGemvFactory { SerialGEMVOp; using algorithms = std::tuple; + static bool is_supported_mode(char trans) { return trans != 'X'; } }; #endif diff --git a/unit_test/blas/Test_Blas2_team_gemv.hpp b/unit_test/blas/Test_Blas2_team_gemv.hpp index b985382954..c2f9ba2a07 100644 --- a/unit_test/blas/Test_Blas2_team_gemv.hpp +++ b/unit_test/blas/Test_Blas2_team_gemv.hpp @@ -37,6 +37,7 @@ struct TeamGemvFactory { using algorithms = std::tuple; + static bool is_supported_mode(char /* trans */) { return true; } }; } // namespace Test diff --git a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp index f5991888ad..ae899c2c78 100644 --- a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp +++ b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp @@ -39,6 +39,7 @@ struct TeamVectorGemvFactory { // no Blocked implementation using algorithms = std::tuple; + static bool is_supported_mode(char /* trans */) { return true; } }; } // namespace Test From 2a2a84ef876dab0cb9da243fba8612508db02033 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Tue, 16 Aug 2022 18:23:41 +0200 Subject: [PATCH 14/15] Team/TeamVector GEMV: retract ConjNoTranspose --- src/blas/KokkosBlas2_serial_gemv.hpp | 7 --- src/blas/KokkosBlas2_team_gemv.hpp | 21 ++----- .../impl/KokkosBlas2_serial_gemv_impl.hpp | 28 --------- src/blas/impl/KokkosBlas2_team_gemv_spec.hpp | 57 ------------------- src/blas/impl/KokkosBlas_util.hpp | 1 - .../KokkosBlas2_serial_gemv_tpl_spec_decl.hpp | 17 ------ .../impl/KokkosSparse_spmv_bsrmatrix_impl.hpp | 14 ++--- test_common/KokkosKernels_TestUtils.hpp | 2 +- unit_test/blas/Test_Blas2_gemv_util.hpp | 6 -- unit_test/blas/Test_Blas2_serial_gemv.hpp | 2 - unit_test/blas/Test_Blas2_team_gemv.hpp | 1 - unit_test/blas/Test_Blas2_teamvector_gemv.hpp | 1 - 12 files changed, 13 insertions(+), 144 deletions(-) diff --git a/src/blas/KokkosBlas2_serial_gemv.hpp b/src/blas/KokkosBlas2_serial_gemv.hpp index 1f53fba87e..cb568095b2 100644 --- a/src/blas/KokkosBlas2_serial_gemv.hpp +++ b/src/blas/KokkosBlas2_serial_gemv.hpp @@ -67,13 +67,6 @@ void KOKKOS_INLINE_FUNCTION serial_gemv(const char trans, } else if (trans == 'C' || trans == 'c') { using mode = KokkosBlas::Trans::ConjTranspose; KokkosBlas::SerialGemv::invoke(alpha, A, x, beta, y); - // - // TODO: what letter should be used here ? - // * in blas "C" means conjugate-transpose - // * in sparse "C" meanse conjugate and "H" conjugate-transpose... - } else if (trans == 'X' || trans == 'x') { - using mode = KokkosBlas::Trans::ConjNoTranspose; - KokkosBlas::SerialGemv::invoke(alpha, A, x, beta, y); } else { Kokkos::abort("Matrix mode not supported"); } diff --git a/src/blas/KokkosBlas2_team_gemv.hpp b/src/blas/KokkosBlas2_team_gemv.hpp index 1950356804..ddc216b8af 100644 --- a/src/blas/KokkosBlas2_team_gemv.hpp +++ b/src/blas/KokkosBlas2_team_gemv.hpp @@ -60,19 +60,15 @@ void KOKKOS_INLINE_FUNCTION team_gemv(const TeamType& team, const char trans, if (trans == 'N' || trans == 'n') TeamGemv::invoke(team, alpha, A, x, beta, y); - if (trans == 'T' || trans == 't') + else if (trans == 'T' || trans == 't') TeamGemv::invoke(team, alpha, A, x, beta, y); - if (trans == 'C' || trans == 'c') + else if (trans == 'C' || trans == 'c') TeamGemv::invoke(team, alpha, A, x, beta, y); - // - // TODO: what letter should be used here ? - // * in blas "C" means conjugate-transpose - // * in sparse "C" meanse conjugate and "H" conjugate-transpose... - if (trans == 'X' || trans == 'x') - TeamGemv::invoke(team, alpha, A, - x, beta, y); + else { + Kokkos::abort("Matrix mode not supported"); + } } // default AlgoTag @@ -101,13 +97,6 @@ teamvector_gemv(const TeamType& team, const char trans, const ScalarType& alpha, } else if (trans == 'C' || trans == 'c') { KokkosBlas::TeamVectorGemv::invoke( team, alpha, A, x, beta, y); - // - // TODO: what letter should be used here ? - // * in blas "C" means conjugate-transpose - // * in sparse "C" meanse conjugate and "H" conjugate-transpose... - } else if (trans == 'X' || trans == 'x') { - KokkosBlas::TeamVectorGemv::invoke(team, alpha, A, x, beta, y); } else { Kokkos::abort("Matrix mode not supported"); } diff --git a/src/blas/impl/KokkosBlas2_serial_gemv_impl.hpp b/src/blas/impl/KokkosBlas2_serial_gemv_impl.hpp index 9a0734d5e4..0d7f52702b 100644 --- a/src/blas/impl/KokkosBlas2_serial_gemv_impl.hpp +++ b/src/blas/impl/KokkosBlas2_serial_gemv_impl.hpp @@ -156,34 +156,6 @@ SerialGemv::invoke( A.stride_0(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); } -/// -/// CNT -/// - -template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemv::invoke( - const ScalarType alpha, const AViewType &A, const xViewType &x, - const ScalarType beta, const yViewType &y) { - return Impl::SerialGemvInternal::invoke( - Impl::OpConj{}, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), - A.stride_1(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); -} - -template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemv::invoke( - const ScalarType alpha, const AViewType &A, const xViewType &x, - const ScalarType beta, const yViewType &y) { - return Impl::SerialGemvInternal::invoke( - Impl::OpConj{}, A.extent(0), A.extent(1), alpha, A.data(), A.stride_0(), - A.stride_1(), x.data(), x.stride_0(), beta, y.data(), y.stride_0()); -} - } // namespace KokkosBlas #endif diff --git a/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp b/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp index 273a9cddb2..92aac23f26 100644 --- a/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp +++ b/src/blas/impl/KokkosBlas2_team_gemv_spec.hpp @@ -182,42 +182,6 @@ struct TeamGemv { } }; -/// -/// CNT -/// - -template -struct TeamGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const ScalarType alpha, const AViewType& A, - const xViewType& x, const ScalarType beta, const yViewType& y) { - static_assert(AViewType::Rank == 2, - "BLAS TeamGemv requires rank-2 A matrix"); - return Impl::TeamGemvInternal::invoke( - member, Impl::OpConj{}, A.extent(0), A.extent(1), alpha, A.data(), - A.stride_0(), A.stride_1(), x.data(), x.stride_0(), beta, y.data(), - y.stride_0()); - } -}; - -template -struct TeamGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const ScalarType alpha, const AViewType& A, - const xViewType& x, const ScalarType beta, const yViewType& y) { - static_assert(AViewType::Rank == 2, - "BLAS TeamGemv requires rank-2 A matrix"); - return Impl::TeamGemvInternal::invoke( - member, Impl::OpConj{}, A.extent(0), A.extent(1), alpha, A.data(), - A.stride_0(), A.stride_1(), x.data(), x.stride_0(), beta, y.data(), - y.stride_0()); - } -}; - /// /// NT /// @@ -276,27 +240,6 @@ struct TeamVectorGemv { } }; -/// -/// CNT -/// - -template -struct TeamVectorGemv { - template - KOKKOS_INLINE_FUNCTION static int invoke( - const MemberType& member, const ScalarType alpha, const AViewType& A, - const xViewType& x, const ScalarType beta, const yViewType& y) { - static_assert(AViewType::Rank == 2, - "Batched TeamVectorGemv requires rank-2 A matrix"); - return Impl::TeamVectorGemvInternal::invoke( - member, Impl::OpConj{}, A.extent(0), A.extent(1), alpha, A.data(), - A.stride_0(), A.stride_1(), x.data(), x.stride_0(), beta, y.data(), - y.stride_0()); - } -}; - } // namespace KokkosBlas #endif diff --git a/src/blas/impl/KokkosBlas_util.hpp b/src/blas/impl/KokkosBlas_util.hpp index 7ed3afa9c1..dcee8283d6 100644 --- a/src/blas/impl/KokkosBlas_util.hpp +++ b/src/blas/impl/KokkosBlas_util.hpp @@ -67,7 +67,6 @@ struct Trans { struct Transpose {}; struct NoTranspose {}; struct ConjTranspose {}; - struct ConjNoTranspose {}; // Note: usually not available in BLAS, MKL, etc. }; #if !defined(KOKKOS_IF_ON_HOST) diff --git a/src/impl/tpls/KokkosBlas2_serial_gemv_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas2_serial_gemv_tpl_spec_decl.hpp index f35fa4fb34..77aa5a6713 100644 --- a/src/impl/tpls/KokkosBlas2_serial_gemv_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosBlas2_serial_gemv_tpl_spec_decl.hpp @@ -199,23 +199,6 @@ SerialGemv::invoke( return 0; } -/// -/// CNT (not supported) -/// - -template <> -template -KOKKOS_INLINE_FUNCTION int -SerialGemv::invoke( - const ScalarType alpha, const AViewType &A, const xViewType &x, - const ScalarType beta, const yViewType &y) { - Kokkos::abort( - "CompactMKL implementation of serial GEMV does not support " - "ConjNoTranspose mode"); - return 0; -} - } // namespace KokkosBlas #endif // __KOKKOSBLAS_ENABLE_INTEL_MKL_COMPACT__ diff --git a/src/sparse/impl/KokkosSparse_spmv_bsrmatrix_impl.hpp b/src/sparse/impl/KokkosSparse_spmv_bsrmatrix_impl.hpp index 89a5f79ba9..a0bf8c96ec 100644 --- a/src/sparse/impl/KokkosSparse_spmv_bsrmatrix_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spmv_bsrmatrix_impl.hpp @@ -539,14 +539,12 @@ struct BsrMatrixSpMVTensorCoreDispatcher { #include "KokkosBlas.hpp" #include "KokkosBlas2_serial_gemv_internal.hpp" -#include "KokkosBatched_Gemv_TeamVector_Internal.hpp" +#include "KokkosBlas2_team_gemv_impl.hpp" #include "KokkosBatched_Gemm_Serial_Internal.hpp" #include "KokkosBatched_Gemm_TeamVector_Internal.hpp" #include "KokkosBlas1_team_scal_impl.hpp" #include "KokkosKernels_ExecSpaceUtils.hpp" -#include "KokkosBlas2_team_gemv_spec.hpp" - namespace KokkosSparse { namespace Experimental { namespace Impl { @@ -656,10 +654,12 @@ struct BSR_GEMV_Functor { const auto X_ptBeg = X_blkCol * block_dim; const auto X_cur = Kokkos::subview( m_x, ::Kokkos::make_pair(X_ptBeg, X_ptBeg + block_dim)); - KokkosBlas::TeamVectorGemv< - team_member, KokkosBlas::Trans::ConjNoTranspose, - KokkosBlas::Algo::Gemv::Default>::invoke(dev, alpha, A_cur, X_cur, - val_one, Y_cur); + KokkosBlas::Impl:: + TeamVectorGemvInternal::invoke( + dev, KokkosBlas::Impl::OpConj{}, A_cur.extent(0), + A_cur.extent(1), alpha, A_cur.data(), A_cur.stride_0(), + A_cur.stride_1(), X_cur.data(), X_cur.stride_0(), val_one, + Y_cur.data(), Y_cur.stride_0()); } } else { for (ordinal_type jBlock = 0; jBlock < count; ++jBlock) { diff --git a/test_common/KokkosKernels_TestUtils.hpp b/test_common/KokkosKernels_TestUtils.hpp index 8a51139c5f..b8ce0eb595 100644 --- a/test_common/KokkosKernels_TestUtils.hpp +++ b/test_common/KokkosKernels_TestUtils.hpp @@ -370,7 +370,7 @@ KOKKOS_INLINE_FUNCTION void vanillaGEMV(char mode, AlphaType alpha, using ScalarY = typename ViewTypeY::non_const_value_type; using KAT_A = Kokkos::ArithTraits; const bool transposed = mode == 'T' || mode == 'C'; - const bool conjugated = mode == 'C' || mode == 'X'; + const bool conjugated = mode == 'C'; const bool has_beta = beta != Kokkos::ArithTraits::zero(); int M = A.extent(transposed ? 1 : 0); int N = A.extent(transposed ? 0 : 1); diff --git a/unit_test/blas/Test_Blas2_gemv_util.hpp b/unit_test/blas/Test_Blas2_gemv_util.hpp index 47a435d50b..635f02c558 100644 --- a/unit_test/blas/Test_Blas2_gemv_util.hpp +++ b/unit_test/blas/Test_Blas2_gemv_util.hpp @@ -134,9 +134,6 @@ struct GEMVTest { // Note: all layouts listed here are subview'ed to test Kokkos::LayoutStride template static void run_layouts(const char *mode) { - if (!GemvFunc::is_supported_mode(mode[0])) { - return; - } #ifdef KOKKOSKERNELS_TEST_LAYOUTLEFT run_view_types(mode); #endif @@ -315,9 +312,6 @@ struct GEMVTest { } \ TEST_F(TestCategory, PREFIX##_gemv_ct_##NAME) { \ PREFIX##_##NAME##_gemv_test::run("C"); \ - } \ - TEST_F(TestCategory, PREFIX##_gemv_cnt_##NAME) { \ - PREFIX##_##NAME##_gemv_test::run("X"); \ } #define TEST_CASE2(PREFIX, FACTORY, NAME, SCALAR, SCALAR_COEF) \ diff --git a/unit_test/blas/Test_Blas2_serial_gemv.hpp b/unit_test/blas/Test_Blas2_serial_gemv.hpp index d4bf4cbd28..fd73707c9a 100644 --- a/unit_test/blas/Test_Blas2_serial_gemv.hpp +++ b/unit_test/blas/Test_Blas2_serial_gemv.hpp @@ -31,7 +31,6 @@ struct SerialGemvFactory { using algorithms = std::tuple; - static bool is_supported_mode(char /* trans */) { return true; } }; #ifdef __KOKKOSBLAS_ENABLE_INTEL_MKL_COMPACT__ @@ -42,7 +41,6 @@ struct SerialMKLGemvFactory { SerialGEMVOp; using algorithms = std::tuple; - static bool is_supported_mode(char trans) { return trans != 'X'; } }; #endif diff --git a/unit_test/blas/Test_Blas2_team_gemv.hpp b/unit_test/blas/Test_Blas2_team_gemv.hpp index c2f9ba2a07..b985382954 100644 --- a/unit_test/blas/Test_Blas2_team_gemv.hpp +++ b/unit_test/blas/Test_Blas2_team_gemv.hpp @@ -37,7 +37,6 @@ struct TeamGemvFactory { using algorithms = std::tuple; - static bool is_supported_mode(char /* trans */) { return true; } }; } // namespace Test diff --git a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp index ae899c2c78..f5991888ad 100644 --- a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp +++ b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp @@ -39,7 +39,6 @@ struct TeamVectorGemvFactory { // no Blocked implementation using algorithms = std::tuple; - static bool is_supported_mode(char /* trans */) { return true; } }; } // namespace Test From da2149ad02237b0e77997e5b35b7b5767a680c64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Zuzek?= Date: Tue, 16 Aug 2022 18:53:22 +0200 Subject: [PATCH 15/15] Team/TeamVector GEMV: fix test headers for top-level interface --- unit_test/blas/Test_Blas2_team_gemv.hpp | 2 +- unit_test/blas/Test_Blas2_teamvector_gemv.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/unit_test/blas/Test_Blas2_team_gemv.hpp b/unit_test/blas/Test_Blas2_team_gemv.hpp index b985382954..722aca1938 100644 --- a/unit_test/blas/Test_Blas2_team_gemv.hpp +++ b/unit_test/blas/Test_Blas2_team_gemv.hpp @@ -8,7 +8,7 @@ #include // for test/inst guards // Note: include serial gemv before util so it knows if CompactMKL is available #include -#include +#include namespace Test { diff --git a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp index f5991888ad..5814541bb2 100644 --- a/unit_test/blas/Test_Blas2_teamvector_gemv.hpp +++ b/unit_test/blas/Test_Blas2_teamvector_gemv.hpp @@ -8,7 +8,7 @@ #include // for test/inst guards // Note: include serial gemv before util so it knows if CompactMKL is available #include -#include +#include namespace Test {