From 7a875284e3edfda3d91f3ef716852713130ed6eb Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Thu, 6 May 2021 06:42:06 -0600 Subject: [PATCH 01/35] src/blas/impl: Fix LayoutRight link errors Fix link errors when building with LayoutRight ON but LayoutLeft OFF. - Conditionally define GetUnifiedLayout based on which layout types are pre instantiated. - Fixes link errors in: - KokkosBlas::Impl::Abs - KokkosBlas::Impl::Axpby - KokkosBlas::Impl::Reciprocal - KokkosBlas::Impl::Update --- src/impl/KokkosKernels_helpers.hpp | 48 ++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/src/impl/KokkosKernels_helpers.hpp b/src/impl/KokkosKernels_helpers.hpp index 797435c51e..2cc5db6590 100644 --- a/src/impl/KokkosKernels_helpers.hpp +++ b/src/impl/KokkosKernels_helpers.hpp @@ -44,31 +44,55 @@ #ifndef KOKKOSKERNELS_HELPERS_HPP_ #define KOKKOSKERNELS_HELPERS_HPP_ +#include "KokkosKernels_config.h" // KOKKOSKERNELS_INST_LAYOUTLEFT, KOKKOSKERNELS_INST_LAYOUTRIGHT + namespace KokkosKernels { namespace Impl { // Unify Layout of a View to LayoutLeft if possible. // Used to reduce number of code instantiations +template +struct GetUnifiedLayoutInternal { + typedef typename std::conditional< + ((ViewType::rank == 1) && (!std::is_same::value)) || + ((ViewType::rank == 0)), + UnifiedLayoutType, typename ViewType::array_layout>::type array_layout; +}; -template +// If LayoutLeft kernels are pre instantiated, try to unify layout to LayoutLeft +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) +template struct GetUnifiedLayout { - typedef typename std::conditional< - ( (ViewType::rank == 1) && - (!std::is_same::value) ) || - ( (ViewType::rank == 0) ) - ,Kokkos::LayoutLeft,typename ViewType::array_layout>::type array_layout; + using array_layout = + typename GetUnifiedLayoutInternal::array_layout; +}; +#else +// If LayoutLeft kernels are not pre instantiated, try to unify layout to +// LayoutRight +#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) +template +struct GetUnifiedLayout { + using array_layout = + typename GetUnifiedLayoutInternal::array_layout; }; +#endif +#endif -template::value> +template ::value> struct GetUnifiedScalarViewType { typedef typename TX::non_const_value_type type; }; -template -struct GetUnifiedScalarViewType { - typedef Kokkos::View::array_layout, - typename T::device_type, +template +struct GetUnifiedScalarViewType { + typedef Kokkos::View< + typename T::non_const_value_type*, + typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename T::device_type, Kokkos::MemoryTraits > type; }; From edd75473347d6822aa50aa523df7a415d473715b Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Thu, 6 May 2021 13:43:06 -0600 Subject: [PATCH 02/35] src/blas: Fix LayoutRight link errors - Fix Kokkos::Impl::GEMV --- src/blas/impl/KokkosBlas2_gemv_spec.hpp | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/blas/impl/KokkosBlas2_gemv_spec.hpp b/src/blas/impl/KokkosBlas2_gemv_spec.hpp index da7983b07a..cb8c616e10 100644 --- a/src/blas/impl/KokkosBlas2_gemv_spec.hpp +++ b/src/blas/impl/KokkosBlas2_gemv_spec.hpp @@ -76,13 +76,11 @@ struct gemv_eti_spec_avail { Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits > \ > { enum : bool { value = true }; }; @@ -170,13 +168,11 @@ extern template struct GEMV< \ Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ false, true>; @@ -186,13 +182,11 @@ template struct GEMV< \ Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ false, true>; From 0b7f10b446f76d83397afd041e6067b8fed0d940 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Thu, 6 May 2021 14:25:43 -0600 Subject: [PATCH 03/35] src/blas: Fix LayoutRight link errors - Fix Kokkos::Impl::Sum --- src/blas/KokkosBlas1_sum.hpp | 2 +- src/blas/impl/KokkosBlas1_sum_spec.hpp | 17 +++++++---------- unit_test/blas/Test_Blas1_sum.hpp | 20 +++++++++----------- 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/src/blas/KokkosBlas1_sum.hpp b/src/blas/KokkosBlas1_sum.hpp index 3908207682..616e7649ab 100644 --- a/src/blas/KokkosBlas1_sum.hpp +++ b/src/blas/KokkosBlas1_sum.hpp @@ -73,7 +73,7 @@ sum (const XVector& x) typedef Kokkos::View< typename XVector::non_const_value_type, - Kokkos::LayoutLeft, + typename XVector_Internal::array_layout, Kokkos::HostSpace, Kokkos::MemoryTraits > RVector_Internal; diff --git a/src/blas/impl/KokkosBlas1_sum_spec.hpp b/src/blas/impl/KokkosBlas1_sum_spec.hpp index b0fc2b634c..01e2e2eb8e 100644 --- a/src/blas/impl/KokkosBlas1_sum_spec.hpp +++ b/src/blas/impl/KokkosBlas1_sum_spec.hpp @@ -50,7 +50,7 @@ #include // Include the actual functors -#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY #include #endif @@ -74,7 +74,7 @@ struct sum_eti_spec_avail { #define KOKKOSBLAS1_SUM_ETI_SPEC_AVAIL( SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ template<> \ struct sum_eti_spec_avail< \ - Kokkos::View >, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -91,8 +91,7 @@ struct sum_eti_spec_avail { template<> \ struct sum_eti_spec_avail< \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -205,7 +204,7 @@ struct Sum { // #define KOKKOSBLAS1_SUM_ETI_SPEC_DECL( SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ extern template struct Sum< \ - Kokkos::View >, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -218,7 +217,7 @@ extern template struct Sum< \ // #define KOKKOSBLAS1_SUM_ETI_SPEC_INST( SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ template struct Sum< \ - Kokkos::View >, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -234,8 +233,7 @@ template struct Sum< \ #define KOKKOSBLAS1_SUM_MV_ETI_SPEC_DECL( SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ extern template struct Sum< \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -250,8 +248,7 @@ extern template struct Sum< \ #define KOKKOSBLAS1_SUM_MV_ETI_SPEC_INST( SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ template struct Sum< \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ diff --git a/unit_test/blas/Test_Blas1_sum.hpp b/unit_test/blas/Test_Blas1_sum.hpp index 2c68d1e10a..c61f66b5b0 100644 --- a/unit_test/blas/Test_Blas1_sum.hpp +++ b/unit_test/blas/Test_Blas1_sum.hpp @@ -11,9 +11,7 @@ namespace Test { typedef typename ViewTypeA::value_type ScalarA; typedef Kokkos::View::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type,Device> BaseTypeA; + typename ViewTypeA::array_layout,Device> BaseTypeA; BaseTypeA b_a("A",N); @@ -163,12 +161,12 @@ int test_sum_mv() { #if defined(KOKKOSKERNELS_INST_FLOAT) || (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F( TestCategory, sum_float ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_float"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_float"); test_sum (); Kokkos::Profiling::popRegion(); } TEST_F( TestCategory, sum_mv_float ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_mv_float"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_mv_float"); test_sum_mv (); Kokkos::Profiling::popRegion(); } @@ -176,12 +174,12 @@ TEST_F( TestCategory, sum_mv_float ) { #if defined(KOKKOSKERNELS_INST_DOUBLE) || (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F( TestCategory, sum_double ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_double"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_double"); test_sum (); Kokkos::Profiling::popRegion(); } TEST_F( TestCategory, sum_mv_double ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_mv_double"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_mv_double"); test_sum_mv (); Kokkos::Profiling::popRegion(); } @@ -189,12 +187,12 @@ TEST_F( TestCategory, sum_mv_double ) { #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F( TestCategory, sum_complex_double ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_complex_double"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_complex_double"); test_sum,TestExecSpace> (); Kokkos::Profiling::popRegion(); } TEST_F( TestCategory, sum_mv_complex_double ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_mv_complex_double"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_mv_complex_double"); test_sum_mv,TestExecSpace> (); Kokkos::Profiling::popRegion(); } @@ -202,12 +200,12 @@ TEST_F( TestCategory, sum_mv_complex_double ) { #if defined(KOKKOSKERNELS_INST_INT) || (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F( TestCategory, sum_int ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_int"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_int"); test_sum (); Kokkos::Profiling::popRegion(); } TEST_F( TestCategory, sum_mv_int ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_mv_int"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::sum_mv_int"); test_sum_mv (); Kokkos::Profiling::popRegion(); } From 9e62a84dd3f56073f562ba2d9bb86fab9b78ab12 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Thu, 6 May 2021 14:39:36 -0600 Subject: [PATCH 04/35] src/blas: Fix LayoutRight link errors - Fix Kokkos::Impl::Scal --- src/blas/impl/KokkosBlas1_scal_spec.hpp | 8 +++---- unit_test/blas/Test_Blas1_scal.hpp | 30 +++++++++++-------------- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/src/blas/impl/KokkosBlas1_scal_spec.hpp b/src/blas/impl/KokkosBlas1_scal_spec.hpp index abbf928f58..fe5ebf89a8 100644 --- a/src/blas/impl/KokkosBlas1_scal_spec.hpp +++ b/src/blas/impl/KokkosBlas1_scal_spec.hpp @@ -49,7 +49,7 @@ #include // Include the actual functors -#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY #include #include #endif @@ -93,7 +93,7 @@ struct scal_eti_spec_avail { struct scal_eti_spec_avail< \ Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -327,7 +327,7 @@ template struct Scal< \ extern template struct Scal< \ Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -344,7 +344,7 @@ extern template struct Scal< \ template struct Scal< \ Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ diff --git a/unit_test/blas/Test_Blas1_scal.hpp b/unit_test/blas/Test_Blas1_scal.hpp index 254850f1ae..b862f92b36 100644 --- a/unit_test/blas/Test_Blas1_scal.hpp +++ b/unit_test/blas/Test_Blas1_scal.hpp @@ -14,13 +14,9 @@ namespace Test { typedef Kokkos::Details::ArithTraits AT; typedef Kokkos::View::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type,Device> BaseTypeA; + typename ViewTypeA::array_layout,Device> BaseTypeA; typedef Kokkos::View::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type,Device> BaseTypeB; + typename ViewTypeB::array_layout,Device> BaseTypeB; ScalarA a(3); @@ -65,7 +61,7 @@ namespace Test { { EXPECT_NEAR_KK(a * h_x(i), h_y(i), eps); } - + Kokkos::deep_copy(b_y,b_org_y); KokkosBlas::scal(y,a,c_x); Kokkos::deep_copy(h_b_y, b_y); @@ -261,12 +257,12 @@ int test_scal_mv() { #if defined(KOKKOSKERNELS_INST_FLOAT) || (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F( TestCategory, scal_float ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_float"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_float"); test_scal (); Kokkos::Profiling::popRegion(); } TEST_F( TestCategory, scal_mv_float ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_mv_float"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_mv_float"); test_scal_mv (); Kokkos::Profiling::popRegion(); } @@ -274,12 +270,12 @@ TEST_F( TestCategory, scal_mv_float ) { #if defined(KOKKOSKERNELS_INST_DOUBLE) || (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F( TestCategory, scal_double ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_double"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_double"); test_scal (); Kokkos::Profiling::popRegion(); } TEST_F( TestCategory, scal_mv_double ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_mv_double"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_mv_double"); test_scal_mv (); Kokkos::Profiling::popRegion(); } @@ -287,12 +283,12 @@ TEST_F( TestCategory, scal_mv_double ) { #if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F( TestCategory, scal_complex_double ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_complex_double"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_complex_double"); test_scal,Kokkos::complex,TestExecSpace> (); Kokkos::Profiling::popRegion(); } TEST_F( TestCategory, scal_mv_complex_double ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_mv_complex_double"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_mv_complex_double"); test_scal_mv,Kokkos::complex,TestExecSpace> (); Kokkos::Profiling::popRegion(); } @@ -300,12 +296,12 @@ TEST_F( TestCategory, scal_mv_complex_double ) { #if defined(KOKKOSKERNELS_INST_INT) || (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) TEST_F( TestCategory, scal_int ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_int"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_int"); test_scal (); Kokkos::Profiling::popRegion(); } TEST_F( TestCategory, scal_mv_int ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_mv_int"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_mv_int"); test_scal_mv (); Kokkos::Profiling::popRegion(); } @@ -313,12 +309,12 @@ TEST_F( TestCategory, scal_mv_int ) { #if !defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) TEST_F( TestCategory, scal_double_int ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_double_int"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_double_int"); test_scal (); Kokkos::Profiling::popRegion(); } TEST_F( TestCategory, scal_mv_double_int ) { - Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_mv_double_int"); + Kokkos::Profiling::pushRegion("KokkosBlas::Test::scal_mv_double_int"); test_scal_mv (); Kokkos::Profiling::popRegion(); } From f834b9fa7c2bc2019265f1d1ffc228c1d6b1affa Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Thu, 6 May 2021 14:48:58 -0600 Subject: [PATCH 05/35] src/blas: Fix LayoutRight link errors - Fix Kokkos::Impl::Nrminf --- src/blas/KokkosBlas1_nrminf.hpp | 2 +- src/blas/impl/KokkosBlas1_nrminf_spec.hpp | 11 ++++------- unit_test/blas/Test_Blas1_nrminf.hpp | 4 +--- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/src/blas/KokkosBlas1_nrminf.hpp b/src/blas/KokkosBlas1_nrminf.hpp index 0d72e32b48..39d97cfc1c 100644 --- a/src/blas/KokkosBlas1_nrminf.hpp +++ b/src/blas/KokkosBlas1_nrminf.hpp @@ -73,7 +73,7 @@ nrminf (const XVector& x) Kokkos::MemoryTraits > XVector_Internal; typedef Kokkos::View > RVector_Internal; diff --git a/src/blas/impl/KokkosBlas1_nrminf_spec.hpp b/src/blas/impl/KokkosBlas1_nrminf_spec.hpp index b6101b7f8a..686c5aec20 100644 --- a/src/blas/impl/KokkosBlas1_nrminf_spec.hpp +++ b/src/blas/impl/KokkosBlas1_nrminf_spec.hpp @@ -50,7 +50,7 @@ #include // Include the actual functors -#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY #include #endif @@ -91,8 +91,7 @@ struct nrminf_eti_spec_avail { template<> \ struct nrminf_eti_spec_avail< \ Kokkos::View::mag_type*, \ - typename std::conditional::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -235,8 +234,7 @@ template struct NrmInf< \ #define KOKKOSBLAS1_NRMINF_MV_ETI_SPEC_DECL( SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ extern template struct NrmInf< \ Kokkos::View::mag_type*, \ - typename std::conditional::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -251,8 +249,7 @@ extern template struct NrmInf< \ #define KOKKOSBLAS1_NRMINF_MV_ETI_SPEC_INST( SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ template struct NrmInf< \ Kokkos::View::mag_type*, \ - typename std::conditional::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ diff --git a/unit_test/blas/Test_Blas1_nrminf.hpp b/unit_test/blas/Test_Blas1_nrminf.hpp index 0893045dee..c95c199120 100644 --- a/unit_test/blas/Test_Blas1_nrminf.hpp +++ b/unit_test/blas/Test_Blas1_nrminf.hpp @@ -12,9 +12,7 @@ namespace Test { typedef Kokkos::Details::ArithTraits AT; typedef Kokkos::View::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type,Device> BaseTypeA; + typename ViewTypeA::array_layout,Device> BaseTypeA; BaseTypeA b_a("A",N); From 7ccea8df6abe9f7a01aa84d0679cebb42e8f60b9 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Thu, 6 May 2021 15:50:49 -0600 Subject: [PATCH 06/35] src/blas: Fix LayoutRight link errors - Fix Kokkos::Impl::Nrm2 --- src/blas/KokkosBlas1_nrm2.hpp | 2 +- src/blas/KokkosBlas1_nrm2_squared.hpp | 2 +- src/blas/impl/KokkosBlas1_nrm2_spec.hpp | 11 ++++------- unit_test/blas/Test_Blas1_nrm2.hpp | 6 ++---- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/blas/KokkosBlas1_nrm2.hpp b/src/blas/KokkosBlas1_nrm2.hpp index ff32b82cc3..bd3a4bf806 100644 --- a/src/blas/KokkosBlas1_nrm2.hpp +++ b/src/blas/KokkosBlas1_nrm2.hpp @@ -73,7 +73,7 @@ nrm2 (const XVector& x) Kokkos::MemoryTraits > XVector_Internal; typedef Kokkos::View > RVector_Internal; diff --git a/src/blas/KokkosBlas1_nrm2_squared.hpp b/src/blas/KokkosBlas1_nrm2_squared.hpp index 6b13fcebe0..e2011064c1 100644 --- a/src/blas/KokkosBlas1_nrm2_squared.hpp +++ b/src/blas/KokkosBlas1_nrm2_squared.hpp @@ -74,7 +74,7 @@ nrm2_squared (const XVector& x) Kokkos::MemoryTraits > XVector_Internal; typedef Kokkos::View > RVector_Internal; diff --git a/src/blas/impl/KokkosBlas1_nrm2_spec.hpp b/src/blas/impl/KokkosBlas1_nrm2_spec.hpp index 4123966ecc..30f4e00d95 100644 --- a/src/blas/impl/KokkosBlas1_nrm2_spec.hpp +++ b/src/blas/impl/KokkosBlas1_nrm2_spec.hpp @@ -50,7 +50,7 @@ #include // Include the actual functors -#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY #include #endif @@ -91,8 +91,7 @@ struct nrm2_eti_spec_avail { template<> \ struct nrm2_eti_spec_avail< \ Kokkos::View::mag_type*, \ - typename std::conditional::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -235,8 +234,7 @@ template struct Nrm2< \ #define KOKKOSBLAS1_NRM2_MV_ETI_SPEC_DECL( SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ extern template struct Nrm2< \ Kokkos::View::mag_type*, \ - typename std::conditional::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -251,8 +249,7 @@ extern template struct Nrm2< \ #define KOKKOSBLAS1_NRM2_MV_ETI_SPEC_INST( SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ template struct Nrm2< \ Kokkos::View::mag_type*, \ - typename std::conditional::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ diff --git a/unit_test/blas/Test_Blas1_nrm2.hpp b/unit_test/blas/Test_Blas1_nrm2.hpp index ba8ecef0ef..651337fc6d 100644 --- a/unit_test/blas/Test_Blas1_nrm2.hpp +++ b/unit_test/blas/Test_Blas1_nrm2.hpp @@ -12,9 +12,7 @@ namespace Test { typedef Kokkos::Details::ArithTraits AT; typedef Kokkos::View::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type,Device> BaseTypeA; + typename ViewTypeA::array_layout,Device> BaseTypeA; BaseTypeA b_a("A",N); @@ -87,7 +85,7 @@ namespace Test { double eps = std::is_same::value?2*1e-5:1e-7; - Kokkos::View r("Dot::Result",K); + Kokkos::View r("Dot::Result",K); KokkosBlas::nrm2(r,a); for(int k=0;k Date: Thu, 6 May 2021 16:15:42 -0600 Subject: [PATCH 07/35] src/blas: Fix LayoutRight link errors - Fix Kokkos::Impl::Nrm1 --- src/blas/KokkosBlas1_nrm1.hpp | 2 +- src/blas/impl/KokkosBlas1_nrm1_spec.hpp | 11 ++++------- unit_test/blas/Test_Blas1_nrm1.hpp | 4 +--- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/src/blas/KokkosBlas1_nrm1.hpp b/src/blas/KokkosBlas1_nrm1.hpp index 639ca2c3d6..350818e2b1 100644 --- a/src/blas/KokkosBlas1_nrm1.hpp +++ b/src/blas/KokkosBlas1_nrm1.hpp @@ -73,7 +73,7 @@ nrm1 (const XVector& x) Kokkos::MemoryTraits > XVector_Internal; typedef Kokkos::View > RVector_Internal; diff --git a/src/blas/impl/KokkosBlas1_nrm1_spec.hpp b/src/blas/impl/KokkosBlas1_nrm1_spec.hpp index 074153b025..a469c1a6a8 100644 --- a/src/blas/impl/KokkosBlas1_nrm1_spec.hpp +++ b/src/blas/impl/KokkosBlas1_nrm1_spec.hpp @@ -50,7 +50,7 @@ #include // Include the actual functors -#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY #include #endif @@ -91,8 +91,7 @@ struct nrm1_eti_spec_avail { template<> \ struct nrm1_eti_spec_avail< \ Kokkos::View::mag_type*, \ - typename std::conditional::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -235,8 +234,7 @@ template struct Nrm1< \ #define KOKKOSBLAS1_NRM1_MV_ETI_SPEC_DECL( SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ extern template struct Nrm1< \ Kokkos::View::mag_type*, \ - typename std::conditional::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -251,8 +249,7 @@ extern template struct Nrm1< \ #define KOKKOSBLAS1_NRM1_MV_ETI_SPEC_INST( SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ template struct Nrm1< \ Kokkos::View::mag_type*, \ - typename std::conditional::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ diff --git a/unit_test/blas/Test_Blas1_nrm1.hpp b/unit_test/blas/Test_Blas1_nrm1.hpp index 6644a14c15..ce46bfeec5 100644 --- a/unit_test/blas/Test_Blas1_nrm1.hpp +++ b/unit_test/blas/Test_Blas1_nrm1.hpp @@ -14,9 +14,7 @@ namespace Test { typedef Kokkos::ArithTraits MAT; typedef Kokkos::View::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type,Device> BaseTypeA; + typename ViewTypeA::array_layout,Device> BaseTypeA; BaseTypeA b_a("A",N); From 521ec587505916e5ebf9899d96b4b5c9fffc99c4 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Thu, 6 May 2021 16:51:09 -0600 Subject: [PATCH 08/35] src/blas: Fix LayoutRight link errors - Fix Kokkos::Impl::Mult --- src/blas/impl/KokkosBlas1_mult_spec.hpp | 14 +++++--------- unit_test/blas/Test_Blas1_mult.hpp | 18 +++++------------- unit_test/blas/Test_Blas1_nrm2.hpp | 2 +- 3 files changed, 11 insertions(+), 23 deletions(-) diff --git a/src/blas/impl/KokkosBlas1_mult_spec.hpp b/src/blas/impl/KokkosBlas1_mult_spec.hpp index 2bd8f79422..bb45594352 100644 --- a/src/blas/impl/KokkosBlas1_mult_spec.hpp +++ b/src/blas/impl/KokkosBlas1_mult_spec.hpp @@ -99,8 +99,7 @@ struct mult_eti_spec_avail { Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -228,7 +227,7 @@ struct Mult printf("KokkosBlas1::mult<> non-ETI specialization for < %s , %s , %s >\n",typeid(YV).name(),typeid(AV).name(),typeid(XV).name()); } #endif - + const size_type numRows = Y.extent(0); if (numRows < static_cast (INT_MAX)) { V_Mult_Generic (gamma, Y, alpha, A, X); @@ -265,8 +264,7 @@ extern template struct Mult< \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ 1, false, true>; @@ -300,8 +298,7 @@ extern template struct Mult< \ Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -313,8 +310,7 @@ template struct Mult< \ Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ diff --git a/unit_test/blas/Test_Blas1_mult.hpp b/unit_test/blas/Test_Blas1_mult.hpp index 1f6856a934..b0839279f6 100644 --- a/unit_test/blas/Test_Blas1_mult.hpp +++ b/unit_test/blas/Test_Blas1_mult.hpp @@ -14,17 +14,11 @@ namespace Test { typedef typename ViewTypeC::value_type ScalarC; typedef Kokkos::View::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type,Device> BaseTypeA; + typename ViewTypeA::array_layout,Device> BaseTypeA; typedef Kokkos::View::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type,Device> BaseTypeB; + typename ViewTypeB::array_layout,Device> BaseTypeB; typedef Kokkos::View::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type,Device> BaseTypeC; + typename ViewTypeC::array_layout,Device> BaseTypeC; ScalarA a = 3; @@ -35,7 +29,7 @@ namespace Test { BaseTypeB b_y("Y",N); BaseTypeC b_z("Y",N); BaseTypeC b_org_z("Org_Z",N); - + ViewTypeA x = Kokkos::subview(b_x,Kokkos::ALL(),0); ViewTypeB y = Kokkos::subview(b_y,Kokkos::ALL(),0); @@ -109,9 +103,7 @@ namespace Test { typedef typename ViewTypeC::value_type ScalarC; typedef Kokkos::View::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type,Device> BaseTypeA; + typename ViewTypeA::array_layout,Device> BaseTypeA; typedef multivector_layout_adapter vfB_type; typedef multivector_layout_adapter vfC_type; diff --git a/unit_test/blas/Test_Blas1_nrm2.hpp b/unit_test/blas/Test_Blas1_nrm2.hpp index 651337fc6d..af3d71b9fe 100644 --- a/unit_test/blas/Test_Blas1_nrm2.hpp +++ b/unit_test/blas/Test_Blas1_nrm2.hpp @@ -85,7 +85,7 @@ namespace Test { double eps = std::is_same::value?2*1e-5:1e-7; - Kokkos::View r("Dot::Result",K); + Kokkos::View r("Dot::Result",K); KokkosBlas::nrm2(r,a); for(int k=0;k Date: Thu, 6 May 2021 17:01:48 -0600 Subject: [PATCH 09/35] src/blas: Fix LayoutRight link errors - Fix Kokkos::Impl::Iamax --- src/blas/KokkosBlas1_iamax.hpp | 12 +++--- src/blas/impl/KokkosBlas1_iamax_spec.hpp | 22 ++++------- unit_test/blas/Test_Blas1_iamax.hpp | 48 ++++++++++++------------ 3 files changed, 37 insertions(+), 45 deletions(-) diff --git a/src/blas/KokkosBlas1_iamax.hpp b/src/blas/KokkosBlas1_iamax.hpp index 9346061d29..234192348f 100644 --- a/src/blas/KokkosBlas1_iamax.hpp +++ b/src/blas/KokkosBlas1_iamax.hpp @@ -50,14 +50,14 @@ namespace KokkosBlas { -/// \brief Return the (smallest) index of the element of the maximum magnitude of the vector x. +/// \brief Return the (smallest) index of the element of the maximum magnitude of the vector x. /// /// \tparam XVector Type of the first vector x; a 1-D Kokkos::View. /// /// \param x [in] Input 1-D View. /// /// \return The (smallest) index of the element of the maximum magnitude; a single value. -/// Note: Returned index is 1-based for compatibility with Fortran. +/// Note: Returned index is 1-based for compatibility with Fortran. template typename XVector::size_type iamax (const XVector& x) { @@ -74,7 +74,7 @@ typename XVector::size_type iamax (const XVector& x) Kokkos::MemoryTraits > XVector_Internal; typedef Kokkos::View > RVector_Internal; @@ -130,12 +130,12 @@ iamax (const RV& R, const XMV& X, Kokkos::Impl::throw_runtime_exception (os.str ()); } - // Create unmanaged versions of the input Views. RV may be rank 0 or rank 2. + // Create unmanaged versions of the input Views. RV may be rank 0 or rank 2. // XMV may be rank 1 or rank 2. typedef Kokkos::View< typename std::conditional< - RV::rank == 0, - typename RV::non_const_value_type, + RV::rank == 0, + typename RV::non_const_value_type, typename RV::non_const_value_type* >::type, typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, typename std::conditional< diff --git a/src/blas/impl/KokkosBlas1_iamax_spec.hpp b/src/blas/impl/KokkosBlas1_iamax_spec.hpp index cff38eed17..d130428f1a 100644 --- a/src/blas/impl/KokkosBlas1_iamax_spec.hpp +++ b/src/blas/impl/KokkosBlas1_iamax_spec.hpp @@ -50,7 +50,7 @@ #include // Include the actual functors -#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY #include #endif @@ -103,8 +103,7 @@ struct iamax_eti_spec_avail { template<> \ struct iamax_eti_spec_avail< \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::HostSpace, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -113,8 +112,7 @@ struct iamax_eti_spec_avail { template<> \ struct iamax_eti_spec_avail< \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -203,7 +201,7 @@ struct Iamax { printf("KokkosBlas1::iamax<> non-ETI specialization for < %s , %s >\n",typeid(RV).name(),typeid(XMV).name()); } #endif - + const size_type numRows = X.extent(0); const size_type numCols = X.extent(1); if (numRows < static_cast (INT_MAX) && @@ -282,8 +280,7 @@ template struct Iamax< \ #define KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_DECL_INDEX( INDEX_TYPE, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ extern template struct Iamax< \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::HostSpace, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -291,8 +288,7 @@ extern template struct Iamax< \ 2, false, true>; \ extern template struct Iamax< \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -312,8 +308,7 @@ extern template struct Iamax< \ #define KOKKOSBLAS1_IAMAX_MV_ETI_SPEC_INST_INDEX( INDEX_TYPE, SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ template struct Iamax< \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::HostSpace, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -321,8 +316,7 @@ template struct Iamax< \ 2, false, true>; \ template struct Iamax< \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ diff --git a/unit_test/blas/Test_Blas1_iamax.hpp b/unit_test/blas/Test_Blas1_iamax.hpp index 4651f9fdcc..65ca3601a4 100644 --- a/unit_test/blas/Test_Blas1_iamax.hpp +++ b/unit_test/blas/Test_Blas1_iamax.hpp @@ -13,9 +13,7 @@ namespace Test { typedef typename AT::mag_type mag_type; typedef Kokkos::View::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type,Device> BaseTypeA; + typename ViewTypeA::array_layout,Device> BaseTypeA; typedef typename BaseTypeA::size_type size_type; @@ -39,31 +37,31 @@ namespace Test { mag_type expected_result = Kokkos::Details::ArithTraits::min(); size_type expected_max_loc = 0; - for(int i=0;i expected_result) { expected_result = val; expected_max_loc = i+1;} } - + if(N == 0) {expected_result = typename AT::mag_type(0); expected_max_loc = 0;} { //printf("impl_test_iamax -- return result as a scalar on host -- N %d\n", N); size_type nonconst_max_loc = KokkosBlas::iamax(a); ASSERT_EQ( nonconst_max_loc, expected_max_loc); - + size_type const_max_loc = KokkosBlas::iamax(c_a); ASSERT_EQ( const_max_loc, expected_max_loc); } { //printf("impl_test_iamax -- return result as a 0-D View on host -- N %d\n", N); - typedef Kokkos::View ViewType0D; - ViewType0D r("Iamax::Result 0-D View on host"); - + typedef Kokkos::View ViewType0D; + ViewType0D r("Iamax::Result 0-D View on host"); + KokkosBlas::iamax(r,a); size_type nonconst_max_loc = r(); ASSERT_EQ( nonconst_max_loc, expected_max_loc); - + KokkosBlas::iamax(r,c_a); size_type const_max_loc = r(); ASSERT_EQ( const_max_loc, expected_max_loc); @@ -71,12 +69,12 @@ namespace Test { { //printf("impl_test_iamax -- return result as a 0-D View on device -- N %d\n", N); - typedef Kokkos::View ViewType0D; - ViewType0D r("Iamax::Result 0-D View on device"); + typedef Kokkos::View ViewType0D; + ViewType0D r("Iamax::Result 0-D View on device"); typename ViewType0D::HostMirror h_r = Kokkos::create_mirror_view(r); - + size_type nonconst_max_loc, const_max_loc; - + KokkosBlas::iamax(r,a); Kokkos::deep_copy(h_r,r); @@ -137,18 +135,18 @@ namespace Test { { //printf("impl_test_iamax_mv -- return results as a 1-D View on host -- N %d\n", N); - Kokkos::View r("Iamax::Result View on host",K); - + Kokkos::View r("Iamax::Result View on host",K); + KokkosBlas::iamax(r,a); - + for(int k=0;k r("Iamax::Result View on device",K); - typename Kokkos::View::HostMirror h_r= Kokkos::create_mirror_view(r); - + Kokkos::View r("Iamax::Result View on device",K); + typename Kokkos::View::HostMirror h_r= Kokkos::create_mirror_view(r); + KokkosBlas::iamax(r,a); Kokkos::deep_copy(h_r,r); - + for(int k=0;k Date: Thu, 6 May 2021 17:13:09 -0600 Subject: [PATCH 10/35] src/blas: Fix LayoutRight link errors - Fix Kokkos::Impl::Dot --- src/blas/KokkosBlas1_dot.hpp | 4 ++-- src/blas/impl/KokkosBlas1_dot_spec.hpp | 27 +++++++++----------------- unit_test/blas/Test_Blas1_dot.hpp | 8 ++------ 3 files changed, 13 insertions(+), 26 deletions(-) diff --git a/src/blas/KokkosBlas1_dot.hpp b/src/blas/KokkosBlas1_dot.hpp index 8b746ea9fd..520177ae05 100644 --- a/src/blas/KokkosBlas1_dot.hpp +++ b/src/blas/KokkosBlas1_dot.hpp @@ -101,11 +101,11 @@ dot (const XVector& x, const YVector& y) using result_type = typename KokkosBlas::Impl::DotAccumulatingScalar::type; using RVector_Internal = Kokkos::View>; using RVector_Result = Kokkos::View>; diff --git a/src/blas/impl/KokkosBlas1_dot_spec.hpp b/src/blas/impl/KokkosBlas1_dot_spec.hpp index d5a0efb04a..adb078dfbb 100644 --- a/src/blas/impl/KokkosBlas1_dot_spec.hpp +++ b/src/blas/impl/KokkosBlas1_dot_spec.hpp @@ -134,8 +134,7 @@ struct dot_eti_spec_avail { #define KOKKOSBLAS1_DOT_MV_ETI_SPEC_AVAIL( SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ template<> \ struct dot_eti_spec_avail< \ - Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -145,8 +144,7 @@ struct dot_eti_spec_avail { 2,2> { enum : bool { value = true }; }; \ template<> \ struct dot_eti_spec_avail< \ - Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -157,8 +155,7 @@ struct dot_eti_spec_avail { template<> \ struct dot_eti_spec_avail< \ Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -441,8 +438,7 @@ template struct DotSpecialAccumulator< \ // #define KOKKOSBLAS1_DOT_MV_ETI_SPEC_DECL( SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ extern template struct Dot< \ - Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -451,8 +447,7 @@ extern template struct Dot< \ Kokkos::MemoryTraits >, \ 2,2,false,true>; \ extern template struct Dot< \ - Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -461,8 +456,7 @@ extern template struct Dot< \ Kokkos::MemoryTraits >, \ 2,1,false,true>; \ extern template struct Dot< \ - Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -473,8 +467,7 @@ extern template struct Dot< \ #define KOKKOSBLAS1_DOT_MV_ETI_SPEC_INST( SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ template struct Dot< \ - Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -483,8 +476,7 @@ template struct Dot< \ Kokkos::MemoryTraits >, \ 2,2,false,true>; \ template struct Dot< \ - Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -493,8 +485,7 @@ template struct Dot< \ Kokkos::MemoryTraits >, \ 2,1,false,true>; \ template struct Dot< \ - Kokkos::View::value, \ - Kokkos::LayoutLeft, LAYOUT>::type, \ + Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ diff --git a/unit_test/blas/Test_Blas1_dot.hpp b/unit_test/blas/Test_Blas1_dot.hpp index 930a7dd40e..e1615155aa 100644 --- a/unit_test/blas/Test_Blas1_dot.hpp +++ b/unit_test/blas/Test_Blas1_dot.hpp @@ -14,13 +14,9 @@ namespace Test { typedef Kokkos::ArithTraits ats; typedef Kokkos::View::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type,Device> BaseTypeA; + typename ViewTypeA::array_layout,Device> BaseTypeA; typedef Kokkos::View::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type,Device> BaseTypeB; + typename ViewTypeB::array_layout,Device> BaseTypeB; BaseTypeA b_a("A",N); From 77e9657b9b21a0c990599f2a2479cbac19914c51 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Thu, 6 May 2021 17:32:38 -0600 Subject: [PATCH 11/35] src/sparse: Fix LayoutRight link errors - Fix Kokkos::Sparse::GUASS_SEIDEL_APPLY --- .../impl/KokkosSparse_gauss_seidel_spec.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/sparse/impl/KokkosSparse_gauss_seidel_spec.hpp b/src/sparse/impl/KokkosSparse_gauss_seidel_spec.hpp index 9146decc64..1ca45ece80 100644 --- a/src/sparse/impl/KokkosSparse_gauss_seidel_spec.hpp +++ b/src/sparse/impl/KokkosSparse_gauss_seidel_spec.hpp @@ -112,13 +112,13 @@ namespace KokkosSparse { KokkosKernels::Experimental::KokkosKernelsHandle< \ const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, \ EXEC_SPACE_TYPE, MEM_SPACE_TYPE, SLOW_MEM_SPACE> , \ - Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View< SCALAR_TYPE **, LAYOUT_TYPE, \ @@ -483,13 +483,13 @@ namespace KokkosSparse { KokkosKernels::Experimental::KokkosKernelsHandle< \ const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, \ EXEC_SPACE_TYPE, MEM_SPACE_TYPE, SLOW_MEM_SPACE> , \ - Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View , \ - Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View Date: Thu, 6 May 2021 17:53:42 -0600 Subject: [PATCH 12/35] src/sparse: Fix LayoutRight link errors - Fix Kokkos::Sparse::SPMV_MV --- src/sparse/KokkosSparse_CrsMatrix.hpp | 5 +++-- unit_test/sparse/Test_Sparse_spmv.hpp | 9 +++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/sparse/KokkosSparse_CrsMatrix.hpp b/src/sparse/KokkosSparse_CrsMatrix.hpp index 6ea67666e7..4a32d55ac3 100644 --- a/src/sparse/KokkosSparse_CrsMatrix.hpp +++ b/src/sparse/KokkosSparse_CrsMatrix.hpp @@ -58,6 +58,7 @@ #include #include #include "KokkosSparse_findRelOffset.hpp" +#include "KokkosKernels_default_types.hpp" namespace KokkosSparse { @@ -413,9 +414,9 @@ class CrsMatrix { //! Type of a host-memory mirror of the sparse matrix. typedef CrsMatrix HostMirror; //! Type of the graph structure of the sparse matrix. - typedef Kokkos::StaticCrsGraph StaticCrsGraphType; + typedef Kokkos::StaticCrsGraph StaticCrsGraphType; //! Type of the graph structure of the sparse matrix - consistent with Kokkos. - typedef Kokkos::StaticCrsGraph staticcrsgraph_type; + typedef Kokkos::StaticCrsGraph staticcrsgraph_type; //! Type of column indices in the sparse matrix. typedef typename staticcrsgraph_type::entries_type index_type; //! Const version of the type of column indices in the sparse matrix. diff --git a/unit_test/sparse/Test_Sparse_spmv.hpp b/unit_test/sparse/Test_Sparse_spmv.hpp index aaca85b512..d85e2338c5 100644 --- a/unit_test/sparse/Test_Sparse_spmv.hpp +++ b/unit_test/sparse/Test_Sparse_spmv.hpp @@ -9,6 +9,7 @@ #include #include "KokkosKernels_Controls.hpp" +#include "KokkosKernels_default_types.hpp" // #ifndef kokkos_complex_double // #define kokkos_complex_double Kokkos::complex @@ -778,9 +779,9 @@ void test_github_issue_101 () // vectors. Include a little extra in case the implementers decide // to strip-mine that. constexpr int numVecs = 22; - Kokkos::View X ("X", numCols, numVecs); + Kokkos::View X ("X", numCols, numVecs); Kokkos::deep_copy (X, static_cast (1.0)); - Kokkos::View Y ("Y", numRows, numVecs); + Kokkos::View Y ("Y", numRows, numVecs); auto Y_h = Kokkos::create_mirror_view (Y); // we'll want this later // Start with the easy test case, where the matrix and the vectors @@ -1043,7 +1044,7 @@ TEST_F( TestCategory,sparse ## _ ## spmv_mv_struct ## _ ## SCALAR ## _ ## ORDINA #endif - +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) #if (defined (KOKKOSKERNELS_INST_DOUBLE) \ && defined (KOKKOSKERNELS_INST_ORDINAL_INT) && defined(KOKKOSKERNELS_INST_LAYOUTLEFT) \ && defined (KOKKOSKERNELS_INST_OFFSET_INT)) || (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) @@ -1156,7 +1157,7 @@ TEST_F( TestCategory,sparse ## _ ## spmv_mv_struct ## _ ## SCALAR ## _ ## ORDINA EXECUTE_TEST_MV(kokkos_complex_float, int64_t, size_t, LayoutLeft, TestExecSpace) EXECUTE_TEST_MV_STRUCT(kokkos_complex_float, int64_t, size_t, LayoutLeft, TestExecSpace) #endif - +#endif // defined(KOKKOSKERNELS_INST_LAYOUTLEFT) From a3a89cb52f3694c7fa0757504c0f63874a90ac28 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Fri, 7 May 2021 05:41:58 -0600 Subject: [PATCH 13/35] src/sparse: Fix LayoutRight link errors - Fix KokkosSparse::Impl::SPMV_MV - Fix KokkosSparse::Impl::GUASS_SEIDEL_APPLY --- src/common/KokkosKernels_Handle.hpp | 4 ++-- src/sparse/KokkosSparse_gauss_seidel_handle.hpp | 6 +++--- src/sparse/KokkosSparse_spmv.hpp | 12 ++++-------- unit_test/sparse/Test_Sparse_block_gauss_seidel.hpp | 6 +++--- unit_test/sparse/Test_Sparse_gauss_seidel.hpp | 12 ++++++------ 5 files changed, 18 insertions(+), 22 deletions(-) diff --git a/src/common/KokkosKernels_Handle.hpp b/src/common/KokkosKernels_Handle.hpp index 39ac62267c..08d04cc0c3 100644 --- a/src/common/KokkosKernels_Handle.hpp +++ b/src/common/KokkosKernels_Handle.hpp @@ -216,7 +216,7 @@ class KokkosKernelsHandle typedef typename size_type_persistent_work_view_t::HostMirror size_type_persistent_work_host_view_t; //Host view type typedef typename Kokkos::View scalar_temp_work_view_t; typedef typename Kokkos::View scalar_persistent_work_view_t; - typedef typename Kokkos::View scalar_persistent_work_view2d_t; + typedef typename Kokkos::View scalar_persistent_work_view2d_t; typedef typename Kokkos::View nnz_lno_temp_work_view_t; typedef typename Kokkos::View nnz_lno_persistent_work_view_t; typedef typename nnz_lno_persistent_work_view_t::HostMirror nnz_lno_persistent_work_host_view_t; //Host view type @@ -810,7 +810,7 @@ class KokkosKernelsHandle this->spilukHandle = nullptr; } } - + }; // end class KokkosKernelsHandle } diff --git a/src/sparse/KokkosSparse_gauss_seidel_handle.hpp b/src/sparse/KokkosSparse_gauss_seidel_handle.hpp index 9176809115..b7022dde0e 100644 --- a/src/sparse/KokkosSparse_gauss_seidel_handle.hpp +++ b/src/sparse/KokkosSparse_gauss_seidel_handle.hpp @@ -226,7 +226,7 @@ namespace KokkosSparse{ typedef typename Kokkos::View scalar_temp_work_view_t; typedef typename Kokkos::View scalar_persistent_work_view_t; - typedef typename Kokkos::View scalar_persistent_work_view2d_t; + typedef typename Kokkos::View scalar_persistent_work_view2d_t; typedef typename scalar_persistent_work_view_t::HostMirror scalar_persistent_work_host_view_t; //Host view type typedef typename Kokkos::View nnz_lno_temp_work_view_t; @@ -514,7 +514,7 @@ namespace KokkosSparse{ throw std::runtime_error("inverse diagonal does not exist until after numeric setup."); return inverse_diagonal; } - + bool use_teams() const { return KokkosKernels::Impl::kk_is_gpu_exec_space(); @@ -562,7 +562,7 @@ namespace KokkosSparse{ using const_ordinal_t = typename const_entries_view_t::value_type; using const_scalar_t = typename const_values_view_t::value_type; - using vector_view_t = Kokkos::View; + using vector_view_t = Kokkos::View; using GSHandle = GaussSeidelHandle; diff --git a/src/sparse/KokkosSparse_spmv.hpp b/src/sparse/KokkosSparse_spmv.hpp index 15e91e6363..dd3f02086e 100644 --- a/src/sparse/KokkosSparse_spmv.hpp +++ b/src/sparse/KokkosSparse_spmv.hpp @@ -374,13 +374,11 @@ spmv (KokkosKernels::Experimental::Controls /*controls*/, // Call single-vector version if appropriate if (x.extent(1) == 1) { typedef Kokkos::View::value, - Kokkos::LayoutLeft, Kokkos::LayoutStride>::type, + typename YVector::array_layout, typename XVector::device_type, Kokkos::MemoryTraits > XVector_SubInternal; typedef Kokkos::View::value, - Kokkos::LayoutLeft,Kokkos::LayoutStride>::type, + typename YVector::array_layout, typename YVector::device_type, Kokkos::MemoryTraits > YVector_SubInternal; @@ -735,13 +733,11 @@ void spmv(const char mode[], // Call single-vector version if appropriate if (x.extent(1) == 1) { typedef Kokkos::View::value, - Kokkos::LayoutLeft, Kokkos::LayoutStride>::type, + typename YVector::array_layout, typename XVector::device_type, Kokkos::MemoryTraits > XVector_SubInternal; typedef Kokkos::View::value, - Kokkos::LayoutLeft,Kokkos::LayoutStride>::type, + typename YVector::array_layout, typename YVector::device_type, Kokkos::MemoryTraits > YVector_SubInternal; diff --git a/unit_test/sparse/Test_Sparse_block_gauss_seidel.hpp b/unit_test/sparse/Test_Sparse_block_gauss_seidel.hpp index c1a01945fd..60267d356f 100644 --- a/unit_test/sparse/Test_Sparse_block_gauss_seidel.hpp +++ b/unit_test/sparse/Test_Sparse_block_gauss_seidel.hpp @@ -255,7 +255,7 @@ void test_block_gauss_seidel_rank1(lno_t numRows, size_type nnz, lno_t bandwidth bool is_symmetric_graph = true; size_t shmem_size = 32128; - + for(int i = 0; i < 2; ++i) { if (i == 1) shmem_size = 2008; //make the shmem small on gpus so that it will test 2 level algorithm. @@ -292,7 +292,7 @@ void test_block_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t bandwidth typedef typename crsMat_t::values_type::non_const_type scalar_view_t; typedef typename crsMat_t::StaticCrsGraphType::row_map_type::non_const_type lno_view_t; typedef typename crsMat_t::StaticCrsGraphType::entries_type::non_const_type lno_nnz_view_t; - typedef Kokkos::View scalar_view2d_t; + typedef Kokkos::View scalar_view2d_t; typedef typename Kokkos::Details::ArithTraits::mag_type mag_t; lno_t numCols = numRows; @@ -378,7 +378,7 @@ void test_block_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t bandwidth scalar_view_t res_norms("Residuals", numVecs); auto h_res_norms = Kokkos::create_mirror_view(res_norms); - + for(int i = 0; i < 2; ++i) { if (i == 1) shmem_size = 2008; //make the shmem small on gpus so that it will test 2 level algorithm. diff --git a/unit_test/sparse/Test_Sparse_gauss_seidel.hpp b/unit_test/sparse/Test_Sparse_gauss_seidel.hpp index 713fe5644d..db39d3fbd9 100644 --- a/unit_test/sparse/Test_Sparse_gauss_seidel.hpp +++ b/unit_test/sparse/Test_Sparse_gauss_seidel.hpp @@ -87,7 +87,7 @@ int run_gauss_seidel( int apply_type = 0, // 0 for symmetric, 1 for forward, 2 for backward. int cluster_size = 1, bool classic = false, // only with two-stage, true for sptrsv instead of richardson - ClusteringAlgorithm clusterAlgo = CLUSTER_DEFAULT) + ClusteringAlgorithm clusterAlgo = CLUSTER_DEFAULT) { typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type lno_view_t; @@ -221,7 +221,7 @@ crsMat_t symmetrize(crsMat_t A) } } //Count entries - Kokkos::View new_host_rowmap("Rowmap", numRows + 1); + Kokkos::View new_host_rowmap("Rowmap", numRows + 1); size_t accum = 0; for(lno_t r = 0; r <= numRows; r++) { @@ -230,8 +230,8 @@ crsMat_t symmetrize(crsMat_t A) accum += symRows[r].size(); } //Allocate new entries/values - Kokkos::View new_host_entries("Entries", accum); - Kokkos::View new_host_values("Values", accum); + Kokkos::View new_host_entries("Entries", accum); + Kokkos::View new_host_values("Values", accum); for(lno_t r = 0; r < numRows; r++) { auto rowIt = symRows[r].begin(); @@ -338,8 +338,8 @@ void test_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t bandwidth, lno_ using namespace Test; srand(245); typedef typename KokkosSparse::CrsMatrix crsMat_t; - typedef Kokkos::View scalar_view2d_t; - typedef Kokkos::View host_scalar_view2d_t; + typedef Kokkos::View scalar_view2d_t; + typedef Kokkos::View host_scalar_view2d_t; typedef typename Kokkos::Details::ArithTraits::mag_type mag_t; lno_t numCols = numRows; From b4219fb76a974732b3113955f4ad32321f7064b3 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Fri, 7 May 2021 05:43:18 -0600 Subject: [PATCH 14/35] scripts: Update cm_test_all_sandia for LayoutRight testing --- scripts/cm_test_all_sandia | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/scripts/cm_test_all_sandia b/scripts/cm_test_all_sandia index e386fb7fc7..4ef5f5ed75 100755 --- a/scripts/cm_test_all_sandia +++ b/scripts/cm_test_all_sandia @@ -74,6 +74,8 @@ print_help() { echo " Valid items:" echo " LayoutLeft,LayoutRight" echo "" + echo "--no-default-eti: Do not include default ETI types for Kokkos Kernels" + echo "" echo "ARGS: list of expressions matching compilers to test" echo " supported compilers sems" @@ -202,6 +204,7 @@ TEST_SCRIPT=False TEST_SPACK=False SKIP_HWLOC=False SPOT_CHECK=False +NO_DEFAULT_ETI=False PRINT_HELP=False OPT_FLAG="" @@ -352,6 +355,9 @@ do --with-layouts*) KOKKOSKERNELS_LAYOUTS="${key#*=}" ;; + --no-default-eti*) + NO_DEFAULT_ETI=True + ;; --with-tpls*) KOKKOSKERNELS_ENABLE_TPLS="${key#*=}" ;; @@ -1191,6 +1197,10 @@ single_build_and_test() { local cxx_standard="${CXX_STANDARD}" + if [ "${NO_DEFAULT_ETI}" = "True" ]; then + local extra_args="$extra_args --no-default-eti" + fi + echo " Starting job $desc" From c806fb32d3f13adcae0503a6e6da7e49899e68ba Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Fri, 7 May 2021 05:58:58 -0600 Subject: [PATCH 15/35] src/common: Add default_types include --- src/common/KokkosKernels_Handle.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/KokkosKernels_Handle.hpp b/src/common/KokkosKernels_Handle.hpp index 08d04cc0c3..1f27ae6291 100644 --- a/src/common/KokkosKernels_Handle.hpp +++ b/src/common/KokkosKernels_Handle.hpp @@ -49,6 +49,7 @@ #include "KokkosSparse_spadd_handle.hpp" #include "KokkosSparse_sptrsv_handle.hpp" #include "KokkosSparse_spiluk_handle.hpp" +#include "KokkosKernels_default_types.hpp" #ifndef _KOKKOSKERNELHANDLE_HPP #define _KOKKOSKERNELHANDLE_HPP From 24714bd0214c0cb920d43bfbba7a37fc8f465ac9 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Fri, 7 May 2021 07:12:12 -0600 Subject: [PATCH 16/35] scripts: Add --with-spaces to cm_test_all --- scripts/cm_test_all_sandia | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/scripts/cm_test_all_sandia b/scripts/cm_test_all_sandia index 4ef5f5ed75..1ebaf0fdc4 100755 --- a/scripts/cm_test_all_sandia +++ b/scripts/cm_test_all_sandia @@ -76,6 +76,9 @@ print_help() { echo "" echo "--no-default-eti: Do not include default ETI types for Kokkos Kernels" echo "" + echo "--with-spaces=SPACES: Set spaces to be instantiated." + echo " Options: hostspace, cudaspace, cudauvmspace" + echo "" echo "ARGS: list of expressions matching compilers to test" echo " supported compilers sems" @@ -358,6 +361,9 @@ do --no-default-eti*) NO_DEFAULT_ETI=True ;; + --with-layouts*) + KOKKOSKERNELS_SPACES="${key#*=}" + ;; --with-tpls*) KOKKOSKERNELS_ENABLE_TPLS="${key#*=}" ;; @@ -1158,6 +1164,10 @@ single_build_and_test() { if [ ! -z "$KOKKOSKERNELS_ORDINALS" ]; then kernels_variants="$kernels_variants ordinals=$KOKKOSKERNELS_ORDINALS" fi + if [ ! -z "$KOKKOSKERNELS_SPACES" ]; then + kernels_variants="$kernels_variants spaces=$KOKKOSKERNELS_SPACES" + KOKKOSKERNELS_SPACES="--with-spaces=$KOKKOSKERNELS_SPACES" + fi echo " # Load modules:" &> reload_modules.sh @@ -1251,13 +1261,13 @@ single_build_and_test() { # KOKKOS_OPTIONS and KOKKOS_CUDA_OPTIONS are exported and detected by kokkos' generate_makefile.sh during install of kokkos; we pass them to the reproducer script instructions echo " # Use generate_makefile line below to call cmake which generates makefile for this build:" &> call_generate_makefile.sh - echo " ${KOKKOSKERNELS_PATH}/cm_generate_makefile.bash --with-devices=$LOCAL_KOKKOS_DEVICES $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $CUDA_ENABLE_CMD $HIP_ENABLE_CMD --kokkos-path=${KOKKOS_PATH} --kokkoskernels-path=${KOKKOSKERNELS_PATH} --with-scalars=$kk_scalars --with-ordinals=${KOKKOSKERNELS_ORDINALS} --with-offsets=${KOKKOSKERNELS_OFFSETS} --with-layouts=${KOKKOSKERNELS_LAYOUTS} ${KOKKOSKERNELS_ENABLE_TPL_CMD} ${KOKKOSKERNELS_TPL_PATH_CMD} ${KOKKOSKERNELS_TPL_LIBS_CMD} ${KOKKOSKERNELS_EXTRA_LINKER_FLAGS_CMD} --with-options=${KOKKOS_OPTIONS} --with-cuda-options=${KOKKOS_CUDA_OPTIONS} ${KOKKOS_BOUNDS_CHECK} --no-examples $extra_args" &>> call_generate_makefile.sh + echo " ${KOKKOSKERNELS_PATH}/cm_generate_makefile.bash --with-devices=$LOCAL_KOKKOS_DEVICES $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $CUDA_ENABLE_CMD $HIP_ENABLE_CMD --kokkos-path=${KOKKOS_PATH} --kokkoskernels-path=${KOKKOSKERNELS_PATH} --with-scalars=$kk_scalars --with-ordinals=${KOKKOSKERNELS_ORDINALS} --with-offsets=${KOKKOSKERNELS_OFFSETS} --with-layouts=${KOKKOSKERNELS_LAYOUTS} ${KOKKOSKERNELS_ENABLE_TPL_CMD} ${KOKKOSKERNELS_TPL_PATH_CMD} ${KOKKOSKERNELS_TPL_LIBS_CMD} ${KOKKOSKERNELS_EXTRA_LINKER_FLAGS_CMD} --with-options=${KOKKOS_OPTIONS} --with-cuda-options=${KOKKOS_CUDA_OPTIONS} ${KOKKOS_BOUNDS_CHECK} ${KOKKOSKERNELS_SPACES} --no-examples $extra_args" &>> call_generate_makefile.sh chmod +x call_generate_makefile.sh # script command with generic path for faster copy/paste of reproducer into issues - echo " # \$KOKKOSKERNELS_PATH/cm_generate_makefile.bash --with-devices=$LOCAL_KOKKOS_DEVICES $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $CUDA_ENABLE_CMD $HIP_ENABLE_CMD --kokkos-path=\$KOKKOS_PATH --kokkoskernels-path=\$KOKKOSKERNELS_PATH --with-scalars=$kk_scalars --with-ordinals=${KOKKOSKERNELS_ORDINALS} --with-offsets=${KOKKOSKERNELS_OFFSETS} --with-layouts=${KOKKOSKERNELS_LAYOUTS} ${KOKKOSKERNELS_ENABLE_TPL_CMD} ${KOKKOSKERNELS_TPL_PATH_CMD} ${KOKKOSKERNELS_TPL_LIBS_CMD} ${KOKKOSKERNELS_EXTRA_LINKER_FLAGS_CMD} --with-options=${KOKKOS_OPTIONS} --with-cuda-options=${KOKKOS_CUDA_OPTIONS} ${KOKKOS_BOUNDS_CHECK} --no-examples $extra_args" &> call_generate_makefile_genericpath.sh + echo " # \$KOKKOSKERNELS_PATH/cm_generate_makefile.bash --with-devices=$LOCAL_KOKKOS_DEVICES $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $CUDA_ENABLE_CMD $HIP_ENABLE_CMD --kokkos-path=\$KOKKOS_PATH --kokkoskernels-path=\$KOKKOSKERNELS_PATH --with-scalars=$kk_scalars --with-ordinals=${KOKKOSKERNELS_ORDINALS} --with-offsets=${KOKKOSKERNELS_OFFSETS} --with-layouts=${KOKKOSKERNELS_LAYOUTS} ${KOKKOSKERNELS_ENABLE_TPL_CMD} ${KOKKOSKERNELS_TPL_PATH_CMD} ${KOKKOSKERNELS_TPL_LIBS_CMD} ${KOKKOSKERNELS_EXTRA_LINKER_FLAGS_CMD} --with-options=${KOKKOS_OPTIONS} --with-cuda-options=${KOKKOS_CUDA_OPTIONS} ${KOKKOS_BOUNDS_CHECK} ${KOKKOSKERNELS_SPACES} --no-examples $extra_args" &> call_generate_makefile_genericpath.sh - run_cmd ${KOKKOSKERNELS_PATH}/cm_generate_makefile.bash --with-devices=$LOCAL_KOKKOS_DEVICES $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $CUDA_ENABLE_CMD $HIP_ENABLE_CMD --kokkos-path=${KOKKOS_PATH} --kokkoskernels-path=${KOKKOSKERNELS_PATH} --with-scalars=$kk_scalars --with-ordinals=${KOKKOSKERNELS_ORDINALS} --with-offsets=${KOKKOSKERNELS_OFFSETS} --with-layouts=${KOKKOSKERNELS_LAYOUTS} ${KOKKOSKERNELS_ENABLE_TPL_CMD} ${KOKKOSKERNELS_TPL_PATH_CMD} ${KOKKOSKERNELS_TPL_LIBS_CMD} ${KOKKOSKERNELS_EXTRA_LINKER_FLAGS_CMD} ${KOKKOS_BOUNDS_CHECK} --no-examples $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + run_cmd ${KOKKOSKERNELS_PATH}/cm_generate_makefile.bash --with-devices=$LOCAL_KOKKOS_DEVICES $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $CUDA_ENABLE_CMD $HIP_ENABLE_CMD --kokkos-path=${KOKKOS_PATH} --kokkoskernels-path=${KOKKOSKERNELS_PATH} --with-scalars=$kk_scalars --with-ordinals=${KOKKOSKERNELS_ORDINALS} --with-offsets=${KOKKOSKERNELS_OFFSETS} --with-layouts=${KOKKOSKERNELS_LAYOUTS} ${KOKKOSKERNELS_ENABLE_TPL_CMD} ${KOKKOSKERNELS_TPL_PATH_CMD} ${KOKKOSKERNELS_TPL_LIBS_CMD} ${KOKKOSKERNELS_EXTRA_LINKER_FLAGS_CMD} ${KOKKOS_BOUNDS_CHECK} ${KOKKOSKERNELS_SPACES} --no-examples $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } local make_par_lvl=12 if [[ "$MACHINE" = white* ]]; then From 366ee3431fa328a55c192952440237bcaebe6a75 Mon Sep 17 00:00:00 2001 From: Evan Harvey <57234914+e10harvey@users.noreply.github.com> Date: Mon, 10 May 2021 07:35:03 -0600 Subject: [PATCH 17/35] Update scripts/cm_test_all_sandia Co-authored-by: Nathan Ellingwood --- scripts/cm_test_all_sandia | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cm_test_all_sandia b/scripts/cm_test_all_sandia index 1ebaf0fdc4..9e8040f4fa 100755 --- a/scripts/cm_test_all_sandia +++ b/scripts/cm_test_all_sandia @@ -361,7 +361,7 @@ do --no-default-eti*) NO_DEFAULT_ETI=True ;; - --with-layouts*) + --with-spaces*) KOKKOSKERNELS_SPACES="${key#*=}" ;; --with-tpls*) From a80001bd71edd21a3f5f79e22e33e1848b2012e3 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Mon, 10 May 2021 13:36:26 -0600 Subject: [PATCH 18/35] unit_test/blas: Fix dot test when LayoutLeft=OFF. --- unit_test/blas/Test_Blas1_dot.hpp | 72 +++++++++++++------------------ 1 file changed, 30 insertions(+), 42 deletions(-) diff --git a/unit_test/blas/Test_Blas1_dot.hpp b/unit_test/blas/Test_Blas1_dot.hpp index e1615155aa..63b3b717e0 100644 --- a/unit_test/blas/Test_Blas1_dot.hpp +++ b/unit_test/blas/Test_Blas1_dot.hpp @@ -8,56 +8,44 @@ namespace Test { template void impl_test_dot(int N) { + typedef typename ViewTypeA::value_type ScalarA; + typedef typename ViewTypeB::value_type ScalarB; + typedef Kokkos::ArithTraits ats; - typedef typename ViewTypeA::value_type ScalarA; - typedef typename ViewTypeB::value_type ScalarB; - typedef Kokkos::ArithTraits ats; - - typedef Kokkos::View BaseTypeA; - typedef Kokkos::View BaseTypeB; - - - BaseTypeA b_a("A",N); - BaseTypeB b_b("B",N); - - ViewTypeA a = Kokkos::subview(b_a,Kokkos::ALL(),0); - ViewTypeB b = Kokkos::subview(b_b,Kokkos::ALL(),0); + ViewTypeA a("a", N); + ViewTypeB b("b", N); - typename BaseTypeA::HostMirror h_b_a = Kokkos::create_mirror_view(b_a); - typename BaseTypeB::HostMirror h_b_b = Kokkos::create_mirror_view(b_b); + typename ViewTypeA::HostMirror h_a = Kokkos::create_mirror_view(a); + typename ViewTypeB::HostMirror h_b = Kokkos::create_mirror_view(b); - typename ViewTypeA::HostMirror h_a = Kokkos::subview(h_b_a,Kokkos::ALL(),0); - typename ViewTypeB::HostMirror h_b = Kokkos::subview(h_b_b,Kokkos::ALL(),0); + Kokkos::Random_XorShift64_Pool rand_pool( + 13718); - Kokkos::Random_XorShift64_Pool rand_pool(13718); - - { - ScalarA randStart, randEnd; - Test::getRandomBounds(10.0, randStart, randEnd); - Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); - } - { - ScalarB randStart, randEnd; - Test::getRandomBounds(10.0, randStart, randEnd); - Kokkos::fill_random(b_b,rand_pool,randStart,randEnd); - } + { + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(a, rand_pool, randStart, randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b, rand_pool, randStart, randEnd); + } - Kokkos::deep_copy(h_b_a,b_a); - Kokkos::deep_copy(h_b_b,b_b); + Kokkos::deep_copy(h_a, a); + Kokkos::deep_copy(h_b, b); - ScalarA expected_result = 0; - for(int i=0;i::value?2*1e-5:1e-7; - EXPECT_NEAR_KK( nonconst_nonconst_result, expected_result, eps*expected_result); - typename ViewTypeA::const_type c_a = a; - typename ViewTypeB::const_type c_b = b; + ScalarA nonconst_nonconst_result = KokkosBlas::dot(a, b); + double eps = std::is_same::value ? 2 * 1e-5 : 1e-7; + EXPECT_NEAR_KK(nonconst_nonconst_result, expected_result, + eps * expected_result); + typename ViewTypeA::const_type c_a = a; + typename ViewTypeB::const_type c_b = b; - ScalarA const_const_result = KokkosBlas::dot(c_a,c_b); + ScalarA const_const_result = KokkosBlas::dot(c_a,c_b); EXPECT_NEAR_KK( const_const_result, expected_result, eps*expected_result); ScalarA nonconst_const_result = KokkosBlas::dot(a,c_b); From 65fa4fed957ef24ed93b81ca2f2eb74f7e2e036e Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Mon, 10 May 2021 13:44:16 -0600 Subject: [PATCH 19/35] unit_test/blas: Fix iamax test when LayoutLeft=OFF. --- unit_test/blas/Test_Blas1_iamax.hpp | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/unit_test/blas/Test_Blas1_iamax.hpp b/unit_test/blas/Test_Blas1_iamax.hpp index 65ca3601a4..36550a317d 100644 --- a/unit_test/blas/Test_Blas1_iamax.hpp +++ b/unit_test/blas/Test_Blas1_iamax.hpp @@ -11,27 +11,19 @@ namespace Test { typedef typename ViewTypeA::non_const_value_type ScalarA; typedef Kokkos::Details::ArithTraits AT; typedef typename AT::mag_type mag_type; + using size_type = typename ViewTypeA::size_type; - typedef Kokkos::View BaseTypeA; + ViewTypeA a("a", N); - typedef typename BaseTypeA::size_type size_type; - - BaseTypeA b_a("A",N); - - ViewTypeA a = Kokkos::subview(b_a,Kokkos::ALL(),0); - - typename BaseTypeA::HostMirror h_b_a = Kokkos::create_mirror_view(b_a); - - typename ViewTypeA::HostMirror h_a = Kokkos::subview(h_b_a,Kokkos::ALL(),0); + typename ViewTypeA::HostMirror h_a = Kokkos::create_mirror_view(a); Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(10.0, randStart, randEnd); - Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); + Kokkos::fill_random(a, rand_pool, randStart, randEnd); - Kokkos::deep_copy(h_b_a,b_a); + Kokkos::deep_copy(h_a, a); typename ViewTypeA::const_type c_a = a; From 9b3f56645e5597e5ae2a6e46dbce3991e849fa26 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Mon, 10 May 2021 13:52:05 -0600 Subject: [PATCH 20/35] unit_test/blas: Fix mult test when LayoutLeft=OFF. --- unit_test/blas/Test_Blas1_mult.hpp | 126 +++++++++++++---------------- 1 file changed, 55 insertions(+), 71 deletions(-) diff --git a/unit_test/blas/Test_Blas1_mult.hpp b/unit_test/blas/Test_Blas1_mult.hpp index b0839279f6..9ceae76691 100644 --- a/unit_test/blas/Test_Blas1_mult.hpp +++ b/unit_test/blas/Test_Blas1_mult.hpp @@ -8,91 +8,75 @@ namespace Test { template void impl_test_mult(int N) { + typedef typename ViewTypeA::value_type ScalarA; + typedef typename ViewTypeB::value_type ScalarB; + typedef typename ViewTypeC::value_type ScalarC; - typedef typename ViewTypeA::value_type ScalarA; - typedef typename ViewTypeB::value_type ScalarB; - typedef typename ViewTypeC::value_type ScalarC; + ScalarA a = 3; + ScalarB b = 5; + double eps = std::is_same::value ? 1e-4 : 1e-7; - typedef Kokkos::View BaseTypeA; - typedef Kokkos::View BaseTypeB; - typedef Kokkos::View BaseTypeC; + ViewTypeA x("X", N); + ViewTypeB y("Y", N); + ViewTypeC z("Y", N); + ViewTypeC b_org_z("Org_Z", N); + typename ViewTypeA::const_type c_x = x; + typename ViewTypeB::const_type c_y = y; - ScalarA a = 3; - ScalarB b = 5; - double eps = std::is_same::value?1e-4:1e-7; + typename ViewTypeA::HostMirror h_x = Kokkos::create_mirror_view(x); + typename ViewTypeB::HostMirror h_y = Kokkos::create_mirror_view(y); + typename ViewTypeC::HostMirror h_z = Kokkos::create_mirror_view(z); - BaseTypeA b_x("X",N); - BaseTypeB b_y("Y",N); - BaseTypeC b_z("Y",N); - BaseTypeC b_org_z("Org_Z",N); - - - ViewTypeA x = Kokkos::subview(b_x,Kokkos::ALL(),0); - ViewTypeB y = Kokkos::subview(b_y,Kokkos::ALL(),0); - ViewTypeC z = Kokkos::subview(b_z,Kokkos::ALL(),0); - typename ViewTypeA::const_type c_x = x; - typename ViewTypeB::const_type c_y = y; - - typename BaseTypeA::HostMirror h_b_x = Kokkos::create_mirror_view(b_x); - typename BaseTypeB::HostMirror h_b_y = Kokkos::create_mirror_view(b_y); - typename BaseTypeC::HostMirror h_b_z = Kokkos::create_mirror_view(b_z); + Kokkos::Random_XorShift64_Pool rand_pool( + 13718); - typename ViewTypeA::HostMirror h_x = Kokkos::subview(h_b_x,Kokkos::ALL(),0); - typename ViewTypeB::HostMirror h_y = Kokkos::subview(h_b_y,Kokkos::ALL(),0); - typename ViewTypeC::HostMirror h_z = Kokkos::subview(h_b_z,Kokkos::ALL(),0); - - Kokkos::Random_XorShift64_Pool rand_pool(13718); - - { - ScalarA randStart, randEnd; - Test::getRandomBounds(10.0, randStart, randEnd); - Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); - } - { - ScalarB randStart, randEnd; - Test::getRandomBounds(10.0, randStart, randEnd); - Kokkos::fill_random(b_y,rand_pool,randStart,randEnd); - } - { - ScalarC randStart, randEnd; - Test::getRandomBounds(10.0, randStart, randEnd); - Kokkos::fill_random(b_z,rand_pool,randStart,randEnd); - } + { + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(x, rand_pool, randStart, randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(y, rand_pool, randStart, randEnd); + } + { + ScalarC randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(z, rand_pool, randStart, randEnd); + } - Kokkos::deep_copy(b_org_z,b_z); - auto h_b_org_z = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), b_org_z); + Kokkos::deep_copy(b_org_z, z); + auto h_b_org_z = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), b_org_z); - Kokkos::deep_copy(h_b_x,b_x); - Kokkos::deep_copy(h_b_y,b_y); + Kokkos::deep_copy(h_x, x); + Kokkos::deep_copy(h_y, y); - //expected_result = ScalarC(b*h_z(i) + a*h_x(i)*h_y(i)) + // expected_result = ScalarC(b*h_z(i) + a*h_x(i)*h_y(i)) - KokkosBlas::mult(b,z,a,x,y); - Kokkos::deep_copy(h_b_z, b_z); - for(int i = 0; i < N; i++) - { - EXPECT_NEAR_KK(a * h_x(i) * h_y(i) + b * h_b_org_z(i, 0), h_z(i), eps); - } + KokkosBlas::mult(b, z, a, x, y); + Kokkos::deep_copy(h_z, z); + for (int i = 0; i < N; i++) { + EXPECT_NEAR_KK(a * h_x(i) * h_y(i) + b * h_b_org_z(i), h_z(i), eps); + } - Kokkos::deep_copy(b_z,b_org_z); - KokkosBlas::mult(b,z,a,x,c_y); - Kokkos::deep_copy(h_b_z, b_z); - for(int i = 0; i < N; i++) + Kokkos::deep_copy(z, b_org_z); + KokkosBlas::mult(b, z, a, x, c_y); + Kokkos::deep_copy(h_z, z); + for(int i = 0; i < N; i++) { - EXPECT_NEAR_KK(a * h_x(i) * h_y(i) + b * h_b_org_z(i, 0), h_z(i), eps); - } + EXPECT_NEAR_KK(a * h_x(i) * h_y(i) + b * h_b_org_z(i), h_z(i), eps); + } - Kokkos::deep_copy(b_z,b_org_z); - KokkosBlas::mult(b,z,a,c_x,c_y); - Kokkos::deep_copy(h_b_z, b_z); - for(int i = 0; i < N; i++) + Kokkos::deep_copy(z, b_org_z); + KokkosBlas::mult(b, z, a, c_x, c_y); + Kokkos::deep_copy(h_z, z); + for(int i = 0; i < N; i++) { - EXPECT_NEAR_KK(a * h_x(i) * h_y(i) + b * h_b_org_z(i, 0), h_z(i), eps); - } + EXPECT_NEAR_KK(a * h_x(i) * h_y(i) + b * h_b_org_z(i), h_z(i), eps); + } } template From 93b7eaafbe314f2ef905c8577520674e3adbf65a Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Mon, 10 May 2021 13:56:00 -0600 Subject: [PATCH 21/35] unit_test/blas: Fix mult_mv test when LayoutLeft=OFF. --- unit_test/blas/Test_Blas1_mult.hpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/unit_test/blas/Test_Blas1_mult.hpp b/unit_test/blas/Test_Blas1_mult.hpp index 9ceae76691..60c86b242a 100644 --- a/unit_test/blas/Test_Blas1_mult.hpp +++ b/unit_test/blas/Test_Blas1_mult.hpp @@ -86,28 +86,24 @@ namespace Test { typedef typename ViewTypeB::value_type ScalarB; typedef typename ViewTypeC::value_type ScalarC; - typedef Kokkos::View BaseTypeA; typedef multivector_layout_adapter vfB_type; typedef multivector_layout_adapter vfC_type; - BaseTypeA b_x("X",N); - typename vfB_type::BaseType b_y("Y",N,K); + ViewTypeA x("X", N); + typename vfB_type::BaseType b_y("Y", N, K); typename vfC_type::BaseType b_z("Z",N,K); typename vfC_type::BaseType b_org_z("Z",N,K); - ViewTypeA x = Kokkos::subview(b_x,Kokkos::ALL(),0); ViewTypeB y = vfB_type::view(b_y); ViewTypeC z = vfC_type::view(b_z); typedef multivector_layout_adapter h_vfB_type; typedef multivector_layout_adapter h_vfC_type; - typename BaseTypeA::HostMirror h_b_x = Kokkos::create_mirror_view(b_x); typename h_vfB_type::BaseType h_b_y = Kokkos::create_mirror_view(b_y); typename h_vfC_type::BaseType h_b_z = Kokkos::create_mirror_view(b_z); - typename ViewTypeA::HostMirror h_x = Kokkos::subview(h_b_x,Kokkos::ALL(),0); + typename ViewTypeA::HostMirror h_x = Kokkos::create_mirror_view(x); typename ViewTypeB::HostMirror h_y = h_vfB_type::view(h_b_y); typename ViewTypeC::HostMirror h_z = h_vfC_type::view(h_b_z); @@ -116,7 +112,7 @@ namespace Test { { ScalarA randStart, randEnd; Test::getRandomBounds(10.0, randStart, randEnd); - Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); + Kokkos::fill_random(x, rand_pool, randStart, randEnd); } { ScalarB randStart, randEnd; @@ -132,8 +128,8 @@ namespace Test { Kokkos::deep_copy(b_org_z,b_z); auto h_b_org_z = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), b_org_z); - Kokkos::deep_copy(h_b_x,b_x); - Kokkos::deep_copy(h_b_y,b_y); + Kokkos::deep_copy(h_x, x); + Kokkos::deep_copy(h_b_y, b_y); Kokkos::deep_copy(h_b_z,b_z); ScalarA a = 3; From 8cea4ef313b1bcf85aa13833f9364599e9459992 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Mon, 10 May 2021 13:58:49 -0600 Subject: [PATCH 22/35] unit_test/blas: Fix nrm1 test when LayoutLeft=OFF. --- unit_test/blas/Test_Blas1_nrm1.hpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/unit_test/blas/Test_Blas1_nrm1.hpp b/unit_test/blas/Test_Blas1_nrm1.hpp index ce46bfeec5..e711a9c649 100644 --- a/unit_test/blas/Test_Blas1_nrm1.hpp +++ b/unit_test/blas/Test_Blas1_nrm1.hpp @@ -13,25 +13,17 @@ namespace Test { typedef typename AT::mag_type mag_type; typedef Kokkos::ArithTraits MAT; - typedef Kokkos::View BaseTypeA; + ViewTypeA a("A", N); - - BaseTypeA b_a("A",N); - - ViewTypeA a = Kokkos::subview(b_a,Kokkos::ALL(),0); - - typename BaseTypeA::HostMirror h_b_a = Kokkos::create_mirror_view(b_a); - - typename ViewTypeA::HostMirror h_a = Kokkos::subview(h_b_a,Kokkos::ALL(),0); + typename ViewTypeA::HostMirror h_a = Kokkos::create_mirror_view(a); Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(10.0, randStart, randEnd); - Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); + Kokkos::fill_random(a, rand_pool, randStart, randEnd); - Kokkos::deep_copy(h_b_a,b_a); + Kokkos::deep_copy(h_a, a); typename ViewTypeA::const_type c_a = a; double eps = (std::is_same::mag_type, float>::value ? 1e-4 : 1e-7); From 05a9bd106eae41338ebedc98793b136de1721219 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Mon, 10 May 2021 14:01:14 -0600 Subject: [PATCH 23/35] unit_test/blas: Fix nrm2 test when LayoutLeft=OFF. --- unit_test/blas/Test_Blas1_nrm2.hpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/unit_test/blas/Test_Blas1_nrm2.hpp b/unit_test/blas/Test_Blas1_nrm2.hpp index af3d71b9fe..7ab94aa759 100644 --- a/unit_test/blas/Test_Blas1_nrm2.hpp +++ b/unit_test/blas/Test_Blas1_nrm2.hpp @@ -11,25 +11,17 @@ namespace Test { typedef typename ViewTypeA::value_type ScalarA; typedef Kokkos::Details::ArithTraits AT; - typedef Kokkos::View BaseTypeA; + ViewTypeA a("A", N); - - BaseTypeA b_a("A",N); - - ViewTypeA a = Kokkos::subview(b_a,Kokkos::ALL(),0); - - typename BaseTypeA::HostMirror h_b_a = Kokkos::create_mirror_view(b_a); - - typename ViewTypeA::HostMirror h_a = Kokkos::subview(h_b_a,Kokkos::ALL(),0); + typename ViewTypeA::HostMirror h_a = Kokkos::create_mirror_view(a); Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(1.0, randStart, randEnd); - Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); + Kokkos::fill_random(a, rand_pool, randStart, randEnd); - Kokkos::deep_copy(h_b_a,b_a); + Kokkos::deep_copy(h_a, a); typename ViewTypeA::const_type c_a = a; double eps = std::is_same::value?2*1e-5:1e-7; From f4491acfd93e10fe9c97fd84b486fad443109f18 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Mon, 10 May 2021 14:03:13 -0600 Subject: [PATCH 24/35] unit_test/blas: Fix nrminf test when LayoutLeft=OFF. --- unit_test/blas/Test_Blas1_nrminf.hpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/unit_test/blas/Test_Blas1_nrminf.hpp b/unit_test/blas/Test_Blas1_nrminf.hpp index c95c199120..5bb1f2810f 100644 --- a/unit_test/blas/Test_Blas1_nrminf.hpp +++ b/unit_test/blas/Test_Blas1_nrminf.hpp @@ -11,25 +11,17 @@ namespace Test { typedef typename ViewTypeA::non_const_value_type ScalarA; typedef Kokkos::Details::ArithTraits AT; - typedef Kokkos::View BaseTypeA; + ViewTypeA a("A", N); - - BaseTypeA b_a("A",N); - - ViewTypeA a = Kokkos::subview(b_a,Kokkos::ALL(),0); - - typename BaseTypeA::HostMirror h_b_a = Kokkos::create_mirror_view(b_a); - - typename ViewTypeA::HostMirror h_a = Kokkos::subview(h_b_a,Kokkos::ALL(),0); + typename ViewTypeA::HostMirror h_a = Kokkos::create_mirror_view(a); Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(10.0, randStart, randEnd); - Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); + Kokkos::fill_random(a, rand_pool, randStart, randEnd); - Kokkos::deep_copy(h_b_a,b_a); + Kokkos::deep_copy(h_a, a); typename ViewTypeA::const_type c_a = a; double eps = std::is_same::value?2*1e-5:1e-7; From 6c2a36ba05d20738adc4d9e66754ed53a65da49b Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Mon, 10 May 2021 14:06:53 -0600 Subject: [PATCH 25/35] unit_test/blas: Fix scal test when LayoutLeft=OFF. --- unit_test/blas/Test_Blas1_scal.hpp | 76 +++++++++++++----------------- 1 file changed, 32 insertions(+), 44 deletions(-) diff --git a/unit_test/blas/Test_Blas1_scal.hpp b/unit_test/blas/Test_Blas1_scal.hpp index b862f92b36..d142db3543 100644 --- a/unit_test/blas/Test_Blas1_scal.hpp +++ b/unit_test/blas/Test_Blas1_scal.hpp @@ -8,63 +8,51 @@ namespace Test { template void impl_test_scal(int N) { + typedef typename ViewTypeA::value_type ScalarA; + typedef typename ViewTypeB::value_type ScalarB; + typedef Kokkos::Details::ArithTraits AT; - typedef typename ViewTypeA::value_type ScalarA; - typedef typename ViewTypeB::value_type ScalarB; - typedef Kokkos::Details::ArithTraits AT; - - typedef Kokkos::View BaseTypeA; - typedef Kokkos::View BaseTypeB; + ScalarA a(3); + typename AT::mag_type eps = AT::epsilon() * 1000; + ViewTypeA x("X", N); + ViewTypeB y("Y", N); + ViewTypeB org_y("Org_Y", N); - ScalarA a(3); - typename AT::mag_type eps = AT::epsilon()*1000; - - BaseTypeA b_x("X",N); - BaseTypeB b_y("Y",N); - BaseTypeB b_org_y("Org_Y",N); - - ViewTypeA x = Kokkos::subview(b_x,Kokkos::ALL(),0); - ViewTypeB y = Kokkos::subview(b_y,Kokkos::ALL(),0); - typename ViewTypeA::const_type c_x = x; - typename ViewTypeB::const_type c_y = y; + typename ViewTypeA::const_type c_x = x; + typename ViewTypeB::const_type c_y = y; - typename BaseTypeA::HostMirror h_b_x = Kokkos::create_mirror_view(b_x); - typename BaseTypeB::HostMirror h_b_y = Kokkos::create_mirror_view(b_y); + typename ViewTypeA::HostMirror h_x = Kokkos::create_mirror_view(x); + typename ViewTypeB::HostMirror h_y = Kokkos::create_mirror_view(y); - typename ViewTypeA::HostMirror h_x = Kokkos::subview(h_b_x,Kokkos::ALL(),0); - typename ViewTypeB::HostMirror h_y = Kokkos::subview(h_b_y,Kokkos::ALL(),0); + Kokkos::Random_XorShift64_Pool rand_pool( + 13718); - Kokkos::Random_XorShift64_Pool rand_pool(13718); - - { - ScalarA randStart, randEnd; - Test::getRandomBounds(1.0, randStart, randEnd); - Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); - } - { - ScalarB randStart, randEnd; - Test::getRandomBounds(1.0, randStart, randEnd); - Kokkos::fill_random(b_y,rand_pool,randStart,randEnd); - } + { + ScalarA randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(x, rand_pool, randStart, randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(y, rand_pool, randStart, randEnd); + } - Kokkos::deep_copy(b_org_y,b_y); + Kokkos::deep_copy(org_y, y); - Kokkos::deep_copy(h_b_x,b_x); - Kokkos::deep_copy(h_b_y,b_y); + Kokkos::deep_copy(h_x, x); - KokkosBlas::scal(y,a,x); - Kokkos::deep_copy(h_b_y, b_y); - for(int i = 0; i < N; i++) + KokkosBlas::scal(y, a, x); + Kokkos::deep_copy(h_y, y); + for(int i = 0; i < N; i++) { EXPECT_NEAR_KK(a * h_x(i), h_y(i), eps); } - Kokkos::deep_copy(b_y,b_org_y); - KokkosBlas::scal(y,a,c_x); - Kokkos::deep_copy(h_b_y, b_y); + Kokkos::deep_copy(y, org_y); + KokkosBlas::scal(y, a, c_x); + Kokkos::deep_copy(h_y, y); for(int i = 0; i < N; i++) { EXPECT_NEAR_KK(a * h_x(i), h_y(i), eps); From 581460097bd9df4c4dd3cefc7df698feb30a62cb Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Mon, 10 May 2021 14:09:37 -0600 Subject: [PATCH 26/35] unit_test/blas: Fix sum test when LayoutLeft=OFF. --- unit_test/blas/Test_Blas1_sum.hpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/unit_test/blas/Test_Blas1_sum.hpp b/unit_test/blas/Test_Blas1_sum.hpp index c61f66b5b0..71f964f58d 100644 --- a/unit_test/blas/Test_Blas1_sum.hpp +++ b/unit_test/blas/Test_Blas1_sum.hpp @@ -10,25 +10,17 @@ namespace Test { typedef typename ViewTypeA::value_type ScalarA; - typedef Kokkos::View BaseTypeA; + ViewTypeA a("A", N); - - BaseTypeA b_a("A",N); - - ViewTypeA a = Kokkos::subview(b_a,Kokkos::ALL(),0); - - typename BaseTypeA::HostMirror h_b_a = Kokkos::create_mirror_view(b_a); - - typename ViewTypeA::HostMirror h_a = Kokkos::subview(h_b_a,Kokkos::ALL(),0); + typename ViewTypeA::HostMirror h_a = Kokkos::create_mirror_view(a); Kokkos::Random_XorShift64_Pool rand_pool(13718); ScalarA randStart, randEnd; Test::getRandomBounds(10.0, randStart, randEnd); - Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); + Kokkos::fill_random(a, rand_pool, randStart, randEnd); - Kokkos::deep_copy(h_b_a,b_a); + Kokkos::deep_copy(h_a, a); typename ViewTypeA::const_type c_a = a; double eps = std::is_same::value?2*1e-5:1e-7; From b6dd1810727923d5ffa3d080a3f382e96835122a Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Mon, 10 May 2021 15:20:11 -0600 Subject: [PATCH 27/35] Fix unified layouts with left and right enabled --- src/blas/KokkosBlas1_axpby.hpp | 27 ++++++++++++------------ src/blas/KokkosBlas1_dot.hpp | 8 +++++-- src/blas/KokkosBlas1_iamax.hpp | 11 +++++++--- src/blas/KokkosBlas1_mult.hpp | 20 ++++++++---------- src/blas/KokkosBlas1_nrm1.hpp | 9 ++++++-- src/blas/KokkosBlas1_nrm2.hpp | 21 +++++++++--------- src/blas/KokkosBlas1_nrm2_squared.hpp | 21 +++++++++--------- src/blas/KokkosBlas1_nrminf.hpp | 9 ++++++-- src/blas/KokkosBlas1_scal.hpp | 23 ++++++++++---------- src/blas/KokkosBlas1_sum.hpp | 19 ++++++++--------- src/blas/KokkosBlas2_gemv.hpp | 8 ++++--- src/blas/impl/KokkosBlas1_axpby_spec.hpp | 8 +++---- src/impl/KokkosKernels_helpers.hpp | 23 ++++++++++---------- src/sparse/KokkosSparse_spmv.hpp | 4 ++-- 14 files changed, 113 insertions(+), 98 deletions(-) diff --git a/src/blas/KokkosBlas1_axpby.hpp b/src/blas/KokkosBlas1_axpby.hpp index 44b444169e..7d57b75d66 100644 --- a/src/blas/KokkosBlas1_axpby.hpp +++ b/src/blas/KokkosBlas1_axpby.hpp @@ -85,29 +85,28 @@ axpby (const AV& a, const XMV& X, const BV& b, const YMV& Y) Kokkos::Impl::throw_runtime_exception (os.str ()); } + using UnifiedXLayout = typename + KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedYLayout = typename + KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; + // Create unmanaged versions of the input Views. XMV and YMV may be // rank 1 or rank 2. AV and BV may be either rank-1 Views, or // scalar values. - typedef typename KokkosKernels::Impl::GetUnifiedScalarViewType< - AV, XMV, true>::type AV_Internal; typedef Kokkos::View< - typename Kokkos::Impl::if_c< - XMV::Rank == 1, - typename XMV::const_value_type*, - typename XMV::const_value_type** >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename XMV::const_data_type, + UnifiedXLayout, typename XMV::device_type, Kokkos::MemoryTraits > XMV_Internal; - typedef typename KokkosKernels::Impl::GetUnifiedScalarViewType< - BV, YMV, true>::type BV_Internal; typedef Kokkos::View< - typename Kokkos::Impl::if_c< - YMV::Rank == 1, - typename YMV::non_const_value_type*, - typename YMV::non_const_value_type** >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename YMV::non_const_data_type, + UnifiedYLayout, typename YMV::device_type, Kokkos::MemoryTraits > YMV_Internal; + typedef typename KokkosKernels::Impl::GetUnifiedScalarViewType< + AV, XMV_Internal, true>::type AV_Internal; + typedef typename KokkosKernels::Impl::GetUnifiedScalarViewType< + BV, YMV_Internal, true>::type BV_Internal; AV_Internal a_internal = a; XMV_Internal X_internal = X; diff --git a/src/blas/KokkosBlas1_dot.hpp b/src/blas/KokkosBlas1_dot.hpp index 520177ae05..a939706594 100644 --- a/src/blas/KokkosBlas1_dot.hpp +++ b/src/blas/KokkosBlas1_dot.hpp @@ -213,13 +213,17 @@ dot (const RV& R, const XMV& X, const YMV& Y, } // Create unmanaged versions of the input Views. + using UnifiedXLayout = typename + KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedRVLayout = typename + KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; typedef Kokkos::View< typename Kokkos::Impl::if_c< RV::rank == 0, typename RV::non_const_value_type, typename RV::non_const_value_type* >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + UnifiedRVLayout, typename RV::device_type, Kokkos::MemoryTraits > RV_Internal; typedef Kokkos::View< @@ -227,7 +231,7 @@ dot (const RV& R, const XMV& X, const YMV& Y, XMV::rank == 1, typename XMV::const_value_type*, typename XMV::const_value_type** >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + UnifiedXLayout, typename XMV::device_type, Kokkos::MemoryTraits > XMV_Internal; typedef Kokkos::View< diff --git a/src/blas/KokkosBlas1_iamax.hpp b/src/blas/KokkosBlas1_iamax.hpp index 234192348f..a2dbb4e068 100644 --- a/src/blas/KokkosBlas1_iamax.hpp +++ b/src/blas/KokkosBlas1_iamax.hpp @@ -130,14 +130,19 @@ iamax (const RV& R, const XMV& X, Kokkos::Impl::throw_runtime_exception (os.str ()); } - // Create unmanaged versions of the input Views. RV may be rank 0 or rank 2. + using UnifiedXLayout = typename + KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedRVLayout = typename + KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; + + // Create unmanaged versions of the input Views. RV may be rank 0 or rank 1. // XMV may be rank 1 or rank 2. typedef Kokkos::View< typename std::conditional< RV::rank == 0, typename RV::non_const_value_type, typename RV::non_const_value_type* >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + UnifiedRVLayout, typename std::conditional< std::is_same::value, Kokkos::HostSpace, @@ -148,7 +153,7 @@ iamax (const RV& R, const XMV& X, XMV::rank == 1, typename XMV::const_value_type*, typename XMV::const_value_type** >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + UnifiedXLayout, typename XMV::device_type, Kokkos::MemoryTraits > XMV_Internal; diff --git a/src/blas/KokkosBlas1_mult.hpp b/src/blas/KokkosBlas1_mult.hpp index 6c6a03038a..d3a6ee8836 100644 --- a/src/blas/KokkosBlas1_mult.hpp +++ b/src/blas/KokkosBlas1_mult.hpp @@ -85,26 +85,24 @@ mult (typename YMV::const_value_type& gamma, Kokkos::Impl::throw_runtime_exception (os.str ()); } + using YUnifiedLayout = typename KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using AUnifiedLayout = typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; + using XUnifiedLayout = typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; + // Create unmanaged versions of the input Views. typedef Kokkos::View< - typename Kokkos::Impl::if_c< - YMV::rank == 1, - typename YMV::non_const_value_type*, - typename YMV::non_const_value_type** >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename YMV::non_const_data_type, + YUnifiedLayout, typename YMV::device_type, Kokkos::MemoryTraits > YMV_Internal; typedef Kokkos::View< typename AV::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + AUnifiedLayout, typename AV::device_type, Kokkos::MemoryTraits > AV_Internal; typedef Kokkos::View< - typename Kokkos::Impl::if_c< - XMV::rank == 1, - typename XMV::const_value_type*, - typename XMV::const_value_type** >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename XMV::const_data_type, + XUnifiedLayout, typename XMV::device_type, Kokkos::MemoryTraits > XMV_Internal; diff --git a/src/blas/KokkosBlas1_nrm1.hpp b/src/blas/KokkosBlas1_nrm1.hpp index 350818e2b1..3343118599 100644 --- a/src/blas/KokkosBlas1_nrm1.hpp +++ b/src/blas/KokkosBlas1_nrm1.hpp @@ -129,6 +129,11 @@ nrm1 (const RV& R, const XMV& X, Kokkos::Impl::throw_runtime_exception (os.str ()); } + using UnifiedXLayout = typename + KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedRVLayout = typename + KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; + // Create unmanaged versions of the input Views. RV and XMV may be // rank 1 or rank 2. typedef Kokkos::View< @@ -136,7 +141,7 @@ nrm1 (const RV& R, const XMV& X, RV::rank == 0, typename RV::non_const_value_type, typename RV::non_const_value_type* >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + UnifiedRVLayout, typename RV::device_type, Kokkos::MemoryTraits > RV_Internal; typedef Kokkos::View< @@ -144,7 +149,7 @@ nrm1 (const RV& R, const XMV& X, XMV::rank == 1, typename XMV::const_value_type*, typename XMV::const_value_type** >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + UnifiedXLayout, typename XMV::device_type, Kokkos::MemoryTraits > XMV_Internal; diff --git a/src/blas/KokkosBlas1_nrm2.hpp b/src/blas/KokkosBlas1_nrm2.hpp index bd3a4bf806..967e641a8b 100644 --- a/src/blas/KokkosBlas1_nrm2.hpp +++ b/src/blas/KokkosBlas1_nrm2.hpp @@ -64,7 +64,7 @@ nrm2 (const XVector& x) static_assert (Kokkos::Impl::is_view::value, "KokkosBlas::nrm2: XVector must be a Kokkos::View."); static_assert (XVector::rank == 1, "KokkosBlas::nrm2: " - "Both Vector inputs must have rank 1."); + "XVector must have rank 1."); typedef typename Kokkos::Details::InnerProductSpaceTraits::mag_type mag_type; typedef Kokkos::View::array_layout; + using UnifiedRVLayout = typename + KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; + // Create unmanaged versions of the input Views. RV and XMV may be // rank 1 or rank 2. typedef Kokkos::View< - typename Kokkos::Impl::if_c< - RV::rank == 0, - typename RV::non_const_value_type, - typename RV::non_const_value_type* >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename RV::non_const_data_type, + UnifiedRVLayout, typename RV::device_type, Kokkos::MemoryTraits > RV_Internal; typedef Kokkos::View< - typename Kokkos::Impl::if_c< - XMV::rank == 1, - typename XMV::const_value_type*, - typename XMV::const_value_type** >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename XMV::const_data_type, + UnifiedXLayout, typename XMV::device_type, Kokkos::MemoryTraits > XMV_Internal; diff --git a/src/blas/KokkosBlas1_nrm2_squared.hpp b/src/blas/KokkosBlas1_nrm2_squared.hpp index e2011064c1..a05c7e9e3a 100644 --- a/src/blas/KokkosBlas1_nrm2_squared.hpp +++ b/src/blas/KokkosBlas1_nrm2_squared.hpp @@ -74,7 +74,7 @@ nrm2_squared (const XVector& x) Kokkos::MemoryTraits > XVector_Internal; typedef Kokkos::View > RVector_Internal; @@ -129,22 +129,21 @@ nrm2_squared (const RV& R, const XMV& X, Kokkos::Impl::throw_runtime_exception (os.str ()); } + using UnifiedXLayout = typename + KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedRVLayout = typename + KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; + // Create unmanaged versions of the input Views. RV and XMV may be // rank 1 or rank 2. typedef Kokkos::View< - typename Kokkos::Impl::if_c< - RV::rank == 0, - typename RV::non_const_value_type, - typename RV::non_const_value_type* >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename RV::non_const_data_type, + UnifiedRVLayout, typename RV::device_type, Kokkos::MemoryTraits > RV_Internal; typedef Kokkos::View< - typename Kokkos::Impl::if_c< - XMV::rank == 1, - typename XMV::const_value_type*, - typename XMV::const_value_type** >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename XMV::const_data_type, + UnifiedXLayout, typename XMV::device_type, Kokkos::MemoryTraits > XMV_Internal; diff --git a/src/blas/KokkosBlas1_nrminf.hpp b/src/blas/KokkosBlas1_nrminf.hpp index 39d97cfc1c..5133154120 100644 --- a/src/blas/KokkosBlas1_nrminf.hpp +++ b/src/blas/KokkosBlas1_nrminf.hpp @@ -129,6 +129,11 @@ nrminf (const RV& R, const XMV& X, Kokkos::Impl::throw_runtime_exception (os.str ()); } + using UnifiedXLayout = typename + KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedRVLayout = typename + KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; + // Create unmanaged versions of the input Views. RV and XMV may be // rank 1 or rank 2. typedef Kokkos::View< @@ -136,7 +141,7 @@ nrminf (const RV& R, const XMV& X, RV::rank == 0, typename RV::non_const_value_type, typename RV::non_const_value_type* >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + UnifiedRVLayout, typename RV::device_type, Kokkos::MemoryTraits > RV_Internal; typedef Kokkos::View< @@ -144,7 +149,7 @@ nrminf (const RV& R, const XMV& X, XMV::rank == 1, typename XMV::const_value_type*, typename XMV::const_value_type** >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + UnifiedXLayout, typename XMV::device_type, Kokkos::MemoryTraits > XMV_Internal; diff --git a/src/blas/KokkosBlas1_scal.hpp b/src/blas/KokkosBlas1_scal.hpp index 970da67766..b06fbd9f98 100644 --- a/src/blas/KokkosBlas1_scal.hpp +++ b/src/blas/KokkosBlas1_scal.hpp @@ -78,27 +78,26 @@ scal (const RMV& R, const AV& a, const XMV& X) Kokkos::Impl::throw_runtime_exception (os.str ()); } + using UnifiedRLayout = typename + KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedXLayout = typename + KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; + // Create unmanaged versions of the input Views. RMV and XMV may be // rank 1 or rank 2. AV may be either a rank-1 View, or a scalar // value. typedef Kokkos::View< - typename Kokkos::Impl::if_c< - RMV::rank == 1, - typename RMV::non_const_value_type*, - typename RMV::non_const_value_type** >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename RMV::non_const_data_type, + UnifiedRLayout, typename RMV::device_type, Kokkos::MemoryTraits > RMV_Internal; - typedef typename KokkosKernels::Impl::GetUnifiedScalarViewType< - AV, XMV, true>::type AV_Internal; typedef Kokkos::View< - typename Kokkos::Impl::if_c< - XMV::rank == 1, - typename XMV::const_value_type*, - typename XMV::const_value_type** >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename XMV::const_data_type, + UnifiedXLayout, typename XMV::device_type, Kokkos::MemoryTraits > XMV_Internal; + typedef typename KokkosKernels::Impl::GetUnifiedScalarViewType< + AV, XMV_Internal, true>::type AV_Internal; RMV_Internal R_internal = R; AV_Internal a_internal = a; diff --git a/src/blas/KokkosBlas1_sum.hpp b/src/blas/KokkosBlas1_sum.hpp index 616e7649ab..3bfd1c0d9b 100644 --- a/src/blas/KokkosBlas1_sum.hpp +++ b/src/blas/KokkosBlas1_sum.hpp @@ -122,22 +122,21 @@ sum (const RV& R, const XMV& X, Kokkos::Impl::throw_runtime_exception (os.str ()); } + using UnifiedXLayout = typename + KokkosKernels::Impl::GetUnifiedLayout::array_layout; + using UnifiedRVLayout = typename + KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout; + // Create unmanaged versions of the input Views. RV and XMV may be // rank 1 or rank 2. typedef Kokkos::View< - typename Kokkos::Impl::if_c< - RV::rank == 0, - typename RV::non_const_value_type, - typename RV::non_const_value_type* >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename RV::non_const_data_type, + UnifiedRVLayout, typename RV::device_type, Kokkos::MemoryTraits > RV_Internal; typedef Kokkos::View< - typename Kokkos::Impl::if_c< - XMV::rank == 1, - typename XMV::const_value_type*, - typename XMV::const_value_type** >::type, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename XMV::const_data_type, + UnifiedXLayout, typename XMV::device_type, Kokkos::MemoryTraits > XMV_Internal; diff --git a/src/blas/KokkosBlas2_gemv.hpp b/src/blas/KokkosBlas2_gemv.hpp index 8ac3f7396d..08a62e6c47 100644 --- a/src/blas/KokkosBlas2_gemv.hpp +++ b/src/blas/KokkosBlas2_gemv.hpp @@ -124,19 +124,21 @@ gemv (const char trans[], Kokkos::Impl::throw_runtime_exception (os.str ()); } + using ALayout = typename AViewType::array_layout; + // Minimize the number of Impl::GEMV instantiations, by // standardizing on particular View specializations for its template // parameters. typedef Kokkos::View > AVT; typedef Kokkos::View::array_layout, + typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout, typename XViewType::device_type, Kokkos::MemoryTraits > XVT; typedef Kokkos::View::array_layout, + typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout, typename YViewType::device_type, Kokkos::MemoryTraits > YVT; diff --git a/src/blas/impl/KokkosBlas1_axpby_spec.hpp b/src/blas/impl/KokkosBlas1_axpby_spec.hpp index e07cdaa0ce..15040fff4e 100644 --- a/src/blas/impl/KokkosBlas1_axpby_spec.hpp +++ b/src/blas/impl/KokkosBlas1_axpby_spec.hpp @@ -446,11 +446,11 @@ extern template struct Axpby< \ Kokkos::MemoryTraits >, \ 2, false, true>; \ extern template struct Axpby< \ - Kokkos::View,\ + Kokkos::View,\ Kokkos::MemoryTraits >, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View,\ + Kokkos::View,\ Kokkos::MemoryTraits >, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -466,11 +466,11 @@ template struct Axpby< \ Kokkos::MemoryTraits >, \ 2, false, true>; \ template struct Axpby< \ - Kokkos::View,\ + Kokkos::View,\ Kokkos::MemoryTraits >, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View,\ + Kokkos::View,\ Kokkos::MemoryTraits >, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ diff --git a/src/impl/KokkosKernels_helpers.hpp b/src/impl/KokkosKernels_helpers.hpp index 2cc5db6590..d9d4ace73a 100644 --- a/src/impl/KokkosKernels_helpers.hpp +++ b/src/impl/KokkosKernels_helpers.hpp @@ -49,15 +49,16 @@ namespace KokkosKernels { namespace Impl { -// Unify Layout of a View to LayoutLeft if possible. -// Used to reduce number of code instantiations -template -struct GetUnifiedLayoutInternal { +// Unify Layout of a View to PreferredLayoutType if possible +// (either matches already, or is rank-0/rank-1 and contiguous) +// Used to reduce number of code instantiations. +template +struct GetUnifiedLayoutPreferring { typedef typename std::conditional< ((ViewType::rank == 1) && (!std::is_same::value)) || ((ViewType::rank == 0)), - UnifiedLayoutType, typename ViewType::array_layout>::type array_layout; + PreferredLayoutType, typename ViewType::array_layout>::type array_layout; }; // If LayoutLeft kernels are pre instantiated, try to unify layout to LayoutLeft @@ -65,8 +66,8 @@ struct GetUnifiedLayoutInternal { template struct GetUnifiedLayout { using array_layout = - typename GetUnifiedLayoutInternal::array_layout; + typename GetUnifiedLayoutPreferring::array_layout; }; #else // If LayoutLeft kernels are not pre instantiated, try to unify layout to @@ -75,8 +76,8 @@ struct GetUnifiedLayout { template struct GetUnifiedLayout { using array_layout = - typename GetUnifiedLayoutInternal::array_layout; + typename GetUnifiedLayoutPreferring::array_layout; }; #endif #endif @@ -91,7 +92,7 @@ template struct GetUnifiedScalarViewType { typedef Kokkos::View< typename T::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, + typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout, typename T::device_type, Kokkos::MemoryTraits > type; }; @@ -99,7 +100,7 @@ struct GetUnifiedScalarViewType { template struct GetUnifiedScalarViewType { typedef Kokkos::View::array_layout, + typename KokkosKernels::Impl::GetUnifiedLayoutPreferring::array_layout, typename T::device_type, Kokkos::MemoryTraits > type; }; diff --git a/src/sparse/KokkosSparse_spmv.hpp b/src/sparse/KokkosSparse_spmv.hpp index dd3f02086e..b7c3a26f47 100644 --- a/src/sparse/KokkosSparse_spmv.hpp +++ b/src/sparse/KokkosSparse_spmv.hpp @@ -374,11 +374,11 @@ spmv (KokkosKernels::Experimental::Controls /*controls*/, // Call single-vector version if appropriate if (x.extent(1) == 1) { typedef Kokkos::View::array_layout, typename XVector::device_type, Kokkos::MemoryTraits > XVector_SubInternal; typedef Kokkos::View::array_layout, typename YVector::device_type, Kokkos::MemoryTraits > YVector_SubInternal; From b771c51c31708afb0b0f22c8d9a8697d082a2e01 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Tue, 11 May 2021 07:37:49 -0600 Subject: [PATCH 28/35] unit_test/blas: Fix gemv test. --- unit_test/blas/Test_Blas2_gemv.hpp | 89 ++++++++++++++---------------- 1 file changed, 40 insertions(+), 49 deletions(-) diff --git a/unit_test/blas/Test_Blas2_gemv.hpp b/unit_test/blas/Test_Blas2_gemv.hpp index c9c0176124..2992825ec5 100644 --- a/unit_test/blas/Test_Blas2_gemv.hpp +++ b/unit_test/blas/Test_Blas2_gemv.hpp @@ -15,15 +15,6 @@ namespace Test { typedef Kokkos::ArithTraits KAT_Y; typedef multivector_layout_adapter vfA_type; - typedef Kokkos::View::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type,Device> BaseTypeX; - typedef Kokkos::View::value, - Kokkos::LayoutRight, Kokkos::LayoutLeft>::type,Device> BaseTypeY; - ScalarA alpha = 3; ScalarY beta = 5; @@ -31,7 +22,7 @@ namespace Test { int ldx; int ldy; - if(mode[0]=='N') { + if (mode[0] == 'N') { ldx = N; ldy = M; } else { @@ -39,80 +30,80 @@ namespace Test { ldy = N; } typename vfA_type::BaseType b_A("A", M, N); - BaseTypeX b_x("X", ldx); - BaseTypeY b_y("Y", ldy); - BaseTypeY b_org_y("Org_Y", ldy); - - ViewTypeA A = vfA_type::view(b_A); - ViewTypeX x = Kokkos::subview(b_x,Kokkos::ALL(),0); - ViewTypeY y = Kokkos::subview(b_y,Kokkos::ALL(),0); + ViewTypeX x("X", ldx); + ViewTypeY y("Y", ldy); + ViewTypeY org_y("Org_Y", ldy); + + ViewTypeA A = vfA_type::view(b_A); typename ViewTypeX::const_type c_x = x; typename ViewTypeA::const_type c_A = A; - typedef multivector_layout_adapter h_vfA_type; + typedef multivector_layout_adapter + h_vfA_type; typename h_vfA_type::BaseType h_b_A = Kokkos::create_mirror_view(b_A); - typename BaseTypeX::HostMirror h_b_x = Kokkos::create_mirror_view(b_x); - typename BaseTypeY::HostMirror h_b_y = Kokkos::create_mirror_view(b_y); typename ViewTypeA::HostMirror h_A = h_vfA_type::view(h_b_A); - typename ViewTypeX::HostMirror h_x = Kokkos::subview(h_b_x,Kokkos::ALL(),0); - typename ViewTypeY::HostMirror h_y = Kokkos::subview(h_b_y,Kokkos::ALL(),0); + typename ViewTypeX::HostMirror h_x = Kokkos::create_mirror_view(x); + typename ViewTypeY::HostMirror h_y = Kokkos::create_mirror_view(y); - Kokkos::Random_XorShift64_Pool rand_pool(13718); + Kokkos::Random_XorShift64_Pool rand_pool( + 13718); { ScalarX randStart, randEnd; Test::getRandomBounds(10.0, randStart, randEnd); - Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); + Kokkos::fill_random(x, rand_pool, randStart, randEnd); } { ScalarY randStart, randEnd; Test::getRandomBounds(10.0, randStart, randEnd); - Kokkos::fill_random(b_y,rand_pool,randStart,randEnd); + Kokkos::fill_random(y, rand_pool, randStart, randEnd); } { ScalarA randStart, randEnd; Test::getRandomBounds(10.0, randStart, randEnd); - Kokkos::fill_random(b_A,rand_pool,randStart,randEnd); + Kokkos::fill_random(b_A, rand_pool, randStart, randEnd); } - Kokkos::deep_copy(b_org_y,b_y); - auto h_b_org_y = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), b_org_y); - auto h_org_y = Kokkos::subview(h_b_org_y, Kokkos::ALL(), 0); + Kokkos::deep_copy(org_y, y); + auto h_org_y = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), org_y); - Kokkos::deep_copy(h_b_x,b_x); - Kokkos::deep_copy(h_b_y,b_y); - Kokkos::deep_copy(h_b_A,b_A); + Kokkos::deep_copy(h_x, x); + Kokkos::deep_copy(h_y, y); + Kokkos::deep_copy(h_b_A, b_A); Kokkos::View expected("expected aAx+by", ldy); Kokkos::deep_copy(expected, h_org_y); vanillaGEMV(mode[0], alpha, h_A, h_x, beta, expected); KokkosBlas::gemv(mode, alpha, A, x, beta, y); - Kokkos::deep_copy(h_b_y, b_y); + Kokkos::deep_copy(h_y, y); int numErrors = 0; - for(int i = 0; i < ldy; i++) - { - if(KAT_Y::abs(expected(i) - h_y(i)) > KAT_Y::abs(eps * expected(i))) + for (int i = 0; i < ldy; i++) { + if (KAT_Y::abs(expected(i) - h_y(i)) > KAT_Y::abs(eps * expected(i))) numErrors++; } - EXPECT_EQ(numErrors, 0) << "Nonconst input, " << M << 'x' << N << ", alpha = " << alpha << ", beta = " << beta << ", mode " << mode << ": gemv incorrect"; - - Kokkos::deep_copy(b_y, b_org_y); - KokkosBlas::gemv(mode, alpha,A ,c_x, beta, y); - Kokkos::deep_copy(h_b_y, b_y); + EXPECT_EQ(numErrors, 0) + << "Nonconst input, " << M << 'x' << N << ", alpha = " << alpha + << ", beta = " << beta << ", mode " << mode << ": gemv incorrect"; + + Kokkos::deep_copy(y, org_y); + KokkosBlas::gemv(mode, alpha, A, c_x, beta, y); + Kokkos::deep_copy(h_y, y); numErrors = 0; - for(int i = 0; i < ldy; i++) - { - if(KAT_Y::abs(expected(i) - h_y(i)) > KAT_Y::abs(eps * expected(i))) + for (int i = 0; i < ldy; i++) { + if (KAT_Y::abs(expected(i) - h_y(i)) > KAT_Y::abs(eps * expected(i))) numErrors++; } - EXPECT_EQ(numErrors, 0) << "Const vector input, " << M << 'x' << N << ", alpha = " << alpha << ", beta = " << beta << ", mode " << mode << ": gemv incorrect"; + EXPECT_EQ(numErrors, 0) + << "Const vector input, " << M << 'x' << N << ", alpha = " << alpha + << ", beta = " << beta << ", mode " << mode << ": gemv incorrect"; - Kokkos::deep_copy(b_y, b_org_y); + Kokkos::deep_copy(y, org_y); KokkosBlas::gemv(mode, alpha, c_A, c_x, beta, y); - Kokkos::deep_copy(h_b_y, b_y); + Kokkos::deep_copy(h_y, y); numErrors = 0; for(int i = 0; i < ldy; i++) { @@ -125,9 +116,9 @@ namespace Test { beta = KAT_Y::zero(); //beta changed, so update the correct answer vanillaGEMV(mode[0], alpha, h_A, h_x, beta, expected); - Kokkos::deep_copy(b_y, KAT_Y::nan()); + Kokkos::deep_copy(y, KAT_Y::nan()); KokkosBlas::gemv(mode, alpha, A, x, beta, y); - Kokkos::deep_copy(h_b_y, b_y); + Kokkos::deep_copy(h_y, y); numErrors = 0; for(int i = 0; i < ldy; i++) { From a2ca37328ec8eaa2d2981ff255c84a10761a1815 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Tue, 11 May 2021 16:29:06 -0600 Subject: [PATCH 29/35] src/sparse: Fix unused ref warning on intel-18 --- .../KokkosSparse_spgemm_mkl2phase_impl.hpp | 41 ++++++++++--------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp index 94f383b903..2f683c0d24 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp @@ -83,7 +83,7 @@ void mkl2phase_symbolic( #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL typedef typename KernelHandle::nnz_lno_t idx; - + typedef typename KernelHandle::HandlePersistentMemorySpace HandlePersistentMemorySpace; typedef typename Kokkos::View int_persistent_work_view_t; @@ -156,8 +156,8 @@ void mkl2phase_symbolic( mynullptr, mynulladj, c_xadj, &nzmax, &info); - if (verbose){ - std::cout << "Sort:" << sort << " Actual MKL2 Symbolic Time:" << timer1.seconds() << std::endl; + if (verbose){ + std::cout << "Sort:" << sort << " Actual MKL2 Symbolic Time:" << timer1.seconds() << std::endl; } if (handle->mkl_convert_to_1base){ @@ -183,7 +183,7 @@ void mkl2phase_symbolic( if (SPARSE_STATUS_SUCCESS != mkl_sparse_d_create_csr (&A, SPARSE_INDEX_BASE_ONE, mklm, mkln, a_xadj, a_xadj + 1, a_adj, mynullptr)){ throw std::runtime_error ("CANNOT CREATE mkl_sparse_s_create_csr A matrix\n"); } - + if (SPARSE_STATUS_SUCCESS != mkl_sparse_d_create_csr (&B, SPARSE_INDEX_BASE_ONE, n, k, b_xadj, b_xadj + 1, b_adj, mynullptr)){ throw std::runtime_error ("CANNOT CREATE mkl_sparse_s_create_csr B matrix\n"); } @@ -191,7 +191,7 @@ void mkl2phase_symbolic( if (SPARSE_STATUS_SUCCESS != mkl_sparse_d_create_csr (&A, SPARSE_INDEX_BASE_ZERO, mklm, mkln, a_xadj, a_xadj + 1, a_adj, mynullptr)){ throw std::runtime_error ("CANNOT CREATE mkl_sparse_s_create_csr A matrix\n"); } - + if (SPARSE_STATUS_SUCCESS != mkl_sparse_d_create_csr (&B, SPARSE_INDEX_BASE_ZERO, n, k, b_xadj, b_xadj + 1, b_adj, mynullptr)){ throw std::runtime_error ("CANNOT CREATE mkl_sparse_s_create_csr B matrix\n"); } @@ -217,13 +217,13 @@ void mkl2phase_symbolic( // options: SPARSE_STAGE_FULL_MULT vs SPARSE_STAGE_NNZ_COUNT then SPARSE_STAGE_FINALIZE_MULT bool success = SPARSE_STATUS_SUCCESS != mkl_sparse_sp2m (operation, common_mtx_props, A, operation, common_mtx_props, B, SPARSE_STAGE_NNZ_COUNT, &C); // success is "true" if mkl_sparse_spmm does not return success - if (verbose){ - std::cout << "Actual DOUBLE MKL SPMM Time:" << timer1.seconds() << std::endl; + if (verbose){ + std::cout << "Actual DOUBLE MKL SPMM Time:" << timer1.seconds() << std::endl; } if (success) { throw std::runtime_error ("ERROR at SPGEMM multiplication in mkl_sparse_spmm\n"); - } + } else { // Copy sparse_matrix_t C results back to input data structure @@ -232,8 +232,8 @@ void mkl2phase_symbolic( double *values; // should return null if (SPARSE_STATUS_SUCCESS != - //mkl_sparse_s_export_csr (C, &c_indexing, &c_rows, &c_cols, &rows_start, &rows_end, &columns, &values)) - mkl_sparse_d_export_csr (C, &c_indexing, &c_rows, &c_cols, &c_xadj, &rows_end, &columns, &values)) + //mkl_sparse_s_export_csr (C, &c_indexing, &c_rows, &c_cols, &rows_start, &rows_end, &columns, &values)) + mkl_sparse_d_export_csr (C, &c_indexing, &c_rows, &c_cols, &c_xadj, &rows_end, &columns, &values)) { throw std::runtime_error ("ERROR at exporting result matrix in mkl_sparse_spmm\n"); } @@ -274,7 +274,7 @@ void mkl2phase_symbolic( throw std::runtime_error ("MKL requires local ordinals to be integer.\n"); (void) k; (void) transposeA; (void) transposeB; (void) verbose; } -#else // KOKKOSKERNELS_ENABLE_TPL_MKL +#else (void)handle; (void)m; (void)n; (void)k; (void)row_mapA; (void)row_mapB; (void)row_mapC; @@ -283,6 +283,10 @@ void mkl2phase_symbolic( (void)verbose; throw std::runtime_error ("MKL IS NOT DEFINED\n"); #endif // KOKKOSKERNELS_ENABLE_TPL_MKL + // Supress -Wunused-param in intel-18 + (void)k; + (void)transposeA; (void)transposeB; + (void)verbose; } @@ -331,7 +335,7 @@ void mkl2phase_symbolic( int *a_adj = (int *)entriesA.data(); int *b_adj = (int *)entriesB.data(); - + if (handle->mkl_convert_to_1base) { @@ -436,7 +440,7 @@ void mkl2phase_symbolic( throw std::runtime_error ("CANNOT CREATE mkl_sparse_s_create_csr A matrix\n"); } } - + if (std::is_same::value){ if (SPARSE_STATUS_SUCCESS != mkl_sparse_d_create_csr (&B, SPARSE_INDEX_BASE_ONE, n, k, b_xadj, b_xadj + 1, b_adj, reinterpret_cast(b_ew))){ throw std::runtime_error ("CANNOT CREATE mkl_sparse_s_create_csr B matrix\n"); @@ -458,7 +462,7 @@ void mkl2phase_symbolic( throw std::runtime_error ("CANNOT CREATE mkl_sparse_s_create_csr A matrix\n"); } } - + if (std::is_same::value){ if (SPARSE_STATUS_SUCCESS != mkl_sparse_d_create_csr (&B, SPARSE_INDEX_BASE_ZERO, n, k, b_xadj, b_xadj + 1, b_adj, reinterpret_cast(b_ew))){ throw std::runtime_error ("CANNOT CREATE mkl_sparse_s_create_csr B matrix\n"); @@ -470,7 +474,7 @@ void mkl2phase_symbolic( } } } - + sparse_operation_t operation; if (transposeA && transposeB){ operation = SPARSE_OPERATION_TRANSPOSE; @@ -481,7 +485,7 @@ void mkl2phase_symbolic( else { throw std::runtime_error ("MKL either transpose both matrices, or none for SPGEMM\n"); } - + matrix_descr common_mtx_props; common_mtx_props.type = SPARSE_MATRIX_TYPE_GENERAL; common_mtx_props.mode = SPARSE_FILL_MODE_FULL; @@ -491,8 +495,8 @@ void mkl2phase_symbolic( // options: SPARSE_STAGE_FULL_MULT vs SPARSE_STAGE_NNZ_COUNT then SPARSE_STAGE_FINALIZE_MULT bool success = SPARSE_STATUS_SUCCESS != mkl_sparse_sp2m (operation, common_mtx_props, A, operation, common_mtx_props, B, SPARSE_STAGE_FINALIZE_MULT, &C); // success is "true" if mkl_sparse_spmm does not return success - if (verbose){ - std::cout << "Actual MKL SPMM Time:" << timer1.seconds() << std::endl; + if (verbose){ + std::cout << "Actual MKL SPMM Time:" << timer1.seconds() << std::endl; } if (success) { @@ -558,7 +562,6 @@ void mkl2phase_symbolic( (void)transposeA; (void)transposeB; (void)verbose; #endif // __INTEL_MKL__ == 2018 && __INTEL_MKL_UPDATE__ >= 2 - } else { (void) m; (void) n; (void) k; From 30e772eb4e83a0f4315a61c21fc75f3063877d2d Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Tue, 11 May 2021 16:32:03 -0600 Subject: [PATCH 30/35] unit_test/blas: Fix unused ref warnings on cuda10 --- unit_test/blas/Test_Blas_gesv.hpp | 42 ++++++++++++++++--------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/unit_test/blas/Test_Blas_gesv.hpp b/unit_test/blas/Test_Blas_gesv.hpp index 486590831b..12d046c35f 100644 --- a/unit_test/blas/Test_Blas_gesv.hpp +++ b/unit_test/blas/Test_Blas_gesv.hpp @@ -44,7 +44,7 @@ void impl_test_gesv(const char* mode, const char* padding, int N) { // Create host mirrors of device views. typename ViewTypeB::HostMirror h_X0 = Kokkos::create_mirror_view(X0); typename ViewTypeB::HostMirror h_B = Kokkos::create_mirror(B); - + // Initialize data. Kokkos::fill_random(A, rand_pool,Kokkos::rand,ScalarA >::max()); Kokkos::fill_random(X0,rand_pool,Kokkos::rand,ScalarA >::max()); @@ -69,14 +69,14 @@ void impl_test_gesv(const char* mode, const char* padding, int N) { magma_imalloc_cpu( &ipiv_raw, Nt ); } ViewTypeP ipiv(ipiv_raw, Nt); - + // Solve. KokkosBlas::gesv(A,B,ipiv); Kokkos::fence(); - + // Get the solution vector. Kokkos::deep_copy( h_B, B ); - + // Checking vs ref on CPU, this eps is about 10^-9 typedef typename ats::mag_type mag_type; const mag_type eps = 1.0e7 * ats::epsilon(); @@ -84,10 +84,10 @@ void impl_test_gesv(const char* mode, const char* padding, int N) { for (int i=0; i eps ) { test_flag = false; - //printf( " Error %d, pivot %c, padding %c: result( %.15lf ) != solution( %.15lf ) at (%ld)\n", N, mode[0], padding[0], ats::abs(h_B(i)), ats::abs(h_X0(i)), i ); + //printf( " Error %d, pivot %c, padding %c: result( %.15lf ) != solution( %.15lf ) at (%ld)\n", N, mode[0], padding[0], ats::abs(h_B(i)), ats::abs(h_X0(i)), i ); break; } - } + } ASSERT_EQ( test_flag, true ); if(mode[0]=='Y') { @@ -100,7 +100,7 @@ void impl_test_gesv(const char* mode, const char* padding, int N) { int Nt = 0; if(mode[0]=='Y') Nt = N; ViewTypeP ipiv("IPIV", Nt); - + // Solve. KokkosBlas::gesv(A,B,ipiv); Kokkos::fence(); @@ -115,10 +115,10 @@ void impl_test_gesv(const char* mode, const char* padding, int N) { for (int i=0; i eps ) { test_flag = false; - //printf( " Error %d, pivot %c, padding %c: result( %.15lf ) != solution( %.15lf ) at (%ld)\n", N, mode[0], padding[0], ats::abs(h_B(i)), ats::abs(h_X0(i)), i ); + //printf( " Error %d, pivot %c, padding %c: result( %.15lf ) != solution( %.15lf ) at (%ld)\n", N, mode[0], padding[0], ats::abs(h_B(i)), ats::abs(h_X0(i)), i ); break; } - } + } ASSERT_EQ( test_flag, true ); #endif @@ -133,7 +133,7 @@ void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, int nrhs) Kokkos::Random_XorShift64_Pool rand_pool(13718); int ldda, lddb; - + if(padding[0]=='Y') {//rounded up to multiple of 32 ldda = ((N+32-1)/32)*32; lddb = ldda; @@ -151,7 +151,7 @@ void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, int nrhs) // Create host mirrors of device views. typename ViewTypeB::HostMirror h_X0 = Kokkos::create_mirror_view( X0 ); typename ViewTypeB::HostMirror h_B = Kokkos::create_mirror( B ); - + // Initialize data. Kokkos::fill_random(A, rand_pool,Kokkos::rand,ScalarA >::max()); Kokkos::fill_random(X0,rand_pool,Kokkos::rand,ScalarA >::max()); @@ -176,14 +176,14 @@ void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, int nrhs) magma_imalloc_cpu( &ipiv_raw, Nt ); } ViewTypeP ipiv(ipiv_raw, Nt); - - // Solve. + + // Solve. KokkosBlas::gesv(A,B,ipiv); Kokkos::fence(); - + // Get the solution vector. Kokkos::deep_copy( h_B, B ); - + // Checking vs ref on CPU, this eps is about 10^-9 typedef typename ats::mag_type mag_type; const mag_type eps = 1.0e7 * ats::epsilon(); @@ -192,7 +192,7 @@ void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, int nrhs) for (int i=0; i eps ) { test_flag = false; - //printf( " Error %d, pivot %c, padding %c: result( %.15lf ) != solution( %.15lf ) at (%ld) at rhs %d\n", N, mode[0], padding[0], ats::abs(h_B(i,j)), ats::abs(h_X0(i,j)), i, j ); + //printf( " Error %d, pivot %c, padding %c: result( %.15lf ) != solution( %.15lf ) at (%ld) at rhs %d\n", N, mode[0], padding[0], ats::abs(h_B(i,j)), ats::abs(h_X0(i,j)), i, j ); break; } } @@ -211,7 +211,7 @@ void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, int nrhs) if(mode[0]=='Y') Nt = N; ViewTypeP ipiv("IPIV", Nt); - // Solve. + // Solve. KokkosBlas::gesv(A,B,ipiv); Kokkos::fence(); @@ -226,7 +226,7 @@ void impl_test_gesv_mrhs(const char* mode, const char* padding, int N, int nrhs) for (int i=0; i eps ) { test_flag = false; - //printf( " Error %d, pivot %c, padding %c: result( %.15lf ) != solution( %.15lf ) at (%ld) at rhs %d\n", N, mode[0], padding[0], ats::abs(h_B(i,j)), ats::abs(h_X0(i,j)), i, j ); + //printf( " Error %d, pivot %c, padding %c: result( %.15lf ) != solution( %.15lf ) at (%ld) at rhs %d\n", N, mode[0], padding[0], ats::abs(h_B(i,j)), ats::abs(h_X0(i,j)), i, j ); break; } } @@ -267,7 +267,8 @@ int test_gesv(const char* mode) { Test::impl_test_gesv(&mode[0], "Y", 179); //padding #endif */ - + // Supress unused parameters on CUDA10 + (void)mode; return 1; } @@ -299,7 +300,8 @@ int test_gesv_mrhs(const char* mode) { Test::impl_test_gesv_mrhs(&mode[0], "Y", 179, 5);//padding #endif */ - + // Supress unused parameters on CUDA10 + (void)mode; return 1; } From cffc476ed1c3e4a7ffe2ceffd31140d426279ea4 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Wed, 12 May 2021 08:11:26 -0600 Subject: [PATCH 31/35] src/sparse: Fix unused ref warning on intel-18 --- src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp index 2f683c0d24..c48cacd5e4 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp @@ -556,7 +556,10 @@ void mkl2phase_symbolic( (void)verbose; #else throw std::runtime_error ("Intel MKL versions > 18 are not yet tested/supported\n"); + // Supress -Wunused-parameter on intel-18 (void) m; (void) n; (void) k; +#endif + // Supress -Wunused-parameter on intel-18 (void)entriesC; (void)valuesA; (void)valuesB; (void)valuesC; (void)transposeA; (void)transposeB; From 15018cb76405edf76c9fb97399d2342fc751551a Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Thu, 13 May 2021 08:05:10 -0600 Subject: [PATCH 32/35] src/impl/tpls: Fix cublas gemv spec for LayoutRight - Related to #974. --- .../tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp | 74 ++++++------------- 1 file changed, 22 insertions(+), 52 deletions(-) diff --git a/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp index f362203f31..cdc7d09c7d 100644 --- a/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp @@ -244,6 +244,24 @@ KOKKOSBLAS2_CGEMV_BLAS( Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::Layout namespace KokkosBlas { namespace Impl { +#define KOKKOSBLAS2_GEMV_CUBLAS_DETERMINE_ARGS(LAYOUTA) \ + bool A_is_lr = std::is_same::value; \ + const int M = static_cast (A_is_lr ? A.extent(1) : A.extent(0)); \ + const int N = static_cast (A_is_lr ? A.extent(0) : A.extent(1)); \ + constexpr int one = 1; \ + const int LDA = A_is_lr ? A.stride(0) : A.stride(1); \ + \ + cublasOperation_t transa; \ + if ((trans[0]=='N')||(trans[0]=='n')) \ + transa = A_is_lr ? CUBLAS_OP_T : CUBLAS_OP_N; \ + else if ((trans[0]=='T')||(trans[0]=='t')) \ + transa = A_is_lr ? CUBLAS_OP_N : CUBLAS_OP_T; \ + else { \ + if (A_is_lr) \ + throw std::runtime_error("Error: cublas gemv conjugate transpose requires LayoutLeft views."); \ + transa = CUBLAS_OP_C; \ + } \ + #define KOKKOSBLAS2_DGEMV_CUBLAS( LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL ) \ template \ struct GEMV< \ @@ -271,19 +289,7 @@ struct GEMV< \ const YViewType& Y) { \ \ Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_CUBLAS,double]"); \ - const int M = static_cast (A.extent(0)); \ - const int N = static_cast (A.extent(1)); \ - constexpr int one = 1; \ - bool A_is_lr = std::is_same::value; \ - const int AST = A_is_lr?A.stride(0):A.stride(1), LDA = AST == 0 ? 1 : AST; \ - \ - cublasOperation_t transa; \ - if ((trans[0]=='N')||(trans[0]=='n')) \ - transa = CUBLAS_OP_N; \ - else if ((trans[0]=='T')||(trans[0]=='t')) \ - transa = CUBLAS_OP_T; \ - else \ - transa = CUBLAS_OP_C; \ + KOKKOSBLAS2_GEMV_CUBLAS_DETERMINE_ARGS(LAYOUTA); \ KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ cublasDgemv(s.handle, transa, M, N, &alpha, A.data(), LDA, X.data(), one, &beta, Y.data(), one); \ Kokkos::Profiling::popRegion(); \ @@ -317,19 +323,7 @@ struct GEMV< \ const YViewType& Y) { \ \ Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_CUBLAS,float]"); \ - const int M = static_cast (A.extent(0)); \ - const int N = static_cast (A.extent(1)); \ - constexpr int one = 1; \ - bool A_is_lr = std::is_same::value; \ - const int AST = A_is_lr?A.stride(0):A.stride(1), LDA = AST == 0 ? 1 : AST; \ - \ - cublasOperation_t transa; \ - if ((trans[0]=='N')||(trans[0]=='n')) \ - transa = CUBLAS_OP_N; \ - else if ((trans[0]=='T')||(trans[0]=='t')) \ - transa = CUBLAS_OP_T; \ - else \ - transa = CUBLAS_OP_C; \ + KOKKOSBLAS2_GEMV_CUBLAS_DETERMINE_ARGS(LAYOUTA); \ KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ cublasSgemv(s.handle, transa, M, N, &alpha, A.data(), LDA, X.data(), one, &beta, Y.data(), one); \ Kokkos::Profiling::popRegion(); \ @@ -363,19 +357,7 @@ struct GEMV< \ const YViewType& Y) { \ \ Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_CUBLAS,complex]"); \ - const int M = static_cast (A.extent(0)); \ - const int N = static_cast (A.extent(1)); \ - constexpr int one = 1; \ - bool A_is_lr = std::is_same::value; \ - const int AST = A_is_lr?A.stride(0):A.stride(1), LDA = AST == 0 ? 1 : AST; \ - \ - cublasOperation_t transa; \ - if ((trans[0]=='N')||(trans[0]=='n')) \ - transa = CUBLAS_OP_N; \ - else if ((trans[0]=='T')||(trans[0]=='t')) \ - transa = CUBLAS_OP_T; \ - else \ - transa = CUBLAS_OP_C; \ + KOKKOSBLAS2_GEMV_CUBLAS_DETERMINE_ARGS(LAYOUTA); \ KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ cublasZgemv(s.handle, transa, M, N, reinterpret_cast(&alpha), reinterpret_cast(A.data()), LDA, reinterpret_cast(X.data()), one, reinterpret_cast(&beta), reinterpret_cast(Y.data()), one); \ Kokkos::Profiling::popRegion(); \ @@ -409,19 +391,7 @@ struct GEMV< \ const YViewType& Y) { \ \ Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_CUBLAS,complex]"); \ - const int M = static_cast (A.extent(0)); \ - const int N = static_cast (A.extent(1)); \ - constexpr int one = 1; \ - bool A_is_lr = std::is_same::value; \ - const int AST = A_is_lr?A.stride(0):A.stride(1), LDA = AST == 0 ? 1 : AST; \ - \ - cublasOperation_t transa; \ - if ((trans[0]=='N')||(trans[0]=='n')) \ - transa = CUBLAS_OP_N; \ - else if ((trans[0]=='T')||(trans[0]=='t')) \ - transa = CUBLAS_OP_T; \ - else \ - transa = CUBLAS_OP_C; \ + KOKKOSBLAS2_GEMV_CUBLAS_DETERMINE_ARGS(LAYOUTA); \ KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ cublasCgemv(s.handle, transa, M, N, reinterpret_cast(&alpha), reinterpret_cast(A.data()), LDA, reinterpret_cast(X.data()), one, reinterpret_cast(&beta), reinterpret_cast(Y.data()), one); \ Kokkos::Profiling::popRegion(); \ From d419296b80aed08e6fb4f32a56f9ae61ace5331e Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Thu, 13 May 2021 08:07:29 -0600 Subject: [PATCH 33/35] unit_test/blas: Check cublas gemv for unsupported transpose op - Related to #974. --- unit_test/blas/Test_Blas2_gemv.hpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/unit_test/blas/Test_Blas2_gemv.hpp b/unit_test/blas/Test_Blas2_gemv.hpp index 2992825ec5..73305370c5 100644 --- a/unit_test/blas/Test_Blas2_gemv.hpp +++ b/unit_test/blas/Test_Blas2_gemv.hpp @@ -12,6 +12,7 @@ namespace Test { typedef typename ViewTypeA::value_type ScalarA; typedef typename ViewTypeX::value_type ScalarX; typedef typename ViewTypeY::value_type ScalarY; + using LayoutAType = typename ViewTypeA::array_layout; typedef Kokkos::ArithTraits KAT_Y; typedef multivector_layout_adapter vfA_type; @@ -78,7 +79,16 @@ namespace Test { Kokkos::deep_copy(expected, h_org_y); vanillaGEMV(mode[0], alpha, h_A, h_x, beta, expected); - KokkosBlas::gemv(mode, alpha, A, x, beta, y); + // Cublas does not support row-major (LayoutRight) + conjugate transpose + // We throw a runtime error in the wrapper for cublasGemv if the user attempts + // this, therefore we must test this code path via the try-catch below. + try { + KokkosBlas::gemv(mode, alpha, A, x, beta, y); + } catch (const std::runtime_error &error) { + if ((mode[0] == 'c' || mode[0] == 'C') && std::is_same::value) + return; // Pass since we caught the runtime error + FAIL(); + } Kokkos::deep_copy(h_y, y); int numErrors = 0; for (int i = 0; i < ldy; i++) { From 89484e589f648882730dd14e8a6ae01ecde17d5a Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Thu, 13 May 2021 11:21:56 -0600 Subject: [PATCH 34/35] src/sparse/impl: Fix merge conflict --- src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp index c48cacd5e4..d3a5c99d08 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl2phase_impl.hpp @@ -274,7 +274,7 @@ void mkl2phase_symbolic( throw std::runtime_error ("MKL requires local ordinals to be integer.\n"); (void) k; (void) transposeA; (void) transposeB; (void) verbose; } -#else +#else // KOKKOSKERNELS_ENABLE_TPL_MKL (void)handle; (void)m; (void)n; (void)k; (void)row_mapA; (void)row_mapB; (void)row_mapC; @@ -283,10 +283,6 @@ void mkl2phase_symbolic( (void)verbose; throw std::runtime_error ("MKL IS NOT DEFINED\n"); #endif // KOKKOSKERNELS_ENABLE_TPL_MKL - // Supress -Wunused-param in intel-18 - (void)k; - (void)transposeA; (void)transposeB; - (void)verbose; } @@ -558,8 +554,6 @@ void mkl2phase_symbolic( throw std::runtime_error ("Intel MKL versions > 18 are not yet tested/supported\n"); // Supress -Wunused-parameter on intel-18 (void) m; (void) n; (void) k; -#endif - // Supress -Wunused-parameter on intel-18 (void)entriesC; (void)valuesA; (void)valuesB; (void)valuesC; (void)transposeA; (void)transposeB; From 35b0f1fd154d90b28ffab293eb2a6320948b18d5 Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Thu, 13 May 2021 13:43:11 -0600 Subject: [PATCH 35/35] src/impl/tpls: Fix hostblas gemv spec for LayoutRight - Related to #975. --- .../tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp | 52 ++++++++++--------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp index cdc7d09c7d..b07e7dfffa 100644 --- a/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp @@ -51,6 +51,24 @@ namespace KokkosBlas { namespace Impl { +#define KOKKOSBLAS2_GEMV_DETERMINE_ARGS(LAYOUTA) \ + bool A_is_lr = std::is_same::value; \ + const int M = static_cast (A_is_lr ? A.extent(1) : A.extent(0)); \ + const int N = static_cast (A_is_lr ? A.extent(0) : A.extent(1)); \ + constexpr int one = 1; \ + const int LDA = A_is_lr ? A.stride(0) : A.stride(1); \ + \ + char transa; \ + if ((trans[0]=='N')||(trans[0]=='n')) \ + transa = A_is_lr ? 'T' : 'N'; \ + else if ((trans[0]=='T')||(trans[0]=='t')) \ + transa = A_is_lr ? 'N' : 'T'; \ + else { \ + if (A_is_lr) \ + throw std::runtime_error("Error: HostBlas::gemv conjugate transpose requires LayoutLeft views."); \ + transa = 'C'; \ + } \ + #define KOKKOSBLAS2_DGEMV_BLAS( LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL ) \ template \ struct GEMV< \ @@ -78,12 +96,8 @@ struct GEMV< \ const YViewType& Y) { \ \ Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_BLAS,double]"); \ - const int M = static_cast (A.extent(0)); \ - const int N = static_cast (A.extent(1)); \ - constexpr int one = 1; \ - bool A_is_lr = std::is_same::value; \ - const int AST = A_is_lr?A.stride(0):A.stride(1), LDA = AST == 0 ? 1 : AST; \ - HostBlas::gemv(trans[0],M,N,alpha,A.data(),LDA,X.data(),one,beta,Y.data(),one); \ + KOKKOSBLAS2_GEMV_DETERMINE_ARGS(LAYOUTA); \ + HostBlas::gemv(transa,M,N,alpha,A.data(),LDA,X.data(),one,beta,Y.data(),one); \ Kokkos::Profiling::popRegion(); \ } \ }; @@ -115,12 +129,8 @@ struct GEMV< \ const YViewType& Y) { \ \ Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_BLAS,float]"); \ - const int M = static_cast (A.extent(0)); \ - const int N = static_cast (A.extent(1)); \ - constexpr int one = 1; \ - bool A_is_lr = std::is_same::value; \ - const int AST = A_is_lr?A.stride(0):A.stride(1), LDA = AST == 0 ? 1 : AST; \ - HostBlas::gemv(trans[0],M,N,alpha,A.data(),LDA,X.data(),one,beta,Y.data(),one); \ + KOKKOSBLAS2_GEMV_DETERMINE_ARGS(LAYOUTA); \ + HostBlas::gemv(transa,M,N,alpha,A.data(),LDA,X.data(),one,beta,Y.data(),one); \ Kokkos::Profiling::popRegion(); \ } \ }; @@ -152,14 +162,10 @@ struct GEMV< \ const YViewType& Y) { \ \ Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_BLAS,complex]"); \ - const int M = static_cast (A.extent(0)); \ - const int N = static_cast (A.extent(1)); \ - constexpr int one = 1; \ - bool A_is_lr = std::is_same::value; \ - const int AST = A_is_lr?A.stride(0):A.stride(1), LDA = AST == 0 ? 1 : AST; \ - const std::complex alpha_val = alpha, beta_val = beta; \ + KOKKOSBLAS2_GEMV_DETERMINE_ARGS(LAYOUTA); \ + const std::complex alpha_val = alpha, beta_val = beta; \ HostBlas >::gemv \ - (trans[0],M,N, \ + (transa,M,N, \ alpha_val, \ reinterpret_cast*>(A.data()),LDA, \ reinterpret_cast*>(X.data()),one, \ @@ -196,14 +202,10 @@ struct GEMV< \ const YViewType& Y) { \ \ Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_BLAS,complex]"); \ - const int M = static_cast (A.extent(0)); \ - const int N = static_cast (A.extent(1)); \ - constexpr int one = 1; \ - bool A_is_lr = std::is_same::value; \ - const int AST = A_is_lr?A.stride(0):A.stride(1), LDA = AST == 0 ? 1 : AST; \ + KOKKOSBLAS2_GEMV_DETERMINE_ARGS(LAYOUTA); \ const std::complex alpha_val = alpha, beta_val = beta; \ HostBlas >::gemv \ - (trans[0],M,N, \ + (transa,M,N, \ alpha_val, \ reinterpret_cast*>(A.data()),LDA, \ reinterpret_cast*>(X.data()),one, \