diff --git a/src/blas/impl/KokkosBlas1_nrm1_impl.hpp b/src/blas/impl/KokkosBlas1_nrm1_impl.hpp index 9e1393eb5a..296c424b3c 100644 --- a/src/blas/impl/KokkosBlas1_nrm1_impl.hpp +++ b/src/blas/impl/KokkosBlas1_nrm1_impl.hpp @@ -52,10 +52,10 @@ namespace KokkosBlas { namespace Impl { // -// nrm1_squared +// nrm1 // -/// \brief 2-norm (squared) functor for single vectors. +/// \brief 1-norm functor for single vectors. /// /// \tparam RV 0-D output View /// \tparam XV 1-D input View @@ -63,12 +63,12 @@ namespace Impl { template struct V_Nrm1_Functor { - typedef typename XV::execution_space execution_space; - typedef SizeType size_type; - typedef typename XV::non_const_value_type xvalue_type; - typedef Kokkos::Details::InnerProductSpaceTraits IPT; - typedef Kokkos::Details::ArithTraits AT; - typedef typename IPT::mag_type value_type; + typedef typename XV::execution_space execution_space; + typedef SizeType size_type; + typedef typename XV::non_const_value_type xvalue_type; + typedef Kokkos::ArithTraits XAT; + typedef typename XAT::mag_type value_type; + typedef Kokkos::ArithTraits MAT; typename XV::const_type m_x; @@ -94,12 +94,13 @@ struct V_Nrm1_Functor KOKKOS_INLINE_FUNCTION void operator() (const size_type& i, value_type& sum) const { - sum += IPT::norm (m_x(i)); + xvalue_type val = m_x(i); + sum += MAT::abs(XAT::real(val)) + MAT::abs(XAT::imag(val)); } KOKKOS_INLINE_FUNCTION void init (value_type& update) const { - update = AT::zero (); + update = MAT::zero (); } KOKKOS_INLINE_FUNCTION void @@ -117,7 +118,7 @@ struct V_Nrm1_Functor } }; -/// \brief Column-wise 2-norm functor for multivectors; works for +/// \brief Column-wise 1-norm functor for multivectors; works for /// any layout, but best performance with LayoutRight. /// /// \tparam RV 1-D output View @@ -126,12 +127,12 @@ struct V_Nrm1_Functor template struct MV_Nrm1_Right_FunctorVector { - typedef typename XMV::execution_space execution_space; - typedef SizeType size_type; - typedef typename XMV::non_const_value_type xvalue_type; - typedef Kokkos::Details::InnerProductSpaceTraits IPT; - typedef Kokkos::Details::ArithTraits AT; - typedef typename IPT::mag_type value_type[]; + typedef typename XMV::execution_space execution_space; + typedef SizeType size_type; + typedef typename XMV::non_const_value_type xvalue_type; + typedef Kokkos::ArithTraits XAT; + typedef Kokkos::ArithTraits MAT; + typedef typename XAT::mag_type value_type[]; size_type value_count; typename XMV::const_type m_x; @@ -166,7 +167,8 @@ struct MV_Nrm1_Right_FunctorVector #pragma vector always #endif for (size_type j = 0; j < numVecs; ++j) { - sum[j] += IPT::norm (m_x(i,j)); + xvalue_type val = m_x(i, j); + sum[j] += MAT::abs(XAT::real(val)) + MAT::abs(XAT::imag(val)); } } @@ -181,7 +183,7 @@ struct MV_Nrm1_Right_FunctorVector #pragma vector always #endif for (size_type j = 0; j < numVecs; ++j) { - update[j] = AT::zero (); + update[j] = MAT::zero (); } } diff --git a/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_avail.hpp b/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_avail.hpp index 16d22e7b02..072abff904 100644 --- a/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_avail.hpp +++ b/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_avail.hpp @@ -77,25 +77,6 @@ KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS( Kokkos::complex, Kokkos::LayoutL #endif -// cuBLAS -#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS -// double -#define KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( SCALAR, LAYOUT, MEMSPACE ) \ -template \ -struct nrminf_tpl_spec_avail< \ -Kokkos::View::mag_type, LAYOUT, Kokkos::HostSpace, \ - Kokkos::MemoryTraits >, \ -Kokkos::View, \ - Kokkos::MemoryTraits >, \ -1> { enum : bool { value = true }; }; - -KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( double, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( float, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) -KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) - -#endif - } } #endif diff --git a/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp index 5f7a102e77..b91e81891a 100644 --- a/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp @@ -83,6 +83,7 @@ Kokkos::View, \ typedef Kokkos::View, \ Kokkos::MemoryTraits > XV; \ typedef typename XV::size_type size_type; \ + typedef Kokkos::Details::InnerProductSpaceTraits IPT; \ \ static void nrminf (RV& R, const XV& X) \ { \ @@ -94,7 +95,7 @@ Kokkos::View, \ int N = numElems; \ int one = 1; \ int idx = HostBlas::iamax(N,X.data(),one)-1; \ - R() = X(idx); \ + R() = IPT::norm(X(idx)); \ } else { \ NrmInf::nrminf(R,X); \ } \ @@ -116,6 +117,7 @@ Kokkos::View, \ typedef Kokkos::View, \ Kokkos::MemoryTraits > XV; \ typedef typename XV::size_type size_type; \ + typedef Kokkos::Details::InnerProductSpaceTraits IPT; \ \ static void nrminf (RV& R, const XV& X) \ { \ @@ -127,7 +129,7 @@ Kokkos::View, \ int N = numElems; \ int one = 1; \ int idx = HostBlas::iamax(N,X.data(),one)-1; \ - R() = X(idx); \ + R() = IPT::norm(X(idx)); \ } else { \ NrmInf::nrminf(R,X); \ } \ @@ -220,176 +222,4 @@ KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_BLAS( Kokkos::LayoutLeft, Kokkos::HostSpace, f #endif -// cuBLAS -#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS -#include - -namespace KokkosBlas { -namespace Impl { - -#define KOKKOSBLAS1_DNRMINF_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \ -template \ -struct NrmInf< \ -Kokkos::View >, \ -Kokkos::View, \ - Kokkos::MemoryTraits >, \ -1,true, ETI_SPEC_AVAIL > { \ - \ - typedef Kokkos::View > RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrminf (RV& R, const XV& X) \ - { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrminf[TPL_CUBLAS,double]"); \ - const size_type numElems = X.extent(0); \ - if (numElems == 0) { Kokkos::deep_copy (R, 0.0); return; } \ - if (numElems < static_cast (INT_MAX)) { \ - nrminf_print_specialization(); \ - const int N = static_cast (numElems); \ - constexpr int one = 1; \ - int idx; \ - KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - cublasIdamax(s.handle, N, X.data(), one, &idx); \ - Kokkos::deep_copy(R, subview(X,idx-1)); \ - } else { \ - NrmInf::nrminf(R,X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ -}; - -#define KOKKOSBLAS1_SNRMINF_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \ -template \ -struct NrmInf< \ -Kokkos::View >, \ -Kokkos::View, \ - Kokkos::MemoryTraits >, \ -1,true, ETI_SPEC_AVAIL > { \ - \ - typedef Kokkos::View > RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrminf (RV& R, const XV& X) \ - { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrminf[TPL_CUBLAS,float]"); \ - const size_type numElems = X.extent(0); \ - if (numElems == 0) { Kokkos::deep_copy (R, 0.0f);; return; } \ - if (numElems < static_cast (INT_MAX)) { \ - nrminf_print_specialization(); \ - const int N = static_cast (numElems); \ - constexpr int one = 1; \ - int idx; \ - KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - cublasIsamax(s.handle, N, X.data(), one, &idx); \ - Kokkos::deep_copy(R, subview(X,idx-1)); \ - } else { \ - NrmInf::nrminf(R,X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ -}; - -#define KOKKOSBLAS1_ZNRMINF_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \ -template \ -struct NrmInf< \ -Kokkos::View >, \ -Kokkos::View*, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits >, \ -1,true, ETI_SPEC_AVAIL > { \ - \ - typedef Kokkos::View > RV; \ - typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits > XV; \ - typedef typename XV::size_type size_type; \ - typedef Kokkos::Details::InnerProductSpaceTraits> IPT; \ - \ - static void nrminf (RV& R, const XV& X) \ - { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrminf[TPL_CUBLAS,complex]"); \ - const size_type numElems = X.extent(0); \ - if (numElems == 0) { Kokkos::deep_copy (R, 0.0); return; } \ - if (numElems < static_cast (INT_MAX)) { \ - nrminf_print_specialization(); \ - const int N = static_cast (numElems); \ - constexpr int one = 1; \ - int idx; \ - KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - cublasIzamax(s.handle, N, reinterpret_cast(X.data()), one, &idx); \ - Kokkos::complex R_cplx_val {0.0, 0.0}; \ - Kokkos::View, LAYOUT, Kokkos::HostSpace, Kokkos::MemoryTraits > R_cplx (&R_cplx_val); \ - Kokkos::deep_copy(R_cplx, subview(X,idx-1)); \ - R() = IPT::norm(R_cplx()); \ - } else { \ - NrmInf::nrminf(R,X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ -}; - -#define KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \ -template \ -struct NrmInf< \ -Kokkos::View >, \ -Kokkos::View*, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits >, \ -1,true, ETI_SPEC_AVAIL > { \ - \ - typedef Kokkos::View > RV; \ - typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits > XV; \ - typedef typename XV::size_type size_type; \ - typedef Kokkos::Details::InnerProductSpaceTraits> IPT; \ - \ - static void nrminf (RV& R, const XV& X) \ - { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrminf[TPL_CUBLAS,complex]"); \ - const size_type numElems = X.extent(0); \ - if (numElems == 0) { Kokkos::deep_copy (R, 0.0f); return; } \ - if (numElems < static_cast (INT_MAX)) { \ - nrminf_print_specialization(); \ - const int N = static_cast (numElems); \ - constexpr int one = 1; \ - int idx; \ - KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - cublasIcamax(s.handle, N, reinterpret_cast(X.data()), one, &idx); \ - Kokkos::complex R_cplx_val {0.0f, 0.0f}; \ - Kokkos::View, LAYOUT, Kokkos::HostSpace, Kokkos::MemoryTraits > R_cplx (&R_cplx_val); \ - Kokkos::deep_copy(R_cplx, subview(X,idx-1)); \ - R() = IPT::norm(R_cplx()); \ - } else { \ - NrmInf::nrminf(R,X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ -}; - -KOKKOSBLAS1_DNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true) -KOKKOSBLAS1_DNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false) - -KOKKOSBLAS1_SNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true) -KOKKOSBLAS1_SNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false) - -KOKKOSBLAS1_ZNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true) -KOKKOSBLAS1_ZNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false) - -KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true) -KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false) - -} -} - -#endif - #endif diff --git a/test_common/KokkosKernels_TestUtils.hpp b/test_common/KokkosKernels_TestUtils.hpp index 43f2d48460..f3a34ba123 100644 --- a/test_common/KokkosKernels_TestUtils.hpp +++ b/test_common/KokkosKernels_TestUtils.hpp @@ -46,6 +46,8 @@ #define KOKKOSKERNELS_TEST_UTILS_HPP #include "KokkosKernels_Utils.hpp" +#include "Kokkos_ArithTraits.hpp" + namespace Test { template::value> struct multivector_layout_adapter; @@ -83,16 +85,15 @@ namespace Test { template void EXPECT_NEAR_KK(Scalar1 val1, Scalar2 val2, Scalar3 tol) { typedef Kokkos::Details::ArithTraits AT1; - typedef Kokkos::Details::ArithTraits AT2; typedef Kokkos::Details::ArithTraits AT3; - EXPECT_NEAR(double(AT1::abs(val1)),double(AT2::abs(val2)),double(AT3::abs(tol))); + EXPECT_LE((double) AT1::abs(val1 - val2), (double) AT3::abs(tol)); } template void EXPECT_NEAR_KK_1DVIEW(ViewType1 v1, ViewType2 v2, Scalar tol) { size_t v1_size = v1.extent(0); size_t v2_size = v2.extent(0); - EXPECT_NEAR_KK(v1_size, v2_size, 0); + EXPECT_EQ(v1_size, v2_size); typename ViewType1::HostMirror h_v1 = Kokkos::create_mirror_view(v1); @@ -227,5 +228,29 @@ namespace Test { constexpr static double value = 0.0009765625F; }; #endif // KOKKOS_HALF_T_IS_FLOAT + + //Get the interval for Kokkos::fill_random + //For real, interval is (-mag, mag) + //For complex, both real and imaginary parts will have interval (-mag, mag) + template + inline void getRandomBounds(double mag, Scalar& start, Scalar& end) + { + start = -mag * Kokkos::ArithTraits::one(); + end = mag * Kokkos::ArithTraits::one(); + } + + template<> + inline void getRandomBounds(double mag, Kokkos::complex& start, Kokkos::complex& end) + { + start = Kokkos::complex(-mag, -mag); + end = Kokkos::complex(mag, mag); + } + + template<> + inline void getRandomBounds(double mag, Kokkos::complex& start, Kokkos::complex& end) + { + start = Kokkos::complex(-mag, -mag); + end = Kokkos::complex(mag, mag); + } } #endif diff --git a/unit_test/blas/Test_Blas1_abs.hpp b/unit_test/blas/Test_Blas1_abs.hpp index acdb167d1d..d1cb36d368 100644 --- a/unit_test/blas/Test_Blas1_abs.hpp +++ b/unit_test/blas/Test_Blas1_abs.hpp @@ -2,7 +2,6 @@ #include #include #include -#include #include namespace Test { @@ -23,7 +22,7 @@ namespace Test { Kokkos::LayoutRight, Kokkos::LayoutLeft>::type,Device> BaseTypeB; - double eps = std::is_same::value?2*1e-5:1e-7; + typename AT::mag_type eps = AT::epsilon()*10; BaseTypeA b_x("X",N); BaseTypeB b_y("Y",N); @@ -42,29 +41,38 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_x,rand_pool,ScalarA(1)); - Kokkos::fill_random(b_y,rand_pool,ScalarB(1)); + { + ScalarA randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(b_y,rand_pool,randStart,randEnd); + } Kokkos::deep_copy(b_org_y,b_y); Kokkos::deep_copy(h_b_x,b_x); Kokkos::deep_copy(h_b_y,b_y); - ScalarA expected_result(0); - for(int i=0;i rand_pool(13718); - Kokkos::fill_random(b_x,rand_pool,ScalarA(1)); - Kokkos::fill_random(b_y,rand_pool,ScalarB(1)); + { + ScalarA randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(b_y,rand_pool,randStart,randEnd); + } Kokkos::deep_copy(b_org_y,b_y); Kokkos::deep_copy(h_b_x,b_x); - Kokkos::deep_copy(h_b_y,b_y); typename ViewTypeA::const_type c_x = x; - ScalarA* expected_result = new ScalarA[K]; - for(int j=0;j r("Dot::Result",K); + typename AT::mag_type eps = AT::epsilon()*10; + //Test and verify non-const input KokkosBlas::abs(y,x); - KokkosBlas::dot(r,y,y); - for(int k=0;k AT; + typedef Kokkos::ArithTraits MAT; typedef Kokkos::View rand_pool(13718); - Kokkos::fill_random(b_a,rand_pool,ScalarA(10)); - Kokkos::fence(); + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); + Kokkos::deep_copy(h_b_a,b_a); typename ViewTypeA::const_type c_a = a; @@ -36,7 +39,13 @@ namespace Test { typename AT::mag_type expected_result = 0; for(int i=0;i::imag is 0 if T is real. + expected_result += MAT::abs(AT::real(h_a(i))) + MAT::abs(AT::imag(h_a(i))); + } typename AT::mag_type nonconst_result = KokkosBlas::asum(a); EXPECT_NEAR_KK( nonconst_result, expected_result, eps*expected_result); diff --git a/unit_test/blas/Test_Blas1_axpby.hpp b/unit_test/blas/Test_Blas1_axpby.hpp index f2bc692d09..84943b1bc7 100644 --- a/unit_test/blas/Test_Blas1_axpby.hpp +++ b/unit_test/blas/Test_Blas1_axpby.hpp @@ -31,6 +31,7 @@ namespace Test { BaseTypeB b_org_y("Org_Y",N); + auto h_b_org_y = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), b_org_y); ViewTypeA x = Kokkos::subview(b_x,Kokkos::ALL(),0); ViewTypeB y = Kokkos::subview(b_y,Kokkos::ALL(),0); typename ViewTypeA::const_type c_x = x; @@ -44,26 +45,38 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_x,rand_pool,ScalarA(10)); - Kokkos::fill_random(b_y,rand_pool,ScalarB(10)); + { + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_y,rand_pool,randStart,randEnd); + } Kokkos::deep_copy(b_org_y,b_y); + Kokkos::deep_copy(h_b_org_y, b_org_y); Kokkos::deep_copy(h_b_x,b_x); - Kokkos::deep_copy(h_b_y,b_y); - - ScalarA expected_result = 0; - for(int i=0;i @@ -93,10 +106,19 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_x,rand_pool,ScalarA(10)); - Kokkos::fill_random(b_y,rand_pool,ScalarB(10)); + { + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_y,rand_pool,randStart,randEnd); + } Kokkos::deep_copy(b_org_y,b_y); + auto h_b_org_y = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), b_org_y); Kokkos::deep_copy(h_b_x,b_x); Kokkos::deep_copy(h_b_y,b_y); @@ -105,36 +127,32 @@ namespace Test { ScalarB b = 5; typename ViewTypeA::const_type c_x = x; - ScalarA* expected_result = new ScalarA[K]; - for(int j=0;j::value?2*1e-5:1e-7; Kokkos::View r("Dot::Result",K); - typedef Kokkos::Details::ArithTraits AT; - KokkosBlas::axpby(a,x,b,y); - KokkosBlas::dot(r,y,y); - for(int k=0;k::value, Kokkos::LayoutRight, Kokkos::LayoutLeft>::type,Device> BaseTypeB; + using MagnitudeA = typename Kokkos::ArithTraits::mag_type; ScalarA a = 3; - double eps = std::is_same::value?2*1e-5:1e-7; + double eps = std::is_same::value?2e-5:1e-7; BaseTypeA b_x("X",N); BaseTypeB b_y("Y",N); BaseTypeB b_org_y("Org_Y",N); - ViewTypeA x = Kokkos::subview(b_x,Kokkos::ALL(),0); ViewTypeB y = Kokkos::subview(b_y,Kokkos::ALL(),0); @@ -43,26 +43,40 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_x,rand_pool,ScalarA(10)); - Kokkos::fill_random(b_y,rand_pool,ScalarB(10)); + { + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(x,rand_pool,randStart,randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(y,rand_pool,randStart,randEnd); + } Kokkos::deep_copy(b_org_y,b_y); + auto h_b_org_y = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), b_org_y); Kokkos::deep_copy(h_b_x,b_x); - Kokkos::deep_copy(h_b_y,b_y); - ScalarA expected_result = 0; + KokkosBlas::axpy(a,x,y); + Kokkos::deep_copy(h_b_y, b_y); + for(int i=0;i @@ -92,10 +106,19 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_x,rand_pool,ScalarA(10)); - Kokkos::fill_random(b_y,rand_pool,ScalarB(10)); + { + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_y,rand_pool,randStart,randEnd); + } Kokkos::deep_copy(b_org_y,b_y); + auto h_b_org_y = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), b_org_y); Kokkos::deep_copy(h_b_x,b_x); Kokkos::deep_copy(h_b_y,b_y); @@ -103,33 +126,28 @@ namespace Test { ScalarA a = 3; typename ViewTypeA::const_type c_x = x; - ScalarA* expected_result = new ScalarA[K]; - for(int j=0;j::value?2*1e-5:1e-7; - Kokkos::View r("Dot::Result",K); - KokkosBlas::axpy(a,x,y); - KokkosBlas::dot(r,y,y); - for(int k=0;k rand_pool(13718); - Kokkos::fill_random(b_a,rand_pool,ScalarA(10)); - Kokkos::fill_random(b_b,rand_pool,ScalarB(10)); - - Kokkos::fence(); + { + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_b,rand_pool,randStart,randEnd); + } Kokkos::deep_copy(h_b_a,b_a); Kokkos::deep_copy(h_b_b,b_b); @@ -92,10 +98,16 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_a,rand_pool,ScalarA(10)); - Kokkos::fill_random(b_b,rand_pool,ScalarB(10)); - - Kokkos::fence(); + { + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_b,rand_pool,randStart,randEnd); + } Kokkos::deep_copy(h_b_a,b_a); Kokkos::deep_copy(h_b_b,b_b); diff --git a/unit_test/blas/Test_Blas1_iamax.hpp b/unit_test/blas/Test_Blas1_iamax.hpp index 166c25c1a8..5e98912553 100644 --- a/unit_test/blas/Test_Blas1_iamax.hpp +++ b/unit_test/blas/Test_Blas1_iamax.hpp @@ -29,9 +29,9 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_a,rand_pool,ScalarA(10)); - - Kokkos::fence(); + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); Kokkos::deep_copy(h_b_a,b_a); @@ -115,9 +115,9 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_a,rand_pool,ScalarA(10)); - - Kokkos::fence(); + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); Kokkos::deep_copy(h_b_a,b_a); diff --git a/unit_test/blas/Test_Blas1_mult.hpp b/unit_test/blas/Test_Blas1_mult.hpp index fcab767dcc..1f6856a934 100644 --- a/unit_test/blas/Test_Blas1_mult.hpp +++ b/unit_test/blas/Test_Blas1_mult.hpp @@ -29,7 +29,7 @@ namespace Test { ScalarA a = 3; ScalarB b = 5; - double eps = std::is_same::value?2*1e-5:1e-7; + double eps = std::is_same::value?1e-4:1e-7; BaseTypeA b_x("X",N); BaseTypeB b_y("Y",N); @@ -53,33 +53,52 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_x,rand_pool,ScalarA(10)); - Kokkos::fill_random(b_y,rand_pool,ScalarB(10)); - Kokkos::fill_random(b_z,rand_pool,ScalarC(10)); + { + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_y,rand_pool,randStart,randEnd); + } + { + ScalarC randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_z,rand_pool,randStart,randEnd); + } Kokkos::deep_copy(b_org_z,b_z); + auto h_b_org_z = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), b_org_z); Kokkos::deep_copy(h_b_x,b_x); Kokkos::deep_copy(h_b_y,b_y); - Kokkos::deep_copy(h_b_z,b_z); - ScalarA expected_result = 0; - for(int i=0;i @@ -118,11 +137,24 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_x,rand_pool,ScalarA(10)); - Kokkos::fill_random(b_y,rand_pool,ScalarB(10)); - Kokkos::fill_random(b_z,rand_pool,ScalarC(10)); + { + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_y,rand_pool,randStart,randEnd); + } + { + ScalarC randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_z,rand_pool,randStart,randEnd); + } Kokkos::deep_copy(b_org_z,b_z); + auto h_b_org_z = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), b_org_z); Kokkos::deep_copy(h_b_x,b_x); Kokkos::deep_copy(h_b_y,b_y); @@ -133,33 +165,28 @@ namespace Test { typename ViewTypeA::const_type c_x = x; typename ViewTypeB::const_type c_y = y; - ScalarC* expected_result = new ScalarC[K]; - for(int j=0;j::value?2*1e-5:1e-7; - - Kokkos::View r("Dot::Result",K); + double eps = std::is_same::value?1e-4:1e-7; KokkosBlas::mult(b,z,a,x,y); - KokkosBlas::dot(r,z,z); - for(int k=0;k AT; + typedef Kokkos::ArithTraits AT; + typedef typename AT::mag_type mag_type; + typedef Kokkos::ArithTraits MAT; typedef Kokkos::View rand_pool(13718); - Kokkos::fill_random(b_a,rand_pool,ScalarA(10)); - - Kokkos::fence(); + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); Kokkos::deep_copy(h_b_a,b_a); typename ViewTypeA::const_type c_a = a; - double eps = std::is_same::value?2*1e-5:1e-7; + double eps = (std::is_same::mag_type, float>::value ? 1e-4 : 1e-7); - typename AT::mag_type expected_result = 0; + mag_type expected_result = 0; for(int i=0;i::imag is 0 if T is real. + expected_result += MAT::abs(AT::real(h_a(i))) + MAT::abs(AT::imag(h_a(i))); + } - typename AT::mag_type const_result = KokkosBlas::nrm1(c_a); - EXPECT_NEAR_KK( const_result, expected_result, eps*expected_result); + mag_type nonconst_result = KokkosBlas::nrm1(a); + EXPECT_NEAR_KK( nonconst_result, expected_result, eps * expected_result ); + mag_type const_result = KokkosBlas::nrm1(c_a); + EXPECT_NEAR_KK( const_result, expected_result, eps * expected_result ); } template @@ -53,6 +61,8 @@ namespace Test { typedef typename ViewTypeA::value_type ScalarA; typedef Kokkos::Details::ArithTraits AT; + typedef typename AT::mag_type mag_type; + typedef Kokkos::ArithTraits MAT; typedef multivector_layout_adapter vfA_type; @@ -68,38 +78,36 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_a,rand_pool,ScalarA(10)); - - Kokkos::fence(); + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); Kokkos::deep_copy(h_b_a,b_a); typename ViewTypeA::const_type c_a = a; - typename AT::mag_type* expected_result = new typename AT::mag_type[K]; - for(int j=0;j::mag_type, float>::value ? 1e-4 : 1e-7); + + Kokkos::View expected_result("Expected Nrm1", K); + for(int k = 0; k < K; k++) + { + expected_result(k) = MAT::zero(); for(int i=0;i::value?2*1e-5:1e-7; - - Kokkos::View r("Dot::Result",K); + Kokkos::View r("Nrm1::Result",K); + Kokkos::View c_r("Nrm1::ConstResult",K); - KokkosBlas::nrm1(r,a); - for(int k=0;k rand_pool(13718); - Kokkos::fill_random(b_a,rand_pool,ScalarA(1)); - - Kokkos::fence(); + ScalarA randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); Kokkos::deep_copy(h_b_a,b_a); @@ -69,9 +69,9 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_a,rand_pool,ScalarA(1)); - - Kokkos::fence(); + ScalarA randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); Kokkos::deep_copy(h_b_a,b_a); diff --git a/unit_test/blas/Test_Blas1_nrm2_squared.hpp b/unit_test/blas/Test_Blas1_nrm2_squared.hpp index ac116b8987..aef2e2e95e 100644 --- a/unit_test/blas/Test_Blas1_nrm2_squared.hpp +++ b/unit_test/blas/Test_Blas1_nrm2_squared.hpp @@ -27,9 +27,9 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_a,rand_pool,ScalarA(1)); - - Kokkos::fence(); + ScalarA randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); Kokkos::deep_copy(h_b_a,b_a); @@ -68,9 +68,9 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_a,rand_pool,ScalarA(1)); - - Kokkos::fence(); + ScalarA randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); Kokkos::deep_copy(h_b_a,b_a); diff --git a/unit_test/blas/Test_Blas1_nrminf.hpp b/unit_test/blas/Test_Blas1_nrminf.hpp index f328a720b7..0893045dee 100644 --- a/unit_test/blas/Test_Blas1_nrminf.hpp +++ b/unit_test/blas/Test_Blas1_nrminf.hpp @@ -27,9 +27,9 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_a,rand_pool,ScalarA(10)); - - Kokkos::fence(); + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); Kokkos::deep_copy(h_b_a,b_a); @@ -70,9 +70,9 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_a,rand_pool,ScalarA(10)); - - Kokkos::fence(); + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); Kokkos::deep_copy(h_b_a,b_a); @@ -98,13 +98,12 @@ namespace Test { EXPECT_NEAR_KK( nonconst_result, exp_result, eps*exp_result); } - /* KokkosBlas::nrminf(r,c_a); + KokkosBlas::nrminf(r,c_a); for(int k=0;k rand_pool(13718); - Kokkos::fill_random(b_x,rand_pool,ScalarA(1)); - Kokkos::fill_random(b_y,rand_pool,ScalarB(1)); - - Kokkos::fence(); + { + ScalarA randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(b_y,rand_pool,randStart,randEnd); + } Kokkos::deep_copy(b_org_y,b_y); @@ -99,10 +105,16 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_x,rand_pool,ScalarA(1)); - Kokkos::fill_random(b_y,rand_pool,ScalarB(1)); - - Kokkos::fence(); + { + ScalarA randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(b_y,rand_pool,randStart,randEnd); + } Kokkos::deep_copy(b_org_y,b_y); diff --git a/unit_test/blas/Test_Blas1_scal.hpp b/unit_test/blas/Test_Blas1_scal.hpp index f59b8d49ea..254850f1ae 100644 --- a/unit_test/blas/Test_Blas1_scal.hpp +++ b/unit_test/blas/Test_Blas1_scal.hpp @@ -25,13 +25,10 @@ namespace Test { ScalarA a(3); typename AT::mag_type eps = AT::epsilon()*1000; - typename AT::mag_type zero = AT::abs( AT::zero() ); - typename AT::mag_type one = AT::abs( AT::one() ); BaseTypeA b_x("X",N); BaseTypeB b_y("Y",N); BaseTypeB b_org_y("Org_Y",N); - ViewTypeA x = Kokkos::subview(b_x,Kokkos::ALL(),0); ViewTypeB y = Kokkos::subview(b_y,Kokkos::ALL(),0); @@ -46,35 +43,35 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_x,rand_pool,ScalarA(1)); - Kokkos::fill_random(b_y,rand_pool,ScalarB(1)); - - Kokkos::fence(); + { + ScalarA randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(b_y,rand_pool,randStart,randEnd); + } Kokkos::deep_copy(b_org_y,b_y); Kokkos::deep_copy(h_b_x,b_x); Kokkos::deep_copy(h_b_y,b_y); - ScalarA expected_result(0); - for(int i=0;i rand_pool(13718); - Kokkos::fill_random(b_x,rand_pool,ScalarA(1)); - Kokkos::fill_random(b_y,rand_pool,ScalarB(1)); + { + ScalarA randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(1.0, randStart, randEnd); + Kokkos::fill_random(b_y,rand_pool,randStart,randEnd); + } Kokkos::fence(); Kokkos::deep_copy(b_org_y,b_y); Kokkos::deep_copy(h_b_x,b_x); - Kokkos::deep_copy(h_b_y,b_y); ScalarA a(3.0); typename ViewTypeA::const_type c_x = x; - ScalarA* expected_result = new ScalarA[K]; - for(int j=0;j r("Dot::Result",K); KokkosBlas::scal(y,a,x); - KokkosBlas::dot(r,y,y); - for(int k=0;k params("Params",K); for(int j=0; j rand_pool(13718); - Kokkos::fill_random(b_a,rand_pool,ScalarA(10)); - - Kokkos::fence(); + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); Kokkos::deep_copy(h_b_a,b_a); @@ -51,7 +51,6 @@ namespace Test { void impl_test_sum_mv(int N, int K) { typedef typename ViewTypeA::value_type ScalarA; - typedef Kokkos::Details::ArithTraits AT; typedef multivector_layout_adapter vfA_type; @@ -67,9 +66,9 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_a,rand_pool,ScalarA(10)); - - Kokkos::fence(); + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_a,rand_pool,randStart,randEnd); Kokkos::deep_copy(h_b_a,b_a); @@ -79,7 +78,7 @@ namespace Test { for(int j=0;j::value?2*1e-5:1e-7; diff --git a/unit_test/blas/Test_Blas1_team_dot.hpp b/unit_test/blas/Test_Blas1_team_dot.hpp index 158dcf5733..f3c819da3b 100644 --- a/unit_test/blas/Test_Blas1_team_dot.hpp +++ b/unit_test/blas/Test_Blas1_team_dot.hpp @@ -46,8 +46,6 @@ namespace Test { Kokkos::fill_random(b_a,rand_pool,ScalarA(10)); Kokkos::fill_random(b_b,rand_pool,ScalarB(10)); - Kokkos::fence(); - Kokkos::deep_copy(h_b_a,b_a); Kokkos::deep_copy(h_b_b,b_b); @@ -150,8 +148,6 @@ namespace Test { Kokkos::fill_random(b_a,rand_pool,ScalarA(10)); Kokkos::fill_random(b_b,rand_pool,ScalarB(10)); - Kokkos::fence(); - Kokkos::deep_copy(h_b_a,b_a); Kokkos::deep_copy(h_b_b,b_b); diff --git a/unit_test/blas/Test_Blas1_team_nrm2.hpp b/unit_test/blas/Test_Blas1_team_nrm2.hpp index 4c654c7eae..99147053ed 100644 --- a/unit_test/blas/Test_Blas1_team_nrm2.hpp +++ b/unit_test/blas/Test_Blas1_team_nrm2.hpp @@ -33,8 +33,6 @@ namespace Test { Kokkos::fill_random(b_a,rand_pool,ScalarA(10)); - Kokkos::fence(); - Kokkos::deep_copy(h_b_a,b_a); typename ViewTypeA::const_type c_a = a; diff --git a/unit_test/blas/Test_Blas1_team_scal.hpp b/unit_test/blas/Test_Blas1_team_scal.hpp index 6b33caa262..fb6ef4487d 100644 --- a/unit_test/blas/Test_Blas1_team_scal.hpp +++ b/unit_test/blas/Test_Blas1_team_scal.hpp @@ -57,8 +57,6 @@ namespace Test { Kokkos::fill_random(b_x,rand_pool,ScalarA(1)); Kokkos::fill_random(b_y,rand_pool,ScalarB(1)); - Kokkos::fence(); - Kokkos::deep_copy(b_org_y,b_y); Kokkos::deep_copy(h_b_x,b_x); @@ -132,8 +130,6 @@ namespace Test { Kokkos::fill_random(b_x,rand_pool,ScalarA(1)); Kokkos::fill_random(b_y,rand_pool,ScalarB(1)); - Kokkos::fence(); - Kokkos::deep_copy(b_org_y,b_y); Kokkos::deep_copy(h_b_x,b_x); diff --git a/unit_test/blas/Test_Blas1_team_update.hpp b/unit_test/blas/Test_Blas1_team_update.hpp index dcc9d1e486..5298a6798d 100644 --- a/unit_test/blas/Test_Blas1_team_update.hpp +++ b/unit_test/blas/Test_Blas1_team_update.hpp @@ -66,8 +66,6 @@ namespace Test { Kokkos::fill_random(b_y,rand_pool,ScalarB(10)); Kokkos::fill_random(b_z,rand_pool,ScalarC(10)); - Kokkos::fence(); - Kokkos::deep_copy(b_org_z,b_z); Kokkos::deep_copy(h_b_x,b_x); @@ -149,8 +147,6 @@ namespace Test { Kokkos::fill_random(b_y,rand_pool,ScalarB(10)); Kokkos::fill_random(b_z,rand_pool,ScalarC(10)); - Kokkos::fence(); - Kokkos::deep_copy(b_org_z,b_z); Kokkos::deep_copy(h_b_x,b_x); diff --git a/unit_test/blas/Test_Blas1_update.hpp b/unit_test/blas/Test_Blas1_update.hpp index 8bfcdbe5cc..0ece3ae74c 100644 --- a/unit_test/blas/Test_Blas1_update.hpp +++ b/unit_test/blas/Test_Blas1_update.hpp @@ -54,35 +54,52 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_x,rand_pool,ScalarA(10)); - Kokkos::fill_random(b_y,rand_pool,ScalarB(10)); - Kokkos::fill_random(b_z,rand_pool,ScalarC(10)); - - Kokkos::fence(); + { + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_y,rand_pool,randStart,randEnd); + } + { + ScalarC randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_z,rand_pool,randStart,randEnd); + } Kokkos::deep_copy(b_org_z,b_z); + auto h_b_org_z = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), b_org_z); + auto h_org_z = Kokkos::subview(h_b_org_z, Kokkos::ALL(), 0); Kokkos::deep_copy(h_b_x,b_x); Kokkos::deep_copy(h_b_y,b_y); Kokkos::deep_copy(h_b_z,b_z); - ScalarA expected_result = 0; - for(int i=0;i @@ -119,13 +136,24 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_x,rand_pool,ScalarA(10)); - Kokkos::fill_random(b_y,rand_pool,ScalarB(10)); - Kokkos::fill_random(b_z,rand_pool,ScalarC(10)); - - Kokkos::fence(); + { + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); + } + { + ScalarB randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_y,rand_pool,randStart,randEnd); + } + { + ScalarC randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_z,rand_pool,randStart,randEnd); + } Kokkos::deep_copy(b_org_z,b_z); + auto h_b_org_z = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), b_org_z); Kokkos::deep_copy(h_b_x,b_x); Kokkos::deep_copy(h_b_y,b_y); @@ -137,33 +165,28 @@ namespace Test { typename ViewTypeA::const_type c_x = x; typename ViewTypeB::const_type c_y = y; - ScalarC* expected_result = new ScalarC[K]; - for(int j=0;j::value?2*1e-5:1e-7; - Kokkos::View r("Dot::Result",K); - KokkosBlas::update(a,x,b,y,c,z); - KokkosBlas::dot(r,z,z); - for(int k=0;k::value ? 2*1e-5 : 1e-7); + double eps = (std::is_same::mag_type, float>::value ? 1e-3 : 1e-10); int ldx; int ldy; @@ -61,59 +61,80 @@ namespace Test { Kokkos::Random_XorShift64_Pool rand_pool(13718); - Kokkos::fill_random(b_x,rand_pool,ScalarX(10)); - Kokkos::fill_random(b_y,rand_pool,ScalarY(10)); - Kokkos::fill_random(b_A,rand_pool,ScalarA(10)); - - Kokkos::fence(); + { + ScalarX randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_x,rand_pool,randStart,randEnd); + } + { + ScalarY randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_y,rand_pool,randStart,randEnd); + } + { + ScalarA randStart, randEnd; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(b_A,rand_pool,randStart,randEnd); + } Kokkos::deep_copy(b_org_y,b_y); + auto h_b_org_y = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), b_org_y); + auto h_org_y = Kokkos::subview(h_b_org_y, Kokkos::ALL(), 0); Kokkos::deep_copy(h_b_x,b_x); Kokkos::deep_copy(h_b_y,b_y); Kokkos::deep_copy(h_b_A,b_A); typedef Kokkos::Details::ArithTraits KAT; - ScalarY expected_result = KAT:: zero(); + Kokkos::View expected("expected aAx+by", ldy); if(mode[0] == 'N') { for(int i = 0; i < M; i++) { - ScalarY y_i = KAT::zero (); + ScalarY y_i = beta * h_org_y(i); for(int j = 0; j < N; j++) { - y_i += h_A(i,j) * h_x(j); + y_i += alpha * h_A(i,j) * h_x(j); } - expected_result += (beta * h_y(i) + alpha * y_i) * (beta * h_y(i) + alpha * y_i) ; + expected(i) = y_i; } } else if(mode[0] == 'T') { for(int j = 0; j < N; j++) { - ScalarY y_j = KAT::zero (); + ScalarY y_j = beta * h_org_y(j); for(int i = 0; i < M; i++) { - y_j += h_A(i,j) * h_x(i); + y_j += alpha * h_A(i,j) * h_x(i); } - expected_result += (beta * h_y(j) + alpha * y_j) * (beta * h_y(j) + alpha * y_j) ; + expected(j) = y_j; } } else if(mode[0] == 'C') { for(int j = 0; j < N; j++) { - ScalarY y_j = KAT::zero (); + ScalarY y_j = beta * h_org_y(j); for(int i = 0; i < M; i++) { - y_j += KAT::conj (h_A(i,j)) * h_x(i); + y_j += alpha * KAT::conj (h_A(i,j)) * h_x(i); } - expected_result += (beta * h_y(j) + alpha * y_j) * (beta * h_y(j) + alpha * y_j) ; + expected(j) = y_j; } } KokkosBlas::gemv(mode, alpha, A, x, beta, y); - ScalarY nonconst_nonconst_result = KokkosBlas::dot(y, y); - EXPECT_NEAR_KK( nonconst_nonconst_result, expected_result, eps*expected_result); + Kokkos::deep_copy(h_b_y, b_y); + for(int i = 0; i < ldy; i++) + { + EXPECT_NEAR_KK(expected(i), h_y(i), eps * expected(i)); + } Kokkos::deep_copy(b_y, b_org_y); KokkosBlas::gemv(mode, alpha,A ,c_x, beta, y); - ScalarY const_nonconst_result = KokkosBlas::dot(y, y); - EXPECT_NEAR_KK( const_nonconst_result, expected_result, eps*expected_result); + Kokkos::deep_copy(h_b_y, b_y); + for(int i = 0; i < ldy; i++) + { + EXPECT_NEAR_KK(expected(i), h_y(i), eps); + } Kokkos::deep_copy(b_y, b_org_y); KokkosBlas::gemv(mode, alpha, c_A, c_x, beta, y); - ScalarY const_const_result = KokkosBlas::dot(y, y); - EXPECT_NEAR_KK( const_const_result, expected_result, eps*expected_result); + Kokkos::deep_copy(h_b_y, b_y); + for(int i = 0; i < ldy; i++) + { + EXPECT_NEAR_KK(expected(i), h_y(i), eps); + } } } @@ -203,7 +224,7 @@ TEST_F( TestCategory, gemv_complex_double ) { Kokkos::Profiling::popRegion(); Kokkos::Profiling::pushRegion("KokkosBlas::Test::gemv_conj_complex_double"); - test_gemv,Kokkos::complex,Kokkos::complex,TestExecSpace> ("T"); + test_gemv,Kokkos::complex,Kokkos::complex,TestExecSpace> ("C"); Kokkos::Profiling::popRegion(); } #endif diff --git a/unit_test/blas/Test_Blas2_team_gemv.hpp b/unit_test/blas/Test_Blas2_team_gemv.hpp index 124941bfd8..f8a7f7c1be 100644 --- a/unit_test/blas/Test_Blas2_team_gemv.hpp +++ b/unit_test/blas/Test_Blas2_team_gemv.hpp @@ -64,8 +64,6 @@ namespace Test { Kokkos::fill_random(b_y,rand_pool,ScalarY(10)); Kokkos::fill_random(b_A,rand_pool,ScalarA(10)); - Kokkos::fence(); - Kokkos::deep_copy(b_org_y,b_y); Kokkos::deep_copy(h_b_x,b_x); diff --git a/unit_test/blas/Test_Blas3_gemm.hpp b/unit_test/blas/Test_Blas3_gemm.hpp index 451b7fedac..580de25397 100644 --- a/unit_test/blas/Test_Blas3_gemm.hpp +++ b/unit_test/blas/Test_Blas3_gemm.hpp @@ -115,8 +115,6 @@ namespace Test { Kokkos::deep_copy(C2,C); - Kokkos::fence(); - struct VanillaGEMM vgemm; vgemm.A_t = A_t; vgemm.B_t = B_t; vgemm.A_c = A_c; vgemm.B_c = B_c; @@ -130,8 +128,6 @@ namespace Test { KokkosBlas::gemm(TA,TB,alpha,A,B,beta,C); - Kokkos::fence(); - mag_type diff_C = 0; struct DiffGEMM diffgemm; diffgemm.N = N; diff --git a/unit_test/blas/Test_Blas3_trmm.hpp b/unit_test/blas/Test_Blas3_trmm.hpp index 9f72bd5e63..4c8d154c15 100644 --- a/unit_test/blas/Test_Blas3_trmm.hpp +++ b/unit_test/blas/Test_Blas3_trmm.hpp @@ -121,7 +121,6 @@ namespace Test { Kokkos::parallel_for("KokkosBlas::Test::NonUnitDiagTRMM", Kokkos::RangePolicy(0,K), nudtrmm); } Kokkos::fill_random(B, rand_pool, Kokkos::rand, ScalarA>::max()); - Kokkos::fence(); Kokkos::deep_copy(host_A, A); // Make host_A a lower triangle @@ -162,11 +161,9 @@ namespace Test { vgemm.beta = beta; Kokkos::parallel_for("KokkosBlas::Test::VanillaGEMM", Kokkos::TeamPolicy(M,Kokkos::AUTO,16), vgemm); } - Kokkos::fence(); Kokkos::deep_copy(host_B_expected, B_expected); KokkosBlas::trmm(side, uplo, trans, diag, alpha, A, B); - Kokkos::fence(); Kokkos::deep_copy(host_B_actual, B); bool test_flag = true; diff --git a/unit_test/blas/Test_Blas3_trsm.hpp b/unit_test/blas/Test_Blas3_trsm.hpp index 8fec44b637..ca9c40ae7e 100644 --- a/unit_test/blas/Test_Blas3_trsm.hpp +++ b/unit_test/blas/Test_Blas3_trsm.hpp @@ -127,8 +127,6 @@ namespace Test { ScalarA alpha_trmm = ScalarA(1)/alpha; ScalarA beta = ScalarA(0); - Kokkos::fence(); - if ((uplo[0]=='L')||(uplo[0]=='l')) { for (int i = 0; i < K-1; i++) for (int j = i+1; j < K; j++)