From 886ffb158b41421396c57f3486f95881f63136e2 Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Tue, 24 Jul 2018 10:19:52 -0600 Subject: [PATCH 1/7] Add test --- test | 1 + 1 file changed, 1 insertion(+) create mode 100644 test diff --git a/test b/test new file mode 100644 index 0000000000..718f4d2ff5 --- /dev/null +++ b/test @@ -0,0 +1 @@ +t From a9d8e0cd95f9fd55245fb1b3514ec2ca918a893b Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Tue, 7 Aug 2018 12:51:47 -0600 Subject: [PATCH 2/7] Add CUBLAS support for nrm1, nrminf, scal, gemv --- .../tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp | 35 ++ .../tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp | 144 ++++++ .../KokkosBlas1_nrminf_tpl_spec_avail.hpp | 34 ++ .../tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp | 162 +++++++ .../tpls/KokkosBlas1_scal_tpl_spec_avail.hpp | 36 ++ .../tpls/KokkosBlas1_scal_tpl_spec_decl.hpp | 152 ++++++ .../tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp | 100 ++++ .../tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp | 447 ++++++++++++++++++ 8 files changed, 1110 insertions(+) diff --git a/src/impl/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp b/src/impl/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp index 836aa68843..5b8a0567a2 100644 --- a/src/impl/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp +++ b/src/impl/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp @@ -69,14 +69,49 @@ Kokkos::View, \ Kokkos::MemoryTraits >, \ 1> { enum : bool { value = true }; }; +#if defined (KOKKOSKERNELS_INST_DOUBLE) KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS( double, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif +#if defined (KOKKOSKERNELS_INST_FLOAT) KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS( float, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif + +#endif + +// cuBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS +// double +#define KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS( SCALAR, LAYOUT, MEMSPACE ) \ +template \ +struct nrm1_tpl_spec_avail< \ +Kokkos::View::mag_type, LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits >, \ +Kokkos::View, \ + Kokkos::MemoryTraits >, \ +1> { enum : bool { value = true }; }; +#if defined (KOKKOSKERNELS_INST_DOUBLE) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS( double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#endif +#if defined (KOKKOSKERNELS_INST_FLOAT) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS( float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) +KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#endif #endif + } } #endif diff --git a/src/impl/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp index f55a79a7c0..4044a4d677 100644 --- a/src/impl/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp @@ -198,4 +198,148 @@ KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_BLAS( Kokkos::LayoutLeft, Kokkos::HostSpace, fal #endif +// cuBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS +#include + +namespace KokkosBlas { +namespace Impl { + +#define KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \ +template \ +struct Nrm1< \ +Kokkos::View >, \ +Kokkos::View, \ + Kokkos::MemoryTraits >, \ +1,true, ETI_SPEC_AVAIL > { \ + \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > XV; \ + typedef typename XV::size_type size_type; \ + \ + static void nrm1 (RV& R, const XV& X) \ + { \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast (INT_MAX)) { \ + nrm1_print_specialization(); \ + const int N = static_cast (numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasDasum(s.handle, N, X.data(), one, &R()); \ + } else { \ + Nrm1::nrm1(R,X); \ + } \ + } \ +}; + +#define KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \ +template \ +struct Nrm1< \ +Kokkos::View >, \ +Kokkos::View, \ + Kokkos::MemoryTraits >, \ +1,true, ETI_SPEC_AVAIL > { \ + \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > XV; \ + typedef typename XV::size_type size_type; \ + \ + static void nrm1 (RV& R, const XV& X) \ + { \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast (INT_MAX)) { \ + nrm1_print_specialization(); \ + const int N = static_cast (numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasSasum(s.handle, N, X.data(), one, &R()); \ + } else { \ + Nrm1::nrm1(R,X); \ + } \ + } \ +}; + +#define KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \ +template \ +struct Nrm1< \ +Kokkos::View >, \ +Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ +1,true, ETI_SPEC_AVAIL > { \ + \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > XV; \ + typedef typename XV::size_type size_type; \ + \ + static void nrm1 (RV& R, const XV& X) \ + { \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast (INT_MAX)) { \ + nrm1_print_specialization(); \ + const int N = static_cast (numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasDzasum(s.handle, N, reinterpret_cast(X.data()), one, &R()); \ + } else { \ + Nrm1::nrm1(R,X); \ + } \ + } \ +}; + +#define KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \ +template \ +struct Nrm1< \ +Kokkos::View >, \ +Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ +1,true, ETI_SPEC_AVAIL > { \ + \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > XV; \ + typedef typename XV::size_type size_type; \ + \ + static void nrm1 (RV& R, const XV& X) \ + { \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast (INT_MAX)) { \ + nrm1_print_specialization(); \ + const int N = static_cast (numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasScasum(s.handle, N, reinterpret_cast(X.data()), one, &R()); \ + } else { \ + Nrm1::nrm1(R,X); \ + } \ + } \ +}; + +KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +} +} + +#endif + #endif diff --git a/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_avail.hpp b/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_avail.hpp index 23363aa98a..24d61dfd7e 100644 --- a/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_avail.hpp +++ b/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_avail.hpp @@ -69,11 +69,45 @@ Kokkos::View, \ Kokkos::MemoryTraits >, \ 1> { enum : bool { value = true }; }; +#if defined (KOKKOSKERNELS_INST_DOUBLE) KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS( double, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif +#if defined (KOKKOSKERNELS_INST_FLOAT) KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS( float, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif + +#endif +// cuBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS +// double +#define KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( SCALAR, LAYOUT, MEMSPACE ) \ +template \ +struct nrminf_tpl_spec_avail< \ +Kokkos::View::mag_type, LAYOUT, Kokkos::HostSpace, \ + Kokkos::MemoryTraits >, \ +Kokkos::View, \ + Kokkos::MemoryTraits >, \ +1> { enum : bool { value = true }; }; + +#if defined (KOKKOSKERNELS_INST_DOUBLE) +KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#endif +#if defined (KOKKOSKERNELS_INST_FLOAT) +KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) +KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) +KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#endif #endif diff --git a/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp index 0b355ee83d..f8c0de56dd 100644 --- a/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp @@ -208,4 +208,166 @@ KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_BLAS( Kokkos::LayoutLeft, Kokkos::HostSpace, f #endif +// cuBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS +#include + +namespace KokkosBlas { +namespace Impl { + +#define KOKKOSBLAS1_DNRMINF_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \ +template \ +struct NrmInf< \ +Kokkos::View >, \ +Kokkos::View, \ + Kokkos::MemoryTraits >, \ +1,true, ETI_SPEC_AVAIL > { \ + \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > XV; \ + typedef typename XV::size_type size_type; \ + \ + static void nrminf (RV& R, const XV& X) \ + { \ + const size_type numElems = X.extent(0); \ + if (numElems == 0) { R() = 0.0; return; } \ + if (numElems < static_cast (INT_MAX)) { \ + nrminf_print_specialization(); \ + const int N = static_cast (numElems); \ + constexpr int one = 1; \ + int idx; \ + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasIdamax(s.handle, N, X.data(), one, &idx); \ + Kokkos::deep_copy(R, subview(X,idx-1)); \ + } else { \ + NrmInf::nrminf(R,X); \ + } \ + } \ +}; + +#define KOKKOSBLAS1_SNRMINF_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \ +template \ +struct NrmInf< \ +Kokkos::View >, \ +Kokkos::View, \ + Kokkos::MemoryTraits >, \ +1,true, ETI_SPEC_AVAIL > { \ + \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > XV; \ + typedef typename XV::size_type size_type; \ + \ + static void nrminf (RV& R, const XV& X) \ + { \ + const size_type numElems = X.extent(0); \ + if (numElems == 0) { R() = 0.0f; return; } \ + if (numElems < static_cast (INT_MAX)) { \ + nrminf_print_specialization(); \ + const int N = static_cast (numElems); \ + constexpr int one = 1; \ + int idx; \ + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasIsamax(s.handle, N, X.data(), one, &idx); \ + Kokkos::deep_copy(R, subview(X,idx-1)); \ + } else { \ + NrmInf::nrminf(R,X); \ + } \ + } \ +}; + +#define KOKKOSBLAS1_ZNRMINF_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \ +template \ +struct NrmInf< \ +Kokkos::View >, \ +Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ +1,true, ETI_SPEC_AVAIL > { \ + \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > XV; \ + typedef typename XV::size_type size_type; \ + typedef Kokkos::Details::InnerProductSpaceTraits> IPT; \ + \ + static void nrminf (RV& R, const XV& X) \ + { \ + const size_type numElems = X.extent(0); \ + if (numElems == 0) { R() = 0.0; return; } \ + if (numElems < static_cast (INT_MAX)) { \ + nrminf_print_specialization(); \ + const int N = static_cast (numElems); \ + constexpr int one = 1; \ + int idx; \ + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasIzamax(s.handle, N, reinterpret_cast(X.data()), one, &idx); \ + Kokkos::View, LAYOUT, Kokkos::HostSpace > R_cplx("R_cplx",1); \ + Kokkos::deep_copy(R_cplx, subview(X,idx-1)); \ + R() = IPT::norm(R_cplx()); \ + } else { \ + NrmInf::nrminf(R,X); \ + } \ + } \ +}; + +#define KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \ +template \ +struct NrmInf< \ +Kokkos::View >, \ +Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ +1,true, ETI_SPEC_AVAIL > { \ + \ + typedef Kokkos::View > RV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > XV; \ + typedef typename XV::size_type size_type; \ + typedef Kokkos::Details::InnerProductSpaceTraits> IPT; \ + \ + static void nrminf (RV& R, const XV& X) \ + { \ + const size_type numElems = X.extent(0); \ + if (numElems == 0) { R() = 0.0f; return; } \ + if (numElems < static_cast (INT_MAX)) { \ + nrminf_print_specialization(); \ + const int N = static_cast (numElems); \ + constexpr int one = 1; \ + int idx; \ + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasIcamax(s.handle, N, reinterpret_cast(X.data()), one, &idx); \ + Kokkos::View, LAYOUT, Kokkos::HostSpace > R_cplx("R_cplx",1);; \ + Kokkos::deep_copy(R_cplx, subview(X,idx-1)); \ + R() = IPT::norm(R_cplx()); \ + } else { \ + NrmInf::nrminf(R,X); \ + } \ + } \ +}; + +KOKKOSBLAS1_DNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_DNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_SNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_SNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_ZNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_ZNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +} +} + +#endif + #endif diff --git a/src/impl/tpls/KokkosBlas1_scal_tpl_spec_avail.hpp b/src/impl/tpls/KokkosBlas1_scal_tpl_spec_avail.hpp index 66743fb98f..4cd8593dc4 100644 --- a/src/impl/tpls/KokkosBlas1_scal_tpl_spec_avail.hpp +++ b/src/impl/tpls/KokkosBlas1_scal_tpl_spec_avail.hpp @@ -70,13 +70,49 @@ Kokkos::View, \ Kokkos::MemoryTraits >, \ 1> { enum : bool { value = true }; }; +#if defined (KOKKOSKERNELS_INST_DOUBLE) KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_BLAS( double, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif +#if defined (KOKKOSKERNELS_INST_FLOAT) KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_BLAS( float, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_BLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_BLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif +#endif +// cuBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS +// double +#define KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS( SCALAR, LAYOUT, MEMSPACE ) \ +template \ +struct scal_tpl_spec_avail< \ +Kokkos::View, \ + Kokkos::MemoryTraits >, \ +SCALAR, \ +Kokkos::View, \ + Kokkos::MemoryTraits >, \ +1> { enum : bool { value = true }; }; + +#if defined (KOKKOSKERNELS_INST_DOUBLE) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS( double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#endif +#if defined (KOKKOSKERNELS_INST_FLOAT) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS( float, Kokkos::LayoutLeft, Kokkos::CudaSpace) #endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) +KOKKOSBLAS1_SCAL_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#endif + +#endif + } } diff --git a/src/impl/tpls/KokkosBlas1_scal_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas1_scal_tpl_spec_decl.hpp index 2a919b85d2..4536255381 100644 --- a/src/impl/tpls/KokkosBlas1_scal_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosBlas1_scal_tpl_spec_decl.hpp @@ -210,4 +210,156 @@ KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_BLAS( Kokkos::LayoutLeft, Kokkos::HostSpace, fal #endif +// cuBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS +#include + +namespace KokkosBlas { +namespace Impl { + +#define KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \ +template \ +struct Scal< \ +Kokkos::View, \ + Kokkos::MemoryTraits >, \ +double, \ +Kokkos::View, \ + Kokkos::MemoryTraits >, \ +1,true, ETI_SPEC_AVAIL > { \ + \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > RV; \ + typedef double AV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > XV; \ + typedef typename XV::size_type size_type; \ + \ + static void scal (const RV& R, const double& alpha, const XV& X) \ + { \ + const size_type numElems = X.extent(0); \ + if ((numElems < static_cast (INT_MAX)) && (R.data() == X.data())) { \ + scal_print_specialization(); \ + const int N = static_cast (numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasDscal(s.handle, N, &alpha, R.data(), one); \ + } else { \ + Scal::scal(R,alpha,X); \ + } \ + } \ +}; + +#define KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \ +template \ +struct Scal< \ +Kokkos::View, \ + Kokkos::MemoryTraits >, \ +float, \ +Kokkos::View, \ + Kokkos::MemoryTraits >, \ +1,true, ETI_SPEC_AVAIL > { \ + \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > RV; \ + typedef float AV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > XV; \ + typedef typename XV::size_type size_type; \ + \ + static void scal (const RV& R, const float& alpha, const XV& X) \ + { \ + const size_type numElems = X.extent(0); \ + if ((numElems < static_cast (INT_MAX)) && (R.data() == X.data())) { \ + scal_print_specialization(); \ + const int N = static_cast (numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasSscal(s.handle, N, &alpha, R.data(), one); \ + } else { \ + Scal::scal(R,alpha,X); \ + } \ + } \ +}; + +#define KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \ +template \ +struct Scal< \ +Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ +Kokkos::complex, \ +Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ +1,true, ETI_SPEC_AVAIL > { \ + \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > RV; \ + typedef Kokkos::complex AV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > XV; \ + typedef typename XV::size_type size_type; \ + \ + static void scal (const RV& R, const Kokkos::complex& alpha, const XV& X) \ + { \ + const size_type numElems = X.extent(0); \ + if ((numElems < static_cast (INT_MAX)) && (R.data() == X.data())) { \ + scal_print_specialization(); \ + const int N = static_cast (numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasZscal(s.handle, N, reinterpret_cast(&alpha), reinterpret_cast(R.data()), one); \ + } else { \ + Scal::scal(R,alpha,X); \ + } \ + } \ +}; + +#define KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \ +template \ +struct Scal< \ +Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ +Kokkos::complex, \ +Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ +1,true, ETI_SPEC_AVAIL > { \ + \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > RV; \ + typedef Kokkos::complex AV; \ + typedef Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits > XV; \ + typedef typename XV::size_type size_type; \ + \ + static void scal (const RV& R, const Kokkos::complex& alpha, const XV& X) \ + { \ + const size_type numElems = X.extent(0); \ + if ((numElems < static_cast (INT_MAX)) && (R.data() == X.data())) { \ + scal_print_specialization(); \ + const int N = static_cast (numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasCscal(s.handle, N, reinterpret_cast(&alpha), reinterpret_cast(R.data()), one); \ + } else { \ + Scal::scal(R,alpha,X); \ + } \ + } \ +}; + +KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_DSCAL_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_SSCAL_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_ZSCAL_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS1_CSCAL_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +} +} + +#endif + #endif diff --git a/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp b/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp index 6e70ba0bbb..3a3f0072ab 100644 --- a/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp +++ b/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp @@ -51,6 +51,106 @@ template struct gemv_tpl_spec_avail { enum : bool { value = false }; }; + +// Generic Host side BLAS (could be MKL or whatever) +#ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS + +#define KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS( SCALAR, LAYOUTA, LAYOUTX, LAYOUTY, MEMSPACE ) \ +template \ +struct gemv_tpl_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > \ + > { enum : bool { value = true }; }; + +#if defined (KOKKOSKERNELS_INST_DOUBLE) \ + && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) + KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS( double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif +#if defined (KOKKOSKERNELS_INST_FLOAT) \ + && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) + KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS( float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) \ + && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) + KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) \ + && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) + KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif + +#if defined (KOKKOSKERNELS_INST_DOUBLE) \ + && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) + KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS( double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) +#endif +#if defined (KOKKOSKERNELS_INST_FLOAT) \ + && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) + KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS( float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) \ + && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) + KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS( Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) \ + && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) + KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS( Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) +#endif + +#endif + +// cuBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS + +#define KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS( SCALAR, LAYOUTA, LAYOUTX, LAYOUTY, MEMSPACE ) \ +template \ +struct gemv_tpl_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > \ + > { enum : bool { value = true }; }; + +#if defined (KOKKOSKERNELS_INST_DOUBLE) \ + && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) + KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS( double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#endif +#if defined (KOKKOSKERNELS_INST_FLOAT) \ + && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) + KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS( float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) \ + && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) + KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) \ + && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) + KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) +#endif + +#if defined (KOKKOSKERNELS_INST_DOUBLE) \ + && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) + KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS( double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) +#endif +#if defined (KOKKOSKERNELS_INST_FLOAT) \ + && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) + KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS( float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) \ + && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) + KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) +#endif +#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) \ + && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) + KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace) +#endif + +#endif } } diff --git a/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp index ad0cf6f432..2d5f346d91 100644 --- a/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp @@ -44,9 +44,456 @@ #ifndef KOKKOSBLAS2_GEMV_TPL_SPEC_DECL_HPP_ #define KOKKOSBLAS2_GEMV_TPL_SPEC_DECL_HPP_ +#ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS +extern "C" void dgemv_( const char* trans, + const int* M, const int* N, + const double* alpha, + const double* A, const int* LDA, + const double* X, const int* INCX, + const double* beta, + double* Y, const int* INCY); +extern "C" void sgemv_( const char* trans, + const int* M, const int* N, + const float* alpha, + const float* A, const int* LDA, + const float* X, const int* INCX, + const float* beta, + float* Y, const int* INCY); +extern "C" void zgemv_( const char* trans, + const int* M, const int* N, + const std::complex* alpha, + const std::complex* A, const int* LDA, + const std::complex* X, const int* INCX, + const std::complex* beta, + std::complex* Y, const int* INCY); +extern "C" void cgemv_( const char* trans, + const int* M, const int* N, + const std::complex* alpha, + const std::complex* A, const int* LDA, + const std::complex* X, const int* INCX, + const std::complex* beta, + std::complex* Y, const int* INCY); + namespace KokkosBlas { namespace Impl { + +#define KOKKOSBLAS2_DGEMV_BLAS( LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL ) \ +template \ +struct GEMV< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > YViewType; \ + \ + static void \ + gemv (const char trans[], \ + typename AViewType::const_value_type& alpha, \ + const AViewType& A, \ + const XViewType& X, \ + typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_BLAS,double]"); \ + const int M = static_cast (A.extent(0)); \ + const int N = static_cast (A.extent(1)); \ + constexpr int one = 1; \ + int strides[2]; \ + bool A_is_lr = std::is_same::value; \ + A.stride(strides); \ + const int LDA = strides[A_is_lr?0:1]; \ + \ + dgemv_(trans,&M,&N,&alpha,A.data(),&LDA,X.data(),&one,&beta,Y.data(),&one); \ + Kokkos::Profiling::popRegion(); \ + } \ +}; + +#define KOKKOSBLAS2_SGEMV_BLAS( LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL ) \ +template \ +struct GEMV< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > YViewType; \ + \ + static void \ + gemv (const char trans[], \ + typename AViewType::const_value_type& alpha, \ + const AViewType& A, \ + const XViewType& X, \ + typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_BLAS,float]"); \ + const int M = static_cast (A.extent(0)); \ + const int N = static_cast (A.extent(1)); \ + constexpr int one = 1; \ + int strides[2]; \ + bool A_is_lr = std::is_same::value; \ + A.stride(strides); \ + const int LDA = strides[A_is_lr?0:1]; \ + \ + sgemv_(trans,&M,&N,&alpha,A.data(),&LDA,X.data(),&one,&beta,Y.data(),&one); \ + Kokkos::Profiling::popRegion(); \ + } \ +}; + +#define KOKKOSBLAS2_ZGEMV_BLAS( LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL ) \ +template \ +struct GEMV< \ + Kokkos::View**, LAYOUTA, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTX, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTY, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > YViewType; \ + \ + static void \ + gemv (const char trans[], \ + typename AViewType::const_value_type& alpha, \ + const AViewType& A, \ + const XViewType& X, \ + typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_BLAS,complex]"); \ + const int M = static_cast (A.extent(0)); \ + const int N = static_cast (A.extent(1)); \ + constexpr int one = 1; \ + int strides[2]; \ + \ + bool A_is_lr = std::is_same::value; \ + A.stride(strides); \ + const int LDA = strides[A_is_lr?0:1]; \ + \ + zgemv_(trans,&M,&N, \ + reinterpret_cast*>(&alpha),reinterpret_cast*>(A.data()),&LDA, \ + reinterpret_cast*>(X.data()),&one, \ + reinterpret_cast*>(&beta),reinterpret_cast*>(Y.data()),&one); \ + Kokkos::Profiling::popRegion(); \ + } \ +}; \ + +#define KOKKOSBLAS2_CGEMV_BLAS( LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL ) \ +template \ +struct GEMV< \ + Kokkos::View**, LAYOUTA, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTX, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTY, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > YViewType; \ + \ + static void \ + gemv (const char trans[], \ + typename AViewType::const_value_type& alpha, \ + const AViewType& A, \ + const XViewType& X, \ + typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_BLAS,complex]"); \ + const int M = static_cast (A.extent(0)); \ + const int N = static_cast (A.extent(1)); \ + constexpr int one = 1; \ + int strides[2]; \ + \ + bool A_is_lr = std::is_same::value; \ + A.stride(strides); \ + const int LDA = strides[A_is_lr?0:1]; \ + \ + cgemv_(trans,&M,&N, \ + reinterpret_cast*>(&alpha),reinterpret_cast*>(A.data()),&LDA, \ + reinterpret_cast*>(X.data()),&one, \ + reinterpret_cast*>(&beta),reinterpret_cast*>(Y.data()),&one); \ + Kokkos::Profiling::popRegion(); \ + } \ +}; + +KOKKOSBLAS2_DGEMV_BLAS( Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS2_DGEMV_BLAS( Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS2_DGEMV_BLAS( Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS2_DGEMV_BLAS( Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS2_SGEMV_BLAS( Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS2_SGEMV_BLAS( Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS2_SGEMV_BLAS( Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS2_SGEMV_BLAS( Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS2_ZGEMV_BLAS( Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS2_ZGEMV_BLAS( Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS2_ZGEMV_BLAS( Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS2_ZGEMV_BLAS( Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS2_CGEMV_BLAS( Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS2_CGEMV_BLAS( Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS2_CGEMV_BLAS( Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS2_CGEMV_BLAS( Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace, false) + +} +} +#endif // KOKKOSKERNELS_ENABLE_TPL_BLAS + +// cuBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS +#include + +namespace KokkosBlas { +namespace Impl { + +#define KOKKOSBLAS2_DGEMV_CUBLAS( LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL ) \ +template \ +struct GEMV< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > YViewType; \ + \ + static void \ + gemv (const char trans[], \ + typename AViewType::const_value_type& alpha, \ + const AViewType& A, \ + const XViewType& X, \ + typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_CUBLAS,double]"); \ + const int M = static_cast (A.extent(0)); \ + const int N = static_cast (A.extent(1)); \ + constexpr int one = 1; \ + int strides[2]; \ + \ + bool A_is_lr = std::is_same::value; \ + A.stride(strides); \ + const int LDA = strides[A_is_lr?0:1]; \ + \ + cublasOperation_t transa; \ + if ((trans[0]=='N')||(trans[0]=='n')) \ + transa = CUBLAS_OP_N; \ + else if ((trans[0]=='T')||(trans[0]=='t')) \ + transa = CUBLAS_OP_T; \ + else \ + transa = CUBLAS_OP_C; \ + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasDgemv(s.handle, transa, M, N, &alpha, A.data(), LDA, X.data(), one, &beta, Y.data(), one); \ + Kokkos::Profiling::popRegion(); \ + } \ +}; + +#define KOKKOSBLAS2_SGEMV_CUBLAS( LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL ) \ +template \ +struct GEMV< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > YViewType; \ + \ + static void \ + gemv (const char trans[], \ + typename AViewType::const_value_type& alpha, \ + const AViewType& A, \ + const XViewType& X, \ + typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_CUBLAS,float]"); \ + const int M = static_cast (A.extent(0)); \ + const int N = static_cast (A.extent(1)); \ + constexpr int one = 1; \ + int strides[2]; \ + \ + bool A_is_lr = std::is_same::value; \ + A.stride(strides); \ + const int LDA = strides[A_is_lr?0:1]; \ + \ + cublasOperation_t transa; \ + if ((trans[0]=='N')||(trans[0]=='n')) \ + transa = CUBLAS_OP_N; \ + else if ((trans[0]=='T')||(trans[0]=='t')) \ + transa = CUBLAS_OP_T; \ + else \ + transa = CUBLAS_OP_C; \ + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasSgemv(s.handle, transa, M, N, &alpha, A.data(), LDA, X.data(), one, &beta, Y.data(), one); \ + Kokkos::Profiling::popRegion(); \ + } \ +}; + +#define KOKKOSBLAS2_ZGEMV_CUBLAS( LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL ) \ +template \ +struct GEMV< \ + Kokkos::View**, LAYOUTA, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTX, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTY, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > YViewType; \ + \ + static void \ + gemv (const char trans[], \ + typename AViewType::const_value_type& alpha, \ + const AViewType& A, \ + const XViewType& X, \ + typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_CUBLAS,complex]"); \ + const int M = static_cast (A.extent(0)); \ + const int N = static_cast (A.extent(1)); \ + constexpr int one = 1; \ + int strides[2]; \ + \ + bool A_is_lr = std::is_same::value; \ + A.stride(strides); \ + const int LDA = strides[A_is_lr?0:1]; \ + \ + cublasOperation_t transa; \ + if ((trans[0]=='N')||(trans[0]=='n')) \ + transa = CUBLAS_OP_N; \ + else if ((trans[0]=='T')||(trans[0]=='t')) \ + transa = CUBLAS_OP_T; \ + else \ + transa = CUBLAS_OP_C; \ + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasZgemv(s.handle, transa, M, N, reinterpret_cast(&alpha), reinterpret_cast(A.data()), LDA, reinterpret_cast(X.data()), one, reinterpret_cast(&beta), reinterpret_cast(Y.data()), one); \ + Kokkos::Profiling::popRegion(); \ + } \ +}; \ + +#define KOKKOSBLAS2_CGEMV_CUBLAS( LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, ETI_SPEC_AVAIL ) \ +template \ +struct GEMV< \ + Kokkos::View**, LAYOUTA, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTX, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTY, Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > YViewType; \ + \ + static void \ + gemv (const char trans[], \ + typename AViewType::const_value_type& alpha, \ + const AViewType& A, \ + const XViewType& X, \ + typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_CUBLAS,complex]"); \ + const int M = static_cast (A.extent(0)); \ + const int N = static_cast (A.extent(1)); \ + constexpr int one = 1; \ + int strides[2]; \ + \ + bool A_is_lr = std::is_same::value; \ + A.stride(strides); \ + const int LDA = strides[A_is_lr?0:1]; \ + \ + cublasOperation_t transa; \ + if ((trans[0]=='N')||(trans[0]=='n')) \ + transa = CUBLAS_OP_N; \ + else if ((trans[0]=='T')||(trans[0]=='t')) \ + transa = CUBLAS_OP_T; \ + else \ + transa = CUBLAS_OP_C; \ + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasCgemv(s.handle, transa, M, N, reinterpret_cast(&alpha), reinterpret_cast(A.data()), LDA, reinterpret_cast(X.data()), one, reinterpret_cast(&beta), reinterpret_cast(Y.data()), one); \ + Kokkos::Profiling::popRegion(); \ + } \ +}; + +KOKKOSBLAS2_DGEMV_CUBLAS( Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS2_DGEMV_CUBLAS( Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS2_DGEMV_CUBLAS( Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS2_DGEMV_CUBLAS( Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) + +KOKKOSBLAS2_SGEMV_CUBLAS( Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS2_SGEMV_CUBLAS( Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS2_SGEMV_CUBLAS( Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS2_SGEMV_CUBLAS( Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) + +KOKKOSBLAS2_ZGEMV_CUBLAS( Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS2_ZGEMV_CUBLAS( Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS2_ZGEMV_CUBLAS( Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS2_ZGEMV_CUBLAS( Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) + +KOKKOSBLAS2_CGEMV_CUBLAS( Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS2_CGEMV_CUBLAS( Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +KOKKOSBLAS2_CGEMV_CUBLAS( Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, true) +KOKKOSBLAS2_CGEMV_CUBLAS( Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::CudaSpace, false) + } } +#endif // KOKKOSKERNELS_ENABLE_TPL_CUBLAS #endif From 8c138c700c43ee908c1aef5fc988ed7e9a7cd26c Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Tue, 7 Aug 2018 12:52:55 -0600 Subject: [PATCH 3/7] Delete test --- test | 1 - 1 file changed, 1 deletion(-) delete mode 100644 test diff --git a/test b/test deleted file mode 100644 index 718f4d2ff5..0000000000 --- a/test +++ /dev/null @@ -1 +0,0 @@ -t From 0917678c3ce1b7cbc3cac3546bbaf5392e38ab17 Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Tue, 14 Aug 2018 09:09:02 -0600 Subject: [PATCH 4/7] Use R.data() instead () in CUBLAS asum() --- src/impl/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/impl/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp index 4044a4d677..c791454dcb 100644 --- a/src/impl/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp @@ -228,7 +228,7 @@ Kokkos::View, \ const int N = static_cast (numElems); \ constexpr int one = 1; \ KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - cublasDasum(s.handle, N, X.data(), one, &R()); \ + cublasDasum(s.handle, N, X.data(), one, R.data()); \ } else { \ Nrm1::nrm1(R,X); \ } \ @@ -258,7 +258,7 @@ Kokkos::View, \ const int N = static_cast (numElems); \ constexpr int one = 1; \ KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - cublasSasum(s.handle, N, X.data(), one, &R()); \ + cublasSasum(s.handle, N, X.data(), one, R.data()); \ } else { \ Nrm1::nrm1(R,X); \ } \ @@ -288,7 +288,7 @@ Kokkos::View*, LAYOUT, Kokkos::Device (numElems); \ constexpr int one = 1; \ KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - cublasDzasum(s.handle, N, reinterpret_cast(X.data()), one, &R()); \ + cublasDzasum(s.handle, N, reinterpret_cast(X.data()), one, R.data()); \ } else { \ Nrm1::nrm1(R,X); \ } \ @@ -318,7 +318,7 @@ Kokkos::View*, LAYOUT, Kokkos::Device (numElems); \ constexpr int one = 1; \ KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - cublasScasum(s.handle, N, reinterpret_cast(X.data()), one, &R()); \ + cublasScasum(s.handle, N, reinterpret_cast(X.data()), one, R.data()); \ } else { \ Nrm1::nrm1(R,X); \ } \ From a14f7a8f5e87d495c5b67f3e2fd135ba5a049e01 Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Tue, 14 Aug 2018 09:13:30 -0600 Subject: [PATCH 5/7] Use unmanaged rank-0 View for the retrieved result and deep_copy for zero assignment --- src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp index f8c0de56dd..8ee9d5f19f 100644 --- a/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp @@ -233,7 +233,7 @@ Kokkos::View, \ static void nrminf (RV& R, const XV& X) \ { \ const size_type numElems = X.extent(0); \ - if (numElems == 0) { R() = 0.0; return; } \ + if (numElems == 0) { Kokkos::deep_copy (R, 0.0); return; } \ if (numElems < static_cast (INT_MAX)) { \ nrminf_print_specialization(); \ const int N = static_cast (numElems); \ @@ -266,7 +266,7 @@ Kokkos::View, \ static void nrminf (RV& R, const XV& X) \ { \ const size_type numElems = X.extent(0); \ - if (numElems == 0) { R() = 0.0f; return; } \ + if (numElems == 0) { Kokkos::deep_copy (R, 0.0f);; return; } \ if (numElems < static_cast (INT_MAX)) { \ nrminf_print_specialization(); \ const int N = static_cast (numElems); \ @@ -300,7 +300,7 @@ Kokkos::View*, LAYOUT, Kokkos::Device (INT_MAX)) { \ nrminf_print_specialization(); \ const int N = static_cast (numElems); \ @@ -308,7 +308,8 @@ Kokkos::View*, LAYOUT, Kokkos::Device(X.data()), one, &idx); \ - Kokkos::View, LAYOUT, Kokkos::HostSpace > R_cplx("R_cplx",1); \ + Kokkos::complex R_cplx_val {0.0, 0.0}; \ + Kokkos::View, LAYOUT, Kokkos::HostSpace, Kokkos::MemoryTraits > R_cplx (&R_cplx_val); \ Kokkos::deep_copy(R_cplx, subview(X,idx-1)); \ R() = IPT::norm(R_cplx()); \ } else { \ @@ -336,7 +337,7 @@ Kokkos::View*, LAYOUT, Kokkos::Device (INT_MAX)) { \ nrminf_print_specialization(); \ const int N = static_cast (numElems); \ @@ -344,7 +345,8 @@ Kokkos::View*, LAYOUT, Kokkos::Device(X.data()), one, &idx); \ - Kokkos::View, LAYOUT, Kokkos::HostSpace > R_cplx("R_cplx",1);; \ + Kokkos::complex R_cplx_val {0.0f, 0.0f}; \ + Kokkos::View, LAYOUT, Kokkos::HostSpace, Kokkos::MemoryTraits > R_cplx (&R_cplx_val); \ Kokkos::deep_copy(R_cplx, subview(X,idx-1)); \ R() = IPT::norm(R_cplx()); \ } else { \ From 067a9da172eb7ad0f2789e6e68bd21f8a4ce9014 Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Tue, 14 Aug 2018 09:14:38 -0600 Subject: [PATCH 6/7] Handle LDA equal to 0 --- src/impl/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp index 2d5f346d91..ad58157d02 100644 --- a/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp @@ -310,7 +310,7 @@ struct GEMV< \ \ bool A_is_lr = std::is_same::value; \ A.stride(strides); \ - const int LDA = strides[A_is_lr?0:1]; \ + const int LDA = (strides[A_is_lr?0:1] == 0)? int(1): strides[A_is_lr?0:1]; \ \ cublasOperation_t transa; \ if ((trans[0]=='N')||(trans[0]=='n')) \ @@ -359,7 +359,7 @@ struct GEMV< \ \ bool A_is_lr = std::is_same::value; \ A.stride(strides); \ - const int LDA = strides[A_is_lr?0:1]; \ + const int LDA = (strides[A_is_lr?0:1] == 0)? int(1): strides[A_is_lr?0:1]; \ \ cublasOperation_t transa; \ if ((trans[0]=='N')||(trans[0]=='n')) \ @@ -408,7 +408,7 @@ struct GEMV< \ \ bool A_is_lr = std::is_same::value; \ A.stride(strides); \ - const int LDA = strides[A_is_lr?0:1]; \ + const int LDA = (strides[A_is_lr?0:1] == 0)? int(1): strides[A_is_lr?0:1]; \ \ cublasOperation_t transa; \ if ((trans[0]=='N')||(trans[0]=='n')) \ @@ -457,7 +457,7 @@ struct GEMV< \ \ bool A_is_lr = std::is_same::value; \ A.stride(strides); \ - const int LDA = strides[A_is_lr?0:1]; \ + const int LDA = (strides[A_is_lr?0:1] == 0)? int(1): strides[A_is_lr?0:1]; \ \ cublasOperation_t transa; \ if ((trans[0]=='N')||(trans[0]=='n')) \ From b8ee388d1f1b3783b72fb0400db50fb64cbfa08f Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Tue, 14 Aug 2018 15:44:32 -0600 Subject: [PATCH 7/7] Update for BLAS library change on White --- scripts/test_all_sandia | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/scripts/test_all_sandia b/scripts/test_all_sandia index 9a49ff8ecc..2481b02141 100755 --- a/scripts/test_all_sandia +++ b/scripts/test_all_sandia @@ -245,10 +245,9 @@ elif [ "$MACHINE" = "white" ]; then BASE_MODULE_LIST="/" IBM_MODULE_LIST="/xl/" - CUDA_MODULE_LIST="/,gcc/5.4.0" - CUDA_MODULE_LIST2="/,gcc/6.3.0,ibm/xl/13.1.6" + CUDA_MODULE_LIST="/,gcc/6.4.0,ibm/xl/16.1.0" - module load netlib + module load netlib/3.8.0/gcc export BLAS_LIBRARIES="${BLAS_ROOT}/lib/libblas.a" # Don't do pthread on white. @@ -256,9 +255,9 @@ elif [ "$MACHINE" = "white" ]; then # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "ibm/13.1.6 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" - "cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" - "cuda/9.0.103 $CUDA_MODULE_LIST2 $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "gcc/6.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "ibm/16.1.0 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" + "cuda/9.0.103 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) if [ -z "$ARCH_FLAG" ]; then