Skip to content

Commit

Permalink
Merge pull request #274 from kokkos/issue-247
Browse files Browse the repository at this point in the history
TPL Support for BLAS functions (nrm1, nrminf, scal, gemv) using CuBLAS (Issue #247)
  • Loading branch information
srajama1 authored Aug 15, 2018
2 parents ff6b93f + b8ee388 commit 9a9845e
Show file tree
Hide file tree
Showing 9 changed files with 1,113 additions and 1 deletion.
2 changes: 1 addition & 1 deletion scripts/test_all_sandia
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ elif [ "$MACHINE" = "white" ]; then
IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/6.4.0,ibm/xl/16.1.0"

module load netlib
module load netlib/3.8.0/gcc
export BLAS_LIBRARIES="${BLAS_ROOT}/lib/libblas.a"

# Don't do pthread on white.
Expand Down
35 changes: 35 additions & 0 deletions src/impl/tpls/KokkosBlas1_nrm1_tpl_spec_avail.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,49 @@ Kokkos::View<const SCALAR*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1> { enum : bool { value = true }; };

#if defined (KOKKOSKERNELS_INST_DOUBLE)
KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS( double, Kokkos::LayoutLeft, Kokkos::HostSpace)
#endif
#if defined (KOKKOSKERNELS_INST_FLOAT)
KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS( float, Kokkos::LayoutLeft, Kokkos::HostSpace)
#endif
#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_)
KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS( Kokkos::complex<double>, Kokkos::LayoutLeft, Kokkos::HostSpace)
#endif
#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_)
KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_BLAS( Kokkos::complex<float>, Kokkos::LayoutLeft, Kokkos::HostSpace)
#endif

#endif

// cuBLAS
#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS
// double
#define KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS( SCALAR, LAYOUT, MEMSPACE ) \
template<class ExecSpace> \
struct nrm1_tpl_spec_avail< \
Kokkos::View<typename Kokkos::Details::InnerProductSpaceTraits<SCALAR>::mag_type, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const SCALAR*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1> { enum : bool { value = true }; };

#if defined (KOKKOSKERNELS_INST_DOUBLE)
KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS( double, Kokkos::LayoutLeft, Kokkos::CudaSpace)
#endif
#if defined (KOKKOSKERNELS_INST_FLOAT)
KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS( float, Kokkos::LayoutLeft, Kokkos::CudaSpace)
#endif
#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_)
KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex<double>, Kokkos::LayoutLeft, Kokkos::CudaSpace)
#endif
#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_)
KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex<float>, Kokkos::LayoutLeft, Kokkos::CudaSpace)
#endif

#endif


}
}
#endif
144 changes: 144 additions & 0 deletions src/impl/tpls/KokkosBlas1_nrm1_tpl_spec_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,4 +198,148 @@ KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_BLAS( Kokkos::LayoutLeft, Kokkos::HostSpace, fal

#endif

// cuBLAS
#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS
#include<KokkosBlas_tpl_spec.hpp>

namespace KokkosBlas {
namespace Impl {

#define KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \
template<class ExecSpace> \
struct Nrm1< \
Kokkos::View<double, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const double*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1,true, ETI_SPEC_AVAIL > { \
\
typedef Kokkos::View<double, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > RV; \
typedef Kokkos::View<const double*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > XV; \
typedef typename XV::size_type size_type; \
\
static void nrm1 (RV& R, const XV& X) \
{ \
const size_type numElems = X.extent(0); \
if (numElems < static_cast<size_type> (INT_MAX)) { \
nrm1_print_specialization<RV,XV>(); \
const int N = static_cast<int> (numElems); \
constexpr int one = 1; \
KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \
cublasDasum(s.handle, N, X.data(), one, R.data()); \
} else { \
Nrm1<RV,XV,1,false,ETI_SPEC_AVAIL>::nrm1(R,X); \
} \
} \
};

#define KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \
template<class ExecSpace> \
struct Nrm1< \
Kokkos::View<float, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const float*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1,true, ETI_SPEC_AVAIL > { \
\
typedef Kokkos::View<float, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > RV; \
typedef Kokkos::View<const float*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > XV; \
typedef typename XV::size_type size_type; \
\
static void nrm1 (RV& R, const XV& X) \
{ \
const size_type numElems = X.extent(0); \
if (numElems < static_cast<size_type> (INT_MAX)) { \
nrm1_print_specialization<RV,XV>(); \
const int N = static_cast<int> (numElems); \
constexpr int one = 1; \
KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \
cublasSasum(s.handle, N, X.data(), one, R.data()); \
} else { \
Nrm1<RV,XV,1,false,ETI_SPEC_AVAIL>::nrm1(R,X); \
} \
} \
};

#define KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \
template<class ExecSpace> \
struct Nrm1< \
Kokkos::View<double, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const Kokkos::complex<double>*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1,true, ETI_SPEC_AVAIL > { \
\
typedef Kokkos::View<double, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > RV; \
typedef Kokkos::View<const Kokkos::complex<double>*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > XV; \
typedef typename XV::size_type size_type; \
\
static void nrm1 (RV& R, const XV& X) \
{ \
const size_type numElems = X.extent(0); \
if (numElems < static_cast<size_type> (INT_MAX)) { \
nrm1_print_specialization<RV,XV>(); \
const int N = static_cast<int> (numElems); \
constexpr int one = 1; \
KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \
cublasDzasum(s.handle, N, reinterpret_cast<const cuDoubleComplex*>(X.data()), one, R.data()); \
} else { \
Nrm1<RV,XV,1,false,ETI_SPEC_AVAIL>::nrm1(R,X); \
} \
} \
};

#define KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \
template<class ExecSpace> \
struct Nrm1< \
Kokkos::View<float, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const Kokkos::complex<float>*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1,true, ETI_SPEC_AVAIL > { \
\
typedef Kokkos::View<float, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > RV; \
typedef Kokkos::View<const Kokkos::complex<float>*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > XV; \
typedef typename XV::size_type size_type; \
\
static void nrm1 (RV& R, const XV& X) \
{ \
const size_type numElems = X.extent(0); \
if (numElems < static_cast<size_type> (INT_MAX)) { \
nrm1_print_specialization<RV,XV>(); \
const int N = static_cast<int> (numElems); \
constexpr int one = 1; \
KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \
cublasScasum(s.handle, N, reinterpret_cast<const cuComplex*>(X.data()), one, R.data()); \
} else { \
Nrm1<RV,XV,1,false,ETI_SPEC_AVAIL>::nrm1(R,X); \
} \
} \
};

KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true)
KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false)

KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true)
KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false)

KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true)
KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false)

KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true)
KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false)

}
}

#endif

#endif
34 changes: 34 additions & 0 deletions src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_avail.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,45 @@ Kokkos::View<const SCALAR*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1> { enum : bool { value = true }; };

#if defined (KOKKOSKERNELS_INST_DOUBLE)
KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS( double, Kokkos::LayoutLeft, Kokkos::HostSpace)
#endif
#if defined (KOKKOSKERNELS_INST_FLOAT)
KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS( float, Kokkos::LayoutLeft, Kokkos::HostSpace)
#endif
#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_)
KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS( Kokkos::complex<double>, Kokkos::LayoutLeft, Kokkos::HostSpace)
#endif
#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_)
KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS( Kokkos::complex<float>, Kokkos::LayoutLeft, Kokkos::HostSpace)
#endif

#endif

// cuBLAS
#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS
// double
#define KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( SCALAR, LAYOUT, MEMSPACE ) \
template<class ExecSpace> \
struct nrminf_tpl_spec_avail< \
Kokkos::View<typename Kokkos::Details::InnerProductSpaceTraits<SCALAR>::mag_type, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const SCALAR*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1> { enum : bool { value = true }; };

#if defined (KOKKOSKERNELS_INST_DOUBLE)
KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( double, Kokkos::LayoutLeft, Kokkos::CudaSpace)
#endif
#if defined (KOKKOSKERNELS_INST_FLOAT)
KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( float, Kokkos::LayoutLeft, Kokkos::CudaSpace)
#endif
#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_)
KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex<double>, Kokkos::LayoutLeft, Kokkos::CudaSpace)
#endif
#if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_)
KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex<float>, Kokkos::LayoutLeft, Kokkos::CudaSpace)
#endif

#endif

Expand Down
Loading

0 comments on commit 9a9845e

Please sign in to comment.