Skip to content

Commit

Permalink
Fixed nrm1 (#914), improved blas tests
Browse files Browse the repository at this point in the history
- Made nrm1 compute the sum of all absolute real and imaginary parts
  to match BLAS/MKL/CUBLAS behavior, rather than sum of magnitudes.
- Improved unit test coverage
  - verify each output element, not just dotprod of output with itself
  - for complex, create randomized inputs with nonzero imaginary parts
  - enable conj-trans mode testing for gemv
  • Loading branch information
brian-kelley committed Mar 19, 2021
1 parent d7fc6d8 commit 5f45262
Show file tree
Hide file tree
Showing 28 changed files with 556 additions and 602 deletions.
40 changes: 21 additions & 19 deletions src/blas/impl/KokkosBlas1_nrm1_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,23 +52,23 @@ namespace KokkosBlas {
namespace Impl {

//
// nrm1_squared
// nrm1
//

/// \brief 2-norm (squared) functor for single vectors.
/// \brief 1-norm functor for single vectors.
///
/// \tparam RV 0-D output View
/// \tparam XV 1-D input View
/// \tparam SizeType Index type. Use int (32 bits) if possible.
template<class RV, class XV, class SizeType = typename XV::size_type>
struct V_Nrm1_Functor
{
typedef typename XV::execution_space execution_space;
typedef SizeType size_type;
typedef typename XV::non_const_value_type xvalue_type;
typedef Kokkos::Details::InnerProductSpaceTraits<xvalue_type> IPT;
typedef Kokkos::Details::ArithTraits<typename IPT::mag_type> AT;
typedef typename IPT::mag_type value_type;
typedef typename XV::execution_space execution_space;
typedef SizeType size_type;
typedef typename XV::non_const_value_type xvalue_type;
typedef Kokkos::ArithTraits<xvalue_type> XAT;
typedef typename XAT::mag_type value_type;
typedef Kokkos::ArithTraits<value_type> MAT;

typename XV::const_type m_x;

Expand All @@ -94,12 +94,13 @@ struct V_Nrm1_Functor
KOKKOS_INLINE_FUNCTION
void operator() (const size_type& i, value_type& sum) const
{
sum += IPT::norm (m_x(i));
xvalue_type val = m_x(i);
sum += MAT::abs(XAT::real(val)) + MAT::abs(XAT::imag(val));
}

KOKKOS_INLINE_FUNCTION void init (value_type& update) const
{
update = AT::zero ();
update = MAT::zero ();
}

KOKKOS_INLINE_FUNCTION void
Expand All @@ -117,7 +118,7 @@ struct V_Nrm1_Functor
}
};

/// \brief Column-wise 2-norm functor for multivectors; works for
/// \brief Column-wise 1-norm functor for multivectors; works for
/// any layout, but best performance with LayoutRight.
///
/// \tparam RV 1-D output View
Expand All @@ -126,12 +127,12 @@ struct V_Nrm1_Functor
template<class RV, class XMV, class SizeType = typename XMV::size_type>
struct MV_Nrm1_Right_FunctorVector
{
typedef typename XMV::execution_space execution_space;
typedef SizeType size_type;
typedef typename XMV::non_const_value_type xvalue_type;
typedef Kokkos::Details::InnerProductSpaceTraits<xvalue_type> IPT;
typedef Kokkos::Details::ArithTraits<typename IPT::mag_type> AT;
typedef typename IPT::mag_type value_type[];
typedef typename XMV::execution_space execution_space;
typedef SizeType size_type;
typedef typename XMV::non_const_value_type xvalue_type;
typedef Kokkos::ArithTraits<xvalue_type> XAT;
typedef Kokkos::ArithTraits<typename XAT::mag_type> MAT;
typedef typename XAT::mag_type value_type[];

size_type value_count;
typename XMV::const_type m_x;
Expand Down Expand Up @@ -166,7 +167,8 @@ struct MV_Nrm1_Right_FunctorVector
#pragma vector always
#endif
for (size_type j = 0; j < numVecs; ++j) {
sum[j] += IPT::norm (m_x(i,j));
xvalue_type val = m_x(i, j);
sum[j] += MAT::abs(XAT::real(val)) + MAT::abs(XAT::imag(val));
}
}

Expand All @@ -181,7 +183,7 @@ struct MV_Nrm1_Right_FunctorVector
#pragma vector always
#endif
for (size_type j = 0; j < numVecs; ++j) {
update[j] = AT::zero ();
update[j] = MAT::zero ();
}
}

Expand Down
19 changes: 0 additions & 19 deletions src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_avail.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,25 +77,6 @@ KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_BLAS( Kokkos::complex<float>, Kokkos::LayoutL

#endif

// cuBLAS
#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS
// double
#define KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( SCALAR, LAYOUT, MEMSPACE ) \
template<class ExecSpace> \
struct nrminf_tpl_spec_avail< \
Kokkos::View<typename Kokkos::Details::InnerProductSpaceTraits<SCALAR>::mag_type, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const SCALAR*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1> { enum : bool { value = true }; };

KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( double, Kokkos::LayoutLeft, Kokkos::CudaSpace)
KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( float, Kokkos::LayoutLeft, Kokkos::CudaSpace)
KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex<double>, Kokkos::LayoutLeft, Kokkos::CudaSpace)
KOKKOSBLAS1_NRMINF_TPL_SPEC_AVAIL_CUBLAS( Kokkos::complex<float>, Kokkos::LayoutLeft, Kokkos::CudaSpace)

#endif

}
}
#endif
178 changes: 4 additions & 174 deletions src/impl/tpls/KokkosBlas1_nrminf_tpl_spec_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ Kokkos::View<const double*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
typedef Kokkos::View<const double*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > XV; \
typedef typename XV::size_type size_type; \
typedef Kokkos::Details::InnerProductSpaceTraits<double> IPT; \
\
static void nrminf (RV& R, const XV& X) \
{ \
Expand All @@ -94,7 +95,7 @@ Kokkos::View<const double*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
int N = numElems; \
int one = 1; \
int idx = HostBlas<double>::iamax(N,X.data(),one)-1; \
R() = X(idx); \
R() = IPT::norm(X(idx)); \
} else { \
NrmInf<RV,XV,1,false,ETI_SPEC_AVAIL>::nrminf(R,X); \
} \
Expand All @@ -116,6 +117,7 @@ Kokkos::View<const float*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
typedef Kokkos::View<const float*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > XV; \
typedef typename XV::size_type size_type; \
typedef Kokkos::Details::InnerProductSpaceTraits<float> IPT; \
\
static void nrminf (RV& R, const XV& X) \
{ \
Expand All @@ -127,7 +129,7 @@ Kokkos::View<const float*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
int N = numElems; \
int one = 1; \
int idx = HostBlas<float>::iamax(N,X.data(),one)-1; \
R() = X(idx); \
R() = IPT::norm(X(idx)); \
} else { \
NrmInf<RV,XV,1,false,ETI_SPEC_AVAIL>::nrminf(R,X); \
} \
Expand Down Expand Up @@ -220,176 +222,4 @@ KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_BLAS( Kokkos::LayoutLeft, Kokkos::HostSpace, f

#endif

// cuBLAS
#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS
#include<KokkosBlas_tpl_spec.hpp>

namespace KokkosBlas {
namespace Impl {

#define KOKKOSBLAS1_DNRMINF_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \
template<class ExecSpace> \
struct NrmInf< \
Kokkos::View<double, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const double*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1,true, ETI_SPEC_AVAIL > { \
\
typedef Kokkos::View<double, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > RV; \
typedef Kokkos::View<const double*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > XV; \
typedef typename XV::size_type size_type; \
\
static void nrminf (RV& R, const XV& X) \
{ \
Kokkos::Profiling::pushRegion("KokkosBlas::nrminf[TPL_CUBLAS,double]"); \
const size_type numElems = X.extent(0); \
if (numElems == 0) { Kokkos::deep_copy (R, 0.0); return; } \
if (numElems < static_cast<size_type> (INT_MAX)) { \
nrminf_print_specialization<RV,XV>(); \
const int N = static_cast<int> (numElems); \
constexpr int one = 1; \
int idx; \
KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \
cublasIdamax(s.handle, N, X.data(), one, &idx); \
Kokkos::deep_copy(R, subview(X,idx-1)); \
} else { \
NrmInf<RV,XV,1,false,ETI_SPEC_AVAIL>::nrminf(R,X); \
} \
Kokkos::Profiling::popRegion(); \
} \
};

#define KOKKOSBLAS1_SNRMINF_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \
template<class ExecSpace> \
struct NrmInf< \
Kokkos::View<float, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const float*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1,true, ETI_SPEC_AVAIL > { \
\
typedef Kokkos::View<float, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > RV; \
typedef Kokkos::View<const float*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > XV; \
typedef typename XV::size_type size_type; \
\
static void nrminf (RV& R, const XV& X) \
{ \
Kokkos::Profiling::pushRegion("KokkosBlas::nrminf[TPL_CUBLAS,float]"); \
const size_type numElems = X.extent(0); \
if (numElems == 0) { Kokkos::deep_copy (R, 0.0f);; return; } \
if (numElems < static_cast<size_type> (INT_MAX)) { \
nrminf_print_specialization<RV,XV>(); \
const int N = static_cast<int> (numElems); \
constexpr int one = 1; \
int idx; \
KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \
cublasIsamax(s.handle, N, X.data(), one, &idx); \
Kokkos::deep_copy(R, subview(X,idx-1)); \
} else { \
NrmInf<RV,XV,1,false,ETI_SPEC_AVAIL>::nrminf(R,X); \
} \
Kokkos::Profiling::popRegion(); \
} \
};

#define KOKKOSBLAS1_ZNRMINF_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \
template<class ExecSpace> \
struct NrmInf< \
Kokkos::View<double, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const Kokkos::complex<double>*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1,true, ETI_SPEC_AVAIL > { \
\
typedef Kokkos::View<double, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > RV; \
typedef Kokkos::View<const Kokkos::complex<double>*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > XV; \
typedef typename XV::size_type size_type; \
typedef Kokkos::Details::InnerProductSpaceTraits<Kokkos::complex<double>> IPT; \
\
static void nrminf (RV& R, const XV& X) \
{ \
Kokkos::Profiling::pushRegion("KokkosBlas::nrminf[TPL_CUBLAS,complex<double>]"); \
const size_type numElems = X.extent(0); \
if (numElems == 0) { Kokkos::deep_copy (R, 0.0); return; } \
if (numElems < static_cast<size_type> (INT_MAX)) { \
nrminf_print_specialization<RV,XV>(); \
const int N = static_cast<int> (numElems); \
constexpr int one = 1; \
int idx; \
KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \
cublasIzamax(s.handle, N, reinterpret_cast<const cuDoubleComplex*>(X.data()), one, &idx); \
Kokkos::complex<double> R_cplx_val {0.0, 0.0}; \
Kokkos::View<Kokkos::complex<double>, LAYOUT, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > R_cplx (&R_cplx_val); \
Kokkos::deep_copy(R_cplx, subview(X,idx-1)); \
R() = IPT::norm(R_cplx()); \
} else { \
NrmInf<RV,XV,1,false,ETI_SPEC_AVAIL>::nrminf(R,X); \
} \
Kokkos::Profiling::popRegion(); \
} \
};

#define KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_CUBLAS( LAYOUT, MEMSPACE, ETI_SPEC_AVAIL ) \
template<class ExecSpace> \
struct NrmInf< \
Kokkos::View<float, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
Kokkos::View<const Kokkos::complex<float>*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> >, \
1,true, ETI_SPEC_AVAIL > { \
\
typedef Kokkos::View<float, LAYOUT, Kokkos::HostSpace, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > RV; \
typedef Kokkos::View<const Kokkos::complex<float>*, LAYOUT, Kokkos::Device<ExecSpace, MEMSPACE>, \
Kokkos::MemoryTraits<Kokkos::Unmanaged> > XV; \
typedef typename XV::size_type size_type; \
typedef Kokkos::Details::InnerProductSpaceTraits<Kokkos::complex<float>> IPT; \
\
static void nrminf (RV& R, const XV& X) \
{ \
Kokkos::Profiling::pushRegion("KokkosBlas::nrminf[TPL_CUBLAS,complex<float>]"); \
const size_type numElems = X.extent(0); \
if (numElems == 0) { Kokkos::deep_copy (R, 0.0f); return; } \
if (numElems < static_cast<size_type> (INT_MAX)) { \
nrminf_print_specialization<RV,XV>(); \
const int N = static_cast<int> (numElems); \
constexpr int one = 1; \
int idx; \
KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \
cublasIcamax(s.handle, N, reinterpret_cast<const cuComplex*>(X.data()), one, &idx); \
Kokkos::complex<float> R_cplx_val {0.0f, 0.0f}; \
Kokkos::View<Kokkos::complex<float>, LAYOUT, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > R_cplx (&R_cplx_val); \
Kokkos::deep_copy(R_cplx, subview(X,idx-1)); \
R() = IPT::norm(R_cplx()); \
} else { \
NrmInf<RV,XV,1,false,ETI_SPEC_AVAIL>::nrminf(R,X); \
} \
Kokkos::Profiling::popRegion(); \
} \
};

KOKKOSBLAS1_DNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true)
KOKKOSBLAS1_DNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false)

KOKKOSBLAS1_SNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true)
KOKKOSBLAS1_SNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false)

KOKKOSBLAS1_ZNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true)
KOKKOSBLAS1_ZNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false)

KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, true)
KOKKOSBLAS1_CNRMINF_TPL_SPEC_DECL_CUBLAS( Kokkos::LayoutLeft, Kokkos::CudaSpace, false)

}
}

#endif

#endif
31 changes: 28 additions & 3 deletions test_common/KokkosKernels_TestUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
#define KOKKOSKERNELS_TEST_UTILS_HPP

#include "KokkosKernels_Utils.hpp"
#include "Kokkos_ArithTraits.hpp"

namespace Test {
template<class ViewType, bool strided = std::is_same<typename ViewType::array_layout, Kokkos::LayoutStride>::value>
struct multivector_layout_adapter;
Expand Down Expand Up @@ -83,16 +85,15 @@ namespace Test {
template<class Scalar1, class Scalar2, class Scalar3>
void EXPECT_NEAR_KK(Scalar1 val1, Scalar2 val2, Scalar3 tol) {
typedef Kokkos::Details::ArithTraits<Scalar1> AT1;
typedef Kokkos::Details::ArithTraits<Scalar2> AT2;
typedef Kokkos::Details::ArithTraits<Scalar3> AT3;
EXPECT_NEAR(double(AT1::abs(val1)),double(AT2::abs(val2)),double(AT3::abs(tol)));
EXPECT_LE((double) AT1::abs(val1 - val2), (double) AT3::abs(tol));
}

template<class ViewType1, class ViewType2, class Scalar>
void EXPECT_NEAR_KK_1DVIEW(ViewType1 v1, ViewType2 v2, Scalar tol) {
size_t v1_size = v1.extent(0);
size_t v2_size = v2.extent(0);
EXPECT_NEAR_KK(v1_size, v2_size, 0);
EXPECT_EQ(v1_size, v2_size);


typename ViewType1::HostMirror h_v1 = Kokkos::create_mirror_view(v1);
Expand Down Expand Up @@ -227,5 +228,29 @@ namespace Test {
constexpr static double value = 0.0009765625F;
};
#endif // KOKKOS_HALF_T_IS_FLOAT

//Get the interval for Kokkos::fill_random
//For real, interval is (-mag, mag)
//For complex, both real and imaginary parts will have interval (-mag, mag)
template<typename Scalar>
inline void getRandomBounds(double mag, Scalar& start, Scalar& end)
{
start = -mag * Kokkos::ArithTraits<Scalar>::one();
end = mag * Kokkos::ArithTraits<Scalar>::one();
}

template<>
inline void getRandomBounds(double mag, Kokkos::complex<float>& start, Kokkos::complex<float>& end)
{
start = Kokkos::complex<float>(-mag, -mag);
end = Kokkos::complex<float>(mag, mag);
}

template<>
inline void getRandomBounds(double mag, Kokkos::complex<double>& start, Kokkos::complex<double>& end)
{
start = Kokkos::complex<double>(-mag, -mag);
end = Kokkos::complex<double>(mag, mag);
}
}
#endif
Loading

0 comments on commit 5f45262

Please sign in to comment.