From e48d61b84a7fe2ff4ff638927c96690a5f710079 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Tue, 14 Apr 2020 11:59:13 -0700 Subject: [PATCH 1/9] Controls: first cut that implements a path for cusparse spmv merge alg --- cmake/kokkoskernels_tpls.cmake | 8 -- src/CMakeLists.txt | 1 + src/common/KokkosKernels_Controls.hpp | 130 +++++++++++++++++ src/impl/tpls/KokkosKernels_tpl_handles.cpp | 133 ++++++++++++++++++ .../tpls/KokkosKernels_tpl_handles_decl.hpp | 66 +++++++++ .../tpls/KokkosKernels_tpl_handles_def.hpp | 72 ++++++++++ .../tpls/KokkosSparse_spmv_tpl_spec_decl.hpp | 26 ++-- src/sparse/KokkosSparse_spmv.hpp | 41 ++++-- src/sparse/impl/KokkosSparse_spmv_impl.hpp | 38 ++--- src/sparse/impl/KokkosSparse_spmv_spec.hpp | 35 ++--- unit_test/sparse/Test_Sparse_spmv.hpp | 79 +++++++++++ 11 files changed, 566 insertions(+), 63 deletions(-) create mode 100644 src/common/KokkosKernels_Controls.hpp create mode 100644 src/impl/tpls/KokkosKernels_tpl_handles.cpp create mode 100644 src/impl/tpls/KokkosKernels_tpl_handles_decl.hpp create mode 100644 src/impl/tpls/KokkosKernels_tpl_handles_def.hpp diff --git a/cmake/kokkoskernels_tpls.cmake b/cmake/kokkoskernels_tpls.cmake index 835fcf9a33..08230dd987 100644 --- a/cmake/kokkoskernels_tpls.cmake +++ b/cmake/kokkoskernels_tpls.cmake @@ -489,11 +489,3 @@ STRING(REPLACE ";" "\n" KOKKOSKERNELS_TPL_EXPORT_TEMP "${KOKKOSKERNELS_TPL_EXPOR UNSET(KOKKOSKERNELS_TPL_EXPORTS CACHE) SET(KOKKOSKERNELS_TPL_EXPORTS ${KOKKOSKERNELS_TPL_EXPORT_TEMP}) -IF (KOKKOSKERNELS_ENABLE_TPL_CUSPARSE AND KOKKOSKERNELS_ENABLE_TESTS) - #The tests use CUDA lambdas, make sure Kokkos was built - #with CUDA lambda support - KOKKOS_CHECK(OPTIONS CUDA_LAMBDA RETURN_VALUE HAVE_CUDA_LAMBDA) - IF (NOT HAVE_CUDA_LAMBDA) - MESSAGE(FATAL_ERROR "CUSPARSE tests require Kokkos to be built with CUDA lambda. Please reinstall Kokkos using -DKokkos_ENABLE_CUDA_LAMBDA=ON") - ENDIF() -ENDIF() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 11c4bd65ad..79e7a9e707 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -344,6 +344,7 @@ LIST(APPEND SOURCES batched/KokkosBatched_Util.cpp impl/tpls/KokkosBlas_Host_tpl.cpp impl/tpls/KokkosBlas_Cuda_tpl.cpp + impl/tpls/KokkosKernels_tpl_handles.cpp ) # For now, don't add the ETI headers to complete list of headers diff --git a/src/common/KokkosKernels_Controls.hpp b/src/common/KokkosKernels_Controls.hpp new file mode 100644 index 0000000000..6d9f6e29e1 --- /dev/null +++ b/src/common/KokkosKernels_Controls.hpp @@ -0,0 +1,130 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + + +#ifndef _KOKKOSKERNEL_CONTROLS_HPP +#define _KOKKOSKERNEL_CONTROLS_HPP +/// \file KokkosKernels_Controls.hpp +/// \brief Mechanism to control internal behavior of kernels +/// \author Luc Berger-Vergiat (lberge@sandia.gov) + +#include +#include "KokkosKernels_tpl_handles_decl.hpp" + +// TPLS headers +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS +#include "cublas_v2.h" +#endif + +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE +#include "cusparse.h" +#endif + +namespace KokkosKernels{ +namespace Experimental{ + + // Declaration of Controls class + class Controls { + + public: + Controls() = default; + + void setParameter(const std::string& name, const std::string& value) { + kernel_parameters[name] = value; + } + + std::string getParameter(const std::string& name) { + auto search = kernel_parameters.find(name); + std::string value; + if(search == kernel_parameters.end()) { + std::cout << "Parameter " << name << " was not found in the list of parameters!" << std::endl; + value = ""; + } else { + value = search->second; + } + return value; + } + +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS + cublasHandle_t cublasHandle = 0; + + cublasHandle_t getCublasHandle() { + if(cublasHandle == 0) { + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlas::singleton(); + cublasHandle = s.cublasHandle; + } + return cublasHandle; + } + + void setCublasHandle(const cublasHandle_t userCublasHandle) { + cublasHandle = userCublasHandle; + } +#endif + +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE + cusparseHandle_t cusparseHandle = 0; + + cusparseHandle_t getCusparseHandle() { + if(cusparseHandle == 0) { + KokkosKernels::Impl::CusparseSingleton & s = + KokkosKernels::Impl::CusparseSingleton::singleton(); + cusparseHandle = s.cusparseHandle; + } + return cusparseHandle; + } + + void setCusparseHandle(const cusparseHandle_t userCusparseHandle) { + cusparseHandle = userCusparseHandle; + } +#endif + + private: + std::unordered_map kernel_parameters; + }; + +} // namespace Experimental +} // namespace KokkosKernels + +#endif // _KOKKOSKERNEL_CONTROLS_HPP diff --git a/src/impl/tpls/KokkosKernels_tpl_handles.cpp b/src/impl/tpls/KokkosKernels_tpl_handles.cpp new file mode 100644 index 0000000000..64cdb26bbf --- /dev/null +++ b/src/impl/tpls/KokkosKernels_tpl_handles.cpp @@ -0,0 +1,133 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include +#include "KokkosKernels_config.h" +#include "KokkosKernels_tpl_handles_def.hpp" diff --git a/src/impl/tpls/KokkosKernels_tpl_handles_decl.hpp b/src/impl/tpls/KokkosKernels_tpl_handles_decl.hpp new file mode 100644 index 0000000000..8ecf0cea27 --- /dev/null +++ b/src/impl/tpls/KokkosKernels_tpl_handles_decl.hpp @@ -0,0 +1,66 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSKERNELS_TPL_HANDLES_DECL_HPP_ +#define KOKKOSKERNELS_TPL_HANDLES_DECL_HPP_ + +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE +#include "KokkosKernels_SparseUtils_cusparse.hpp" + +namespace KokkosKernels{ +namespace Impl{ + +struct CusparseSingleton { + cusparseHandle_t cusparseHandle; + + CusparseSingleton(); + + static CusparseSingleton & singleton(); +}; + +} // namespace Impl +} // namespace KokkosKernels +#endif + +#endif // KOKKOSKERNELS_TPL_HANDLES_DECL_HPP_ diff --git a/src/impl/tpls/KokkosKernels_tpl_handles_def.hpp b/src/impl/tpls/KokkosKernels_tpl_handles_def.hpp new file mode 100644 index 0000000000..dc8c199550 --- /dev/null +++ b/src/impl/tpls/KokkosKernels_tpl_handles_def.hpp @@ -0,0 +1,72 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSKERNELS_TPL_HANDLES_DEF_HPP_ +#define KOKKOSKERNELS_TPL_HANDLES_DEF_HPP_ + +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE +#include "cusparse.h" +#include "KokkosKernels_tpl_handles_decl.hpp" + +namespace KokkosKernels{ +namespace Impl{ + +CusparseSingleton::CusparseSingleton() { + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreate(&cusparseHandle)); + + Kokkos::push_finalize_hook ([&] () { + cusparseDestroy(cusparseHandle); + }); +} + +CusparseSingleton & CusparseSingleton::singleton() { + static CusparseSingleton s ; + return s ; +} + +} // namespace Impl +} // namespace KokkosKernels +#endif + +#endif // KOKKOSKERNELS_TPL_HANDLES_DEF_HPP_ diff --git a/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp b/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp index ea87ab0c5a..f2f951684d 100644 --- a/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp @@ -49,12 +49,14 @@ #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE #include "cusparse.h" #include "KokkosKernels_SparseUtils_cusparse.hpp" +#include "KokkosKernels_Controls.hpp" namespace KokkosSparse { namespace Impl { template - void spmv_cusparse(const char mode[], + void spmv_cusparse(KokkosKernels::Experimental::Controls& controls, + const char mode[], typename YVector::non_const_value_type const & alpha, const AMatrix& A, const XVector& x, @@ -64,7 +66,7 @@ namespace Impl { using value_type = typename AMatrix::non_const_value_type; /* initialize cusparse library */ - cusparseHandle_t cusparseHandle = 0; + cusparseHandle_t cusparseHandle = 0;// controls.getCusparseHandle(); KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreate(&cusparseHandle)); /* Set the operation mode */ @@ -97,11 +99,13 @@ namespace Impl { KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&vecX, x.extent_int(0), const_cast(x.data()), myCudaDataType)); KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&vecY, y.extent_int(0), const_cast(y.data()), myCudaDataType)); - size_t bufferSize = 0; - void* dBuffer = NULL; + size_t bufferSize = 0; + void* dBuffer = NULL; + cusparseSpMVAlg_t alg = CUSPARSE_CSRMV_ALG1; + if(controls.getParameter("algorithm") == "merge") {alg = CUSPARSE_CSRMV_ALG2;} KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMV_bufferSize(cusparseHandle, myCusparseOperation, &alpha, A_cusparse, vecX, &beta, vecY, myCudaDataType, - CUSPARSE_CSRMV_ALG1, &bufferSize)); + alg, &bufferSize)); CUDA_SAFE_CALL(cudaMalloc(&dBuffer, bufferSize)); /* perform SpMV */ @@ -164,13 +168,15 @@ namespace Impl { true, COMPILE_LIBRARY> { \ using device_type = Kokkos::Device; \ using memory_trait_type = Kokkos::MemoryTraits; \ - using AMatrix = CrsMatrix; \ - using XVector = Kokkos::View>; \ - using YVector = Kokkos::View; \ + using AMatrix = CrsMatrix; \ + using XVector = Kokkos::View>; \ + using YVector = Kokkos::View; \ + using Controls = KokkosKernels::Experimental::Controls; \ \ using coefficient_type = typename YVector::non_const_value_type; \ \ - static void spmv (const char mode[], \ + static void spmv (Controls& controls, \ + const char mode[], \ const coefficient_type& alpha, \ const AMatrix& A, \ const XVector& x, \ @@ -178,7 +184,7 @@ namespace Impl { const YVector& y) { \ std::string label = "KokkosSparse::spmv[TPL_CUSPARSE," + Kokkos::ArithTraits::name() + "]"; \ Kokkos::Profiling::pushRegion(label); \ - spmv_cusparse(mode, alpha, A, x, beta, y); \ + spmv_cusparse(controls, mode, alpha, A, x, beta, y); \ Kokkos::Profiling::popRegion(); \ } \ }; diff --git a/src/sparse/KokkosSparse_spmv.hpp b/src/sparse/KokkosSparse_spmv.hpp index 1005caee28..7d9b015fdd 100644 --- a/src/sparse/KokkosSparse_spmv.hpp +++ b/src/sparse/KokkosSparse_spmv.hpp @@ -49,6 +49,7 @@ #define KOKKOSSPARSE_SPMV_HPP_ #include "KokkosKernels_helpers.hpp" +#include "KokkosKernels_Controls.hpp" #include "KokkosSparse_spmv_spec.hpp" #include "KokkosSparse_spmv_struct_spec.hpp" #include @@ -64,7 +65,8 @@ namespace { template void -spmv (const char mode[], +spmv (KokkosKernels::Experimental::Controls controls, + const char mode[], const AlphaType& alpha, const AMatrix& A, const XVector& x, @@ -149,7 +151,7 @@ spmv (const char mode[], typename YVector_Internal::value_type*, typename YVector_Internal::array_layout, typename YVector_Internal::device_type, - typename YVector_Internal::memory_traits>::spmv (mode, alpha, A_i, x_i, beta, y_i); + typename YVector_Internal::memory_traits>::spmv (controls, mode, alpha, A_i, x_i, beta, y_i); } @@ -222,7 +224,8 @@ struct SPMV2D1D void -spmv (const char mode[], +spmv (KokkosKernels::Experimental::Controls controls, + const char mode[], const AlphaType& alpha, const AMatrix& A, const XVector& x, @@ -337,6 +340,7 @@ spmv (const char mode[], /// by \c mode. If beta == 0, ignore and overwrite the initial /// entries of y; if alpha == 0, ignore the entries of A and x. /// +/// \param controls [in] kokkos-kernels control structure /// \param mode [in] "N" for no transpose, "T" for transpose, or "C" /// for conjugate transpose. /// \param alpha [in] Scalar multiplier for the matrix A. @@ -348,17 +352,32 @@ spmv (const char mode[], /// multivector (rank-2 Kokkos::View). It must have the same number /// of columns as x. template -void -spmv(const char mode[], - const AlphaType& alpha, - const AMatrix& A, - const XVector& x, - const BetaType& beta, - const YVector& y) { +void spmv(KokkosKernels::Experimental::Controls controls, + const char mode[], + const AlphaType& alpha, + const AMatrix& A, + const XVector& x, + const BetaType& beta, + const YVector& y) { using RANK_SPECIALISE = typename std::conditional (XVector::rank) == 2, RANK_TWO, RANK_ONE>::type; - spmv (mode, alpha, A, x, beta, y, RANK_SPECIALISE ()); + spmv (controls, mode, alpha, A, x, beta, y, RANK_SPECIALISE ()); +} + +// Overload for backward compatibility and also just simpler +// interface for users that are happy with the kernel default settings +template +void spmv(const char mode[], + const AlphaType& alpha, + const AMatrix& A, + const XVector& x, + const BetaType& beta, + const YVector& y) { + + KokkosKernels::Experimental::Controls controls; + spmv(controls, mode, alpha, A, x, beta, y); + } namespace Experimental { diff --git a/src/sparse/impl/KokkosSparse_spmv_impl.hpp b/src/sparse/impl/KokkosSparse_spmv_impl.hpp index eba99fb8ee..939e88a1f2 100644 --- a/src/sparse/impl/KokkosSparse_spmv_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spmv_impl.hpp @@ -45,6 +45,7 @@ #ifndef KOKKOSSPARSE_IMPL_SPMV_DEF_HPP_ #define KOKKOSSPARSE_IMPL_SPMV_DEF_HPP_ +#include "KokkosKernels_Controls.hpp" #include "Kokkos_InnerProductSpaceTraits.hpp" #include "KokkosBlas1_scal.hpp" #include "KokkosSparse_CrsMatrix.hpp" @@ -274,11 +275,12 @@ template static void -spmv_beta_no_transpose (typename YVector::const_value_type& alpha, - const AMatrix& A, - const XVector& x, - typename YVector::const_value_type& beta, - const YVector& y) +spmv_beta_no_transpose (const KokkosKernels::Experimental::Controls& controls, + typename YVector::const_value_type& alpha, + const AMatrix& A, + const XVector& x, + typename YVector::const_value_type& beta, + const YVector& y) { typedef typename AMatrix::ordinal_type ordinal_type; typedef typename AMatrix::execution_space execution_space; @@ -303,14 +305,13 @@ spmv_beta_no_transpose (typename YVector::const_value_type& alpha, int vector_length = -1; int64_t rows_per_thread = -1; + // Note on 03/24/20, lbv: We can use the controls + // here to allow the user to pass in some tunning + // parameters. + int64_t rows_per_team = spmv_launch_parameters(A.numRows(),A.nnz(),rows_per_thread,team_size,vector_length); int64_t worksets = (y.extent(0)+rows_per_team-1)/rows_per_team; - // std::cout << "worksets=" << worksets - // << ", rows_per_team=" << rows_per_team - // << ", team_size=" << team_size - // << ", vector_length=" << vector_length << std::endl; - SPMV_Functor func (alpha,A,x,beta,y,rows_per_team); if(A.nnz()>10000000) { @@ -388,20 +389,21 @@ template static void -spmv_beta (const char mode[], - typename YVector::const_value_type& alpha, - const AMatrix& A, - const XVector& x, - typename YVector::const_value_type& beta, - const YVector& y) +spmv_beta (const KokkosKernels::Experimental::Controls& controls, + const char mode[], + typename YVector::const_value_type& alpha, + const AMatrix& A, + const XVector& x, + typename YVector::const_value_type& beta, + const YVector& y) { if (mode[0] == NoTranspose[0]) { spmv_beta_no_transpose - (alpha,A,x,beta,y); + (controls,alpha,A,x,beta,y); } else if (mode[0] == Conjugate[0]) { spmv_beta_no_transpose - (alpha,A,x,beta,y); + (controls,alpha,A,x,beta,y); } else if (mode[0]==Transpose[0]) { spmv_beta_transpose diff --git a/src/sparse/impl/KokkosSparse_spmv_spec.hpp b/src/sparse/impl/KokkosSparse_spmv_spec.hpp index 98d3389418..d36dfb0ee0 100644 --- a/src/sparse/impl/KokkosSparse_spmv_spec.hpp +++ b/src/sparse/impl/KokkosSparse_spmv_spec.hpp @@ -49,6 +49,7 @@ #include #include "KokkosSparse_CrsMatrix.hpp" +#include "KokkosKernels_Controls.hpp" // Include the actual functors #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY #include @@ -155,12 +156,13 @@ struct SPMV{ typedef typename YVector::non_const_value_type coefficient_type; - static void spmv (const char mode[], - const coefficient_type& alpha, - const AMatrix& A, - const XVector& x, - const coefficient_type& beta, - const YVector& y); + static void spmv (const KokkosKernels::Experimental::Controls& controls, + const char mode[], + const coefficient_type& alpha, + const AMatrix& A, + const XVector& x, + const coefficient_type& beta, + const YVector& y); }; // Unification layer @@ -245,12 +247,13 @@ struct SPMV < AT, AO, AD, AM, AS, typedef typename YVector::non_const_value_type coefficient_type; static void - spmv (const char mode[], - const coefficient_type& alpha, - const AMatrix& A, - const XVector& x, - const coefficient_type& beta, - const YVector& y) + spmv (const KokkosKernels::Experimental::Controls& controls, + const char mode[], + const coefficient_type& alpha, + const AMatrix& A, + const XVector& x, + const coefficient_type& beta, + const YVector& y) { typedef Kokkos::Details::ArithTraits KAT; @@ -264,16 +267,16 @@ struct SPMV < AT, AO, AD, AM, AS, } if (beta == KAT::zero ()) { - spmv_beta (mode, alpha, A, x, beta, y); + spmv_beta (controls, mode, alpha, A, x, beta, y); } else if (beta == KAT::one ()) { - spmv_beta (mode, alpha, A, x, beta, y); + spmv_beta (controls, mode, alpha, A, x, beta, y); } else if (beta == -KAT::one ()) { - spmv_beta (mode, alpha, A, x, beta, y); + spmv_beta (controls, mode, alpha, A, x, beta, y); } else { - spmv_beta (mode, alpha, A, x, beta, y); + spmv_beta (controls, mode, alpha, A, x, beta, y); } } }; diff --git a/unit_test/sparse/Test_Sparse_spmv.hpp b/unit_test/sparse/Test_Sparse_spmv.hpp index 350d75b512..a7b42fa697 100644 --- a/unit_test/sparse/Test_Sparse_spmv.hpp +++ b/unit_test/sparse/Test_Sparse_spmv.hpp @@ -8,6 +8,8 @@ #include #include +#include "KokkosKernels_Controls.hpp" + #ifndef kokkos_complex_double #define kokkos_complex_double Kokkos::complex #define kokkos_complex_float Kokkos::complex @@ -262,8 +264,49 @@ void check_spmv_mv_struct(const crsMat_t input_mat, } } // check_spmv_mv_struct +template +void check_spmv_controls(KokkosKernels::Experimental::Controls controls, + crsMat_t input_mat, x_vector_type x, y_vector_type y, + typename y_vector_type::non_const_value_type alpha, + typename y_vector_type::non_const_value_type beta) { + //typedef typename crsMat_t::StaticCrsGraphType graph_t; + using ExecSpace = typename crsMat_t::execution_space; + using my_exec_space = Kokkos::RangePolicy; + using y_value_type = typename y_vector_type::non_const_value_type; + using y_value_trait = Kokkos::ArithTraits; + using y_value_mag_type = typename y_value_trait::mag_type; + + // y is the quantity being tested here, + // so let us use y_value_type to determine + // the appropriate tolerance precision. + const y_value_mag_type eps = std::is_same::value ? 2*1e-3 : 1e-7; + const size_t nr = input_mat.numRows(); + y_vector_type expected_y("expected", nr); + Kokkos::deep_copy(expected_y, y); + Kokkos::fence(); + + sequential_spmv(input_mat, x, expected_y, alpha, beta); + +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE + controls.setParameter("algorithm", "merge"); + printf("requested merge based algorithm\n"); +#endif + + KokkosSparse::spmv(controls, "N", alpha, input_mat, x, beta, y); + int num_errors = 0; + Kokkos::parallel_reduce("KokkosSparse::Test::spmv", + my_exec_space(0, y.extent(0)), + fSPMV(expected_y, y, eps), + num_errors); + if(num_errors>0) printf("KokkosSparse::Test::spmv: %i errors of %i with params: %lf %lf\n", + num_errors, y.extent_int(0), + y_value_trait::abs(alpha), y_value_trait::abs(beta)); + EXPECT_TRUE(num_errors==0); +} // check_spmv_controls + } // namespace Test + template void test_spmv(lno_t numRows,size_type nnz, lno_t bandwidth, lno_t row_size_variance){ @@ -495,6 +538,41 @@ void test_spmv_mv_struct_1D(lno_t nx, int numMV) { Test::check_spmv_mv_struct(input_mat, 1, structure, input_x, output_y, output_y_copy, 1.0, 1.0, numMV); } +// check that the controls are flowing down correctly in the spmv kernel +template +void test_spmv_controls(lno_t numRows,size_type nnz, lno_t bandwidth, lno_t row_size_variance) { + + using crsMat_t = typename KokkosSparse::CrsMatrix; + using scalar_view_t = typename crsMat_t::values_type::non_const_type; + using x_vector_type = scalar_view_t; + using y_vector_type = scalar_view_t; + using Controls = KokkosKernels::Experimental::Controls; + + + lno_t numCols = numRows; + + crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix(numRows,numCols,nnz,row_size_variance, bandwidth); + lno_t nr = input_mat.numRows(); + lno_t nc = input_mat.numCols(); + + x_vector_type input_x ("x", nc); + y_vector_type output_y ("y", nr); + + Kokkos::Random_XorShift64_Pool rand_pool(13718); + + using ScalarX = typename x_vector_type::value_type; + using ScalarY = typename y_vector_type::value_type; + + Kokkos::fill_random(input_x,rand_pool,ScalarX(10)); + Kokkos::fill_random(output_y,rand_pool,ScalarY(10)); + + Controls controls; + + Test::check_spmv_controls(controls, input_mat, input_x, output_y, 1.0, 0.0); + Test::check_spmv_controls(controls, input_mat, input_x, output_y, 0.0, 1.0); + Test::check_spmv_controls(controls, input_mat, input_x, output_y, 1.0, 1.0); +} // test_spmv_controls + //call it if ordinal int and, scalar float and double are instantiated. template void test_github_issue_101 () @@ -650,6 +728,7 @@ TEST_F( TestCategory,sparse ## _ ## spmv ## _ ## SCALAR ## _ ## ORDINAL ## _ ## test_spmv (50000, 50000 * 30, 200, 10); \ test_spmv (50000, 50000 * 30, 100, 10); \ test_spmv (10000, 10000 * 20, 100, 5); \ + test_spmv_controls (10000, 10000 * 20, 100, 5); \ } #define EXECUTE_TEST_MV(SCALAR, ORDINAL, OFFSET, LAYOUT, DEVICE) \ From 77cda8cd34c7c61de5671ab3e8a0e46e50bfc48d Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Wed, 29 Apr 2020 13:05:42 -0700 Subject: [PATCH 2/9] SpMV merge: started adding infrastructure for test. Something bugs out in the cmake logic. Could be a Lassen issue too. --- perf_test/sparse/CMakeLists.txt | 5 +++++ src/common/KokkosKernels_Controls.hpp | 3 +++ src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp | 6 +----- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/perf_test/sparse/CMakeLists.txt b/perf_test/sparse/CMakeLists.txt index 8ca70ba8ad..fc847d820b 100644 --- a/perf_test/sparse/CMakeLists.txt +++ b/perf_test/sparse/CMakeLists.txt @@ -43,6 +43,11 @@ KOKKOSKERNELS_ADD_EXECUTABLE( SOURCES KokkosSparse_spmv.cpp ) +KOKKOSKERNELS_ADD_EXECUTABLE( + sparse_spmv_merge + SOURCES KokkosSparse_spmv_merge.cpp + ) + KOKKOSKERNELS_ADD_EXECUTABLE( sparse_sptrsv SOURCES KokkosSparse_sptrsv.cpp diff --git a/src/common/KokkosKernels_Controls.hpp b/src/common/KokkosKernels_Controls.hpp index 6d9f6e29e1..49e2249897 100644 --- a/src/common/KokkosKernels_Controls.hpp +++ b/src/common/KokkosKernels_Controls.hpp @@ -51,6 +51,7 @@ /// \author Luc Berger-Vergiat (lberge@sandia.gov) #include +#include "KokkosKernels_config.h" #include "KokkosKernels_tpl_handles_decl.hpp" // TPLS headers @@ -60,6 +61,8 @@ #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE #include "cusparse.h" +#else +blahblah #endif namespace KokkosKernels{ diff --git a/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp b/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp index f2f951684d..6e53586e5e 100644 --- a/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp @@ -66,8 +66,7 @@ namespace Impl { using value_type = typename AMatrix::non_const_value_type; /* initialize cusparse library */ - cusparseHandle_t cusparseHandle = 0;// controls.getCusparseHandle(); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreate(&cusparseHandle)); + cusparseHandle_t cusparseHandle = controls.getCusparseHandle(); /* Set the operation mode */ cusparseOperation_t myCusparseOperation = CUSPARSE_OPERATION_NON_TRANSPOSE; @@ -155,9 +154,6 @@ namespace Impl { KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyMatDescr(descrA)); #endif // CUSPARSE_VERSION - - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroy(cusparseHandle)); - cusparseHandle = 0; } #define KOKKOSSPARSE_SPMV_CUSPARSE(SCALAR, OFFSET, LAYOUT, COMPILE_LIBRARY) \ From 23726096bc44c99e6e245ea573b0cd7d593b6485 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Wed, 29 Apr 2020 13:26:32 -0700 Subject: [PATCH 3/9] SpMV merge: adding begining of unit-test --- perf_test/sparse/KokkosSparse_spmv_merge.cpp | 113 +++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 perf_test/sparse/KokkosSparse_spmv_merge.cpp diff --git a/perf_test/sparse/KokkosSparse_spmv_merge.cpp b/perf_test/sparse/KokkosSparse_spmv_merge.cpp new file mode 100644 index 0000000000..ebf3e00d78 --- /dev/null +++ b/perf_test/sparse/KokkosSparse_spmv_merge.cpp @@ -0,0 +1,113 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include "KokkosKernels_default_types.hpp" + +using Scalar = default_scalar; +using lno_t = default_lno_t; +using size_type = default_size_type; + +void print_help() { + printf("SPMV merge benchmark code written by Luc Berger-Vergiat.\n"); + printf("The goal is to test cusSPARSE's merge algorithm on imbalanced matrices."); + printf("Options:\n"); + printf(" --compare : Compare the performance of the merge algo with the default algo.\n"); + printf(" -l [LOOP] : How many spmv to run to aggregate average time. \n"); + printf(" -numRows : Number of rows the matrix will contain.\n"); + printf(" -numCols : Number of columns the matrix will contain (allow rectangular matrix).\n"); + printf(" -numEntries : Number of entries per row.\n"); + printf(" -numLongRows : Number of rows that will contain more entries than the average.\n"); + printf(" -numLongEntries : Number of entries per row in the unbalanced rows.\n"); +} + +int main(int argc, char** argv) { + + bool compare = false; + int loop = 100; + int numRows = 10000; + int numCols = 0; + int numEntries = 7; + int numLongRows = 10; + int numLongEntries = 200; + + if(argc == 1) { + print_help(); + return 0; + } + + for(int i = 0; i < argc; i++) { + if((strcmp(argv[i],"--compare" )==0)) {compare=true; continue;} + if((strcmp(argv[i],"-l" )==0)) {loop=atoi(argv[++i]); continue;} + if((strcmp(argv[i],"-numRows" )==0)) {numRows=atoi(argv[++i]); continue;} + if((strcmp(argv[i],"-numCols" )==0)) {numCols=atoi(argv[++i]); continue;} + if((strcmp(argv[i],"-numEntries" )==0)) {numEntries=atoi(argv[++i]); continue;} + if((strcmp(argv[i],"-numLongRows" )==0)) {numLongRows=atoi(argv[++i]); continue;} + if((strcmp(argv[i],"-numLongEntries" )==0)) {numLongEntries=atoi(argv[++i]); continue;} + if((strcmp(argv[i],"--help")==0) || (strcmp(argv[i],"-h")==0)) { + print_help(); + return 0; + } + } + + // If numCols was not set, assume the matrix is square. + if(numCols == 0) {numCols = numRows;} + + Kokkos::initialize(argc, argv); + + Kokkos::finalize(); +} // main From 3251464d62fb1d848a8b13eff5be95a6b7bc9d0f Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Wed, 20 May 2020 21:32:39 -0700 Subject: [PATCH 4/9] Tested implementation of Controls to allow spmv merge algorithm This allows the cusparse specialization layer to use either the default or the merge algorithm depending on input parameters from the user. --- perf_test/sparse/KokkosSparse_spmv_merge.cpp | 205 +++++++++++++++++- src/common/KokkosKernels_IOUtils.hpp | 2 +- .../tpls/KokkosSparse_spmv_tpl_spec_decl.hpp | 102 ++++++--- 3 files changed, 268 insertions(+), 41 deletions(-) diff --git a/perf_test/sparse/KokkosSparse_spmv_merge.cpp b/perf_test/sparse/KokkosSparse_spmv_merge.cpp index ebf3e00d78..64927c0a5a 100644 --- a/perf_test/sparse/KokkosSparse_spmv_merge.cpp +++ b/perf_test/sparse/KokkosSparse_spmv_merge.cpp @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -57,11 +58,106 @@ #include #include #include "KokkosKernels_default_types.hpp" +#include "KokkosKernels_IOUtils.hpp" using Scalar = default_scalar; using lno_t = default_lno_t; using size_type = default_size_type; +template +matrix_type generate_unbalanced_matrix(const lno_t numRows, const lno_t numEntries, + const lno_t numLongRows, const lno_t numLongEntries) { + using row_map_type = typename matrix_type::row_map_type::non_const_type; + using entries_type = typename matrix_type::index_type::non_const_type; + using values_type = typename matrix_type::values_type::non_const_type; + + // Structure of the matrix: + // the last numLongRows will contain the highly connected rows + // the rest of the matrix will have a more classic sparse structure + // with numEntries per row. + + // Randomly pick the length of the rows using a normal distribution + std::mt19937 rand_generator(42); // Seed with 42 for reproducibility + std::normal_distribution row_dist{static_cast(numEntries), static_cast(std::sqrt(numEntries))}; + + std::vector permutation(numRows - numLongRows); + std::vector row_map_vec(numRows + 1); + row_map_vec[0] = 0; + for(lno_t rowIdx = 0; rowIdx < numRows - numLongRows; ++rowIdx) { + row_map_vec[rowIdx + 1] = row_map_vec[rowIdx] + static_cast(row_dist(rand_generator)); + + // also filling the permutation vector that will be used to construct long rows + permutation[rowIdx] = rowIdx; + } + + std::normal_distribution long_row_dist{static_cast(numLongEntries), static_cast(numLongEntries/2)}; + lno_t rand_number; + for(lno_t rowIdx = numRows - numLongRows; rowIdx < numRows; ++rowIdx) { + rand_number = static_cast(long_row_dist(rand_generator)); + row_map_vec[rowIdx + 1] = row_map_vec[rowIdx] + rand_number; + } + const lno_t numNNZ = row_map_vec[numRows]; + + std::vector colind_vec(row_map_vec[numRows]); + // We loop over the first part of the matrix and assume that the bandwidth is 0.01*numRows + // i.e. highly concentrated around digaonal + std::normal_distribution entry_dist{static_cast(0.0), static_cast(numRows/100)}; + for(lno_t rowIdx = 0; rowIdx < numRows - numLongRows; ++rowIdx) { + const lno_t rowLength = row_map_vec[rowIdx + 1] - row_map_vec[rowIdx]; + // Making the stencil symmetric because it looks a bit more like a regular discretization + for(lno_t entryIdx = 0; entryIdx < (rowLength / 2); ++entryIdx) { + colind_vec[row_map_vec[rowIdx] + entryIdx] = rowIdx - static_cast(entry_dist(rand_generator)); + colind_vec[row_map_vec[rowIdx + 1] - entryIdx - 1] = rowIdx + static_cast(entry_dist(rand_generator)); + } + // Add diagonal entry if row length is an odd number + if((rowLength % 2) == 1) { + colind_vec[row_map_vec[rowIdx] + rowLength / 2] = rowIdx; + } + } + + for(lno_t rowIdx = numRows - numLongRows; rowIdx < numRows; ++rowIdx) { + // Generate a random permutation + std::shuffle(permutation.begin(), permutation.end(), rand_generator); + + lno_t rowLength = row_map_vec[rowIdx + 1] - row_map_vec[rowIdx]; + for(lno_t entryIdx = 0; entryIdx < rowLength; ++entryIdx) { + colind_vec[row_map_vec[rowIdx] + entryIdx] = permutation[entryIdx]; + } + } + + row_map_type row_map("row map", numRows + 1); + entries_type entries("entries", numNNZ); + values_type values ("values", numNNZ); + + // Copy row map values to view + typename row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(row_map); + row_map_h(0) = 0; + for(lno_t rowIdx = 0; rowIdx < numRows; ++rowIdx) { + row_map_h(rowIdx + 1) = row_map_vec[rowIdx + 1]; + } + Kokkos::deep_copy(row_map, row_map_h); + + // Copy column indices to view + typename row_map_type::HostMirror entries_h = Kokkos::create_mirror_view(entries); + entries_h(0) = 0; + for(lno_t entryIdx = 0; entryIdx < numNNZ; ++entryIdx) { + entries_h(entryIdx) = colind_vec[entryIdx]; + } + Kokkos::deep_copy(entries, entries_h); + + // Fill the values view with 1.0 + Kokkos::deep_copy(values, 1.0); + + matrix_type unbalanced_matrix("unbalanced matrix", numRows, numRows, numNNZ, + values, row_map, entries); + + std::cout << std::endl; + std::cout << "Matrix statistics:" << std::endl + << " - average nnz per row: " << row_map_vec[numRows - numLongRows] / (numRows - numLongRows) << std::endl; + + return unbalanced_matrix; +} + void print_help() { printf("SPMV merge benchmark code written by Luc Berger-Vergiat.\n"); printf("The goal is to test cusSPARSE's merge algorithm on imbalanced matrices."); @@ -69,7 +165,6 @@ void print_help() { printf(" --compare : Compare the performance of the merge algo with the default algo.\n"); printf(" -l [LOOP] : How many spmv to run to aggregate average time. \n"); printf(" -numRows : Number of rows the matrix will contain.\n"); - printf(" -numCols : Number of columns the matrix will contain (allow rectangular matrix).\n"); printf(" -numEntries : Number of entries per row.\n"); printf(" -numLongRows : Number of rows that will contain more entries than the average.\n"); printf(" -numLongEntries : Number of entries per row in the unbalanced rows.\n"); @@ -77,13 +172,12 @@ void print_help() { int main(int argc, char** argv) { - bool compare = false; - int loop = 100; - int numRows = 10000; - int numCols = 0; - int numEntries = 7; - int numLongRows = 10; - int numLongEntries = 200; + bool compare = false; + lno_t loop = 100; + lno_t numRows = 175000; + lno_t numEntries = 15; + lno_t numLongRows = 4; + lno_t numLongEntries = 30000; if(argc == 1) { print_help(); @@ -94,7 +188,6 @@ int main(int argc, char** argv) { if((strcmp(argv[i],"--compare" )==0)) {compare=true; continue;} if((strcmp(argv[i],"-l" )==0)) {loop=atoi(argv[++i]); continue;} if((strcmp(argv[i],"-numRows" )==0)) {numRows=atoi(argv[++i]); continue;} - if((strcmp(argv[i],"-numCols" )==0)) {numCols=atoi(argv[++i]); continue;} if((strcmp(argv[i],"-numEntries" )==0)) {numEntries=atoi(argv[++i]); continue;} if((strcmp(argv[i],"-numLongRows" )==0)) {numLongRows=atoi(argv[++i]); continue;} if((strcmp(argv[i],"-numLongEntries" )==0)) {numLongEntries=atoi(argv[++i]); continue;} @@ -104,10 +197,100 @@ int main(int argc, char** argv) { } } - // If numCols was not set, assume the matrix is square. - if(numCols == 0) {numCols = numRows;} + // We want an odd number of entries in all rows to generate a symmetric matrix + if((numEntries / 2) == 0) {++numEntries;} + if((numLongEntries / 2) == 0) {++numLongEntries;} + + std::cout << "Test parameters:" << std::endl + << " - loop: " << loop << std::endl + << " - compare: " << compare << std::endl + << " - numRows: " << numRows << std::endl + << " - numEntries: " << numEntries << std::endl + << " - numLongRows: " << numLongRows << std::endl + << " - numLongEntries: " << numLongEntries << std::endl; Kokkos::initialize(argc, argv); + { + if(std::is_same::value) { + // Note that we template the matrix with entries=lno_t and offsets=lno_t to make sure + // it verifies the cusparse requirements + using matrix_type = KokkosSparse::CrsMatrix; + using values_type = typename matrix_type::values_type::non_const_type; + const Scalar SC_ONE = Kokkos::ArithTraits::one(); + const Scalar alpha = SC_ONE + SC_ONE; + const Scalar beta = alpha + SC_ONE; + + matrix_type test_matrix = generate_unbalanced_matrix(numRows, numEntries, numLongRows, numLongEntries); + + values_type y("right hand side", test_matrix.numRows()); + values_type x("left hand side", test_matrix.numCols()); + Kokkos::deep_copy(x, SC_ONE); + Kokkos::deep_copy(y, SC_ONE); + + KokkosKernels::Experimental::Controls controls; + controls.setParameter("algorithm", "merge"); + + // Perform a so called "warm-up" run + KokkosSparse::spmv(controls, "N", alpha, test_matrix, x, beta, y); + + double min_time = 1.0e32, max_time = 0.0, avg_time = 0.0; + for(int iterIdx = 0; iterIdx < loop; ++iterIdx) { + Kokkos::Timer timer; + KokkosSparse::spmv(controls, "N", alpha, test_matrix, x, beta, y); + Kokkos::fence(); + double time = timer.seconds(); + avg_time += time; + if(time>max_time) max_time = time; + if(timemax_time) max_time = time; + if(time(controls, "N", alpha, test_matrix, x, beta, y); + Kokkos::fence(); + double time = timer.seconds(); + avg_time += time; + if(time>max_time) max_time = time; + if(time + void spmv_native(KokkosKernels::Experimental::Controls& controls, + const char mode[], + typename YVector::non_const_value_type const & alpha, + const AMatrix& A, + const XVector& x, + typename YVector::non_const_value_type const & beta, + const YVector& y) { + using KAT = Kokkos::Details::ArithTraits; + + std::cout << "It is currently not possible to use the native SpMV implementation" + " when cuSPARSE is enabled" << std::endl; + } + template void spmv_cusparse(KokkosKernels::Experimental::Controls& controls, const char mode[], @@ -62,8 +76,9 @@ namespace Impl { const XVector& x, typename YVector::non_const_value_type const & beta, const YVector& y) { - using offset_type = typename AMatrix::non_const_size_type; - using value_type = typename AMatrix::non_const_value_type; + using offset_type = typename AMatrix::non_const_size_type; + using entry_type = typename AMatrix::non_const_ordinal_type; + using value_type = typename AMatrix::non_const_value_type; /* initialize cusparse library */ cusparseHandle_t cusparseHandle = controls.getCusparseHandle(); @@ -75,21 +90,24 @@ namespace Impl { #if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION) /* Check that cusparse can handle the types of the input Kokkos::CrsMatrix */ - cusparseIndexType_t myCusparseIndexType; - if(std::is_same::value) {myCusparseIndexType = CUSPARSE_INDEX_32I;} - if(std::is_same::value) {myCusparseIndexType = CUSPARSE_INDEX_64I;} + cusparseIndexType_t myCusparseOffsetType; + if(std::is_same::value) {myCusparseOffsetType = CUSPARSE_INDEX_32I;} + if(std::is_same::value) {myCusparseOffsetType = CUSPARSE_INDEX_64I;} + cusparseIndexType_t myCusparseEntryType; + if(std::is_same::value) {myCusparseEntryType = CUSPARSE_INDEX_32I;} + if(std::is_same::value) {myCusparseEntryType = CUSPARSE_INDEX_64I;} cudaDataType myCudaDataType; - if(std::is_same::value) {myCudaDataType = CUDA_R_32F;} - if(std::is_same::value) {myCudaDataType = CUDA_R_64F;} + if(std::is_same::value) {myCudaDataType = CUDA_R_32F;} + if(std::is_same::value) {myCudaDataType = CUDA_R_64F;} /* create matrix */ cusparseSpMatDescr_t A_cusparse; KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&A_cusparse, A.numRows(), A.numCols(), A.nnz(), const_cast(A.graph.row_map.data()), - const_cast(A.graph.entries.data()), + const_cast(A.graph.entries.data()), const_cast(A.values.data()), - myCusparseIndexType, - myCusparseIndexType, + myCusparseOffsetType, + myCusparseEntryType, CUSPARSE_INDEX_BASE_ZERO, myCudaDataType)); @@ -101,7 +119,8 @@ namespace Impl { size_t bufferSize = 0; void* dBuffer = NULL; cusparseSpMVAlg_t alg = CUSPARSE_CSRMV_ALG1; - if(controls.getParameter("algorithm") == "merge") {alg = CUSPARSE_CSRMV_ALG2;} + if(controls.getParameter("algorithm") == "default") {alg = CUSPARSE_CSRMV_ALG1;} + if(controls.getParameter("algorithm") == "merge") {alg = CUSPARSE_CSRMV_ALG2;} KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMV_bufferSize(cusparseHandle, myCusparseOperation, &alpha, A_cusparse, vecX, &beta, vecY, myCudaDataType, alg, &bufferSize)); @@ -156,15 +175,15 @@ namespace Impl { #endif // CUSPARSE_VERSION } -#define KOKKOSSPARSE_SPMV_CUSPARSE(SCALAR, OFFSET, LAYOUT, COMPILE_LIBRARY) \ +#define KOKKOSSPARSE_SPMV_CUSPARSE(SCALAR, ORDINAL, OFFSET, LAYOUT, COMPILE_LIBRARY) \ template<> \ - struct SPMV, Kokkos::MemoryTraits, OFFSET const, \ - SCALAR const*, LAYOUT, Kokkos::Device, Kokkos::MemoryTraits, \ - SCALAR*, LAYOUT, Kokkos::Device, Kokkos::MemoryTraits, \ - true, COMPILE_LIBRARY> { \ + struct SPMV, Kokkos::MemoryTraits, OFFSET const, \ + SCALAR const*, LAYOUT, Kokkos::Device, Kokkos::MemoryTraits, \ + SCALAR*, LAYOUT, Kokkos::Device, Kokkos::MemoryTraits, \ + true, true> { \ using device_type = Kokkos::Device; \ using memory_trait_type = Kokkos::MemoryTraits; \ - using AMatrix = CrsMatrix; \ + using AMatrix = CrsMatrix; \ using XVector = Kokkos::View>; \ using YVector = Kokkos::View; \ using Controls = KokkosKernels::Experimental::Controls; \ @@ -178,21 +197,46 @@ namespace Impl { const XVector& x, \ const coefficient_type& beta, \ const YVector& y) { \ - std::string label = "KokkosSparse::spmv[TPL_CUSPARSE," + Kokkos::ArithTraits::name() + "]"; \ - Kokkos::Profiling::pushRegion(label); \ - spmv_cusparse(controls, mode, alpha, A, x, beta, y); \ - Kokkos::Profiling::popRegion(); \ + if(controls.getParameter("algorithm") == "native") { \ + std::string label = "KokkosSparse::spmv[TPL_CUSPARSE," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spmv_native(controls, mode, alpha, A, x, beta, y); \ + Kokkos::Profiling::popRegion(); \ + } else { \ + std::string label = "KokkosSparse::spmv[TPL_CUSPARSE," + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spmv_cusparse(controls, mode, alpha, A, x, beta, y); \ + Kokkos::Profiling::popRegion(); \ + } \ } \ }; - KOKKOSSPARSE_SPMV_CUSPARSE(double, int, Kokkos::LayoutLeft, true) - KOKKOSSPARSE_SPMV_CUSPARSE(double, int, Kokkos::LayoutLeft, false) - KOKKOSSPARSE_SPMV_CUSPARSE(double, int, Kokkos::LayoutRight, true) - KOKKOSSPARSE_SPMV_CUSPARSE(double, int, Kokkos::LayoutRight, false) - KOKKOSSPARSE_SPMV_CUSPARSE(float, int, Kokkos::LayoutLeft, true) - KOKKOSSPARSE_SPMV_CUSPARSE(float, int, Kokkos::LayoutLeft, false) - KOKKOSSPARSE_SPMV_CUSPARSE(float, int, Kokkos::LayoutRight, true) - KOKKOSSPARSE_SPMV_CUSPARSE(float, int, Kokkos::LayoutRight, false) + KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutLeft, true) + // KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutLeft, false) + KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutRight, true) + // KOKKOSSPARSE_SPMV_CUSPARSE(double, int, int, Kokkos::LayoutRight, false) + KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutLeft, true) + // KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutLeft, false) + KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutRight, true) + // KOKKOSSPARSE_SPMV_CUSPARSE(float, int, int, Kokkos::LayoutRight, false) +#if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION) + KOKKOSSPARSE_SPMV_CUSPARSE(double, int64_t, int, Kokkos::LayoutLeft, true) + // KOKKOSSPARSE_SPMV_CUSPARSE(double, int64_t, int, Kokkos::LayoutLeft, false) + KOKKOSSPARSE_SPMV_CUSPARSE(double, int64_t, int, Kokkos::LayoutRight, true) + // KOKKOSSPARSE_SPMV_CUSPARSE(double, int64_t, int, Kokkos::LayoutRight, false) + KOKKOSSPARSE_SPMV_CUSPARSE(float, int64_t, int, Kokkos::LayoutLeft, true) + // KOKKOSSPARSE_SPMV_CUSPARSE(float, int64_t, int, Kokkos::LayoutLeft, false) + KOKKOSSPARSE_SPMV_CUSPARSE(float, int64_t, int, Kokkos::LayoutRight, true) + // KOKKOSSPARSE_SPMV_CUSPARSE(float, int64_t, int, Kokkos::LayoutRight, false) + KOKKOSSPARSE_SPMV_CUSPARSE(double, int64_t, int64_t, Kokkos::LayoutLeft, true) + // KOKKOSSPARSE_SPMV_CUSPARSE(double, int64_t, int64_t, Kokkos::LayoutLeft, false) + KOKKOSSPARSE_SPMV_CUSPARSE(double, int64_t, int64_t, Kokkos::LayoutRight, true) + // KOKKOSSPARSE_SPMV_CUSPARSE(double, int64_t, int64_t, Kokkos::LayoutRight, false) + KOKKOSSPARSE_SPMV_CUSPARSE(float, int64_t, int64_t, Kokkos::LayoutLeft, true) + // KOKKOSSPARSE_SPMV_CUSPARSE(float, int64_t, int64_t, Kokkos::LayoutLeft, false) + KOKKOSSPARSE_SPMV_CUSPARSE(float, int64_t, int64_t, Kokkos::LayoutRight, true) + // KOKKOSSPARSE_SPMV_CUSPARSE(float, int64_t, int64_t, Kokkos::LayoutRight, false) +#endif #undef KOKKOSSPARSE_SPMV_CUSPARSE From 049d9439dbbbe2224fb5894068518da0bbe1eb55 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Thu, 21 May 2020 07:57:21 -0700 Subject: [PATCH 5/9] Remove unfortunate debugging left-over... --- src/common/KokkosKernels_Controls.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/common/KokkosKernels_Controls.hpp b/src/common/KokkosKernels_Controls.hpp index 49e2249897..bf1924bed4 100644 --- a/src/common/KokkosKernels_Controls.hpp +++ b/src/common/KokkosKernels_Controls.hpp @@ -61,8 +61,6 @@ #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE #include "cusparse.h" -#else -blahblah #endif namespace KokkosKernels{ From 3d87887f153953f3135aec8892235b10ac5a3b88 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Thu, 21 May 2020 08:43:35 -0700 Subject: [PATCH 6/9] Adding CMake guard for spmv merge performance test This test needs to be guarded as the SpMV merge algorithm is only available in cuSPARSE, not natively in Kokkos-Kernels at this point. --- perf_test/sparse/CMakeLists.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/perf_test/sparse/CMakeLists.txt b/perf_test/sparse/CMakeLists.txt index fc847d820b..da22993cda 100644 --- a/perf_test/sparse/CMakeLists.txt +++ b/perf_test/sparse/CMakeLists.txt @@ -43,10 +43,12 @@ KOKKOSKERNELS_ADD_EXECUTABLE( SOURCES KokkosSparse_spmv.cpp ) -KOKKOSKERNELS_ADD_EXECUTABLE( - sparse_spmv_merge - SOURCES KokkosSparse_spmv_merge.cpp - ) +IF(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) + KOKKOSKERNELS_ADD_EXECUTABLE( + sparse_spmv_merge + SOURCES KokkosSparse_spmv_merge.cpp + ) +ENDIF() KOKKOSKERNELS_ADD_EXECUTABLE( sparse_sptrsv From d857850ef3f8bd366f2edf51f3b85d5dd410206c Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Thu, 21 May 2020 16:25:23 -0700 Subject: [PATCH 7/9] Fixing some small issues and expanding a bit capabilities in SpMV Fixing a name clashing in sptrsv_cuSPARSE: error_t which is usually used as a type not a variable. Adding isParamter method to Controls. Listing more parameters as controled parameters in SpMV --- perf_test/sparse/KokkosSparse_spmv_merge.cpp | 18 ++++++++++++------ src/common/KokkosKernels_Controls.hpp | 16 +++++++++++++++- src/sparse/impl/KokkosSparse_spmv_impl.hpp | 15 ++++++++++++++- .../impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp | 16 ++++++++-------- 4 files changed, 49 insertions(+), 16 deletions(-) diff --git a/perf_test/sparse/KokkosSparse_spmv_merge.cpp b/perf_test/sparse/KokkosSparse_spmv_merge.cpp index 64927c0a5a..ec819bd704 100644 --- a/perf_test/sparse/KokkosSparse_spmv_merge.cpp +++ b/perf_test/sparse/KokkosSparse_spmv_merge.cpp @@ -60,13 +60,15 @@ #include "KokkosKernels_default_types.hpp" #include "KokkosKernels_IOUtils.hpp" -using Scalar = default_scalar; -using lno_t = default_lno_t; -using size_type = default_size_type; - template -matrix_type generate_unbalanced_matrix(const lno_t numRows, const lno_t numEntries, - const lno_t numLongRows, const lno_t numLongEntries) { +matrix_type generate_unbalanced_matrix(const typename matrix_type::ordinal_type numRows, + const typename matrix_type::ordinal_type numEntries, + const typename matrix_type::ordinal_type numLongRows, + const typename matrix_type::ordinal_type numLongEntries) { + + using Scalar = typename matrix_type::value_type; + using lno_t = typename matrix_type::ordinal_type; + using row_map_type = typename matrix_type::row_map_type::non_const_type; using entries_type = typename matrix_type::index_type::non_const_type; using values_type = typename matrix_type::values_type::non_const_type; @@ -172,6 +174,10 @@ void print_help() { int main(int argc, char** argv) { + using Scalar = default_scalar; + using lno_t = default_lno_t; + using size_type = default_size_type; + bool compare = false; lno_t loop = 100; lno_t numRows = 175000; diff --git a/src/common/KokkosKernels_Controls.hpp b/src/common/KokkosKernels_Controls.hpp index bf1924bed4..0b317858c9 100644 --- a/src/common/KokkosKernels_Controls.hpp +++ b/src/common/KokkosKernels_Controls.hpp @@ -70,13 +70,26 @@ namespace Experimental{ class Controls { public: + // Constructor Controls() = default; + // set a new parameter void setParameter(const std::string& name, const std::string& value) { kernel_parameters[name] = value; } - std::string getParameter(const std::string& name) { + // check if a parameter is already set + bool isParameter(const std::string& name) const { + bool return_value = false; + + auto search = kernel_parameters.find(name); + if(search != kernel_parameters.end()) { return_value = true; } + + return return_value; + } + + // retrieve the value associated with a parameter if it is already set + std::string getParameter(const std::string& name) const { auto search = kernel_parameters.find(name); std::string value; if(search == kernel_parameters.end()) { @@ -122,6 +135,7 @@ namespace Experimental{ #endif private: + // storage for kernel parameters std::unordered_map kernel_parameters; }; diff --git a/src/sparse/impl/KokkosSparse_spmv_impl.hpp b/src/sparse/impl/KokkosSparse_spmv_impl.hpp index 939e88a1f2..3ea7d150b6 100644 --- a/src/sparse/impl/KokkosSparse_spmv_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spmv_impl.hpp @@ -308,13 +308,26 @@ spmv_beta_no_transpose (const KokkosKernels::Experimental::Controls& controls, // Note on 03/24/20, lbv: We can use the controls // here to allow the user to pass in some tunning // parameters. + if(controls.isParameter("team size")) {team_size = std::stoi(controls.getParameter("team size"));} + if(controls.isParameter("vector length")) {vector_length = std::stoi(controls.getParameter("vector length"));} + if(controls.isParameter("rows per thread")) {rows_per_thread = std::stoll(controls.getParameter("rows per thread"));} + + bool use_dynamic_schedule = false; // Forces the use of a dynamic schedule + bool use_static_schedule = false; // Forces the use of a static schedule + if(controls.isParameter("schedule")) { + if(controls.getParameter("schedule") == "dynamic") { + use_dynamic_schedule = true; + } else if(controls.getParameter("schedule") == "static") { + use_static_schedule = true; + } + } int64_t rows_per_team = spmv_launch_parameters(A.numRows(),A.nnz(),rows_per_thread,team_size,vector_length); int64_t worksets = (y.extent(0)+rows_per_team-1)/rows_per_team; SPMV_Functor func (alpha,A,x,beta,y,rows_per_team); - if(A.nnz()>10000000) { + if(((A.nnz()>10000000) || use_dynamic_schedule) && !use_static_schedule) { Kokkos::TeamPolicy > policy(1,1); if(team_size<0) policy = Kokkos::TeamPolicy >(worksets,Kokkos::AUTO,vector_length); diff --git a/src/sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp b/src/sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp index 7c903b738c..bd3ecc53d4 100644 --- a/src/sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp +++ b/src/sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp @@ -111,11 +111,11 @@ namespace Impl{ // pBuffer returned by cudaMalloc is automatically aligned to 128 bytes. - cudaError_t error_t; - error_t = cudaMalloc((void**)&(h->pBuffer), pBufferSize); + cudaError_t my_error; + my_error = cudaMalloc((void**)&(h->pBuffer), pBufferSize); - if (cudaSuccess != error_t) - std::cout << "cudmalloc pBuffer error_t error name " << cudaGetErrorString(error_t) << std::endl; + if (cudaSuccess != my_error) + std::cout << "cudmalloc pBuffer error_t error name " << cudaGetErrorString(my_error) << std::endl; status = cusparseDcsrsv2_analysis( h->handle, @@ -147,11 +147,11 @@ namespace Impl{ &pBufferSize); // pBuffer returned by cudaMalloc is automatically aligned to 128 bytes. - cudaError_t error_t; - error_t = cudaMalloc((void**)&(h->pBuffer), pBufferSize); + cudaError_t my_error; + my_error = cudaMalloc((void**)&(h->pBuffer), pBufferSize); - if (cudaSuccess != error_t) - std::cout << "cudmalloc pBuffer error_t error name " << cudaGetErrorString(error_t) << std::endl; + if (cudaSuccess != my_error) + std::cout << "cudmalloc pBuffer error_t error name " << cudaGetErrorString(my_error) << std::endl; status = cusparseZcsrsv2_analysis( h->handle, From bca2ddaf18890ee31ab9e4cd6cb5974a5ff32c88 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Fri, 22 May 2020 16:42:30 -0600 Subject: [PATCH 8/9] Fixing a header and some tpl management. --- src/common/KokkosKernels_Controls.hpp | 4 ++-- src/impl/tpls/KokkosKernels_tpl_handles_decl.hpp | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/common/KokkosKernels_Controls.hpp b/src/common/KokkosKernels_Controls.hpp index 0b317858c9..6562e17e4d 100644 --- a/src/common/KokkosKernels_Controls.hpp +++ b/src/common/KokkosKernels_Controls.hpp @@ -106,8 +106,8 @@ namespace Experimental{ cublasHandle_t getCublasHandle() { if(cublasHandle == 0) { - KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlas::singleton(); - cublasHandle = s.cublasHandle; + KokkosBlas::Impl::CudaBlasSingleton & s = KokkosBlas::Impl::CudaBlasSingleton::singleton(); + cublasHandle = s.handle; } return cublasHandle; } diff --git a/src/impl/tpls/KokkosKernels_tpl_handles_decl.hpp b/src/impl/tpls/KokkosKernels_tpl_handles_decl.hpp index 8ecf0cea27..cee904ce9c 100644 --- a/src/impl/tpls/KokkosKernels_tpl_handles_decl.hpp +++ b/src/impl/tpls/KokkosKernels_tpl_handles_decl.hpp @@ -45,6 +45,8 @@ #ifndef KOKKOSKERNELS_TPL_HANDLES_DECL_HPP_ #define KOKKOSKERNELS_TPL_HANDLES_DECL_HPP_ +#include "KokkosBlas_tpl_spec.hpp" + #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE #include "KokkosKernels_SparseUtils_cusparse.hpp" From f7d9bf0e635fda677a25f14ba49eb0cf38d69c50 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Fri, 22 May 2020 17:26:20 -0600 Subject: [PATCH 9/9] For some reasons the header was not fixed in the previous commit... --- src/impl/tpls/KokkosKernels_tpl_handles.cpp | 87 --------------------- 1 file changed, 87 deletions(-) diff --git a/src/impl/tpls/KokkosKernels_tpl_handles.cpp b/src/impl/tpls/KokkosKernels_tpl_handles.cpp index 64cdb26bbf..a856c1cdf3 100644 --- a/src/impl/tpls/KokkosKernels_tpl_handles.cpp +++ b/src/impl/tpls/KokkosKernels_tpl_handles.cpp @@ -41,93 +41,6 @@ // ************************************************************************ //@HEADER */ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ #include #include "KokkosKernels_config.h" #include "KokkosKernels_tpl_handles_def.hpp"