From 47f3a0b26dfd7c1a0238abd6ae03886ddc27bf11 Mon Sep 17 00:00:00 2001 From: Carl William Pearson Date: Tue, 5 Jul 2022 14:50:49 -0600 Subject: [PATCH] remove BlockCrsMatrix --- .../KokkosBatched_Test_BlockCrs_Cuda.cpp | 133 -- .../KokkosBatched_Test_BlockCrs_Host.cpp | 120 -- perf_test/batched/scripts/test-bcrs.sh | 43 - perf_test/sparse/CMakeLists.txt | 4 - perf_test/sparse/KokkosSparse_block_pcg.cpp | 4 +- .../sparse/KokkosSparse_spmv_blockcrs.cpp | 526 ------ scripts/analysis/batched/pd.py | 8 - src/CMakeLists.txt | 14 - .../KokkosBatched_Test_BlockCrs_Util.hpp | 942 ----------- ...e_spmv_blockcrsmatrix_eti_spec_inst.cpp.in | 56 - ...pmv_mv_blockcrsmatrix_eti_spec_inst.cpp.in | 56 - ..._spmv_blockcrsmatrix_eti_spec_avail.hpp.in | 56 - ...e_spmv_blockcrsmatrix_eti_spec_decl.hpp.in | 56 - ...mv_mv_blockcrsmatrix_eti_spec_avail.hpp.in | 56 - ...pmv_mv_blockcrsmatrix_eti_spec_decl.hpp.in | 56 - ...rse_spmv_blockcrsmatrix_tpl_spec_avail.hpp | 70 - ...arse_spmv_blockcrsmatrix_tpl_spec_decl.hpp | 48 - src/sparse/KokkosSparse_BlockCrsMatrix.hpp | 1006 ----------- src/sparse/KokkosSparse_Utils.hpp | 176 +- src/sparse/KokkosSparse_gauss_seidel.hpp | 8 +- src/sparse/KokkosSparse_spmv.hpp | 264 +-- .../impl/KokkosSparse_gauss_seidel_spec.hpp | 57 - .../KokkosSparse_spmv_blockcrsmatrix_impl.hpp | 1178 ------------- .../KokkosSparse_spmv_blockcrsmatrix_spec.hpp | 284 ---- test_common/KokkosBatched_Test_BlockCrs.hpp | 1467 ----------------- unit_test/sparse/Test_Sparse.hpp | 2 - .../sparse/Test_Sparse_BlockCrsMatrix.hpp | 384 ----- .../sparse/Test_Sparse_block_gauss_seidel.hpp | 20 +- .../sparse/Test_Sparse_spmv_blockcrs.hpp | 527 ------ 29 files changed, 92 insertions(+), 7529 deletions(-) delete mode 100644 perf_test/batched/KokkosBatched_Test_BlockCrs_Cuda.cpp delete mode 100644 perf_test/batched/KokkosBatched_Test_BlockCrs_Host.cpp delete mode 100755 perf_test/batched/scripts/test-bcrs.sh delete mode 100644 perf_test/sparse/KokkosSparse_spmv_blockcrs.cpp delete mode 100644 src/batched/dense/KokkosBatched_Test_BlockCrs_Util.hpp delete mode 100644 src/impl/generated_specializations_cpp/spmv/KokkosSparse_spmv_blockcrsmatrix_eti_spec_inst.cpp.in delete mode 100644 src/impl/generated_specializations_cpp/spmv/KokkosSparse_spmv_mv_blockcrsmatrix_eti_spec_inst.cpp.in delete mode 100644 src/impl/generated_specializations_hpp/KokkosSparse_spmv_blockcrsmatrix_eti_spec_avail.hpp.in delete mode 100644 src/impl/generated_specializations_hpp/KokkosSparse_spmv_blockcrsmatrix_eti_spec_decl.hpp.in delete mode 100644 src/impl/generated_specializations_hpp/KokkosSparse_spmv_mv_blockcrsmatrix_eti_spec_avail.hpp.in delete mode 100644 src/impl/generated_specializations_hpp/KokkosSparse_spmv_mv_blockcrsmatrix_eti_spec_decl.hpp.in delete mode 100644 src/impl/tpls/KokkosSparse_spmv_blockcrsmatrix_tpl_spec_avail.hpp delete mode 100644 src/impl/tpls/KokkosSparse_spmv_blockcrsmatrix_tpl_spec_decl.hpp delete mode 100644 src/sparse/KokkosSparse_BlockCrsMatrix.hpp delete mode 100644 src/sparse/impl/KokkosSparse_spmv_blockcrsmatrix_impl.hpp delete mode 100644 src/sparse/impl/KokkosSparse_spmv_blockcrsmatrix_spec.hpp delete mode 100644 test_common/KokkosBatched_Test_BlockCrs.hpp delete mode 100644 unit_test/sparse/Test_Sparse_BlockCrsMatrix.hpp delete mode 100644 unit_test/sparse/Test_Sparse_spmv_blockcrs.hpp diff --git a/perf_test/batched/KokkosBatched_Test_BlockCrs_Cuda.cpp b/perf_test/batched/KokkosBatched_Test_BlockCrs_Cuda.cpp deleted file mode 100644 index 50f15cf719..0000000000 --- a/perf_test/batched/KokkosBatched_Test_BlockCrs_Cuda.cpp +++ /dev/null @@ -1,133 +0,0 @@ -/* Implementation for testing KokkosKernels on BCRS operations - - block-tridiagonal factorization - - block-tridiagonal solve - - bcrs matvec - - StructuredBlock represents a 3D mesh having ni, nj, nk cells in each - dimension. Variable ordering is such that the k index is the fastest and the - i index is slowest. Smoothing lines are built in the k direction. - BlockCrsMatrix is a simple block CRS data structure. - BlockTridiagMatrices holds the block tridiagonal matrices. - - An example run is - ./driver -ni 32 -nj 32 -nk 128 -bs 5 -c - - This runs a sequence of unit tests, then runs a problem having a 32x32x128 - structured block with the lines oriented along the third dimension (line - length = 128). The block size is 5. -c adds a somewhat expensive check of the - answer. It's good to run with -c once in a while, but the cheap unit tests - that always run before the big problem already provide good coverage. -*/ - -#include "Kokkos_Core.hpp" -#include "Kokkos_Timer.hpp" - -#if defined(KOKKOS_ENABLE_CUDA) -#define __KOKKOSBATCHED_TEST_ENABLE_CUDA__ - -#include "KokkosBatched_Util.hpp" - -#define KOKKOSBATCHED_USE_UNBLOCKED_ALGO 1 -//#define KOKKOSBATCHED_USE_BLOCKED_ALGO 1 - -#if defined(KOKKOSBATCHED_USE_UNBLOCKED_ALGO) -typedef KokkosBatched::Algo::LU::Unblocked AlgoLU; -typedef KokkosBatched::Algo::Trsm::Unblocked AlgoTrsm; -typedef KokkosBatched::Algo::Gemm::Unblocked AlgoGemm; - -typedef KokkosBatched::Algo::Trsv::Unblocked AlgoTrsv; -typedef KokkosBatched::Algo::Gemv::Unblocked AlgoGemv; -#endif -#if defined(KOKKOSBATCHED_USE_BLOCKED_ALGO) -typedef KokkosBatched::Algo::LU::Blocked AlgoLU; -typedef KokkosBatched::Algo::Trsm::Blocked AlgoTrsm; -typedef KokkosBatched::Algo::Gemm::Blocked AlgoGemm; - -typedef KokkosBatched::Algo::Trsv::Blocked AlgoTrsv; -typedef KokkosBatched::Algo::Gemv::Blocked AlgoGemv; -#endif - -#include "KokkosBatched_Test_BlockCrs.hpp" - -using namespace KokkosBatched; - -int main(int argc, char* argv[]) { - Kokkos::initialize(argc, argv); - - typedef Kokkos::DefaultExecutionSpace DeviceSpaceType; - - const bool detail = false; - - Kokkos::print_configuration(std::cout, detail); - - enum : int { - VectorLength = - DefaultVectorLength::value, - RangeTagOper = 0, - TeamTagOper = 1 - }; - - // Unit tests - bool profile = false; - for (int i = 1; i < argc; ++i) { - const std::string& token = argv[i]; - if (strncmp(token.c_str(), "-profile", 8) == 0) profile = true; - } - - if (!profile) { - // std::cout << " Unit Test::Range :: Begin\n"; - // { - // Test::run( - // 3, 4, 2, 25, 2); - // Test::run(44, - // 63, 15, 4, 1); - // Test::run( - // 2, 2, 15, 3, 3); - // Test::run( - // 1, 1, 2, 63, 8); - - // for (int nrhs=1;nrhs<=33;++nrhs) - // Test::run(2, - // 2, 15, 3, nrhs); - // } - // std::cout << " Unit Test::Range :: End\n"; - - std::cout << " Unit Test::Team :: Begin\n"; - { - Test::run( - 3, 4, 2, 25, 2); - Test::run( - 44, 63, 15, 4, 1); - Test::run( - 2, 2, 15, 3, 3); - Test::run( - 1, 1, 2, 63, 8); - - for (int nrhs = 1; nrhs <= 33; ++nrhs) - Test::run(2, 2, 15, 3, nrhs); - } - std::cout << " Unit Test::Team :: End\n"; - } - - // Performance tests - std::cout << " Perf Test:: Begin\n"; - { - const Test::Input input(argc, argv); - Test::run(input); - } - std::cout << " Perf Test:: End\n"; - - Kokkos::finalize(); - - return 0; -} -#else - -int main(int argc, char *argv[]) { - std::cout << "Kokkos::Cuda is not enabled\n"; - return -1; -} - -#endif diff --git a/perf_test/batched/KokkosBatched_Test_BlockCrs_Host.cpp b/perf_test/batched/KokkosBatched_Test_BlockCrs_Host.cpp deleted file mode 100644 index 1319fa03db..0000000000 --- a/perf_test/batched/KokkosBatched_Test_BlockCrs_Host.cpp +++ /dev/null @@ -1,120 +0,0 @@ -/* Implementation for testing KokkosKernels on BCRS operations - - block-tridiagonal factorization - - block-tridiagonal solve - - bcrs matvec - - StructuredBlock represents a 3D mesh having ni, nj, nk cells in each - dimension. Variable ordering is such that the k index is the fastest and the - i index is slowest. Smoothing lines are built in the k direction. - BlockCrsMatrix is a simple block CRS data structure. - BlockTridiagMatrices holds the block tridiagonal matrices. - - An example run is - ./driver -ni 32 -nj 32 -nk 128 -bs 5 -c - - This runs a sequence of unit tests, then runs a problem having a 32x32x128 - structured block with the lines oriented along the third dimension (line - length = 128). The block size is 5. -c adds a somewhat expensive check of the - answer. It's good to run with -c once in a while, but the cheap unit tests - that always run before the big problem already provide good coverage. -*/ - -#include "Kokkos_Core.hpp" -#include "Kokkos_Timer.hpp" - -#include "KokkosBatched_Util.hpp" - -//#define KOKKOSBATCHED_USE_UNBLOCKED_ALGO 1 -#define KOKKOSBATCHED_USE_BLOCKED_ALGO 1 - -#if defined(KOKKOSBATCHED_USE_UNBLOCKED_ALGO) -typedef KokkosBatched::Algo::LU::Unblocked AlgoLU; -typedef KokkosBatched::Algo::Trsm::Unblocked AlgoTrsm; -typedef KokkosBatched::Algo::Gemm::Unblocked AlgoGemm; - -typedef KokkosBatched::Algo::Trsv::Unblocked AlgoTrsv; -typedef KokkosBatched::Algo::Gemv::Unblocked AlgoGemv; -#endif -#if defined(KOKKOSBATCHED_USE_BLOCKED_ALGO) -typedef KokkosBatched::Algo::LU::Blocked AlgoLU; -typedef KokkosBatched::Algo::Trsm::Blocked AlgoTrsm; -typedef KokkosBatched::Algo::Gemm::Blocked AlgoGemm; - -typedef KokkosBatched::Algo::Trsv::Blocked AlgoTrsv; -typedef KokkosBatched::Algo::Gemv::Blocked AlgoGemv; -#endif - -#include "KokkosBatched_Test_BlockCrs.hpp" - -using namespace KokkosBatched; - -int main(int argc, char* argv[]) { - Kokkos::initialize(argc, argv); - -#if !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) - typedef Kokkos::DefaultHostExecutionSpace HostSpaceType; - const bool detail = false; - - Kokkos::print_configuration(std::cout, detail); - - enum : int { - VectorLength = - DefaultVectorLength::value, - RangeTagOper = 0 - }; - - // vector type - typedef Vector, VectorLength> VectorType; - - // Unit tests - bool profile = false; - for (int i = 1; i < argc; ++i) { - const std::string& token = argv[i]; - if (strncmp(token.c_str(), "-profile", 8) == 0) profile = true; - } - - if (!profile) { - // including compact layer, it is not possible to test - // scalar and vector in the same code without templating - std::cout << " Unit Test::Range::Vector :: Begin\n"; - { - Test::run(3, 4, 2, - 25, 2); - Test::run( - 44, 63, 15, 4, 1); - Test::run(2, 2, 15, - 3, 3); - - for (int nrhs = 1; nrhs <= 33; ++nrhs) - Test::run( - 2, 2, 15, 3, nrhs); - } - - std::cout << " Unit Test::Range::Vector :: End\n"; - } - - // MKL -#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_COMPACT_BATCHED__) - std::cout << " Perf Test::CompactMKL Begin\n"; - { - const bool test_mkl = true; - const Test::Input input(argc, argv); - Test::run(input, test_mkl); - } - std::cout << " Perf Test::CompactMKL End\n"; -#endif - - // Performance tests - std::cout << " Perf Test::Vector Begin\n"; - { - const Test::Input input(argc, argv); - Test::run(input); - } - std::cout << " Perf Test::Vector End\n"; - -#endif - Kokkos::finalize(); - - return 0; -} diff --git a/perf_test/batched/scripts/test-bcrs.sh b/perf_test/batched/scripts/test-bcrs.sh deleted file mode 100755 index 639fa8bb6c..0000000000 --- a/perf_test/batched/scripts/test-bcrs.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -# ./testboth.sh > output.txt -# grep ">>>\|>> \|Timer:" output.txt - -#numacmd="KMP_AFFINITY=balanced numactl --membind 1" - -# 128x128x128 domain is too large to use hbm only -numacmd="KMP_AFFINITY=balanced" -#sz="-ni 128 -nj 128 -nk 128" - -# 4 5 8 9 A < 10 GB -for bsz in 3 5; do - sz="-ni 128 -nj 128 -nk 128" - echo ">>> bsz $bsz" - for nth in 4 8 16 34 68 136 272; do - echo ">> nthread $nth" - echo "> kk" - cmd="$numacmd ./KokkosKernels_Test_BlockCrs --kokkos-threads=$nth $sz -bs $bsz" - echo $cmd - eval $cmd - echo "> sparc" - cmd="$numacmd ./bcrs --kokkos-threads=$nth $sz -bs $bsz" - echo $cmd - eval $cmd - done -done - -for bsz in 10 15; do - sz="-ni 64 -nj 64 -nk 128" - echo ">>> bsz $bsz" - for nth in 4 8 16 34 68 136 272; do - echo ">> nthread $nth" - echo "> kk" - cmd="$numacmd ./KokkosKernels_Test_BlockCrs --kokkos-threads=$nth $sz -bs $bsz" - echo $cmd - eval $cmd - echo "> sparc" - cmd="$numacmd ./bcrs --kokkos-threads=$nth $sz -bs $bsz" - echo $cmd - eval $cmd - done -done diff --git a/perf_test/sparse/CMakeLists.txt b/perf_test/sparse/CMakeLists.txt index fe2b7a094e..0a8538cef4 100644 --- a/perf_test/sparse/CMakeLists.txt +++ b/perf_test/sparse/CMakeLists.txt @@ -65,10 +65,6 @@ IF (KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) ) ENDIF () -KOKKOSKERNELS_ADD_EXECUTABLE( - sparse_spmv_blockcrs - SOURCES KokkosSparse_spmv_blockcrs.cpp -) KOKKOSKERNELS_ADD_EXECUTABLE( sparse_spmv_bsr diff --git a/perf_test/sparse/KokkosSparse_block_pcg.cpp b/perf_test/sparse/KokkosSparse_block_pcg.cpp index 25d7a65fdd..0e910ad9c9 100644 --- a/perf_test/sparse/KokkosSparse_block_pcg.cpp +++ b/perf_test/sparse/KokkosSparse_block_pcg.cpp @@ -322,7 +322,7 @@ void run_experiment( // typedef typename lno_nnz_view_t::value_type lno_t; // typedef typename lno_view_t::value_type size_type; // typedef typename scalar_view_t::value_type scalar_t; - KokkosKernels::Impl::kk_create_blockcrs_formated_point_crsmatrix( + KokkosKernels::Impl::kk_create_bsr_formated_point_crsmatrix( block_size, crsmat.numRows(), crsmat.numCols(), crsmat.graph.row_map, crsmat.graph.entries, crsmat.values, out_r, out_c, pf_rm, pf_e, pf_v); @@ -349,7 +349,7 @@ void run_experiment( scalar_view_t bf_v; size_t but_r, but_c; - KokkosKernels::Impl::kk_create_blockcrs_from_blockcrs_formatted_point_crs( + KokkosKernels::Impl::kk_create_bsr_from_bsr_formatted_point_crs( block_size, out_r, out_c, pf_rm, pf_e, pf_v, but_r, but_c, bf_rm, bf_e, bf_v); diff --git a/perf_test/sparse/KokkosSparse_spmv_blockcrs.cpp b/perf_test/sparse/KokkosSparse_spmv_blockcrs.cpp deleted file mode 100644 index 1eb7f0b8da..0000000000 --- a/perf_test/sparse/KokkosSparse_spmv_blockcrs.cpp +++ /dev/null @@ -1,526 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Luc Berger-Vergiat (lberge@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include - -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include "KokkosKernels_default_types.hpp" -#include - -namespace details { - -enum class Implementation : int { KokkosKernels = 0, Cuda = 1, MKL = 2 }; - -/// -/// Define default types -/// -typedef double Scalar; -typedef int Ordinal; -/// -////////////////////////// - -/// Random generator -template -inline scalar_t random() { - auto const max = static_cast(RAND_MAX) + static_cast(1); - return static_cast(std::rand()) / max; -} - -template -inline void set_random_value(scalar_t &v) { - v = random(); -} - -template -inline void set_random_value(Kokkos::complex &v) { - Scalar vre = random(); - Scalar vim = random(); - v = Kokkos::complex(vre, vim); -} - -template -inline void set_random_value(std::complex &v) { - scalar_t vre = random(); - scalar_t vim = random(); - v = std::complex(vre, vim); -} - -template -void make_block_entries( - const KokkosSparse::CrsMatrix &mat_b1, - int blockSize, std::vector &mat_rowmap, - std::vector &mat_colidx, std::vector &mat_val) { - Ordinal nRow = blockSize * mat_b1.numRows(); - size_t nnz = static_cast(blockSize) * static_cast(blockSize) * - mat_b1.nnz(); - - mat_val.resize(nnz); - for (size_t ii = 0; ii < nnz; ++ii) set_random_value(mat_val[ii]); - - // - // Create graph for CrsMatrix - // - - mat_rowmap.assign(nRow + 1, 0); - mat_colidx.assign(nnz, 0); - - for (Ordinal ir = 0; ir < mat_b1.numRows(); ++ir) { - const auto jbeg = mat_b1.graph.row_map(ir); - const auto jend = mat_b1.graph.row_map(ir + 1); - for (Ordinal ib = 0; ib < blockSize; ++ib) { - const Ordinal my_row = ir * blockSize + ib; - mat_rowmap[my_row + 1] = mat_rowmap[my_row] + (jend - jbeg) * blockSize; - for (auto ijk = jbeg; ijk < jend; ++ijk) { - const auto col0 = mat_b1.graph.entries(ijk); - for (Ordinal jb = 0; jb < blockSize; ++jb) { - mat_colidx[mat_rowmap[my_row] + (ijk - jbeg) * blockSize + jb] = - col0 * blockSize + jb; - } - } - } - } // for (Ordinal ir = 0; ir < mat_b1.numRows(); ++ir) -} - -template -int test_blockcrs_matrix_single_vec( - const char fOp[], - KokkosSparse::CrsMatrix - mat_b1, - int test, int loop, const scalar_t alpha, const scalar_t beta, - const int bMax) { - typedef typename KokkosSparse::CrsMatrix< - scalar_t, Ordinal, Kokkos::DefaultExecutionSpace, void, size_t> - crsMat_type; - - typedef typename crsMat_type::values_type::non_const_type scalar_view_t; - typedef scalar_view_t x_vector_type; - typedef scalar_view_t y_vector_type; - - srand(17312837); - - int num_errors = 0; - const auto bMax_o = static_cast(bMax); - for (Ordinal blockSize = 1; blockSize <= bMax_o; ++blockSize) { - Ordinal nRow = blockSize * mat_b1.numRows(); - Ordinal nCol = nRow; - std::vector mat_rowmap; - std::vector mat_colidx; - std::vector mat_val; - - // Create the entries - make_block_entries(mat_b1, blockSize, mat_rowmap, mat_colidx, - mat_val); - - // Create the CrsMatrix for the reference computation - crsMat_type Acrs("new_crs_matr", nRow, nCol, mat_val.size(), &mat_val[0], - &mat_rowmap[0], &mat_colidx[0]); - - x_vector_type xref("new_right_hand_side", nRow); - auto h_xref = Kokkos::create_mirror_view(xref); - for (Ordinal ir = 0; ir < nRow; ++ir) { - set_random_value(h_xref(ir)); - } - Kokkos::deep_copy(xref, h_xref); - - y_vector_type y0("y_init", nRow); - auto h_y0 = Kokkos::create_mirror_view(y0); - for (Ordinal ir = 0; ir < nRow; ++ir) set_random_value(h_y0(ir)); - Kokkos::deep_copy(y0, h_y0); - - y_vector_type ycrs("crs_product_result", nRow); - auto h_ycrs = Kokkos::create_mirror_view(ycrs); - - // Time a series of multiplications with the CrsMatrix - double time_crs = 0.0; - for (int jr = 0; jr < loop; ++jr) { - for (Ordinal ir = 0; ir < nRow; ++ir) h_ycrs(ir) = h_y0(ir); - Kokkos::deep_copy(ycrs, h_ycrs); - Kokkos::Timer timer; - KokkosSparse::spmv(fOp, alpha, Acrs, xref, beta, ycrs); - time_crs += timer.seconds(); - } - - // Create the output vector - y_vector_type yblockcrs("product_result", nRow); - auto h_yblockcrs = Kokkos::create_mirror_view(yblockcrs); - - double time_blockcrs = 0.0; - // Create the BlockCrsMatrix - KokkosSparse::Experimental::BlockCrsMatrix< - scalar_t, Ordinal, Kokkos::DefaultExecutionSpace, void, size_t> - Ablockcrs(Acrs, blockSize); - - switch (static_cast(test)) { - default: - case Implementation::KokkosKernels: { - // Time a series of multiplications with the BlockCrsMatrix - for (int jr = 0; jr < loop; ++jr) { - for (Ordinal ir = 0; ir < nRow; ++ir) h_yblockcrs(ir) = h_y0(ir); - Kokkos::deep_copy(yblockcrs, h_yblockcrs); - Kokkos::Timer timer; - KokkosSparse::spmv(fOp, alpha, Ablockcrs, xref, beta, yblockcrs); - time_blockcrs += timer.seconds(); - } - } break; - } - - // Check that the numerical result is matching - Kokkos::deep_copy(h_ycrs, ycrs); - Kokkos::deep_copy(h_yblockcrs, yblockcrs); - double error = 0.0, maxNorm = 0.0; - for (size_t ir = 0; ir < h_ycrs.extent(0); ++ir) { - maxNorm = std::max(maxNorm, Kokkos::ArithTraits::abs(h_ycrs(ir))); - error = std::max(error, Kokkos::ArithTraits::abs( - h_ycrs(ir) - h_yblockcrs(ir))); - } - - double tol = - (mat_val.size() / nRow) * std::numeric_limits::epsilon(); - if (error > tol * maxNorm) { - num_errors += 1; - std::cout << static_cast(test) << " "; - std::cout << fOp << ", " << blockSize << " : " - << " error " << error << " maxNorm " << maxNorm << " tol " - << tol << " tol * maxNorm " << tol * maxNorm << "\n"; - } - - //-- Print the number of Gflops for both products - if (blockSize == 1) { - printf("Op, blockSize: AvgGFlop(CrsMatrix) AvgGFlop(BlockCrsMatrix) \n"); - } - double num_flops = mat_val.size() * 2 * loop; - double crs_flop = (num_flops / time_crs) * 1.0e-09; - double blockcrs_flop = (num_flops / time_blockcrs) * 1.0e-09; - std::cout << fOp << ", " << blockSize << " : "; - if (crs_flop < blockcrs_flop) { - std::cout << crs_flop << " <" << blockcrs_flop << ">"; - } else { - std::cout << "<" << crs_flop << "> " << blockcrs_flop; - } - std::cout << std::endl; - - } // for (Ordinal blockSize = 1; blockSize < bMax; ++blockSize) - - return int(num_errors); -} - -template -int test_blockcrs_matrix_vec( - const char fOp[], - KokkosSparse::CrsMatrix - mat_b1, - int nvec, int test, int loop, const scalar_t alpha, const scalar_t beta, - const int bMax) { - typedef typename KokkosSparse::CrsMatrix< - scalar_t, Ordinal, Kokkos::DefaultExecutionSpace, void, size_t> - crsMat_type; - - typedef Kokkos::View - block_vector_t; - - srand(17312837); - - int num_errors = 0; - const auto bMax_o = static_cast(bMax); - for (Ordinal blockSize = 1; blockSize <= bMax_o; ++blockSize) { - Ordinal nRow = blockSize * mat_b1.numRows(); - Ordinal nCol = nRow; - std::vector mat_rowmap; - std::vector mat_colidx; - std::vector mat_val; - - make_block_entries(mat_b1, blockSize, mat_rowmap, mat_colidx, - mat_val); - - // Create the CrsMatrix for the reference computation - crsMat_type Acrs("new_crs_matr", nRow, nCol, mat_val.size(), &mat_val[0], - &mat_rowmap[0], &mat_colidx[0]); - - block_vector_t xref("new_right_hand_side", nRow, nvec); - auto h_xref = Kokkos::create_mirror_view(xref); - for (Ordinal jc = 0; jc < nvec; ++jc) { - for (Ordinal ir = 0; ir < nRow; ++ir) { - set_random_value(h_xref(ir, jc)); - } - } - Kokkos::deep_copy(xref, h_xref); - - block_vector_t y0("y_init", nRow, nvec); - auto h_y0 = Kokkos::create_mirror_view(y0); - for (Ordinal jc = 0; jc < nvec; ++jc) - for (Ordinal ir = 0; ir < nRow; ++ir) set_random_value(h_y0(ir, jc)); - Kokkos::deep_copy(y0, h_y0); - - block_vector_t ycrs("crs_product_result", nRow, nvec); - auto h_ycrs = Kokkos::create_mirror_view(ycrs); - - // Time a series of multiplications with the CrsMatrix format - double time_crs = 0.0; - for (int jr = 0; jr < loop; ++jr) { - for (Ordinal jc = 0; jc < nvec; ++jc) - for (Ordinal ir = 0; ir < nRow; ++ir) h_ycrs(ir, jc) = h_y0(ir, jc); - Kokkos::deep_copy(ycrs, h_ycrs); - Kokkos::Timer timer; - KokkosSparse::spmv(fOp, alpha, Acrs, xref, beta, ycrs); - time_crs += timer.seconds(); - } - - // Create the BlockCrsMatrix variable - KokkosSparse::Experimental::BlockCrsMatrix< - scalar_t, Ordinal, Kokkos::DefaultExecutionSpace, void, size_t> - Ablockcrs(Acrs, blockSize); - - block_vector_t yblockcrs("blockcrs_product_result", nRow, nvec); - auto h_yblockcrs = Kokkos::create_mirror_view(yblockcrs); - - // Time a series of multiplications with the BlockCrsMatrix - double time_blockcrs = 0.0; - switch (static_cast(test)) { - default: - case Implementation::KokkosKernels: { - // Time a series of multiplications with the BlockCrsMatrix - for (int jr = 0; jr < loop; ++jr) { - for (Ordinal jc = 0; jc < nvec; ++jc) { - for (Ordinal ir = 0; ir < nRow; ++ir) - h_yblockcrs(ir, jc) = h_y0(ir, jc); - } - Kokkos::deep_copy(yblockcrs, h_yblockcrs); - Kokkos::Timer timer; - KokkosSparse::spmv(fOp, alpha, Ablockcrs, xref, beta, yblockcrs); - time_blockcrs += timer.seconds(); - } - } break; - } - - // Check that the result is matching - Kokkos::deep_copy(h_ycrs, ycrs); - Kokkos::deep_copy(h_yblockcrs, yblockcrs); - double tol = - (mat_val.size() / nRow) * std::numeric_limits::epsilon(); - for (int jc = 0; jc < nvec; ++jc) { - double error = 0.0, maxNorm = 0.0; - for (size_t ir = 0; ir < h_ycrs.extent(0); ++ir) { - maxNorm = - std::max(maxNorm, Kokkos::ArithTraits::abs(h_ycrs(ir, jc))); - error = std::max(error, Kokkos::ArithTraits::abs( - h_ycrs(ir, jc) - h_yblockcrs(ir, jc))); - } - if (error > tol * maxNorm) { - num_errors += 1; - std::cout << fOp << ", " << blockSize << " : rhs " << jc << " error " - << error << " maxNorm " << maxNorm << " tol " << tol - << " tol * maxNorm " << tol * maxNorm << "\n"; - } - } - - // Print the number of Gflops - if (blockSize == 1) { - printf("Op, blockSize: AvgGFlop(CrsMatrix) AvgGFlop(BlockCrsMatrix) \n"); - } - double num_flops = mat_val.size() * 2 * loop * nvec; - double crs_flop = (num_flops / time_crs) * 1.0e-09; - double blockcrs_flop = (num_flops / time_blockcrs) * 1.0e-09; - std::cout << fOp << ", " << blockSize << " "; - if (crs_flop < blockcrs_flop) { - // std::cout << crs_flop << " <" << blockcrs_flop << ">"; - std::cout << crs_flop << " " << blockcrs_flop << " "; - } else { - // std::cout << "<" << crs_flop << "> " << blockcrs_flop; - std::cout << " " << crs_flop << " " << blockcrs_flop; - } - std::cout << std::endl; - } - - return int(num_errors); -} - -void print_help() { - printf("BlockCrsMatrix SPMV benchmark code \n"); - printf("Options:\n"); - printf( - " -bs : Maximum blocksize for the sparse matrix (default " - "= " - "16). \n"); - printf(" -h : Help. \n"); - printf( - " -l [LOOP] : How many spmv to run to aggregate average time " - "(default = 512). \n"); - printf( - " -nx : Number of points in the x-direction (default = " - "32).\n"); - printf( - " The matrix will be of dimension nx (nx - 1) (nx + " - "1).\n"); - printf( - " -nv : Number of vectors to multiply with (default = 1). " - "\n"); - printf(" --op : Use different operation \n"); - printf(" Options: \n"); - printf(" N = normal (default) y <- alpha A x + beta y\n"); - printf( - " C = conjugate y <- alpha conj(A) x + beta " - "y\n"); - printf( - " T = transpose y <- alpha A^T x + beta " - "y\n"); - printf( - " H = hermitian y <- alpha A^H x + beta " - "y\n"); -} -} // namespace details - -int main(int argc, char **argv) { - int loop = 512; - int bMax = 16; - int nvec = 1; - int nx = 32; - - char fOp[] = "N"; - - int test = static_cast(details::Implementation::KokkosKernels); - - for (int i = 0; i < argc; i++) { - if ((strcmp(argv[i], "-bs") == 0)) { - int tmp = atoi(argv[++i]); - bMax = (tmp > 0) ? tmp : bMax; - continue; - } - - if ((strcmp(argv[i], "--tpl") == 0)) { - i++; - if ((strcmp(argv[i], "cuda") == 0)) - test = static_cast(details::Implementation::Cuda); - if ((strcmp(argv[i], "mkl") == 0)) - test = static_cast(details::Implementation::MKL); - continue; - } - - if ((strcmp(argv[i], "--help") == 0) || (strcmp(argv[i], "-h") == 0)) { - details::print_help(); - return 0; - } - - if ((strcmp(argv[i], "-l") == 0)) { - int tmp = atoi(argv[++i]); - loop = (tmp > 0) ? tmp : loop; - continue; - } - - if ((strcmp(argv[i], "-nx") == 0)) { - int tmp = atoi(argv[++i]); - nx = (tmp > 0) ? tmp : nx; - continue; - } - - if ((strcmp(argv[i], "-nv") == 0)) { - int tmp = atoi(argv[++i]); - nvec = (tmp > 0) ? tmp : nvec; - continue; - } - - if ((strcmp(argv[i], "--op") == 0)) { - i++; - if ((strcmp(argv[i], "N") == 0)) strcpy(fOp, "N"); - if ((strcmp(argv[i], "C") == 0)) strcpy(fOp, "C"); - if ((strcmp(argv[i], "T") == 0)) strcpy(fOp, "T"); - if ((strcmp(argv[i], "H") == 0)) strcpy(fOp, "H"); - continue; - } - } - - Kokkos::initialize(argc, argv); - { - // The mat_structure view is used to generate a matrix using - // finite difference (FD) or finite element (FE) discretization - // on a cartesian grid. - Kokkos::View mat_structure( - "Matrix Structure", 3); - mat_structure(0, 0) = nx; // Request 8 grid point in 'x' direction - mat_structure(0, 1) = 0; // Add BC to the left - mat_structure(0, 2) = 0; // Add BC to the right - mat_structure(1, 0) = nx - 1; // Request 7 grid point in 'y' direction - mat_structure(1, 1) = 0; // Add BC to the bottom - mat_structure(1, 2) = 0; // Add BC to the top - mat_structure(2, 0) = nx + 1; // Request 9 grid point in 'z' direction - mat_structure(2, 1) = 0; // Add BC to the bottom - mat_structure(2, 2) = 0; // Add BC to the top - - typedef typename KokkosSparse::CrsMatrix - h_crsMat_type; - - h_crsMat_type mat_b1 = - Test::generate_structured_matrix3D("FD", mat_structure); - - int total_errors = 0; - - if (nvec == 1) - total_errors = details::test_blockcrs_matrix_single_vec( - fOp, mat_b1, test, loop, details::Scalar(3.1), details::Scalar(-2.4), - bMax); - else - total_errors = details::test_blockcrs_matrix_vec( - fOp, mat_b1, nvec, test, loop, details::Scalar(3.1), - details::Scalar(-2.4), bMax); - - if (total_errors != 0) { - printf("Kokkos::BlockCrsMatrix SpMV Test: Failed\n"); - } - } - Kokkos::finalize(); -} diff --git a/scripts/analysis/batched/pd.py b/scripts/analysis/batched/pd.py index 5b7ac37768..13e18757a5 100644 --- a/scripts/analysis/batched/pd.py +++ b/scripts/analysis/batched/pd.py @@ -23,9 +23,6 @@ Right now, this is probably the best format: ./pd.py -t --kkt-parse --kkt-plot-vs-nthreads-linlog -s foo -f KokkosKernels_Test_Gemm.txt --per-thread - Parse KokkosKernels_Test_BlockCrs_* example line: - ./pd.py --bcrs-parse --bcrs-plot-vs-nthreads-linlog --gflops -f KokkosKernels_Test_BlockCrs_SPARC.txt -s foo - Plots for workset size: ./pd.py --kkt-parse --kkt-plot-workset -f KokkosKernels_Test_Gemm.workset.txt -s foo To show speedup w.r.t. OpenMP MKL, add --speedup: @@ -624,11 +621,6 @@ def get_optparser(): p.add_option('--kkt-plot-vs-nthreads-linlog', dest='kkt_plot_vs_nthreads_linlog', action='store_true', default=False) p.add_option('--kkt-plot-workset', dest='kkt_plot_workset', action='store_true', default=False) - p.add_option('--bcrs-parse', dest='bcrs_parse', action='store_true', default=False, help='Parse KokkosKernels_Test_BlockCrs_* files.') - p.add_option('--bcrs-plot-vs-nthreads-loglog', dest='bcrs_plot_vs_nthreads_loglog', action='store_true', default=False) - p.add_option('--bcrs-plot-vs-nthreads-linlin', dest='bcrs_plot_vs_nthreads_linlin', action='store_true', default=False) - p.add_option('--bcrs-plot-vs-nthreads-linlog', dest='bcrs_plot_vs_nthreads_linlog', action='store_true', default=False) - p.add_option('--per-thread', dest='perthread', action='store_true', default=False) p.add_option('--gflops', dest='gflops', action='store_true', default=False) p.add_option('--speedup', dest='speedup', action='store_true', default=False) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8fd0bc21b8..93d653733f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -304,20 +304,6 @@ KOKKOSKERNELS_GENERATE_ETI(Sparse_spmv_mv_struct spmv TYPE_LISTS FLOATS ORDINALS OFFSETS LAYOUTS DEVICES ) -KOKKOSKERNELS_GENERATE_ETI(Sparse_spmv_blockcrsmatrix spmv - COMPONENTS sparse - HEADER_LIST ETI_HEADERS - SOURCE_LIST SOURCES - TYPE_LISTS FLOATS ORDINALS OFFSETS LAYOUTS DEVICES -) - -KOKKOSKERNELS_GENERATE_ETI(Sparse_spmv_mv_blockcrsmatrix spmv - COMPONENTS sparse - HEADER_LIST ETI_HEADERS - SOURCE_LIST SOURCES - TYPE_LISTS FLOATS ORDINALS OFFSETS LAYOUTS DEVICES -) - KOKKOSKERNELS_GENERATE_ETI(Sparse_spmv_bsrmatrix spmv COMPONENTS sparse HEADER_LIST ETI_HEADERS diff --git a/src/batched/dense/KokkosBatched_Test_BlockCrs_Util.hpp b/src/batched/dense/KokkosBatched_Test_BlockCrs_Util.hpp deleted file mode 100644 index 77fa690908..0000000000 --- a/src/batched/dense/KokkosBatched_Test_BlockCrs_Util.hpp +++ /dev/null @@ -1,942 +0,0 @@ -#include -#include -#include - -#include - -#include "Kokkos_Core.hpp" - -#include "KokkosBatched_Util.hpp" - -#define TEST_ASSERT(m, success) \ - if (!(m)) { \ - success = false; \ - printf("FAILED: %s, at %d, %s\n", #m, __LINE__, __FILE__); \ - } - -namespace KokkosBatched { - -namespace Test { - -typedef int ordinal_type; -typedef int size_type; -typedef double scalar_type; -#define BLOCKCRS_MAX_BLOCKSIZE 32 -#define FLOP_MUL 1.0 -#define FLOP_ADD 1.0 - -double LU_FlopCount(int mm, int nn) { - double m = (double)mm; - double n = (double)nn; - if (m > n) - return (FLOP_MUL * (0.5 * m * n * n - (1.0 / 6.0) * n * n * n + - 0.5 * m * n - 0.5 * n * n + (2.0 / 3.0) * n) + - FLOP_ADD * (0.5 * m * n * n - (1.0 / 6.0) * n * n * n - - 0.5 * m * n + (1.0 / 6.0) * n)); - else - return (FLOP_MUL * (0.5 * n * m * m - (1.0 / 6.0) * m * m * m + - 0.5 * n * m - 0.5 * m * m + (2.0 / 3.0) * m) + - FLOP_ADD * (0.5 * n * m * m - (1.0 / 6.0) * m * m * m - - 0.5 * n * m + (1.0 / 6.0) * m)); -} - -double Trsm_Lower_FlopCountLower(int mm, int nn) { - double m = (double)mm; - double n = (double)nn; - return (FLOP_MUL * (0.5 * m * n * (n + 1.0)) + - FLOP_ADD * (0.5 * m * n * (n - 1.0))); -} - -double Trsm_Upper_FlopCountUpper(int mm, int nn) { - double m = (double)mm; - double n = (double)nn; - return (FLOP_MUL * (0.5 * m * n * (n + 1.0)) + - FLOP_ADD * (0.5 * m * n * (n - 1.0))); -} - -double Gemm_FlopCount(int mm, int nn, int kk) { - double m = (double)mm; - double n = (double)nn; - double k = (double)kk; - return (FLOP_MUL * (m * n * k) + FLOP_ADD * (m * n * k)); -} - -template -double compute_relative_diff(const aViewType a, const bViewType b) { - // Bring the vectors to the host. This is just a correctness checker. - auto aa = Kokkos::create_mirror_view(a); - Kokkos::deep_copy(aa, a); - auto bb = Kokkos::create_mirror_view(b); - Kokkos::deep_copy(bb, b); - - double diff2 = 0, norm2 = 0; - for (ordinal_type i = 0, iend = aa.extent(0); i < iend; ++i) - for (ordinal_type j = 0, jend = aa.extent(1); j < jend; ++j) - for (ordinal_type k = 0, kend = aa.extent(2); k < kend; ++k) - for (ordinal_type l = 0, lend = aa.extent(3); l < lend; ++l) { - const double val = aa.access(i, j, k, l), - diff = aa.access(i, j, k, l) - bb.access(i, j, k, l); - diff2 += diff * diff; - norm2 += val * val; - } - - return std::sqrt(diff2 / norm2); -} - -// Representation of a structured block mesh. The fastest index is k. -struct StencilShape { - enum Enum { cross }; -}; - -struct StructuredBlock { - const ordinal_type ni, nj, nk; - - StructuredBlock(const ordinal_type ni_, const ordinal_type nj_, - const ordinal_type nk_) - : ni(ni_), nj(nj_), nk(nk_), _njnk(nj_ * nk_) {} - - KOKKOS_INLINE_FUNCTION - size_type size() const { return ni * nj * nk; } - - KOKKOS_INLINE_FUNCTION - size_type ij2id(const ordinal_type i, const ordinal_type j) const { - return i * nj + j; - } - - KOKKOS_INLINE_FUNCTION - void id2ij(const size_type id, ordinal_type &i, ordinal_type &j) const { - i = id / nj; - j = id % nj; - } - - KOKKOS_INLINE_FUNCTION - size_type ijk2id(const ordinal_type i, const ordinal_type j, - const ordinal_type k) const { - return (i * nj + j) * nk + k; - } - - KOKKOS_INLINE_FUNCTION - void id2ijk(const size_type id, ordinal_type &i, ordinal_type &j, - ordinal_type &k) const { - i = id / _njnk; - k = id % _njnk; - j = k / nk; - k = k % nk; - } - - private: - const ordinal_type _njnk; -}; - -template -struct CrsGraph { - typedef ExecSpace exec_space; - typedef ArrayLayout array_layout; - - typedef Kokkos::View row_ptr_type; - typedef Kokkos::View row_idx_type; - typedef Kokkos::View col_idx_type; - - row_ptr_type rowptr; - row_idx_type rowidx; - col_idx_type colidx; - - CrsGraph() : rowptr("rowptr", 1), rowidx("rowidx", 0), colidx("colidx", 0) {} - - KOKKOS_INLINE_FUNCTION - bool isEmpty() const { - return (rowptr.extent(0) <= 1 || colidx.extent(0) == 0 || - rowidx.extent(0) == 0); - } - - KOKKOS_INLINE_FUNCTION - ordinal_type NumRows() const { - return (isEmpty() ? 0 : static_cast(rowptr.extent(0)) - 1); - } - - KOKKOS_INLINE_FUNCTION - size_type NumNonZeros() const { - return (isEmpty() ? 0 : static_cast(colidx.extent(0))); - } -}; - -template -inline CrsGraph create_mirror( - const CrsGraph src) { - CrsGraph dst; - - dst.rowptr = - Kokkos::create_mirror_view(typename DstSpace::memory_space(), src.rowptr); - dst.rowidx = - Kokkos::create_mirror_view(typename DstSpace::memory_space(), src.rowidx); - dst.colidx = - Kokkos::create_mirror_view(typename DstSpace::memory_space(), src.colidx); - - return dst; -} - -template -inline void deep_copy(const CrsGraph dst, - const CrsGraph src) { - Kokkos::deep_copy(dst.rowptr, src.rowptr); - Kokkos::deep_copy(dst.rowidx, src.rowidx); - Kokkos::deep_copy(dst.colidx, src.colidx); -} - -// Given a structured block and a stencil (at present, just a 3D 1-hop cross), -// construct a corresponding CRS graph. -template -CrsGraph -create_graph_host_for_structured_block(const StructuredBlock mesh, - const StencilShape::Enum shape) { - CrsGraph graph; - - Kokkos::resize(graph.rowptr, mesh.size() + 1); - graph.rowptr[0] = 0; - - std::vector colidx, rowidx; - switch (shape) { - case StencilShape::cross: - for (ordinal_type c = 0; c < mesh.size(); ++c) { - ordinal_type i, j, k, n = 0; - - mesh.id2ijk(c, i, j, k); - - rowidx.push_back(c); - colidx.push_back(c); - ++n; - if (i > 0) { - rowidx.push_back(c); - colidx.push_back(mesh.ijk2id(i - 1, j, k)); - ++n; - } - if (i + 1 < mesh.ni) { - rowidx.push_back(c); - colidx.push_back(mesh.ijk2id(i + 1, j, k)); - ++n; - } - if (j > 0) { - rowidx.push_back(c); - colidx.push_back(mesh.ijk2id(i, j - 1, k)); - ++n; - } - if (j + 1 < mesh.nj) { - rowidx.push_back(c); - colidx.push_back(mesh.ijk2id(i, j + 1, k)); - ++n; - } - if (k > 0) { - rowidx.push_back(c); - colidx.push_back(mesh.ijk2id(i, j, k - 1)); - ++n; - } - if (k + 1 < mesh.nk) { - rowidx.push_back(c); - colidx.push_back(mesh.ijk2id(i, j, k + 1)); - ++n; - } - graph.rowptr[c + 1] = graph.rowptr[c] + n; - } - break; - } - assert(graph.rowptr[mesh.size()] == static_cast(colidx.size())); - assert(graph.rowptr[mesh.size()] == static_cast(rowidx.size())); - - for (ordinal_type c = 0; c < mesh.size(); ++c) - std::sort(colidx.begin() + graph.rowptr[c], - colidx.begin() + graph.rowptr[c + 1]); - - const ordinal_type nnz = graph.rowptr[mesh.size()]; - Kokkos::resize(graph.colidx, nnz); - Kokkos::resize(graph.rowidx, nnz); - for (ordinal_type c = 0; c < nnz; ++c) { - graph.colidx[c] = colidx[c]; - graph.rowidx[c] = rowidx[c]; - } - return graph; -} - -template -class BlockCrsMatrix { - public: - typedef ExeSpace exec_space; - typedef ArrayLayout array_layout; - typedef Test::CrsGraph crs_graph_type; - - typedef scalar_type value_type; - typedef Kokkos::View - value_array_type; - - private: - crs_graph_type _graph; - ordinal_type _blocksize; - value_array_type _values; - - public: - BlockCrsMatrix() : _graph(), _blocksize(), _values() {} - - BlockCrsMatrix(const BlockCrsMatrix &b) - : _graph(b._graph), _blocksize(b._blocksize), _values(b._values) {} - - BlockCrsMatrix(const crs_graph_type graph, const ordinal_type blocksize) - : _graph(graph), - _blocksize(blocksize), - _values("BlockCrsMatrix::_values", _graph.NumNonZeros(), _blocksize, - _blocksize) {} - - BlockCrsMatrix(const crs_graph_type graph, const ordinal_type blocksize, - const value_array_type values) - : _graph(graph), _blocksize(blocksize), _values(values) {} - - ordinal_type BlockSize() const { return _blocksize; } - crs_graph_type CrsGraph() const { return _graph; } - value_array_type Values() const { return _values; } -}; - -template -inline BlockCrsMatrix create_mirror( - const BlockCrsMatrix src) { - const auto graph = create_mirror(src.CrsGraph()); - const auto blocksize = src.BlockSize(); - const auto values = Kokkos::create_mirror_view( - typename DstSpace::memory_space(), src.Values()); - return BlockCrsMatrix(graph, blocksize, values); -} - -template -inline void deep_copy(const BlockCrsMatrix dst, - const BlockCrsMatrix src) { - deep_copy(dst.CrsGraph(), src.CrsGraph()); - Kokkos::deep_copy(dst.Values(), src.Values()); -} - -template -void fill_block_crs_matrix_host( - BlockCrsMatrix A) { - // extract graph and blocksizes - const auto graph = A.CrsGraph(); - const auto values = A.Values(); - const ordinal_type blocksize = A.BlockSize(); - - scalar_type tmp[BLOCKCRS_MAX_BLOCKSIZE * - BLOCKCRS_MAX_BLOCKSIZE], //[blocksize*blocksize], - diag_block[BLOCKCRS_MAX_BLOCKSIZE] - [BLOCKCRS_MAX_BLOCKSIZE], //[blocksize][blocksize], - offdiag_block[BLOCKCRS_MAX_BLOCKSIZE] - [BLOCKCRS_MAX_BLOCKSIZE]; //[blocksize][blocksize]; - - Random random; - - // for diagonal block, make spd - { - const ordinal_type iend = blocksize * blocksize; - for (ordinal_type i = 0; i < iend; ++i) tmp[i] = 2 * (random.value() - 0.5); - - for (ordinal_type i = 0; i < blocksize; ++i) - for (ordinal_type j = i; j < blocksize; ++j) { - diag_block[i][j] = 0; - for (ordinal_type k = 0; k < blocksize; ++k) - diag_block[i][j] += tmp[i * blocksize + k] * tmp[j * blocksize + k]; - if (i != j) - diag_block[j][i] = diag_block[i][j]; // symmetrize - else - diag_block[i][j] *= 0.5 * blocksize; // improve condition - } - } - - { - // for off diagonal; down-weight off-diag blocks to improve conditioning. - for (ordinal_type i = 0; i < blocksize; ++i) - for (ordinal_type j = 0; j < blocksize; ++j) - offdiag_block[i][j] = 0.1 * 2 * (random.value() - 0.5); - } - - for (ordinal_type r = 0; r < graph.NumRows(); ++r) { - // random number generator (-1, 1) - const ordinal_type cbegin = graph.rowptr(r), cend = graph.rowptr(r + 1); - for (ordinal_type c = cbegin; c < cend; ++c) { - auto block = Kokkos::subview(values, c, Kokkos::ALL(), Kokkos::ALL()); - - if (graph.colidx(c) == r) { - for (ordinal_type i = 0; i < blocksize; ++i) - for (ordinal_type j = i; j < blocksize; ++j) - block(i, j) = diag_block[i][j]; - } else { - // for off diagonal; down-weight off-diag blocks to improve - // conditioning. - for (ordinal_type i = 0; i < blocksize; ++i) - for (ordinal_type j = 0; j < blocksize; ++j) - block(i, j) = offdiag_block[i][j]; - } - } - } -} - -// nrhs should go after blocksize to match matrix dimensions consistently -template -class BlockMultiVector { - public: - typedef ExeSpace exec_space; - typedef ArrayLayout array_layout; - - typedef scalar_type value_type; - typedef Kokkos::View - value_array_type; - - private: - value_array_type _values; - - public: - BlockMultiVector(const ordinal_type nvecs, const ordinal_type nrows, - const ordinal_type blocksize) - : _values("BlockMultiVector::_values", nvecs, nrows, blocksize) {} - - BlockMultiVector(const value_array_type values) : _values(values) {} - - ordinal_type NumVectors() const { return _values.extent(0); } - ordinal_type NumRows() const { return _values.extent(1); } - ordinal_type BlockSize() const { return _values.extent(2); } - - value_array_type Values() const { return _values; } -}; - -template -inline BlockMultiVector create_mirror( - const BlockMultiVector src) { - return BlockMultiVector(Kokkos::create_mirror_view( - typename DstSpace::memory_space(), src.Values())); -} - -template -inline void deep_copy(const BlockMultiVector dst, - const BlockMultiVector src) { - Kokkos::deep_copy(dst.Values(), src.Values()); -} - -template -void fill_block_multi_vector_host( - BlockMultiVector B) { - const ordinal_type jend = B.NumVectors(), iend = B.NumRows(), - kend = B.BlockSize(); - - auto B_val = B.Values(); - - for (ordinal_type j = 0; j < jend; ++j) - for (ordinal_type i = 0; i < iend; ++i) - for (ordinal_type k = 0; k < kend; ++k) - B_val(j, i, k) = static_cast((i + j + k) % 7) - 3; -} - -template -class BlockTridiagMatrices { - public: - typedef ExecSpace exec_space; - typedef ValueType value_type; - typedef ArrayLayout array_layout; - - typedef Kokkos::View - value_array_type; - - private: - const ordinal_type _ntridiags, _nrows, _blocksize; - // A B - // C - value_array_type _A, _B, _C; - - public: - BlockTridiagMatrices(const ordinal_type ntridiags, const ordinal_type nrows, - const ordinal_type blocksize) - : _ntridiags(ntridiags), - _nrows(nrows), - _blocksize(blocksize), - _A("BlockTridiagMatrix::_A", _ntridiags, _nrows, _blocksize, - _blocksize), - _B("BlockTridiagMatrix::_B", _ntridiags, _nrows - 1, _blocksize, - _blocksize), - _C("BlockTridiagMatrix::_C", _ntridiags, _nrows - 1, _blocksize, - _blocksize) {} - - BlockTridiagMatrices(const ordinal_type ntridiags, const ordinal_type nrows, - const ordinal_type blocksize, const value_array_type A_, - const value_array_type B_, const value_array_type C_) - : _ntridiags(ntridiags), - _nrows(nrows), - _blocksize(blocksize), - _A(A_), - _B(B_), - _C(C_) {} - - value_array_type A() const { return _A; } - value_array_type B() const { return _B; } - value_array_type C() const { return _C; } - - ordinal_type BlockSize() const { return _blocksize; } - ordinal_type NumRows() const { return _nrows; } - ordinal_type NumTridiagMatrices() const { return _ntridiags; } -}; - -template -BlockTridiagMatrices -create_block_tridiag_matrices(const ordinal_type ntridiags, - const ordinal_type nrows, - const ordinal_type blocksize) { - return BlockTridiagMatrices( - adjustDimension(ntridiags), nrows, blocksize); -} - -template -inline BlockTridiagMatrices create_mirror( - const BlockTridiagMatrices src) { - return BlockTridiagMatrices( - src.NumTridiagMatrices(), src.NumRows(), src.BlockSize(), - Kokkos::create_mirror_view(typename DstSpace::memory_space(), src.A()), - Kokkos::create_mirror_view(typename DstSpace::memory_space(), src.B()), - Kokkos::create_mirror_view(typename DstSpace::memory_space(), src.C())); -} - -template -inline void deep_copy( - const BlockTridiagMatrices dst, - const BlockTridiagMatrices src) { - Kokkos::deep_copy(dst.A(), src.A()); - Kokkos::deep_copy(dst.B(), src.B()); - Kokkos::deep_copy(dst.C(), src.C()); -} - -template -KOKKOS_INLINE_FUNCTION typename std::enable_if< - std::is_same::value, - scalar_type &>::type -tdiag_val(const ViewType &A, const ordinal_type &t, const ordinal_type &i, - const ordinal_type &ii, const ordinal_type &jj) { - return A(t, i, ii, jj); -} - -template -KOKKOS_INLINE_FUNCTION typename std::enable_if< - !std::is_same::value, - scalar_type &>::type -tdiag_val(const ViewType &A, const ordinal_type &t, const ordinal_type &i, - const ordinal_type &ii, const ordinal_type &jj) { - typedef typename ViewType::value_type value_type; - return A(t / value_type::vector_length, i, ii, - jj)[t % value_type::vector_length]; -} - -template -class PartitionedBlockMultiVector { - public: - typedef ExeSpace exec_space; - typedef ValueType value_type; - typedef ArrayLayout array_layout; - - typedef Kokkos::View - value_array_type; - - private: - value_array_type _values; - - public: - PartitionedBlockMultiVector(const ordinal_type nparts, - const ordinal_type nvectors, - const ordinal_type nrows, - const ordinal_type blocksize) - : _values("BlockMultiVector::_values", nparts, nvectors, nrows, - blocksize) {} - - PartitionedBlockMultiVector(const value_array_type values) - : _values(values) {} - - ordinal_type NumPartitions() const { return _values.extent(0); } - ordinal_type NumVectors() const { return _values.extent(1); } - ordinal_type NumRows() const { return _values.extent(2); } - ordinal_type BlockSize() const { return _values.extent(3); } - - value_array_type Values() const { return _values; } -}; - -template -PartitionedBlockMultiVector -create_partitioned_block_multi_vector(const ordinal_type nparts, - const ordinal_type nvectors, - const ordinal_type nrows, - const ordinal_type blocksize) { - return PartitionedBlockMultiVector( - adjustDimension(nparts), nvectors, nrows, blocksize); -} - -template -inline PartitionedBlockMultiVector -create_mirror( - const PartitionedBlockMultiVector src) { - return PartitionedBlockMultiVector( - Kokkos::create_mirror_view(typename DstSpace::memory_space(), - src.Values())); -} - -template -inline void deep_copy( - const PartitionedBlockMultiVector dst, - const PartitionedBlockMultiVector src) { - Kokkos::deep_copy(dst.Values(), src.Values()); -} - -template -void fill_partitioned_block_multi_vector_host( - PartitionedBlockMultiVector - B, - const ordinal_type ninj) { - const ordinal_type iend = ninj, // B.NumPartitions(), - jend = B.NumVectors(), kend = B.NumRows(), lend = B.BlockSize(); - - auto B_val = B.Values(); - for (ordinal_type i = 0; i < iend; ++i) - for (ordinal_type j = 0; j < jend; ++j) - for (ordinal_type k = 0; k < kend; ++k) - for (ordinal_type l = 0; l < lend; ++l) - tdiag_val(B_val, i, j, k, l) = - static_cast((i + j + k + l) % 7) - 3; -} - -template -class BlockCrsMatrixVectorProductByRow { - public: - typedef BlockCrsMatrix block_crs_matrix_type; - typedef typename block_crs_matrix_type::crs_graph_type crs_graph_type; - typedef BlockMultiVector block_multi_vector_type; - - private: - ConstUnmanagedViewType _rowptr; - ConstUnmanagedViewType _colidx; - - ConstUnmanagedViewType _A; - ConstUnmanagedViewType _x; - /**/ UnmanagedViewType _y; - - ordinal_type _blocksize; - - public: - // A thread maps to a point row of the matrix. - // loop = blksize*m - KOKKOS_INLINE_FUNCTION - void operator()(const ordinal_type idx) const { - // index of blockrow and row in a block - const ordinal_type i = idx / _blocksize; - const ordinal_type ii = idx % _blocksize; - - // loop over multivectors - const ordinal_type jend = _y.extent(0); - for (ordinal_type j = 0; j < jend; ++j) { - scalar_type tmp = 0; - - // block row - const ordinal_type cbegin = _rowptr(i), cend = _rowptr(i + 1); - - for (ordinal_type c = cbegin; c < cend; ++c) { - const ordinal_type col = _colidx(c); - for (ordinal_type jj = 0; jj < _blocksize; ++jj) - tmp += _A(col, ii, jj) * _x(j, col, jj); - } - _y(j, i, ii) = tmp; - } - } - - void run(const block_crs_matrix_type A, const block_multi_vector_type x, - const block_multi_vector_type y) { - _rowptr = A.CrsGraph().rowptr; - _colidx = A.CrsGraph().colidx; - - _blocksize = A.BlockSize(); - - _A = A.Values(); - _x = x.Values(); - _y = y.Values(); - - Kokkos::RangePolicy policy(0, _x.extent(1) * _blocksize); - Kokkos::parallel_for("BlockCrsMatrixVectorProductByRow::run", policy, - *this); - } -}; - -template -class BlockCrsMatrixVectorProductByBlockRow { - public: - typedef BlockCrsMatrix block_crs_matrix_type; - typedef typename block_crs_matrix_type::crs_graph_type crs_graph_type; - typedef BlockMultiVector block_multi_vector_type; - - private: - ConstUnmanagedViewType _rowptr; - ConstUnmanagedViewType _colidx; - - ConstUnmanagedViewType _A; - ConstUnmanagedViewType _x; - /**/ UnmanagedViewType _y; - - ordinal_type _blocksize; - - public: - // A thread maps to a row block of the matrix. - // loop = m - KOKKOS_INLINE_FUNCTION - void operator()(const ordinal_type i) const { - // loop over multivector colums - const ordinal_type jend = _y.extent(0); - for (ordinal_type j = 0; j < jend; ++j) { - // set zero - for (ordinal_type ii = 0; ii < _blocksize; ++ii) _y(j, i, ii) = 0; - - // block row - const ordinal_type cbegin = _rowptr(i), cend = _rowptr(i + 1); - - for (ordinal_type c = cbegin; c < cend; ++c) { - const ordinal_type col = _colidx(c); - for (ordinal_type ii = 0; ii < _blocksize; ++ii) { - scalar_type tmp = 0; - for (ordinal_type jj = 0; jj < _blocksize; ++jj) - tmp += _A(col, ii, jj) * _x(j, col, jj); - _y(j, i, ii) += tmp; - } - } - } - } - - void run(const block_crs_matrix_type A, const block_multi_vector_type x, - const block_multi_vector_type y) { - _rowptr = A.CrsGraph().rowptr; - _colidx = A.CrsGraph().colidx; - - _blocksize = A.BlockSize(); - - _A = A.Values(); - _x = x.Values(); - _y = y.Values(); - - Kokkos::RangePolicy policy(0, _x.extent(1)); - Kokkos::parallel_for(policy, *this); - } -}; - -template -class ExtractBlockTridiagMatrices { - public: - typedef ExecSpace exec_space; - typedef ValueType value_type; - typedef ArrayLayout array_layout; - - typedef StructuredBlock structured_block_mesh_type; - typedef BlockCrsMatrix block_crs_matrix_type; - typedef typename block_crs_matrix_type::crs_graph_type crs_graph_type; - typedef BlockTridiagMatrices - block_tridiag_matrices_type; - - private: - structured_block_mesh_type _mesh; - ordinal_type _blocksize; - - ConstUnmanagedViewType _rowptr; - ConstUnmanagedViewType _rowidx; - ConstUnmanagedViewType _colidx; - - ConstUnmanagedViewType _A; - /**/ UnmanagedViewType - _TA, _TB, _TC; - - public: - ExtractBlockTridiagMatrices(const structured_block_mesh_type mesh) - : _mesh(mesh) {} - - template - KOKKOS_INLINE_FUNCTION void elementwise_copy( - const TViewType &T, const AViewType &A, const ordinal_type ij, - const ordinal_type k, const ordinal_type c, - const ordinal_type blocksize) const { - for (ordinal_type ii = 0; ii < blocksize; ++ii) - for (ordinal_type jj = 0; jj < blocksize; ++jj) - tdiag_val(T, ij, k, ii, jj) = A(c, ii, jj); - } - - // A thread maps nonzero blocks - KOKKOS_INLINE_FUNCTION - void operator()(const ordinal_type c) const { - const ordinal_type row = _rowidx[c], col = _colidx[c]; - - ordinal_type ri, rj, rk, ci, cj, ck; - _mesh.id2ijk(row, ri, rj, rk); - _mesh.id2ijk(col, ci, cj, ck); - - if (ri == ci && rj == cj) { - const ordinal_type ij = _mesh.ij2id(ri, rj); - // consider connectivity to k-direction - switch (rk - ck) { - case 1: elementwise_copy(_TC, _A, ij, ck, c, _blocksize); break; - case 0: elementwise_copy(_TA, _A, ij, rk, c, _blocksize); break; - case -1: elementwise_copy(_TB, _A, ij, rk, c, _blocksize); break; - } - } - } - - void run(const block_crs_matrix_type A, const block_tridiag_matrices_type T) { - _rowptr = A.CrsGraph().rowptr; - _rowidx = A.CrsGraph().rowidx; - _colidx = A.CrsGraph().colidx; - - _A = A.Values(); - - _TA = T.A(); - _TB = T.B(); - _TC = T.C(); - - _blocksize = A.BlockSize(); - Kokkos::RangePolicy policy(0, _A.extent(0)); - Kokkos::parallel_for(policy, *this); - } - - template - bool elementwise_check(const TViewType &T, const AViewType &A, - const ordinal_type ij, const ordinal_type k, - const ordinal_type c, - const ordinal_type blocksize) const { - const auto eps = 1e2 * std::numeric_limits::epsilon(); - for (ordinal_type ii = 0; ii < blocksize; ++ii) - for (ordinal_type jj = 0; jj < blocksize; ++jj) - if (Kokkos::ArithTraits::abs(tdiag_val(T, ij, k, ii, jj) - - A(c, ii, jj)) >= eps) - return false; - return true; - } - - bool check() const { - auto rowptr = Kokkos::create_mirror_view(_rowptr); - Kokkos::deep_copy(rowptr, _rowptr); - auto colidx = Kokkos::create_mirror_view(_colidx); - Kokkos::deep_copy(colidx, _colidx); - auto TA = Kokkos::create_mirror_view(_TA); - Kokkos::deep_copy(TA, _TA); - auto TB = Kokkos::create_mirror_view(_TB); - Kokkos::deep_copy(TB, _TB); - auto TC = Kokkos::create_mirror_view(_TC); - Kokkos::deep_copy(TC, _TC); - auto A = Kokkos::create_mirror_view(_A); - Kokkos::deep_copy(A, _A); - - const ordinal_type ijend = adjustDimension(_mesh.ni * _mesh.nj), - kend = _mesh.nk; - - assert(ijend == ordinal_type(TA.extent(0))); - assert((kend - 0) == ordinal_type(TA.extent(1))); - assert(ijend == ordinal_type(TB.extent(0))); - assert((kend - 1) == ordinal_type(TB.extent(1))); - assert(ijend == ordinal_type(TC.extent(0))); - assert((kend - 1) == ordinal_type(TC.extent(1))); - - for (ordinal_type ij = 0; ij < ijend; ++ij) { - ordinal_type i, j; - _mesh.id2ij(ij, i, j); - - for (ordinal_type k = 0; k < kend; ++k) { - const ordinal_type row = _mesh.ijk2id(i, j, k), idx_begin = rowptr[row], - idx_end = rowptr[row + 1]; - - // check - bool found[3] = {}, same[3] = {}; - for (ordinal_type idx = idx_begin; idx < idx_end; ++idx) { - switch (row - colidx[idx]) { - case 1: - same[2] = elementwise_check(TC, A, ij, k - 1, idx, _blocksize); - found[2] = true; - break; - case 0: - same[0] = elementwise_check(TA, A, ij, k, idx, _blocksize); - found[0] = true; - break; - case -1: - same[1] = elementwise_check(TB, A, ij, k, idx, _blocksize); - found[1] = true; - break; - } - } - if (k == 0) - assert(found[0] & same[0] && found[1] & same[1]); - else if (k == (kend - 1)) - assert(found[0] & same[0] && found[2] & same[2]); - else - assert(found[0] & same[0] && found[1] & same[1] && - found[2] & same[2]); - } - } - return true; - } -}; - -inline bool eq(const std::string &a, const char *const b1, - const char *const b2 = 0) { - return (a == std::string(b1) || (b2 && a == std::string(b2)) || - a == std::string("-") + std::string(b1)); -} - -// Command-line argument parser and holder. -template -struct Input { - bool quiet, check; - ordinal_type ni, nj, nk; - ordinal_type bs; // block size - ordinal_type nrhs; // #vectors in multivector - ordinal_type opf, ops; - StencilShape::Enum stencil_shape; - - Input(int argc, char **argv) { - quiet = false; - check = false; - ni = nj = nk = 10; - bs = 5; - nrhs = 1; - if (std::is_same::value) { - opf = 0; - ops = 0; // range policy default - } else { - opf = 1; - ops = 1; // team is default - } - stencil_shape = StencilShape::cross; - - for (ordinal_type i = 1; i < argc; ++i) { - const std::string &token = argv[i]; - if (eq(token, "-nijk")) - ni = nj = nk = std::atoi(argv[++i]); - else if (eq(token, "-ni")) - ni = std::atoi(argv[++i]); - else if (eq(token, "-nj")) - nj = std::atoi(argv[++i]); - else if (eq(token, "-nk")) - nk = std::atoi(argv[++i]); - else if (eq(token, "-bs")) - bs = std::atoi(argv[++i]); - else if (eq(token, "-nrhs")) - nrhs = std::atoi(argv[++i]); - else if (eq(token, "-opf")) - opf = std::atoi(argv[++i]); - else if (eq(token, "-ops")) - ops = std::atoi(argv[++i]); - else if (eq(token, "-c", "-check")) - check = true; - } - if (nk <= 1) throw std::runtime_error("k dimension is <= 1; must be >= 2."); - if (!quiet) print(std::cout); - } - - void print(std::ostream &os) const { - os << " ni " << ni << " nj " << nj << " nk " << nk << " bs " << bs - << " nrhs " << nrhs << " opf " << opf << " ops " << ops << " sc " - << stencil_shape << "\n"; - } -}; - -} // namespace Test -} // namespace KokkosBatched diff --git a/src/impl/generated_specializations_cpp/spmv/KokkosSparse_spmv_blockcrsmatrix_eti_spec_inst.cpp.in b/src/impl/generated_specializations_cpp/spmv/KokkosSparse_spmv_blockcrsmatrix_eti_spec_inst.cpp.in deleted file mode 100644 index 1bb85d6067..0000000000 --- a/src/impl/generated_specializations_cpp/spmv/KokkosSparse_spmv_blockcrsmatrix_eti_spec_inst.cpp.in +++ /dev/null @@ -1,56 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// KokkosKernels 0.9: Linear Algebra and Graph Kernels -// Copyright 2017 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true -#include "KokkosKernels_config.h" -#include "KokkosSparse_spmv_blockcrsmatrix_spec.hpp" - -namespace KokkosSparse { -namespace Experimental { -namespace Impl { -// clang-format off -@SPARSE_SPMV_BLOCKCRSMATRIX_ETI_INST_BLOCK@ -// clang-format on -} // namespace Impl -} // namespace Experimental -} // namespace KokkosSparse diff --git a/src/impl/generated_specializations_cpp/spmv/KokkosSparse_spmv_mv_blockcrsmatrix_eti_spec_inst.cpp.in b/src/impl/generated_specializations_cpp/spmv/KokkosSparse_spmv_mv_blockcrsmatrix_eti_spec_inst.cpp.in deleted file mode 100644 index ae672bc04a..0000000000 --- a/src/impl/generated_specializations_cpp/spmv/KokkosSparse_spmv_mv_blockcrsmatrix_eti_spec_inst.cpp.in +++ /dev/null @@ -1,56 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// KokkosKernels 0.9: Linear Algebra and Graph Kernels -// Copyright 2017 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true -#include "KokkosKernels_config.h" -#include "KokkosSparse_spmv_blockcrsmatrix_spec.hpp" - -namespace KokkosSparse { -namespace Experimental { -namespace Impl { -// clang-format off -@SPARSE_SPMV_MV_BLOCKCRSMATRIX_ETI_INST_BLOCK@ -// clang-format on -} // namespace Impl -} // namespace Experimental -} // namespace KokkosSparse diff --git a/src/impl/generated_specializations_hpp/KokkosSparse_spmv_blockcrsmatrix_eti_spec_avail.hpp.in b/src/impl/generated_specializations_hpp/KokkosSparse_spmv_blockcrsmatrix_eti_spec_avail.hpp.in deleted file mode 100644 index 1ce97a5795..0000000000 --- a/src/impl/generated_specializations_hpp/KokkosSparse_spmv_blockcrsmatrix_eti_spec_avail.hpp.in +++ /dev/null @@ -1,56 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// KokkosKernels 0.9: Linear Algebra and Graph Kernels -// Copyright 2017 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOSSPARSE_SPMV_BLOCKCRSMATRIX_ETI_SPEC_AVAIL_HPP_ -#define KOKKOSSPARSE_SPMV_BLOCKCRSMATRIX_ETI_SPEC_AVAIL_HPP_ - -namespace KokkosSparse { -namespace Experimental { -namespace Impl { -// clang-format off -@SPARSE_SPMV_BLOCKCRSMATRIX_ETI_AVAIL_BLOCK@ -// clang-format on -} // namespace Impl -} // namespace Experimental -} // namespace KokkosSparse -#endif diff --git a/src/impl/generated_specializations_hpp/KokkosSparse_spmv_blockcrsmatrix_eti_spec_decl.hpp.in b/src/impl/generated_specializations_hpp/KokkosSparse_spmv_blockcrsmatrix_eti_spec_decl.hpp.in deleted file mode 100644 index 9ad333ccfd..0000000000 --- a/src/impl/generated_specializations_hpp/KokkosSparse_spmv_blockcrsmatrix_eti_spec_decl.hpp.in +++ /dev/null @@ -1,56 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// KokkosKernels 0.9: Linear Algebra and Graph Kernels -// Copyright 2017 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOSSPARSE_SPMV_BLOCKCRSMATRIX_ETI_SPEC_DECL_HPP_ -#define KOKKOSSPARSE_SPMV_BLOCKCRSMATRIX_ETI_SPEC_DECL_HPP_ - -namespace KokkosSparse { -namespace Experimental { -namespace Impl { -// clang-format off -@SPARSE_SPMV_BLOCKCRSMATRIX_ETI_DECL_BLOCK@ -// clang-format on -} // namespace Impl -} // namespace Experimental -} // namespace KokkosSparse -#endif diff --git a/src/impl/generated_specializations_hpp/KokkosSparse_spmv_mv_blockcrsmatrix_eti_spec_avail.hpp.in b/src/impl/generated_specializations_hpp/KokkosSparse_spmv_mv_blockcrsmatrix_eti_spec_avail.hpp.in deleted file mode 100644 index 85b72e3b7b..0000000000 --- a/src/impl/generated_specializations_hpp/KokkosSparse_spmv_mv_blockcrsmatrix_eti_spec_avail.hpp.in +++ /dev/null @@ -1,56 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// KokkosKernels 0.9: Linear Algebra and Graph Kernels -// Copyright 2017 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOSSPARSE_SPMV_MV_BLOCKCRSMATRIX_ETI_SPEC_AVAIL_HPP_ -#define KOKKOSSPARSE_SPMV_MV_BLOCKCRSMATRIX_ETI_SPEC_AVAIL_HPP_ - -namespace KokkosSparse { -namespace Experimental { -namespace Impl { -// clang-format off -@SPARSE_SPMV_MV_BLOCKCRSMATRIX_ETI_AVAIL_BLOCK@ -// clang-format on -} // namespace Impl -} // namespace Experimental -} // namespace KokkosSparse -#endif diff --git a/src/impl/generated_specializations_hpp/KokkosSparse_spmv_mv_blockcrsmatrix_eti_spec_decl.hpp.in b/src/impl/generated_specializations_hpp/KokkosSparse_spmv_mv_blockcrsmatrix_eti_spec_decl.hpp.in deleted file mode 100644 index c0b77c54f2..0000000000 --- a/src/impl/generated_specializations_hpp/KokkosSparse_spmv_mv_blockcrsmatrix_eti_spec_decl.hpp.in +++ /dev/null @@ -1,56 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// KokkosKernels 0.9: Linear Algebra and Graph Kernels -// Copyright 2017 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOSSPARSE_SPMV_MV_BLOCKCRSMATRIX_ETI_SPEC_DECL_HPP_ -#define KOKKOSSPARSE_SPMV_MV_BLOCKCRSMATRIX_ETI_SPEC_DECL_HPP_ - -namespace KokkosSparse { -namespace Experimental { -namespace Impl { -// clang-format off -@SPARSE_SPMV_MV_BLOCKCRSMATRIX_ETI_DECL_BLOCK@ -// clang-format on -} // namespace Impl -} // namespace Experimental -} // namespace KokkosSparse -#endif diff --git a/src/impl/tpls/KokkosSparse_spmv_blockcrsmatrix_tpl_spec_avail.hpp b/src/impl/tpls/KokkosSparse_spmv_blockcrsmatrix_tpl_spec_avail.hpp deleted file mode 100644 index e7ac862f22..0000000000 --- a/src/impl/tpls/KokkosSparse_spmv_blockcrsmatrix_tpl_spec_avail.hpp +++ /dev/null @@ -1,70 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOSPARSE_SPMV_BLOCKCRSMATRIX_TPL_SPEC_AVAIL_HPP_ -#define KOKKOSPARSE_SPMV_BLOCKCRSMATRIX_TPL_SPEC_AVAIL_HPP_ - -namespace KokkosSparse { -namespace Experimental { -namespace Impl { - -// Specialization struct which defines whether a specialization exists -template -struct spmv_blockcrsmatrix_tpl_spec_avail { - enum : bool { value = false }; -}; - -// Specialization struct which defines whether a specialization exists -template -struct spmv_mv_blockcrsmatrix_tpl_spec_avail { - enum : bool { value = false }; -}; - -} // namespace Impl -} // namespace Experimental -} // namespace KokkosSparse - -#endif // KOKKOSPARSE_SPMV_BLOCKCRSMATRIX_TPL_SPEC_AVAIL_HPP_ diff --git a/src/impl/tpls/KokkosSparse_spmv_blockcrsmatrix_tpl_spec_decl.hpp b/src/impl/tpls/KokkosSparse_spmv_blockcrsmatrix_tpl_spec_decl.hpp deleted file mode 100644 index d5e9aad5be..0000000000 --- a/src/impl/tpls/KokkosSparse_spmv_blockcrsmatrix_tpl_spec_decl.hpp +++ /dev/null @@ -1,48 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOSKERNELS_KOKKOSSPARSE_SPMV_BLOCKCRSMATRIX_TPL_SPEC_DECL_HPP -#define KOKKOSKERNELS_KOKKOSSPARSE_SPMV_BLOCKCRSMATRIX_TPL_SPEC_DECL_HPP - -#endif // KOKKOSKERNELS_KOKKOSSPARSE_SPMV_BLOCKCRSMATRIX_TPL_SPEC_DECL_HPP diff --git a/src/sparse/KokkosSparse_BlockCrsMatrix.hpp b/src/sparse/KokkosSparse_BlockCrsMatrix.hpp deleted file mode 100644 index 4e52c0f693..0000000000 --- a/src/sparse/KokkosSparse_BlockCrsMatrix.hpp +++ /dev/null @@ -1,1006 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -/// \file Kokkos_Sparse_BlockCrsMatrix.hpp -/// \brief Local sparse matrix interface -/// -/// This file provides KokkosSparse::BlockCrsMatrix. This implements a -/// local (no MPI) sparse matrix stored in block compressed row sparse -/// ("BlockCrs") format. - -#ifndef KOKKOS_SPARSE_BLOCKCRSMATRIX_HPP_ -#define KOKKOS_SPARSE_BLOCKCRSMATRIX_HPP_ - -#include "Kokkos_Core.hpp" -#include "Kokkos_StaticCrsGraph.hpp" -#include "Kokkos_ArithTraits.hpp" -#include -#include -#include -#include "KokkosSparse_CrsMatrix.hpp" - -namespace KokkosSparse { - -namespace Experimental { - -/// \class SparseBlockRowView -/// \brief View of a block-row of a sparse matrix. -/// \tparam MatrixType BlockCrsMatrix Sparse matrix type -/// -/// This class provides a generic view of a block-row of a sparse matrix. -/// -/// Whether the view is const or not, depends on whether -/// MatrixType is a const or nonconst view of the matrix. If -/// you always want a const view, use SparseBlockRowViewConst (see below). -/// -/// MatrixType must provide the \c value_type and \c ordinal_type -/// typedefs. In addition, it must make sense to use SparseBlockRowView to -/// view a block-row of MatrixType. -template -struct SparseBlockRowView { - //! The type of the values in the row. - typedef typename MatrixType::value_type value_type; - //! The type of the column indices in the row. - typedef typename MatrixType::ordinal_type ordinal_type; - //! The type for returned block of values. - typedef Kokkos::View - block_values_type; - - private: - //! Array of values in the row. - value_type* values_; - //! Array of (local) column indices in the row. - ordinal_type* colidx_; - /// \brief Stride between successive rows in a block. - /// - /// For block compressed sparse row (BlockCSR) storage with row-major layout - /// by full row, (i.e. consecutive rows within a block are NOT contiguous), - /// this will be the stride between rows within a block-row - const ordinal_type blockDim_; - - public: - /// \brief Constructor - /// - /// \param values [in] Array of the row's values. - /// \param colidx [in] Array of the row's column indices. - /// \param blockDim [in] (Constant) stride between block rows - /// within a block-row in the above arrays. - /// \param count [in] Number of blocks in the desired block-row. - // - // Assumes values and colidx__ already offset to the correct location - KOKKOS_INLINE_FUNCTION - SparseBlockRowView(value_type* const values, ordinal_type* const colidx__, - const ordinal_type& blockDim, const ordinal_type& count) - : values_(values), - colidx_(colidx__), - blockDim_(blockDim), - length(count) {} - - /// \brief Constructor with offset into \c colidx array - /// - /// \param values [in] Array of the row's values. - /// \param colidx [in] Array of the row's column indices. - /// \param blockDim [in] (Constant) stride between rows in - /// within a block in the above arrays. - /// \param count [in] Number of blocks in the desired block-row - /// \param start [in] Offset into values and colidx of the desired block-row - /// start. - /// Note: The offset into the values array for a block-row equals - /// num_blocks_prior_to_block-row*blockDim*blockDim - /// - /// \tparam OffsetType The type of \c start (see above). Must be a - /// built-in integer type. This may differ from ordinal_type. - /// For example, the matrix may have dimensions that fit in int, - /// but a number of entries that does not fit in int. - template - KOKKOS_INLINE_FUNCTION SparseBlockRowView( - const typename MatrixType::values_type& values, - const typename MatrixType::index_type& colidx__, - const ordinal_type& blockDim, const ordinal_type& count, - const OffsetType& start, - const typename std::enable_if::value, - int>::type& = 0) - : values_(&values(start * blockDim * blockDim)), - colidx_(&colidx__(start)), - blockDim_(blockDim), - length(count) {} - - /// \brief Number of entries (i.e. blocks) in the row. - /// - /// This is a public const field rather than a public const method, - /// in order to avoid possible overhead of a method call if the - /// compiler is unable to inline that method call. - /// - /// We assume that rows contain no duplicate entries (i.e., entries - /// with the same column index). Thus, a row may have up to - /// A.numCols() entries. This means that the correct type of - /// 'length' is ordinal_type. - /// Here, length refers to the number of blocks in a block-row - const ordinal_type length; - - /// \brief Return a pointer offset to full-row i of values_ array; - /// user responsible for indexing into this pointer correctly - /// \param i [in] must be the LOCAL row index offset within this block-row - /// - /// Output: pointer to values_ array at start of full row with local index i - /// - /// Pointer interfaces are NOT guaranteed for backward compatibility - /// This interface is intended for performant kernels, not common usage - KOKKOS_INLINE_FUNCTION - value_type* full_row_in_block_row(const ordinal_type& i) const { - return values_ + (i * length * blockDim_); - } - - /// /brief Return a pointer offset to local row i of block K of values_ array; - /// user responsible for indexing into this pointer correctly - /// \param K [in] must be the LOCAL block index within this block-row - /// \param i [in] must be the LOCAL row index offset within this block-row - /// - /// Output: pointer to values_ array at start of local row within block K - /// - /// Pointer interfaces are NOT guaranteed for backward compatibility - /// This interface is intended for performant kernels, not common usage - KOKKOS_INLINE_FUNCTION - value_type* local_row_in_block(const ordinal_type& K, - const ordinal_type& i) const { - return (values_ + (K * blockDim_ + i * length * blockDim_)); - } - - /// \brief Return the value at a specified block K of block-row - /// with local row and col offset (i,j) - /// \param K [in] must be the LOCAL block index within this block-row - /// \param i [in] must be the LOCAL row index offset within this block-row - /// \param j [in] must be the LOCAL col index offset within this block-row - /// - /// Output: reference to value_type at the given (K, i, j) offset into values_ - KOKKOS_INLINE_FUNCTION - value_type& local_block_value(const ordinal_type& K, const ordinal_type& i, - const ordinal_type& j) const { - return values_[K * blockDim_ + i * length * blockDim_ + j]; - } - - /// \brief Return the block column index for a specified block K - /// - /// \param K [in] must be the LOCAL block index within this block-row - /// \return Block column index for "uncompressed" block row - KOKKOS_INLINE_FUNCTION - ordinal_type block_colidx(const ordinal_type K) const { return colidx_[K]; } - - /// \brief Return unmanaged 2D strided View wrapping local block K from this - /// block-row \param K [in] must be the LOCAL block index within this - /// block-row - KOKKOS_INLINE_FUNCTION - block_values_type block(const ordinal_type& K) const { - return block_values_type( - &(values_[K * blockDim_]), - Kokkos::LayoutStride(blockDim_, length * blockDim_, blockDim_, 1)); - } - - /// \brief Return offset into colidx_ for the requested block idx - /// If none found, return Kokkos::Details::ArithTraits::max - /// \param idx_to_match [in] local block idx within block-row - /// \param is_sorted [in] defaulted to false; no usage at this time - KOKKOS_INLINE_FUNCTION - ordinal_type findRelBlockOffset(const ordinal_type idx_to_match, - bool /*is_sorted*/ = false) const { - ordinal_type offset = Kokkos::Details::ArithTraits::max(); - for (ordinal_type blk_offset = 0; blk_offset < length; ++blk_offset) { - ordinal_type idx = colidx_[blk_offset]; - if (idx == idx_to_match) { - offset = blk_offset; - break; - } // return relative offset - } - return offset; - } -}; - -/// \class SparseBlockRowViewConst -/// \brief Const view of a row of a sparse matrix. -/// \tparam MatrixType Sparse matrix type, such as BlockCrsMatrix. -/// -/// This class is like SparseBlockRowView, except that it provides a const -/// view. This class exists in order to let users get a const view of -/// a row of a nonconst matrix. -template -struct SparseBlockRowViewConst { - //! The type of the values in the row. - typedef const typename MatrixType::non_const_value_type value_type; - //! The type of the column indices in the row. - typedef const typename MatrixType::non_const_ordinal_type ordinal_type; - //! The type for returned block of values. - typedef Kokkos::View - block_values_type; - - private: - //! Array of values in the row. - value_type* values_; - //! Array of (local) column indices in the row. - ordinal_type* colidx_; - /// \brief Stride between successive rows in a block-row - /// - /// For block compressed sparse row (BlockCSR) storage with row-major layout, - /// (i.e. consecutive rows within a block are NOT contiguous), this will be - /// the stride between rows within a block-row - const ordinal_type blockDim_; - - public: - /// \brief Constructor - /// - /// \param values [in] Array of the row's values. - /// \param colidx [in] Array of the row's column indices. - /// \param blockDim [in] (Constant) stride between block rows - /// within a block-row in the above arrays. - /// \param count [in] Number of entries in the row. - // - // Assumes values and colidx__ already offset to the correct location - KOKKOS_INLINE_FUNCTION - SparseBlockRowViewConst(value_type* const values, - ordinal_type* const colidx__, - const ordinal_type& blockDim, - const ordinal_type& count) - : values_(values), - colidx_(colidx__), - blockDim_(blockDim), - length(count) {} - - /// \brief Constructor with offset into \c colidx array - /// - /// \param values [in] Array of the row's values. - /// \param colidx [in] Array of the row's column indices. - /// \param count [in] Number of entries in the row. - /// \param start [in] Offset into values and colidx of the desired block-row - /// start. - /// Note: The offset into the values array for a block-row equals - /// num_blocks_prior_to_block-row*blockDim*blockDim - /// - /// \tparam OffsetType The type of \c start (see above). Must be a - /// built-in integer type. This may differ from ordinal_type. - /// For example, the matrix may have dimensions that fit in int, - /// but a number of entries that does not fit in int. - template - KOKKOS_INLINE_FUNCTION SparseBlockRowViewConst( - const typename MatrixType::values_type& values, - const typename MatrixType::index_type& colidx__, - const ordinal_type& blockDim, const ordinal_type& count, - const OffsetType& start, - const typename std::enable_if::value, - int>::type& = 0) - : values_(&values(start * blockDim * blockDim)), - colidx_(&colidx__(start)), - blockDim_(blockDim), - length(count) {} - - /// \brief Number of entries (i.e. blocks) in the row. - /// - /// This is a public const field rather than a public const method, - /// in order to avoid possible overhead of a method call if the - /// compiler is unable to inline that method call. - /// - /// We assume that rows contain no duplicate entries (i.e., entries - /// with the same column index). Thus, a row may have up to - /// A.numCols() entries. This means that the correct type of - /// 'length' is ordinal_type. - const ordinal_type length; - - /// \brief Return a pointer offset to full-row i of values_ array; - /// user responsible for indexing into this pointer correctly - /// \param i [in] must be the LOCAL row index offset within this block-row - /// - /// Output: pointer to values_ array at start of full row with local index i - /// - /// Pointer interfaces are NOT guaranteed for backward compatibility - /// This interface is intended for performant kernels, not common usage - KOKKOS_INLINE_FUNCTION - value_type* full_row_in_block_row(const ordinal_type& i) const { - return values_ + (i * length * blockDim_); - } - - /// /brief Return a pointer offset to local row i of block K of values_ array; - /// user responsible for indexing into this pointer correctly - /// \param K [in] must be the LOCAL block index within this block-row - /// \param i [in] must be the LOCAL row index offset within this block-row - /// - /// Output: pointer to values_ array at start of local row within block K - /// - /// Pointer interfaces are NOT guaranteed for backward compatibility - /// This interface is intended for performant kernels, not common usage - KOKKOS_INLINE_FUNCTION - value_type* local_row_in_block(const ordinal_type& K, - const ordinal_type& i) const { - return (values_ + (K * blockDim_ + i * length * blockDim_)); - } - - /// \brief Return the value at a specified block K with local row and col ids - /// (i,j) \param K [in] must be the LOCAL block index within this block-row - /// \param i [in] must be the LOCAL row index offset within this block-row - /// \param j [in] must be the LOCAL col index offset within this block-row - /// - /// Output: reference to value_type at the given (K, i, j) offset into values_ - KOKKOS_INLINE_FUNCTION - value_type& local_block_value(const ordinal_type& K, const ordinal_type& i, - const ordinal_type& j) const { - return values_[K * blockDim_ + i * length * blockDim_ + j]; - } - - /// \brief Return the block column index for a specified block K - /// - /// \param K [in] must be the LOCAL block index within this block-row - /// \return Block column index for "uncompressed" block row - KOKKOS_INLINE_FUNCTION - ordinal_type block_colidx(const ordinal_type K) const { return colidx_[K]; } - - /// \brief Return unmanaged 2D strided View wrapping local block K from this - /// block-row \param K [in] must be the LOCAL block index within this - /// block-row - KOKKOS_INLINE_FUNCTION - block_values_type block(const ordinal_type& K) const { - return block_values_type( - &(values_[K * blockDim_]), - Kokkos::LayoutStride(blockDim_, length * blockDim_, blockDim_, 1)); - } - - /// \brief Return offset into colidx_ for the requested block idx - /// If none found, return Kokkos::Details::ArithTraits::max - /// \param idx_to_match [in] local block idx within block-row - /// \param is_sorted [in] defaulted to false; no usage at this time - KOKKOS_INLINE_FUNCTION - ordinal_type findRelBlockOffset(const ordinal_type& idx_to_match, - bool /*is_sorted*/ = false) const { - typedef typename std::remove_cv::type non_const_ordinal_type; - non_const_ordinal_type offset = - Kokkos::Details::ArithTraits::max(); - for (non_const_ordinal_type blk_offset = 0; blk_offset < length; - ++blk_offset) { - ordinal_type idx = colidx_[blk_offset]; - if (idx == idx_to_match) { - offset = blk_offset; - break; - } // return relative offset - } - return offset; - } -}; - -/// \class BlockCrsMatrix -/// \brief Compressed sparse row implementation of a sparse matrix. -/// \tparam ScalarType The type of entries in the sparse matrix. -/// \tparam OrdinalType The type of column indices in the sparse matrix. -/// \tparam Device The Kokkos Device type. -/// \tparam MemoryTraits Traits describing how Kokkos manages and -/// accesses data. The default parameter suffices for most users. -/// -/// "Crs" stands for "compressed row sparse." This is the phrase -/// Trilinos traditionally uses to describe compressed sparse row -/// storage for sparse matrices, as described, for example, in Saad -/// (2nd ed.). -template ::size_type> -class BlockCrsMatrix { - static_assert( - std::is_signed::value, - "BlockCrsMatrix requires that OrdinalType is a signed integer type."); - - private: - typedef - typename Kokkos::ViewTraits::host_mirror_space host_mirror_space; - - public: - //! Type of the matrix's execution space. - typedef typename Device::execution_space execution_space; - //! Type of the matrix's memory space. - typedef typename Device::memory_space memory_space; - //! Type of the matrix's device type. - typedef Kokkos::Device device_type; - - //! Type of each value in the matrix. - typedef ScalarType value_type; - //! Type of each (column) index in the matrix. - typedef OrdinalType ordinal_type; - typedef MemoryTraits memory_traits; - /// \brief Type of each entry of the "row map." - /// - /// The "row map" corresponds to the \c ptr array of row offsets in - /// compressed sparse row (CSR) storage. - typedef SizeType size_type; - - //! Type of a host-memory mirror of the sparse matrix. - typedef BlockCrsMatrix - HostMirror; - //! Type of the graph structure of the sparse matrix. - typedef Kokkos::StaticCrsGraph - StaticCrsGraphType; - //! Type of the graph structure of the sparse matrix - consistent with Kokkos. - typedef Kokkos::StaticCrsGraph - staticcrsgraph_type; - //! Type of column indices in the sparse matrix. - typedef typename staticcrsgraph_type::entries_type index_type; - //! Const version of the type of column indices in the sparse matrix. - typedef typename index_type::const_value_type const_ordinal_type; - //! Nonconst version of the type of column indices in the sparse matrix. - typedef typename index_type::non_const_value_type non_const_ordinal_type; - //! Type of the "row map" (which contains the offset for each row's data). - typedef typename staticcrsgraph_type::row_map_type row_map_type; - //! Const version of the type of row offsets in the sparse matrix. - typedef typename row_map_type::const_value_type const_size_type; - //! Nonconst version of the type of row offsets in the sparse matrix. - typedef typename row_map_type::non_const_value_type non_const_size_type; - //! Kokkos Array type of the entries (values) in the sparse matrix. - typedef Kokkos::View - values_type; - //! Const version of the type of the entries in the sparse matrix. - typedef typename values_type::const_value_type const_value_type; - //! Nonconst version of the type of the entries in the sparse matrix. - typedef typename values_type::non_const_value_type non_const_value_type; - - /// \name Storage of the actual sparsity structure and values. - /// - /// BlockCrsMatrix uses the compressed sparse row (CSR) storage format to - /// store the sparse matrix. CSR is also called "compressed row - /// storage"; hence the name, which it inherits from Tpetra and from - /// Epetra before it. - //@{ - //! The graph (sparsity structure) of the sparse matrix. - staticcrsgraph_type graph; - //! The 1-D array of values of the sparse matrix. - values_type values; - //@} - - /// \brief Launch configuration that can be used by - /// overloads/specializations of MV_multiply(). - /// - /// This is a hack and needs to be replaced by a general - /// state mechanism. - DeviceConfig dev_config; - - /// \brief Default constructor; constructs an empty sparse matrix. - /// - /// mfh: numCols and nnz should be properties of the graph, not the matrix. - /// Then BlockCrsMatrix needs methods to get these from the graph. - BlockCrsMatrix() : numCols_(0), blockDim_(0) {} - - //! Copy constructor (shallow copy). - template - BlockCrsMatrix(const BlockCrsMatrix& B) - : graph(B.graph.entries, B.graph.row_map), - values(B.values), - dev_config(B.dev_config), - numCols_(B.numCols()), - blockDim_(B.blockDim()) { - graph.row_block_offsets = B.graph.row_block_offsets; - // MD: Changed the copy constructor of graph - // as the constructor of StaticCrsGraph does not allow copy from non const - // version. - } - - /// \brief Construct with a graph that will be shared. - /// - /// \param[in] arg_label The sparse matrix's label. - /// \param[in] arg_graph The graph between the blocks. - /// \param[in] blockDimIn The block size. - /// - /// Allocate the values array for subsequent fill. - BlockCrsMatrix(const std::string& arg_label, - const staticcrsgraph_type& arg_graph, - const OrdinalType& blockDimIn) - : graph(arg_graph), - values(arg_label, - arg_graph.entries.extent(0) * blockDimIn * blockDimIn), - numCols_(maximum_entry(arg_graph) + 1), - blockDim_(blockDimIn) {} - - /// \brief Constructor that copies raw arrays of host data in - /// coordinate format. - /// - /// On input, each entry of the sparse matrix is stored in val[k], - /// with row index rows[k] and column index cols[k]. We assume that - /// the entries are sorted in increasing order by row index. - /// - /// This constructor is mainly useful for benchmarking or for - /// reading the sparse matrix's data from a file. - /// - /// \param label [in] The sparse matrix's label. - /// \param nrows [in] The number of rows. - /// \param ncols [in] The number of columns. - /// \param annz [in] The number of entries. - /// \param val [in] The entries. - /// \param rows [in] The row indices. rows[k] is the row index of - /// val[k]. - /// \param cols [in] The column indices. cols[k] is the column - /// index of val[k]. - /// \param pad [in] If true, pad the sparse matrix's storage with - /// zeros in order to improve cache alignment and / or - /// vectorization. - /// - /// The \c pad argument is currently not used. - BlockCrsMatrix(const std::string& label, OrdinalType nrows, OrdinalType ncols, - size_type annz, ScalarType* val, OrdinalType* rows, - OrdinalType* cols, OrdinalType blockdim, bool pad = false) { - (void)pad; - ctor_impl(label, nrows, ncols, annz, val, rows, cols, blockdim); - } - - /// \brief Constructor that accepts a row map, column indices, and - /// values. - /// - /// The matrix will store and use the row map, indices, and values - /// directly (by view, not by deep copy). - /// - /// \param label [in] The sparse matrix's label. - /// \param nrows [in] The number of rows. - /// \param ncols [in] The number of columns. - /// \param annz [in] The number of entries. - /// \param vals [in/out] The entries. - /// \param rows [in/out] The row map (containing the offsets to the - /// data in each row). - /// \param cols [in/out] The column indices. - BlockCrsMatrix(const std::string& /*label*/, const OrdinalType nrows, - const OrdinalType ncols, const size_type /*annz*/, - const values_type& vals, const row_map_type& rows, - const index_type& cols, const OrdinalType blockDimIn) - : graph(cols, rows), - values(vals), - numCols_(ncols), - blockDim_(blockDimIn) { - const ordinal_type actualNumRows = - (rows.extent(0) != 0) ? static_cast( - rows.extent(0) - static_cast(1)) - : static_cast(0); - if (nrows != actualNumRows) { - std::ostringstream os; - os << "Input argument nrows = " << nrows - << " != the actual number of " - "rows " - << actualNumRows << " according to the 'rows' input argument."; - throw std::invalid_argument(os.str()); - } - // nnz returns graph.entries.extent(0) i.e. ptr[ nrows + 1 ] nnz entry - // input annz is nnz of values, not comparable with block ptr 'nnz' i.e. - // numBlocks - if (blockDim_ <= 0) { - std::ostringstream os; - os << "Input argument blockDim = " << blockDim_ - << " is not larger than 0."; - throw std::invalid_argument(os.str()); - } - } - - /// \brief Constructor that accepts a a static graph, and values. - /// - /// The matrix will store and use the row map, indices, and values - /// directly (by view, not by deep copy). - /// - /// \param[in] label The sparse matrix's label. - /// \param[in] ncols The number of columns. - /// \param[in] vals The entries. - /// \param[in] graph_ The graph between the blocks. - /// \param[in] blockDimIn The block size. - BlockCrsMatrix(const std::string& /*label*/, const OrdinalType& ncols, - const values_type& vals, const staticcrsgraph_type& graph_, - const OrdinalType& blockDimIn) - : graph(graph_), values(vals), numCols_(ncols), blockDim_(blockDimIn) {} - - /// \brief Constructor that accepts a CrsMatrix and block dimension, - /// assuming the provided CrsMatrix has appropriate block structure. - template - BlockCrsMatrix(const KokkosSparse::CrsMatrix& crs_mtx, - const OrdinalType blockDimIn) { - typedef typename KokkosSparse::CrsMatrix - crs_matrix_type; - typedef typename crs_matrix_type::staticcrsgraph_type crs_graph_type; - typedef typename crs_graph_type::entries_type crs_graph_entries_type; - typedef typename crs_graph_type::row_map_type crs_graph_row_map_type; - - blockDim_ = blockDimIn; - numCols_ = crs_mtx.numCols() / blockDim_; - values = crs_mtx.values; - - OrdinalType nbrows = - crs_mtx.numRows() / - blockDim_; // actual number of block rows; add 1 for ptr length - - // block_rows will accumulate the number of blocks per row - this is NOT the - // row_map with cum sum!! - std::vector block_rows(nbrows, 0); - - typename crs_graph_row_map_type::HostMirror h_crs_row_map = - Kokkos::create_mirror_view(crs_mtx.graph.row_map); - Kokkos::deep_copy(h_crs_row_map, crs_mtx.graph.row_map); - typename crs_graph_entries_type::HostMirror h_crs_entries = - Kokkos::create_mirror_view(crs_mtx.graph.entries); - Kokkos::deep_copy(h_crs_entries, crs_mtx.graph.entries); - - // determine size of block cols indices == number of blocks, i.e. nnz for - // the block CRS graph - OrdinalType numBlocks = 0; - for (OrdinalType i = 0; i < crs_mtx.numRows(); i += blockDim_) { - numBlocks += - (h_crs_row_map(i + 1) - h_crs_row_map(i)) / blockDim_; // cum sum - block_rows[i / blockDim_] = (h_crs_row_map(i + 1) - h_crs_row_map(i)) / - blockDim_; // frequency counts - } - - // create_staticcrsgraph takes the frequency of blocks per row - // and returns the cum sum pointer row_map with nbrows+1 size, and total - // numBlocks in the final entry - graph = Kokkos::create_staticcrsgraph("blockgraph", - block_rows); - typename index_type::HostMirror h_entries = - Kokkos::create_mirror_view(graph.entries); - typename row_map_type::HostMirror h_rowmap = - Kokkos::create_mirror_view(graph.row_map); - - Kokkos::deep_copy(h_rowmap, graph.row_map); - - for (OrdinalType i = 0; i < nbrows; ++i) { - OrdinalType blks_in_row = block_rows[i]; - - OrdinalType offset_into_blkcolidx_start = h_rowmap(i); - OrdinalType offset_into_colidx_start = - offset_into_blkcolidx_start * blockDim_ * blockDim_; - - for (OrdinalType lidx = 0; lidx < blks_in_row; ++lidx) { - h_entries(offset_into_blkcolidx_start + lidx) = - h_crs_entries(offset_into_colidx_start + blockDim_ * lidx) / - blockDim_; - } - } - - Kokkos::deep_copy(graph.entries, h_entries); - } - - /// Declaration for ctor_impl - this member function is not inlined - void ctor_impl(const std::string& label, const OrdinalType nrows, - const OrdinalType ncols, const size_type annz, ScalarType* val, - OrdinalType* rows, OrdinalType* cols, - const OrdinalType blockDimIn); - - /// \brief Given an array of blocks, sum the values into corresponding - /// block in BlockCrsMatrix - /// \param rowi [in] is a block-row index - /// \param ncol [in] is number of blocks referenced in cols[] array - /// \param cols[] [in] are block colidxs within the block-row to be summed - /// into - /// ncol entries - /// \param vals[] [in] array containing 'block' of values - /// ncol*block_size*block_size entries - /// assume vals block is provided in 'LayoutRight' or 'Row Major' - /// format, that is e.g. 2x2 block [ a b ; c d ] provided as flattened - /// 1d array as [a b c d] Assume that each block is stored contiguously - /// in vals: [a b; c d] [e f; g h] -> [a b c d e f g h] If so, then i - /// in [0, ncols) for cols[] maps to i*block_size*block_size in vals[] - KOKKOS_INLINE_FUNCTION - OrdinalType sumIntoValues(const OrdinalType rowi, const OrdinalType cols[], - const OrdinalType ncol, const ScalarType vals[], - const bool is_sorted = false, - const bool force_atomic = false) const { - SparseBlockRowView row_view = this->block_row(rowi); - const ordinal_type block_size = this->blockDim(); - - ordinal_type numValid = 0; // number of valid local column indices - - for (ordinal_type i = 0; i < ncol; ++i) { - // Find offset into values for block-row rowi and colidx cols[i] - // cols[i] is the index to match - // blk_offset is the offset for block colidx from bptr[rowi] to bptr[rowi - // + 1] (not global offset) colidx_ and values_ are already offset to the - // beginning of blockrow rowi - auto blk_offset = row_view.findRelBlockOffset(cols[i], is_sorted); - if (blk_offset != Kokkos::Details::ArithTraits::max()) { - ordinal_type offset_into_vals = - i * block_size * - block_size; // stride == 1 assumed between elements - for (ordinal_type lrow = 0; lrow < block_size; ++lrow) { - auto local_row_values = row_view.local_row_in_block( - blk_offset, lrow); // pointer to start of specified local row - // within this block - for (ordinal_type lcol = 0; lcol < block_size; ++lcol) { - if (force_atomic) { - Kokkos::atomic_add( - &(local_row_values[lcol]), - vals[offset_into_vals + lrow * block_size + lcol]); - } else { - local_row_values[lcol] += - vals[offset_into_vals + lrow * block_size + lcol]; - } - } - } - ++numValid; - } - } // end for ncol - return numValid; - } - - /// \brief Given an array of blocks, replace the values of corresponding - /// blocks in BlockCrsMatrix - /// \param rowi [in] is a block-row index - /// \param ncol [in] is number of blocks referenced in cols[] array - /// \param cols[] [in] are block colidxs within the block-row to be summed - /// into - /// ncol entries - /// \param vals[] [in] array containing 'block' of values - // ncol*block_size*block_size entries - // assume vals block is provided in 'LayoutRight' or 'Row Major' - // format, that is e.g. 2x2 block [ a b ; c d ] provided as flattened - // 1d array as [a b c d] Assume that each block is stored contiguously - // in vals: [a b; c d] [e f; g h] -> [a b c d e f g h] If so, then i in - // [0, ncols) for cols[] maps to i*block_size*block_size in vals[] - KOKKOS_INLINE_FUNCTION - OrdinalType replaceValues(const OrdinalType rowi, const OrdinalType cols[], - const OrdinalType ncol, const ScalarType vals[], - const bool is_sorted = false, - const bool force_atomic = false) const { - SparseBlockRowView row_view = this->block_row(rowi); - const ordinal_type block_size = this->blockDim(); - - ordinal_type numValid = 0; // number of valid local column indices - - for (ordinal_type i = 0; i < ncol; ++i) { - // Find offset into values for block-row rowi and colidx cols[i] - // cols[i] is the index to match - // blk_offset is the offset for block colidx from bptr[rowi] to bptr[rowi - // + 1] (not global offset) colidx_ and values_ are already offset to the - // beginning of blockrow rowi - auto blk_offset = row_view.findRelBlockOffset(cols[i], is_sorted); - if (blk_offset != Kokkos::Details::ArithTraits::max()) { - ordinal_type offset_into_vals = - i * block_size * - block_size; // stride == 1 assumed between elements - for (ordinal_type lrow = 0; lrow < block_size; ++lrow) { - auto local_row_values = row_view.local_row_in_block( - blk_offset, lrow); // pointer to start of specified local row - // within this block - for (ordinal_type lcol = 0; lcol < block_size; ++lcol) { - if (force_atomic) { - Kokkos::atomic_assign( - &(local_row_values[lcol]), - vals[offset_into_vals + lrow * block_size + lcol]); - } else { - local_row_values[lcol] = - vals[offset_into_vals + lrow * block_size + lcol]; - } - } - } - ++numValid; - } - } // end for ncol - return numValid; - } - - //! Attempt to assign the input matrix to \c *this. - // Are the CUDA sparse handles needed to be copied here?? - template - BlockCrsMatrix& operator=( - const BlockCrsMatrix& mtx) { - numCols_ = mtx.numCols(); - blockDim_ = mtx.blockDim(); - graph = mtx.graph; - values = mtx.values; - dev_config = mtx.dev_config; - return *this; - } - - //! The number of rows in the sparse matrix. - KOKKOS_INLINE_FUNCTION ordinal_type numRows() const { - return graph.numRows(); - } - - //! The number of columns in the sparse matrix. - KOKKOS_INLINE_FUNCTION ordinal_type numCols() const { return numCols_; } - - //! The block dimension in the sparse block matrix. - KOKKOS_INLINE_FUNCTION ordinal_type blockDim() const { return blockDim_; } - - //! The number of "point" (non-block) rows in the matrix. - // This is the dimension of the range of this matrix as a linear operator. - KOKKOS_INLINE_FUNCTION ordinal_type numPointRows() const { - return numRows() * blockDim(); - } - - //! The number of "point" (non-block) columns in the matrix. - // This is the dimension of the domain of this matrix as a linear operator. - KOKKOS_INLINE_FUNCTION ordinal_type numPointCols() const { - return numCols() * blockDim(); - } - - //! The number of stored entries in the sparse matrix. - KOKKOS_INLINE_FUNCTION size_type nnz() const { - return graph.entries.extent(0); - } - - friend struct SparseBlockRowView; - - /// \brief Return a SparseBlockRowView of block-row i of the matrix. - /// - /// If row i does not belong to the matrix, return an empty view. - /// - /// The returned object \c view implements the following interface: - ///
    - ///
  • \c view.length is the number of entries (i.e. blocks) - /// in the block row
  • - ///
  • \c view.local_row_in_block_row(K, i) returns a nonconst pointer - /// to the values of the ith local row in the k-th block of the block-row - ///
  • - ///
  • \c view.full_row_in_block_row(i) returns a nonconst pointer - /// to the values of the ith local row of the block-row
  • - ///
  • \c view.local_block_value(K, i, j) returns a nonconst reference - /// to the value in the ith local row and jth local col - /// of the k-th block of the block-row
  • - ///
  • \c view.block(K) returns an unmanaged 2D strided Kokkos::View - /// of the values of the k-th block of the block-row
  • - ///
- /// - /// Users should not rely on the return type of this method. They - /// should instead assign to 'auto'. - /// - KOKKOS_INLINE_FUNCTION - SparseBlockRowView block_row(const ordinal_type i) const { - const size_type start = - graph.row_map(i); // total num blocks prior to this block-row - const ordinal_type count = static_cast( - graph.row_map(i + 1) - start); // num blocks in this row - - if (count == 0) { - return SparseBlockRowView(nullptr, nullptr, 1, 0); - } else { - return SparseBlockRowView(values, graph.entries, - blockDim(), count, start); - } - } - - /// \brief Return a SparseBlockRowViewConst of block-row i of the matrix. - /// - /// If row i does not belong to the matrix, return an empty view. - /// - /// The returned object \c view implements the following interface: - ///
    - ///
  • \c view.length is the number of entries (i.e. blocks) - /// in the block row
  • - ///
  • \c view.local_row_in_block_row(K, i) returns a nonconst pointer - /// to the values of the ith local row in the k-th block of the block-row - ///
  • - ///
  • \c view.full_row_in_block_row(i) returns a nonconst pointer - /// to the values of the ith local row of the block-row
  • - ///
  • \c view.local_block_value(K, i, j) returns a nonconst reference - /// to the value in the ith local row and jth local col - /// of the k-th block of the block-row
  • - ///
  • \c view.block(K) returns an unmanaged 2D strided Kokkos::View - /// of the values of the k-th block of the block-row
  • - ///
- /// - /// Users should not rely on the return type of this method. They - /// should instead assign to 'auto'. - /// - KOKKOS_INLINE_FUNCTION - SparseBlockRowViewConst block_row_Const( - const ordinal_type i) const { - const size_type start = - graph.row_map(i); // total num blocks prior to this block-row - const ordinal_type count = static_cast( - graph.row_map(i + 1) - start); // num blocks in this row - - if (count == 0) { - return SparseBlockRowViewConst(nullptr, nullptr, 1, 0); - } else { - return SparseBlockRowViewConst(values, graph.entries, - blockDim(), count, start); - } - } - - private: - ordinal_type numCols_; - ordinal_type blockDim_; // TODO Assuming square blocks for now - add - // blockRowDim, blockColDim -}; - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -// Input assumptions: -// rows is pointer rep for the row_map member View of the BlockCrsMatrix graph -// (i.e. cum sum of number of blocks per block-row) cols is pointer rep for -// the entries member View of the BlockCrsMatrix graph (colidx for block-row -// blocks) annz is the total number of non-zeros in the CrsMatrix (equal to -// blockDim*blockDim*numBlocks) -template -void BlockCrsMatrix::ctor_impl(const std::string& /*label*/, - const OrdinalType nrows, - const OrdinalType ncols, - const size_type annz, ScalarType* val, - OrdinalType* rows, OrdinalType* cols, - const OrdinalType blockDimIn) { - numCols_ = ncols; - blockDim_ = blockDimIn; - - // Wrap the raw pointers in unmanaged host Views - typename values_type::HostMirror unman_val(val, annz); - typename row_map_type::HostMirror unman_rows(rows, nrows + 1); - typename index_type::HostMirror unman_cols(cols, ncols); - - // Create temporary Views for row_map and entries because the StaticCrsGraph - // ctor requires View inputs - values_type tmp_row_map("tmp_row_map", nrows + 1); - values_type tmp_entries("tmp_entries", ncols); - - Kokkos::deep_copy(val, unman_val); - Kokkos::deep_copy(tmp_row_map, unman_rows); - Kokkos::deep_copy(tmp_entries, unman_cols); - - // Initialize graph using the temp entries and row_map Views - graph = staticcrsgraph_type(tmp_entries, tmp_row_map); -} - -/// \class is_block_crs_matrix -/// \brief is_block_crs_matrix::value is true if T is a BlockCrsMatrix<...>, -/// false oterhwise -template -struct is_block_crs_matrix : public std::false_type {}; -template -struct is_block_crs_matrix> : public std::true_type {}; -template -struct is_block_crs_matrix> : public std::true_type { -}; - -} // namespace Experimental -} // namespace KokkosSparse - -#endif diff --git a/src/sparse/KokkosSparse_Utils.hpp b/src/sparse/KokkosSparse_Utils.hpp index 323ae7846f..d35611ca0c 100644 --- a/src/sparse/KokkosSparse_Utils.hpp +++ b/src/sparse/KokkosSparse_Utils.hpp @@ -50,7 +50,6 @@ #include #include "KokkosKernels_PrintUtils.hpp" #include "KokkosSparse_CrsMatrix.hpp" -#include "KokkosSparse_BlockCrsMatrix.hpp" #include "KokkosSparse_BsrMatrix.hpp" #ifdef KOKKOSKERNELS_HAVE_PARALLEL_GNUSORT @@ -60,22 +59,20 @@ namespace KokkosKernels { enum SparseMatrixFormat { - BlockCRS, BSR, - CRS = BlockCRS, // convenience alias: for block_size=1 or no-blocks there is - // no difference in value ordering (so the format tag becomes - // irrelevant) + CRS, }; namespace Impl { +/* create a block-sparse version of a CrsMatrix +*/ template -void kk_create_blockcrs_formated_point_crsmatrix( +void kk_create_bsr_formated_point_crsmatrix( int block_size, size_t num_rows, size_t num_cols, in_row_view_t in_xadj, in_nnz_view_t in_adj, in_val_view_t in_vals, - size_t &out_num_rows, size_t &out_num_cols, out_row_view_t &out_xadj, out_nnz_view_t &out_adj, out_val_view_t &out_vals) { typedef typename in_nnz_view_t::non_const_value_type lno_t; @@ -103,13 +100,13 @@ void kk_create_blockcrs_formated_point_crsmatrix( std::vector block_accumulators(out_num_cols, 0); std::vector block_flags(out_num_cols, false); + // loop over first rows of each block-row for (lno_t i = 0; i < lno_t(num_rows); i += block_size) { - // std::cout << "row:" << i << std::endl; lno_t outputrowsize = 0; + // loop over rows in block for (lno_t block_ind = 0; block_ind < block_size; ++block_ind) { - lno_t row_ind = block_ind + i; - // std::cout << "\nrow_ind:" << row_ind << std::endl; + const lno_t row_ind = block_ind + i; if (row_ind < lno_t(num_rows)) { size_type adj_begin = hr(row_ind); size_type adj_end = hr(row_ind + 1); @@ -216,73 +213,47 @@ void kk_create_blockcrs_formated_point_crsmatrix( Kokkos::deep_copy(out_vals, hov); } +/* Create output row pointer, col index, and value arrays + for BSR-format data from CRS data consistent with BSR format + +*/ template -void kk_create_blockcrs_from_blockcrs_formatted_point_crs( - int block_size, size_t num_rows, size_t num_cols, in_row_view_t in_xadj, - in_nnz_view_t in_adj, in_val_view_t in_vals, - - size_t &out_num_rows, size_t &out_num_cols, out_row_view_t &out_xadj, - out_nnz_view_t &out_adj, out_val_view_t &out_vals) { - typename in_row_view_t::HostMirror hr = Kokkos::create_mirror_view(in_xadj); - Kokkos::deep_copy(hr, in_xadj); - typename in_nnz_view_t::HostMirror he = Kokkos::create_mirror_view(in_adj); - Kokkos::deep_copy(he, in_adj); - typename in_val_view_t::HostMirror hv = Kokkos::create_mirror_view(in_vals); - Kokkos::deep_copy(hv, in_vals); - - out_num_rows = num_rows / block_size; - out_num_cols = num_cols / block_size; - - out_xadj = out_row_view_t("BlockedCRS XADJ", out_num_rows + 1); - out_adj = out_nnz_view_t("BlockedCRS ADJ", - in_adj.extent(0) / (block_size * block_size)); - out_vals = out_val_view_t("BlockedCRS VALS", in_vals.extent(0)); - - typename out_row_view_t::HostMirror hor = - Kokkos::create_mirror_view(out_xadj); - typename out_nnz_view_t::HostMirror hoe = Kokkos::create_mirror_view(out_adj); - typename out_val_view_t::HostMirror hov = - Kokkos::create_mirror_view(out_vals); - - typedef typename in_nnz_view_t::non_const_value_type lno_t; - typedef typename in_row_view_t::non_const_value_type size_type; - // typedef typename in_val_view_t::non_const_value_type scalar_t; - - for (lno_t i = 0; i < lno_t(out_num_rows); ++i) { - hor(i) = hr(i * block_size) / (block_size * block_size); - - size_type ib = hr(i * block_size); - size_type ie = hr(i * block_size + 1); - - lno_t is = ie - ib; - - size_type ob = hor(i); - // size_type oe = hr(i * block_size + 1) / block_size; - lno_t os = (ie - ib) / block_size; - lno_t write_index = 0; - for (lno_t j = 0; j < is; ++j) { - lno_t e = he(ib + j); - if (e % block_size == 0) { - hoe(ob + write_index++) = e / block_size; - } - } - if (write_index != os) { - std::cerr << "row:" << i << " expected size:" << os - << " written size:" << write_index << std::endl; - exit(1); - } - } - hor(out_num_rows) = hr(out_num_rows * block_size) / (block_size * block_size); - Kokkos::deep_copy(out_xadj, hor); - Kokkos::deep_copy(out_adj, hoe); - - size_type ne = in_adj.extent(0); - for (size_type i = 0; i < ne; ++i) { - hov(i) = hv(i); - } - Kokkos::deep_copy(out_vals, hov); +void kk_create_bsr_from_bsr_formatted_point_crs( + int block_size, size_t num_rows, size_t num_cols, + in_row_view_t in_xadj, // row pointer (CrsMatrix::graph.row_map) + in_nnz_view_t in_adj, // col index (CrsMatrix::graph.entries) + in_val_view_t in_vals, // values CrsMatrix::values + size_t &out_num_rows, // rows of blocks in output + size_t &out_num_cols, // cols of blocks in output + out_row_view_t &out_xadj, + out_nnz_view_t &out_adj, + out_val_view_t &out_vals + ) { + + + // reconstruct CrsMatrix + typedef typename in_nnz_view_t::non_const_value_type in_ordinal_type; + typedef typename in_val_view_t::non_const_value_type in_scalar_type; + typedef typename in_nnz_view_t::device_type in_device_type; + typedef KokkosSparse::CrsMatrix InMatrix; + InMatrix in("", num_rows, num_cols, in_vals.size(), in_vals, in_xadj, in_adj); + + // convert to BsrMatrix + typedef typename out_nnz_view_t::non_const_value_type out_ordinal_type; + typedef typename out_val_view_t::non_const_value_type out_scalar_type; + typedef typename out_nnz_view_t::device_type out_device_type; + typedef KokkosSparse::Experimental::BsrMatrix< + out_scalar_type, + out_ordinal_type, + out_device_type + > OutMatrix; + OutMatrix out(in, block_size); + + out_xadj = out.graph.row_map; + out_adj = out.graph.entries; + out_vals = out.values; } template class RowIndexBase { @@ -2007,31 +1978,47 @@ class RowIndexBase { lno_t row_size; }; + +/* The only use of this is in Sparse Gauss Seidel, which is only implemented + for BSR and CRS, which are identical when block size is 1 + +*/ template class MatrixRowIndex; +/* SGS expects this interface that accepts a block size even when CrsMatrix + is used, so... +*/ template -class MatrixRowIndex +class MatrixRowIndex : public RowIndexBase { public: using Base = RowIndexBase; + // CrsMatrix "block size" is 1 KOKKOS_INLINE_FUNCTION - MatrixRowIndex(const lno_t block_size_, const lno_t row_begin_, + MatrixRowIndex(const lno_t /*block_size_*/, const lno_t row_begin_, const lno_t row_end_) - : Base(block_size_, row_begin_, row_end_) {} + : Base(1, row_begin_, row_end_) { + } + // which block a col_idx is in (= col_idx for CRS) KOKKOS_INLINE_FUNCTION size_type block(const lno_t col_idx) { - return Base::row_offset() + col_idx * Base::block_size; + return col_idx; } + // stride between values in a block (1 for CRS, though there is only 1 value...) KOKKOS_INLINE_FUNCTION - size_type block_stride() { return Base::size() * Base::block_size; } + size_type block_stride() { return 1; } KOKKOS_INLINE_FUNCTION - size_type value(const lno_t col_idx, const lno_t block_row, - const lno_t block_col) { + size_type value(const lno_t col_idx, + const lno_t /*block_row*/, // row within block (must be zero for CRS) + const lno_t /*block_col*/ // col within block (nust be zero for CRS) + ) { + constexpr lno_t block_row = 0; + constexpr lno_t block_col = 0; return block(col_idx) + block_row * block_stride() + block_col; } }; @@ -2072,13 +2059,6 @@ struct MatrixTraits< static constexpr auto format = KokkosKernels::CRS; }; -template -struct MatrixTraits> { - static constexpr auto format = KokkosKernels::BlockCRS; -}; - template struct MatrixTraits struct MatrixConverter; -template <> -struct MatrixConverter { - template < - typename scalar_t, typename lno_t, typename device, typename size_type, - typename crsMat_t = - KokkosSparse::CrsMatrix, - typename blockCrsMat_t = KokkosSparse::Experimental::BlockCrsMatrix< - scalar_t, lno_t, device, void, size_type>> - static blockCrsMat_t from_blockcrs_formated_point_crsmatrix( - const KokkosSparse::CrsMatrix - &mtx, - lno_t block_size) { - return blockCrsMat_t(mtx, block_size); - } -}; - template <> struct MatrixConverter { template > - static bsrMtx_t from_blockcrs_formated_point_crsmatrix( + static bsrMtx_t from_bsr_formated_point_crsmatrix( const KokkosSparse::CrsMatrix &mtx, lno_t block_size) { diff --git a/src/sparse/KokkosSparse_gauss_seidel.hpp b/src/sparse/KokkosSparse_gauss_seidel.hpp index efe70dd1c5..3aaba443eb 100644 --- a/src/sparse/KokkosSparse_gauss_seidel.hpp +++ b/src/sparse/KokkosSparse_gauss_seidel.hpp @@ -286,7 +286,7 @@ void gauss_seidel_numeric(KernelHandle *handle, is_graph_symmetric); } -template void block_gauss_seidel_numeric( @@ -437,7 +437,7 @@ void symmetric_gauss_seidel_apply( update_y_vector, omega, numIter, true, true); } -template @@ -603,7 +603,7 @@ void forward_sweep_gauss_seidel_apply( update_y_vector, omega, numIter, true, false); } -template @@ -769,7 +769,7 @@ void backward_sweep_gauss_seidel_apply( update_y_vector, omega, numIter, false, true); } -template diff --git a/src/sparse/KokkosSparse_spmv.hpp b/src/sparse/KokkosSparse_spmv.hpp index 95860029f1..ded601d90e 100644 --- a/src/sparse/KokkosSparse_spmv.hpp +++ b/src/sparse/KokkosSparse_spmv.hpp @@ -53,12 +53,10 @@ #include "KokkosKernels_Controls.hpp" #include "KokkosSparse_spmv_spec.hpp" #include "KokkosSparse_spmv_struct_spec.hpp" -#include "KokkosSparse_spmv_blockcrsmatrix_spec.hpp" #include "KokkosSparse_spmv_bsrmatrix_spec.hpp" #include #include "KokkosSparse_BsrMatrix.hpp" #include "KokkosSparse_CrsMatrix.hpp" -#include "KokkosSparse_BlockCrsMatrix.hpp" #include "KokkosBlas1_scal.hpp" #include "KokkosKernels_Utils.hpp" #include "KokkosKernels_Error.hpp" @@ -74,8 +72,7 @@ struct RANK_TWO {}; /// vector /// /// -/// \tparam AMatrix A KokkosSparse::CrsMatrix, KokkosSparse::BlockCrsMatrix or -/// KokkosSparse::BsrMatrix +/// \tparam AMatrix A KokkosSparse::CrsMatrix, or KokkosSparse::BsrMatrix /// /// \param controls [in] kokkos-kernels control structure. /// \param mode [in] @@ -255,122 +252,6 @@ void spmv(KokkosKernels::Experimental::Controls controls, const char mode[], } } -#ifdef DOXY // hide SFINAE from documentation -template -#else -template < - class AlphaType, class AMatrix, class XVector, class BetaType, - class YVector, - typename std::enable_if::value>::type* = nullptr> -#endif -void spmv(KokkosKernels::Experimental::Controls controls, const char mode[], - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y, const RANK_ONE) { - // Make sure that x and y have the same rank. - static_assert( - static_cast(XVector::rank) == static_cast(YVector::rank), - "KokkosSparse::spmv: Vector ranks do not match."); - // Make sure that x (and therefore y) is rank 1. - static_assert(static_cast(XVector::rank) == 1, - "KokkosSparse::spmv: Both Vector inputs must have rank 1 " - "in order to call this specialization of spmv."); - // Make sure that y is non-const. - static_assert(std::is_same::value, - "KokkosSparse::spmv: Output Vector must be non-const."); - // - if (A.blockDim() == 1) { - KokkosSparse::CrsMatrix< - typename AMatrix::value_type, typename AMatrix::ordinal_type, - typename AMatrix::device_type, Kokkos::MemoryTraits, - typename AMatrix::size_type> - Acrs("bsr_to_crs", A.numCols(), A.values, A.graph); - KokkosSparse::spmv(controls, mode, alpha, Acrs, x, beta, y, RANK_ONE()); - return; - } - // Check compatibility of dimensions at run time. - if ((mode[0] == NoTranspose[0]) || (mode[0] == Conjugate[0])) { - if ((x.extent(1) != y.extent(1)) || - (static_cast(A.numCols() * A.blockDim()) != - static_cast(x.extent(0))) || - (static_cast(A.numRows() * A.blockDim()) != - static_cast(y.extent(0)))) { - std::ostringstream os; - os << "KokkosSparse::spmv (BlockCrsMatrix): Dimensions do not match: " - << ", A: " << A.numRows() * A.blockDim() << " x " - << A.numCols() * A.blockDim() << ", x: " << x.extent(0) << " x " - << x.extent(1) << ", y: " << y.extent(0) << " x " << y.extent(1); - - KokkosKernels::Impl::throw_runtime_exception(os.str()); - } - } else { - if ((x.extent(1) != y.extent(1)) || - (static_cast(A.numCols() * A.blockDim()) != - static_cast(y.extent(0))) || - (static_cast(A.numRows() * A.blockDim()) != - static_cast(x.extent(0)))) { - std::ostringstream os; - os << "KokkosSparse::spmv (BlockCrsMatrix): Dimensions do not match " - "(transpose): " - << ", A: " << A.numRows() * A.blockDim() << " x " - << A.numCols() * A.blockDim() << ", x: " << x.extent(0) << " x " - << x.extent(1) << ", y: " << y.extent(0) << " x " << y.extent(1); - - KokkosKernels::Impl::throw_runtime_exception(os.str()); - } - } - // - typedef KokkosSparse::Experimental::BlockCrsMatrix< - typename AMatrix::const_value_type, typename AMatrix::const_ordinal_type, - typename AMatrix::device_type, Kokkos::MemoryTraits, - typename AMatrix::const_size_type> - AMatrix_Internal; - - typedef Kokkos::View< - typename XVector::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XVector::device_type, - Kokkos::MemoryTraits > - XVector_Internal; - - typedef Kokkos::View< - typename YVector::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename YVector::device_type, Kokkos::MemoryTraits > - YVector_Internal; - - AMatrix_Internal A_i(A); - XVector_Internal x_i(x); - YVector_Internal y_i(y); - -#define __SPMV_TYPES__ \ - typename AMatrix_Internal::const_value_type, \ - typename AMatrix_Internal::const_ordinal_type, \ - typename AMatrix_Internal::device_type, \ - typename AMatrix_Internal::memory_traits, \ - typename AMatrix_Internal::const_size_type, \ - typename XVector_Internal::const_value_type*, \ - typename XVector_Internal::array_layout, \ - typename XVector_Internal::device_type, \ - typename XVector_Internal::memory_traits, \ - typename YVector_Internal::value_type*, \ - typename YVector_Internal::array_layout, \ - typename YVector_Internal::device_type, \ - typename YVector_Internal::memory_traits - - constexpr bool eti_spec_avail = - KokkosSparse::Experimental::Impl::spmv_blockcrsmatrix_eti_spec_avail< - __SPMV_TYPES__>::value; - - Experimental::Impl::SPMV_BLOCKCRSMATRIX< - __SPMV_TYPES__, eti_spec_avail>::spmv_blockcrsmatrix(controls, mode, - alpha, A_i, x_i, - beta, y_i); -#undef __SPMV_TYPES__ -} - #ifdef DOXY // hide SFINAE template @@ -640,9 +521,7 @@ struct SPMV2D1D -#else -template < - class AlphaType, class AMatrix, class XVector, class BetaType, - class YVector, - typename std::enable_if::value>::type* = nullptr> -#endif -void spmv(KokkosKernels::Experimental::Controls controls, const char mode[], - const AlphaType& alpha, const AMatrix& A, const XVector& x, - const BetaType& beta, const YVector& y, const RANK_TWO) { - // Make sure that x and y have the same rank. - static_assert( - static_cast(XVector::rank) == static_cast(YVector::rank), - "KokkosSparse::spmv: Vector ranks do not match."); - // Make sure that x (and therefore y) is rank 2. - static_assert(static_cast(XVector::rank) == 2, - "KokkosSparse::spmv: Both Vector inputs must have rank 2 " - "in order to call this specialization of spmv."); - // Make sure that y is non-const. - static_assert(std::is_same::value, - "KokkosSparse::spmv: Output Vector must be non-const."); - - if (A.blockDim() == 1) { - KokkosSparse::CrsMatrix< - typename AMatrix::value_type, typename AMatrix::ordinal_type, - typename AMatrix::device_type, Kokkos::MemoryTraits, - typename AMatrix::size_type> - Acrs("blockcrs_to_crs", A.numCols(), A.values, A.graph); - KokkosSparse::spmv(controls, mode, alpha, Acrs, x, beta, y, RANK_TWO()); - return; - } - // Check compatibility of dimensions at run time. - if ((mode[0] == NoTranspose[0]) || (mode[0] == Conjugate[0])) { - if ((x.extent(1) != y.extent(1)) || - (static_cast(A.numCols() * A.blockDim()) != - static_cast(x.extent(0))) || - (static_cast(A.numRows() * A.blockDim()) != - static_cast(y.extent(0)))) { - std::ostringstream os; - os << "KokkosSparse::spmv (BlockCrsMatrix): Dimensions do not match: " - << ", A: " << A.numRows() * A.blockDim() << " x " - << A.numCols() * A.blockDim() << ", x: " << x.extent(0) << " x " - << x.extent(1) << ", y: " << y.extent(0) << " x " << y.extent(1); - - KokkosKernels::Impl::throw_runtime_exception(os.str()); - } - } else { - if ((x.extent(1) != y.extent(1)) || - (static_cast(A.numCols() * A.blockDim()) != - static_cast(y.extent(0))) || - (static_cast(A.numRows() * A.blockDim()) != - static_cast(x.extent(0)))) { - std::ostringstream os; - os << "KokkosSparse::spmv (BlockCrsMatrix): Dimensions do not match " - "(transpose): " - << ", A: " << A.numRows() * A.blockDim() << " x " - << A.numCols() * A.blockDim() << ", x: " << x.extent(0) << " x " - << x.extent(1) << ", y: " << y.extent(0) << " x " << y.extent(1); - - KokkosKernels::Impl::throw_runtime_exception(os.str()); - } - } - // - typedef KokkosSparse::Experimental::BlockCrsMatrix< - typename AMatrix::const_value_type, typename AMatrix::const_ordinal_type, - typename AMatrix::device_type, Kokkos::MemoryTraits, - typename AMatrix::const_size_type> - AMatrix_Internal; - AMatrix_Internal A_i(A); - - typedef Kokkos::View< - typename XVector::const_value_type**, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XVector::device_type, - Kokkos::MemoryTraits > - XVector_Internal; - XVector_Internal x_i(x); - - typedef Kokkos::View< - typename YVector::non_const_value_type**, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename YVector::device_type, Kokkos::MemoryTraits > - YVector_Internal; - YVector_Internal y_i(y); - // - // - // Call single-vector version if appropriate - // - if (x.extent(1) == 1) { - typedef Kokkos::View< - typename XVector::const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XVector::device_type, - Kokkos::MemoryTraits > - XVector_SubInternal; - typedef Kokkos::View< - typename YVector::non_const_value_type*, - typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename YVector::device_type, Kokkos::MemoryTraits > - YVector_SubInternal; - - XVector_SubInternal x_0 = Kokkos::subview(x_i, Kokkos::ALL(), 0); - YVector_SubInternal y_0 = Kokkos::subview(y_i, Kokkos::ALL(), 0); - - return spmv(controls, mode, alpha, A_i, x_0, beta, y_0, RANK_ONE()); - } - // - return Experimental::Impl::SPMV_MV_BLOCKCRSMATRIX< - typename AMatrix_Internal::const_value_type, - typename AMatrix_Internal::const_ordinal_type, - typename AMatrix_Internal::device_type, - typename AMatrix_Internal::memory_traits, - typename AMatrix_Internal::const_size_type, - typename XVector_Internal::const_value_type**, - typename XVector_Internal::array_layout, - typename XVector_Internal::device_type, - typename XVector_Internal::memory_traits, - typename YVector_Internal::value_type**, - typename YVector_Internal::array_layout, - typename YVector_Internal::device_type, - typename YVector_Internal::memory_traits>:: - spmv_mv_blockcrsmatrix(controls, mode, alpha, A_i, x_i, beta, y_i); -} /// \brief Public interface to local sparse matrix-vector multiply. /// @@ -1144,9 +896,7 @@ void spmv(KokkosKernels::Experimental::Controls controls, const char mode[], /// enabled for Kokkos::CrsMatrix and Kokkos::Experimental::BsrMatrix on a /// single vector, or for Kokkos::Experimental::BsrMatrix with a multivector. /// -/// \tparam AMatrix KokkosSparse::CrsMatrix, -/// KokkosSparse::Experimental::BlockCrsMatrix, or -/// KokkosSparse::Experimental::BsrMatrix +/// \tparam AMatrix KokkosSparse::CrsMatrix or KokkosSparse::Experimental::BsrMatrix /// /// \param controls [in] kokkos-kernels control structure /// \param mode [in] "N" for no transpose, "T" for transpose, or "C" @@ -1228,13 +978,12 @@ void spmv(KokkosKernels::Experimental::Controls controls, const char mode[], /// argument types /// /// This is a catch-all interfaceace that throws a compile-time error if \c -/// AMatrix is not a CrsMatrix, BsrMatrix, or BlockCrsMatrix +/// AMatrix is not a CrsMatrix, or BsrMatrix /// template < class AlphaType, class AMatrix, class XVector, class BetaType, class YVector, typename std::enable_if< - !KokkosSparse::Experimental::is_block_crs_matrix::value && !KokkosSparse::Experimental::is_bsr_matrix::value && !KokkosSparse::is_crs_matrix::value>::type* = nullptr> void spmv(KokkosKernels::Experimental::Controls /*controls*/, @@ -1245,9 +994,8 @@ void spmv(KokkosKernels::Experimental::Controls /*controls*/, // instantiation static_assert( KokkosSparse::is_crs_matrix::value || - KokkosSparse::Experimental::is_bsr_matrix::value || - KokkosSparse::Experimental::is_block_crs_matrix::value, - "SpMV: AMatrix must be CrsMatrix, BsrMatrix, or BlockCrsMatrix"); + KokkosSparse::Experimental::is_bsr_matrix::value, + "SpMV: AMatrix must be CrsMatrix or BsrMatrix"); } // Overload for backward compatibility and also just simpler diff --git a/src/sparse/impl/KokkosSparse_gauss_seidel_spec.hpp b/src/sparse/impl/KokkosSparse_gauss_seidel_spec.hpp index 182d33a2e7..4e779afcb8 100644 --- a/src/sparse/impl/KokkosSparse_gauss_seidel_spec.hpp +++ b/src/sparse/impl/KokkosSparse_gauss_seidel_spec.hpp @@ -397,21 +397,6 @@ struct GAUSS_SEIDEL_APPLY, \ - KokkosKernels::BlockCRS, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - false, true>; \ template struct GAUSS_SEIDEL_NUMERIC< \ KokkosKernels::Experimental::KokkosKernelsHandle< \ const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, \ @@ -431,27 +416,6 @@ struct GAUSS_SEIDEL_APPLY, \ - KokkosKernels::BlockCRS, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - false, true>; \ extern template struct GAUSS_SEIDEL_APPLY< \ KokkosKernels::Experimental::KokkosKernelsHandle< \ const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, \ @@ -477,27 +441,6 @@ struct GAUSS_SEIDEL_APPLY, \ - KokkosKernels::BlockCRS, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - false, true>; \ template struct GAUSS_SEIDEL_APPLY< \ KokkosKernels::Experimental::KokkosKernelsHandle< \ const OFFSET_TYPE, const ORDINAL_TYPE, const SCALAR_TYPE, \ diff --git a/src/sparse/impl/KokkosSparse_spmv_blockcrsmatrix_impl.hpp b/src/sparse/impl/KokkosSparse_spmv_blockcrsmatrix_impl.hpp deleted file mode 100644 index 0dc0dfceb6..0000000000 --- a/src/sparse/impl/KokkosSparse_spmv_blockcrsmatrix_impl.hpp +++ /dev/null @@ -1,1178 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOSKERNELS_KOKKOSSPARSE_SPMV_BLOCKCRSMATRIX_IMPL_HPP -#define KOKKOSKERNELS_KOKKOSSPARSE_SPMV_BLOCKCRSMATRIX_IMPL_HPP - -#include "KokkosBlas.hpp" -#include "KokkosBatched_Gemv_Serial_Internal.hpp" -#include "KokkosBatched_Gemm_Serial_Internal.hpp" -#include "KokkosKernels_ExecSpaceUtils.hpp" -#include "KokkosSparse_spmv_impl.hpp" - -namespace KokkosSparse { -namespace Experimental { -namespace Impl { -namespace BCRS { - -template -struct BCRS_GEMV_Functor { - typedef typename AMatrix::execution_space execution_space; - typedef typename AMatrix::non_const_value_type value_type; - typedef typename Kokkos::TeamPolicy team_policy; - typedef typename team_policy::member_type team_member; - typedef Kokkos::Details::ArithTraits ATV; - - //! Nonconst version of the type of column indices in the sparse matrix. - typedef typename AMatrix::non_const_ordinal_type ordinal_type; - //! Nonconst version of the type of row offsets in the sparse matrix. - typedef typename AMatrix::non_const_size_type size_type; - - const value_type alpha; - AMatrix m_A; - XVector m_x; - YVector m_y; - - const ordinal_type block_dim; - const ordinal_type blocks_per_team; - - bool conjugate = false; - - BCRS_GEMV_Functor(const value_type alpha_, const AMatrix m_A_, - const XVector m_x_, const YVector m_y_, - const int blocks_per_team_, bool conj_) - : alpha(alpha_), - m_A(m_A_), - m_x(m_x_), - m_y(m_y_), - block_dim(m_A_.blockDim()), - blocks_per_team(blocks_per_team_), - conjugate(conj_) { - static_assert(static_cast(XVector::rank) == 1, - "XVector must be a rank 1 View."); - static_assert(static_cast(YVector::rank) == 1, - "YVector must be a rank 1 View."); - } - - KOKKOS_INLINE_FUNCTION - void operator()(const ordinal_type iBlock) const { - const auto ystart = iBlock * block_dim; - const auto start = m_A.graph.row_map(iBlock); - const ordinal_type count = - static_cast(m_A.graph.row_map(iBlock + 1) - start); - const auto row = m_A.block_row_Const(iBlock); - const auto beta1 = static_cast(1); - // - if (conjugate) { - for (ordinal_type ic = 0; ic < count; ++ic) { - const auto Aview = row.block(ic); - const auto xstart = row.block_colidx(ic) * block_dim; - for (ordinal_type ii = 0; ii < block_dim; ++ii) { - value_type t(0); - for (ordinal_type jj = 0; jj < block_dim; ++jj) { - const auto aval = - Kokkos::ArithTraits::conj(Aview(ii, jj)); - t += aval * m_x(xstart + jj); - } - m_y(ystart + ii) += alpha * t; - } - } - } else { - for (ordinal_type ic = 0; ic < count; ++ic) { - const auto Aview = row.block(ic); - const auto xstart = row.block_colidx(ic) * block_dim; - KokkosBatched::SerialGemvInternal:: - invoke( - block_dim, block_dim, alpha, Aview.data(), Aview.stride_0(), - Aview.stride_1(), &m_x(xstart), m_x.stride_0(), beta1, - &m_y(ystart), m_y.stride_0()); - } - } - } - - KOKKOS_INLINE_FUNCTION - void operator()(const team_member &dev) const { - using y_value_type = typename YVector::non_const_value_type; - Kokkos::parallel_for( - Kokkos::TeamThreadRange(dev, 0, blocks_per_team), - [&](const ordinal_type &loop) { - const ordinal_type iBlock = - static_cast(dev.league_rank()) * blocks_per_team + - loop; - if (iBlock >= m_A.numRows()) { - return; - } - const auto start = m_A.graph.row_map(iBlock); - const ordinal_type count = - static_cast(m_A.graph.row_map(iBlock + 1) - start); - const auto row = m_A.block_row_Const(iBlock); - // - auto yview = Kokkos::subview( - m_y, Kokkos::make_pair(iBlock * block_dim, - iBlock * block_dim + block_dim)); - // - for (ordinal_type ir = 0; ir < block_dim; ++ir) { - y_value_type sum = 0; - - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(dev, count), - [&](const ordinal_type &iEntry, y_value_type &lsum) { - const auto start_col = row.block_colidx(iEntry) * block_dim; - for (ordinal_type jr = 0; jr < block_dim; ++jr) { - const value_type val = - conjugate - ? ATV::conj(row.local_block_value(iEntry, ir, jr)) - : row.local_block_value(iEntry, ir, jr); - lsum += val * m_x(start_col + jr); - } - }, - sum); - - Kokkos::single(Kokkos::PerThread(dev), [&]() { - sum *= alpha; - yview(ir) += sum; - }); - } - }); - } -}; - -/* ******************* */ - -// -// spMatVec_no_transpose: version for CPU execution spaces -// (RangePolicy or trivial serial impl used) -// -template ()>::type * = nullptr> -void spMatVec_no_transpose( - const KokkosKernels::Experimental::Controls &controls, - const AlphaType &alpha, - const KokkosSparse::Experimental::BlockCrsMatrix< - AT, AO, AD, Kokkos::MemoryTraits, AS> &A, - const XVector &x, const BetaType &beta, YVector &y, bool useConjugate) { - // This is required to maintain semantics of KokkosKernels native SpMV: - // if y contains NaN but beta = 0, the result y should be filled with 0. - // For example, this is useful for passing in uninitialized y and beta=0. - if (beta == Kokkos::ArithTraits::zero()) - Kokkos::deep_copy(y, Kokkos::ArithTraits::zero()); - else - KokkosBlas::scal(y, beta, y); - - // - // Treat the case y <- alpha * A * x + beta * y - // - - typedef KokkosSparse::Experimental::BlockCrsMatrix< - AT, AO, AD, Kokkos::MemoryTraits, AS> - AMatrix_Internal; - - bool use_dynamic_schedule = false; // Forces the use of a dynamic schedule - bool use_static_schedule = false; // Forces the use of a static schedule - if (controls.isParameter("schedule")) { - if (controls.getParameter("schedule") == "dynamic") { - use_dynamic_schedule = true; - } else if (controls.getParameter("schedule") == "static") { - use_static_schedule = true; - } - } - - BCRS_GEMV_Functor func(alpha, A, x, y, 1, - useConjugate); - if (((A.nnz() > 10000000) || use_dynamic_schedule) && !use_static_schedule) { - Kokkos::parallel_for( - "KokkosSparse::bcrs_spmv", - Kokkos::RangePolicy< - typename AMatrix_Internal::device_type::execution_space, - Kokkos::Schedule>(0, A.numRows()), - func); - } else { - Kokkos::parallel_for( - "KokkosSparse::bcrs_spmv", - Kokkos::RangePolicy< - typename AMatrix_Internal::device_type::execution_space, - Kokkos::Schedule>(0, A.numRows()), - func); - } -} - -/* ******************* */ - -// -// spMatVec_no_transpose: version for GPU execution spaces (TeamPolicy used) -// -template ()>::type * = nullptr> -void spMatVec_no_transpose( - const KokkosKernels::Experimental::Controls &controls, - const AlphaType &alpha, - const KokkosSparse::Experimental::BlockCrsMatrix< - AT, AO, AD, Kokkos::MemoryTraits, AS> &A, - const XVector &x, const BetaType &beta, YVector &y, bool useConjugate) { - if (A.numRows() <= static_cast(0)) { - return; - } - - // We need to scale y first ("scaling" by zero just means filling - // with zeros), since the functor updates y (by adding alpha Op(A) x). - KokkosBlas::scal(y, beta, y); - - typedef KokkosSparse::Experimental::BlockCrsMatrix< - AT, AO, AD, Kokkos::MemoryTraits, AS> - AMatrix_Internal; - typedef typename AMatrix_Internal::execution_space execution_space; - - bool use_dynamic_schedule = false; // Forces the use of a dynamic schedule - bool use_static_schedule = false; // Forces the use of a static schedule - if (controls.isParameter("schedule")) { - if (controls.getParameter("schedule") == "dynamic") { - use_dynamic_schedule = true; - } else if (controls.getParameter("schedule") == "static") { - use_static_schedule = true; - } - } - int team_size = -1; - int vector_length = -1; - int64_t blocks_per_thread = -1; - - // - // Use the controls to allow the user to pass in some tuning parameters. - // - if (controls.isParameter("team size")) { - team_size = std::stoi(controls.getParameter("team size")); - } - if (controls.isParameter("vector length")) { - vector_length = std::stoi(controls.getParameter("vector length")); - } - if (controls.isParameter("rows per thread")) { - blocks_per_thread = std::stoll(controls.getParameter("rows per thread")); - } - - // - // Use the existing launch parameters routine from SPMV - // - int64_t blocks_per_team = - KokkosSparse::Impl::spmv_launch_parameters( - A.numRows(), A.nnz(), blocks_per_thread, team_size, vector_length); - int64_t worksets = (A.numRows() + blocks_per_team - 1) / blocks_per_team; - - AMatrix_Internal A_internal = A; - - BCRS_GEMV_Functor func( - alpha, A_internal, x, y, blocks_per_team, useConjugate); - - if (((A.nnz() > 10000000) || use_dynamic_schedule) && !use_static_schedule) { - Kokkos::TeamPolicy> - policy(1, 1); - if (team_size < 0) - policy = Kokkos::TeamPolicy>( - worksets, Kokkos::AUTO, vector_length); - else - policy = Kokkos::TeamPolicy>( - worksets, team_size, vector_length); - Kokkos::parallel_for("KokkosSparse::bcrs_spmv", policy, - func); - } else { - Kokkos::TeamPolicy> - policy(1, 1); - if (team_size < 0) - policy = - Kokkos::TeamPolicy>( - worksets, Kokkos::AUTO, vector_length); - else - policy = - Kokkos::TeamPolicy>( - worksets, team_size, vector_length); - Kokkos::parallel_for("KokkosSparse::bcrs_spmv", policy, - func); - } -} - -/* ******************* */ - -template -struct BCRS_GEMV_Transpose_Functor { - typedef typename AMatrix::execution_space execution_space; - typedef typename AMatrix::non_const_value_type value_type; - typedef typename Kokkos::TeamPolicy team_policy; - typedef typename team_policy::member_type team_member; - typedef Kokkos::Details::ArithTraits ATV; - - //! Nonconst version of the type of column indices in the sparse matrix. - typedef typename AMatrix::non_const_ordinal_type ordinal_type; - //! Nonconst version of the type of row offsets in the sparse matrix. - typedef typename AMatrix::non_const_size_type size_type; - - const value_type alpha; - - AMatrix m_A; - XVector m_x; - YVector m_y; - - const ordinal_type block_dim; - const ordinal_type blocks_per_team; - - bool conjugate = false; - - BCRS_GEMV_Transpose_Functor(const value_type alpha_, const AMatrix m_A_, - const XVector m_x_, const YVector m_y_, - const int blocks_per_team_, bool conj_) - : alpha(alpha_), - m_A(m_A_), - m_x(m_x_), - m_y(m_y_), - block_dim(m_A_.blockDim()), - blocks_per_team(blocks_per_team_), - conjugate(conj_) { - static_assert(static_cast(XVector::rank) == 1, - "XVector must be a rank 1 View."); - static_assert(static_cast(YVector::rank) == 1, - "YVector must be a rank 1 View."); - } - - KOKKOS_INLINE_FUNCTION - void operator()(const ordinal_type iBlock) const { - // - // Assume that alpha is not zero - // - const auto xstart = iBlock * block_dim; - const auto xview = - Kokkos::subview(m_x, Kokkos::make_pair(xstart, xstart + block_dim)); - const auto start = m_A.graph.row_map(iBlock); - const ordinal_type count = - static_cast(m_A.graph.row_map(iBlock + 1) - start); - const auto row = m_A.block_row_Const(iBlock); - const auto beta1 = static_cast(1); - const auto alpha1 = beta1; - if (conjugate) { - for (ordinal_type ic = 0; ic < count; ++ic) { - const auto Aview = row.block(ic); - const auto ystart = row.block_colidx(ic) * block_dim; - for (ordinal_type jj = 0; jj < block_dim; ++jj) { - value_type t(0); - for (ordinal_type ii = 0; ii < block_dim; ++ii) { - const auto aval = - Kokkos::ArithTraits::conj(Aview(ii, jj)); - t += aval * xview(ii); - } - t *= alpha; - Kokkos::atomic_add(&m_y(ystart + jj), t); - } - } - } else { - for (ordinal_type ic = 0; ic < count; ++ic) { - const auto Aview = row.block(ic); - const auto ystart = row.block_colidx(ic) * block_dim; - for (ordinal_type jj = 0; jj < block_dim; ++jj) { - value_type t(0); - KokkosBatched::SerialGemvInternal< - KokkosBatched::Algo::Gemv::Blocked>::invoke( - 1, block_dim, alpha1, Aview.data() + jj, Aview.stride_1(), - Aview.stride_0(), xview.data(), xview.stride_0(), beta1, &t, 1); - t *= alpha; - Kokkos::atomic_add(&m_y(ystart + jj), t); - } - } - } - } - - KOKKOS_INLINE_FUNCTION - void operator()(const team_member &dev) const { - using y_value_type = typename YVector::non_const_value_type; - Kokkos::parallel_for( - Kokkos::TeamThreadRange(dev, 0, blocks_per_team), - [&](const ordinal_type &loop) { - const ordinal_type iBlock = - static_cast(dev.league_rank()) * blocks_per_team + - loop; - if (iBlock >= m_A.numRows()) { - return; - } - const auto start = m_A.graph.row_map(iBlock); - const ordinal_type count = - static_cast(m_A.graph.row_map(iBlock + 1) - start); - const auto row = m_A.block_row_Const(iBlock); - // - for (ordinal_type ir = 0; ir < block_dim; ++ir) { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(dev, count), - [&](const ordinal_type &iEntry) { - for (ordinal_type jr = 0; jr < block_dim; ++jr) { - const value_type val = - conjugate - ? ATV::conj(row.local_block_value(iEntry, jr, ir)) - : row.local_block_value(iEntry, jr, ir); - const ordinal_type ind = row.block_colidx(iEntry); - Kokkos::atomic_add( - &m_y(block_dim * ind + ir), - static_cast( - alpha * val * m_x(block_dim * iBlock + jr))); - } - }); - } - }); - } -}; - -/* ******************* */ - -/// \brief spMatVec_transpose: version for CPU execution spaces (RangePolicy or -/// trivial serial impl used) -template ()>::type * = nullptr> -void spMatVec_transpose( - const KokkosKernels::Experimental::Controls &controls, - const AlphaType &alpha, - const KokkosSparse::Experimental::BlockCrsMatrix< - AT, AO, AD, Kokkos::MemoryTraits, AS> &A, - const XVector &x, const BetaType &beta, YVector &y, bool useConjugate) { - // This is required to maintain semantics of KokkosKernels native SpMV: - // if y contains NaN but beta = 0, the result y should be filled with 0. - // For example, this is useful for passing in uninitialized y and beta=0. - if (beta == Kokkos::ArithTraits::zero()) - Kokkos::deep_copy(y, Kokkos::ArithTraits::zero()); - else - KokkosBlas::scal(y, beta, y); - - if (alpha == Kokkos::ArithTraits::zero()) return; - - // - // Treat the case y <- alpha * A^T * x + beta * y - // - - typedef KokkosSparse::Experimental::BlockCrsMatrix< - AT, AO, AD, Kokkos::MemoryTraits, AS> - AMatrix_Internal; - - AMatrix_Internal A_internal = A; - - bool use_dynamic_schedule = false; // Forces the use of a dynamic schedule - bool use_static_schedule = false; // Forces the use of a static schedule - if (controls.isParameter("schedule")) { - if (controls.getParameter("schedule") == "dynamic") { - use_dynamic_schedule = true; - } else if (controls.getParameter("schedule") == "static") { - use_static_schedule = true; - } - } - - BCRS_GEMV_Transpose_Functor func( - alpha, A_internal, x, y, 1, useConjugate); - if (((A.nnz() > 10000000) || use_dynamic_schedule) && !use_static_schedule) { - Kokkos::parallel_for( - "KokkosSparse::blockcrs_spmv", - Kokkos::RangePolicy< - typename AMatrix_Internal::device_type::execution_space, - Kokkos::Schedule>(0, A.numRows()), - func); - } else { - Kokkos::parallel_for( - "KokkosSparse::blockcrs_spmv", - Kokkos::RangePolicy< - typename AMatrix_Internal::device_type::execution_space, - Kokkos::Schedule>(0, A.numRows()), - func); - } -} - -// -// spMatVec_transpose: version for GPU execution spaces (TeamPolicy used) -// -template ()>::type * = nullptr> -void spMatVec_transpose(const KokkosKernels::Experimental::Controls &controls, - const AlphaType &alpha, const AMatrix &A, - const XVector &x, const BetaType &beta, YVector &y, - bool useConjugate) { - if (A.numRows() <= 0) { - return; - } - - // We need to scale y first ("scaling" by zero just means filling - // with zeros), since the functor works by atomic-adding into y. - KokkosBlas::scal(y, beta, y); - - typedef typename AMatrix::execution_space execution_space; - - bool use_dynamic_schedule = false; // Forces the use of a dynamic schedule - bool use_static_schedule = false; // Forces the use of a static schedule - if (controls.isParameter("schedule")) { - if (controls.getParameter("schedule") == "dynamic") { - use_dynamic_schedule = true; - } else if (controls.getParameter("schedule") == "static") { - use_static_schedule = true; - } - } - int team_size = -1; - int vector_length = -1; - int64_t blocks_per_thread = -1; - - // - // Use the controls to allow the user to pass in some tuning parameters. - // - if (controls.isParameter("team size")) { - team_size = std::stoi(controls.getParameter("team size")); - } - if (controls.isParameter("vector length")) { - vector_length = std::stoi(controls.getParameter("vector length")); - } - if (controls.isParameter("rows per thread")) { - blocks_per_thread = std::stoll(controls.getParameter("rows per thread")); - } - - // - // Use the existing launch parameters routine from SPMV - // - int64_t blocks_per_team = - KokkosSparse::Impl::spmv_launch_parameters( - A.numRows(), A.nnz(), blocks_per_thread, team_size, vector_length); - int64_t worksets = (A.numRows() + blocks_per_team - 1) / blocks_per_team; - - BCRS_GEMV_Transpose_Functor func( - alpha, A, x, y, blocks_per_team, useConjugate); - - if (((A.nnz() > 10000000) || use_dynamic_schedule) && !use_static_schedule) { - Kokkos::TeamPolicy> - policy(1, 1); - if (team_size < 0) - policy = Kokkos::TeamPolicy>( - worksets, Kokkos::AUTO, vector_length); - else - policy = Kokkos::TeamPolicy>( - worksets, team_size, vector_length); - Kokkos::parallel_for("KokkosSparse::bspmv", policy, - func); - } else { - Kokkos::TeamPolicy> - policy(1, 1); - if (team_size < 0) - policy = - Kokkos::TeamPolicy>( - worksets, Kokkos::AUTO, vector_length); - else - policy = - Kokkos::TeamPolicy>( - worksets, team_size, vector_length); - Kokkos::parallel_for("KokkosSparse::bspmv", policy, - func); - } -} - -/* ******************* */ - -template -struct BCRS_GEMM_Functor { - typedef typename AMatrix::execution_space execution_space; - typedef typename AMatrix::non_const_value_type value_type; - typedef typename Kokkos::TeamPolicy team_policy; - typedef typename team_policy::member_type team_member; - typedef Kokkos::Details::ArithTraits ATV; - - //! Nonconst version of the type of column indices in the sparse matrix. - typedef typename AMatrix::non_const_ordinal_type ordinal_type; - //! Nonconst version of the type of row offsets in the sparse matrix. - typedef typename AMatrix::non_const_size_type size_type; - - const value_type alpha; - AMatrix m_A; - XVector m_x; - YVector m_y; - const ordinal_type block_dim; - const ordinal_type num_rhs; - - const ordinal_type blocks_per_team; - - bool conjugate = false; - - BCRS_GEMM_Functor(const value_type alpha_, const AMatrix m_A_, - const XVector m_x_, const YVector m_y_, - const int blocks_per_team_, bool conj_) - : alpha(alpha_), - m_A(m_A_), - m_x(m_x_), - m_y(m_y_), - block_dim(m_A_.blockDim()), - num_rhs(m_x_.extent(1)), - blocks_per_team(blocks_per_team_), - conjugate(conj_) { - static_assert(static_cast(XVector::rank) == 2, - "XVector must be a rank 2 View."); - static_assert(static_cast(YVector::rank) == 2, - "YVector must be a rank 2 View."); - } - - KOKKOS_INLINE_FUNCTION - void operator()(const ordinal_type iBlock) const { - // - const auto ystart = iBlock * block_dim; - const auto start = m_A.graph.row_map(iBlock); - const ordinal_type count = - static_cast(m_A.graph.row_map(iBlock + 1) - start); - const auto row = m_A.block_row_Const(iBlock); - // - for (ordinal_type ic = 0; ic < count; ++ic) { - const auto Aview = row.block(ic); - const auto xstart = row.block_colidx(ic) * block_dim; - for (ordinal_type jr = 0; jr < num_rhs; ++jr) { - for (ordinal_type ii = 0; ii < block_dim; ++ii) { - value_type t(0); - for (ordinal_type jj = 0; jj < block_dim; ++jj) { - const auto aval = - (conjugate) - ? Kokkos::ArithTraits::conj(Aview(ii, jj)) - : Aview(ii, jj); - t += aval * m_x(xstart + jj, jr); - } - m_y(ystart + ii, jr) += alpha * t; - } - } - } - } - - KOKKOS_INLINE_FUNCTION - void operator()(const team_member &dev) const { - using y_value_type = typename YVector::non_const_value_type; - Kokkos::parallel_for( - Kokkos::TeamThreadRange(dev, 0, blocks_per_team), - [&](const ordinal_type &loop) { - const ordinal_type iBlock = - static_cast(dev.league_rank()) * blocks_per_team + - loop; - if (iBlock >= m_A.numRows()) { - return; - } - // - const auto start = m_A.graph.row_map(iBlock); - const ordinal_type count = - static_cast(m_A.graph.row_map(iBlock + 1) - start); - const auto row = m_A.block_row_Const(iBlock); - const auto nrhs = num_rhs; - // - for (ordinal_type ic = 0; ic < nrhs; ++ic) { - for (ordinal_type ir = 0; ir < block_dim; ++ir) { - y_value_type sum = 0; - - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(dev, count), - [&](const ordinal_type &iEntry, y_value_type &lsum) { - const auto start_col = row.block_colidx(iEntry) * block_dim; - for (ordinal_type jr = 0; jr < block_dim; ++jr) { - const value_type val = - conjugate - ? ATV::conj(row.local_block_value(iEntry, ir, jr)) - : row.local_block_value(iEntry, ir, jr); - lsum += val * m_x(start_col + jr, ic); - } - }, - sum); - - Kokkos::single(Kokkos::PerThread(dev), [&]() { - sum *= alpha; - m_y(iBlock * block_dim + ir, ic) += sum; - }); - } - } - // - }); - } -}; - -/* ******************* */ - -// -// spMatMultiVec_no_transpose: version for CPU execution spaces -// (RangePolicy or trivial serial impl used) -// -template ()>::type * = nullptr> -void spMatMultiVec_no_transpose( - const KokkosKernels::Experimental::Controls &controls, - const AlphaType &alpha, - const KokkosSparse::Experimental::BlockCrsMatrix< - AT, AO, AD, Kokkos::MemoryTraits, AS> &A, - const XVector &x, const BetaType &beta, YVector &y, bool useConjugate) { - // This is required to maintain semantics of KokkosKernels native SpMV: - // if y contains NaN but beta = 0, the result y should be filled with 0. - // For example, this is useful for passing in uninitialized y and beta=0. - if (beta == Kokkos::ArithTraits::zero()) - Kokkos::deep_copy(y, Kokkos::ArithTraits::zero()); - else - KokkosBlas::scal(y, beta, y); - // - // Treat the case y <- alpha * A * x + beta * y - // - typedef KokkosSparse::Experimental::BlockCrsMatrix< - AT, AO, AD, Kokkos::MemoryTraits, AS> - AMatrix_Internal; - - bool use_dynamic_schedule = false; // Forces the use of a dynamic schedule - bool use_static_schedule = false; // Forces the use of a static schedule - if (controls.isParameter("schedule")) { - if (controls.getParameter("schedule") == "dynamic") { - use_dynamic_schedule = true; - } else if (controls.getParameter("schedule") == "static") { - use_static_schedule = true; - } - } - - BCRS_GEMM_Functor func(alpha, A, x, y, 1, - useConjugate); - if (((A.nnz() > 10000000) || use_dynamic_schedule) && !use_static_schedule) { - Kokkos::parallel_for( - "KokkosSparse::bcrs_spm_mv", - Kokkos::RangePolicy< - typename AMatrix_Internal::device_type::execution_space, - Kokkos::Schedule>(0, A.numRows()), - func); - } else { - Kokkos::parallel_for( - "KokkosSparse::bcrs_spm_mv", - Kokkos::RangePolicy< - typename AMatrix_Internal::device_type::execution_space, - Kokkos::Schedule>(0, A.numRows()), - func); - } -} - -/* ******************* */ - -// -// spMatMultiVec_no_transpose: version for GPU execution spaces (TeamPolicy -// used) -// -template ()>::type * = nullptr> -void spMatMultiVec_no_transpose( - const KokkosKernels::Experimental::Controls &controls, - const AlphaType &alpha, - const KokkosSparse::Experimental::BlockCrsMatrix< - AT, AO, AD, Kokkos::MemoryTraits, AS> &A, - const XVector &x, const BetaType &beta, YVector &y, bool useConjugate) { - if (A.numRows() <= static_cast(0)) { - return; - } - - KokkosBlas::scal(y, beta, y); - - typedef KokkosSparse::Experimental::BlockCrsMatrix< - AT, AO, AD, Kokkos::MemoryTraits, AS> - AMatrix_Internal; - typedef typename AMatrix_Internal::execution_space execution_space; - - bool use_dynamic_schedule = false; // Forces the use of a dynamic schedule - bool use_static_schedule = false; // Forces the use of a static schedule - if (controls.isParameter("schedule")) { - if (controls.getParameter("schedule") == "dynamic") { - use_dynamic_schedule = true; - } else if (controls.getParameter("schedule") == "static") { - use_static_schedule = true; - } - } - int team_size = -1; - int vector_length = -1; - int64_t blocks_per_thread = -1; - - // - // Use the controls to allow the user to pass in some tuning parameters. - // - if (controls.isParameter("team size")) { - team_size = std::stoi(controls.getParameter("team size")); - } - if (controls.isParameter("vector length")) { - vector_length = std::stoi(controls.getParameter("vector length")); - } - if (controls.isParameter("rows per thread")) { - blocks_per_thread = std::stoll(controls.getParameter("rows per thread")); - } - - // - // Use the existing launch parameters routine from SPMV - // - int64_t blocks_per_team = - KokkosSparse::Impl::spmv_launch_parameters( - A.numRows(), A.nnz(), blocks_per_thread, team_size, vector_length); - int64_t worksets = (A.numRows() + blocks_per_team - 1) / blocks_per_team; - - AMatrix_Internal A_internal = A; - - BCRS_GEMM_Functor func( - alpha, A_internal, x, y, blocks_per_team, useConjugate); - - if (((A.nnz() > 10000000) || use_dynamic_schedule) && !use_static_schedule) { - Kokkos::TeamPolicy> - policy(1, 1); - if (team_size < 0) - policy = Kokkos::TeamPolicy>( - worksets, Kokkos::AUTO, vector_length); - else - policy = Kokkos::TeamPolicy>( - worksets, team_size, vector_length); - Kokkos::parallel_for("KokkosSparse::bcrs_spm_mv", - policy, func); - } else { - Kokkos::TeamPolicy> - policy(1, 1); - if (team_size < 0) - policy = - Kokkos::TeamPolicy>( - worksets, Kokkos::AUTO, vector_length); - else - policy = - Kokkos::TeamPolicy>( - worksets, team_size, vector_length); - Kokkos::parallel_for("KokkosSparse::bcrs_spm_mv", - policy, func); - } -} - -/* ******************* */ - -template -struct BCRS_GEMM_Transpose_Functor { - typedef typename AMatrix::execution_space execution_space; - typedef typename AMatrix::non_const_value_type value_type; - typedef typename Kokkos::TeamPolicy team_policy; - typedef typename team_policy::member_type team_member; - typedef Kokkos::Details::ArithTraits ATV; - - //! Nonconst version of the type of column indices in the sparse matrix. - typedef typename AMatrix::non_const_ordinal_type ordinal_type; - //! Nonconst version of the type of row offsets in the sparse matrix. - typedef typename AMatrix::non_const_size_type size_type; - - const value_type alpha; - AMatrix m_A; - XVector m_x; - YVector m_y; - const ordinal_type block_dim; - const ordinal_type num_rhs; - - const ordinal_type blocks_per_team; - - bool conjugate = false; - - BCRS_GEMM_Transpose_Functor(const value_type alpha_, const AMatrix m_A_, - const XVector m_x_, const YVector m_y_, - const int blocks_per_team_, bool conj_) - : alpha(alpha_), - m_A(m_A_), - m_x(m_x_), - m_y(m_y_), - block_dim(m_A_.blockDim()), - num_rhs(m_x_.extent(1)), - blocks_per_team(blocks_per_team_), - conjugate(conj_) { - static_assert(static_cast(XVector::rank) == 2, - "XVector must be a rank 2 View."); - static_assert(static_cast(YVector::rank) == 2, - "YVector must be a rank 2 View."); - } - - KOKKOS_INLINE_FUNCTION - void operator()(const ordinal_type iBlock) const { - // - const auto xstart = iBlock * block_dim; - const auto xview = Kokkos::subview( - m_x, Kokkos::make_pair(xstart, xstart + block_dim), Kokkos::ALL()); - const auto start = m_A.graph.row_map(iBlock); - const ordinal_type count = - static_cast(m_A.graph.row_map(iBlock + 1) - start); - const auto row = m_A.block_row_Const(iBlock); - const auto beta1 = static_cast(1); - const auto alpha1 = beta1; - const auto ldx = m_x.stride_1(); - // - if (conjugate) { - for (ordinal_type ic = 0; ic < count; ++ic) { - const auto Aview = row.block(ic); - const auto ystart = row.block_colidx(ic) * block_dim; - for (ordinal_type jr = 0; jr < num_rhs; ++jr) { - for (ordinal_type jj = 0; jj < block_dim; ++jj) { - value_type t(0); - for (ordinal_type ii = 0; ii < block_dim; ++ii) { - const auto aval = - Kokkos::ArithTraits::conj(Aview(ii, jj)); - t += aval * xview(ii, jr); - } - t *= alpha; - Kokkos::atomic_add(&m_y(ystart + jj, jr), t); - } - } - } - } else { - for (ordinal_type ic = 0; ic < count; ++ic) { - const auto Aview = row.block(ic); - const auto ystart = row.block_colidx(ic) * block_dim; - for (ordinal_type jr = 0; jr < num_rhs; ++jr) { - for (ordinal_type jj = 0; jj < block_dim; ++jj) { - value_type t(0); - KokkosBatched::SerialGemvInternal< - KokkosBatched::Algo::Gemv::Blocked>::invoke( - 1, block_dim, alpha1, Aview.data() + jj, Aview.stride_1(), - Aview.stride_0(), xview.data() + jr * ldx, xview.stride_0(), - beta1, &t, 1); - t *= alpha; - Kokkos::atomic_add(&m_y(ystart + jj, jr), t); - } - } - } - } - } - - KOKKOS_INLINE_FUNCTION - void operator()(const team_member &dev) const { - using y_value_type = typename YVector::non_const_value_type; - Kokkos::parallel_for( - Kokkos::TeamThreadRange(dev, 0, blocks_per_team), - [&](const ordinal_type &loop) { - const ordinal_type iBlock = - static_cast(dev.league_rank()) * blocks_per_team + - loop; - if (iBlock >= m_A.numRows()) { - return; - } - // - const auto start = m_A.graph.row_map(iBlock); - const ordinal_type count = - static_cast(m_A.graph.row_map(iBlock + 1) - start); - const auto row = m_A.block_row_Const(iBlock); - const auto nrhs = m_x.extent(1); - // - for (size_t ic = 0; ic < nrhs; ++ic) { - for (ordinal_type ir = 0; ir < block_dim; ++ir) { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(dev, count), - [&](const ordinal_type &iEntry) { - for (ordinal_type jr = 0; jr < block_dim; ++jr) { - const value_type val = - conjugate - ? ATV::conj(row.local_block_value(iEntry, jr, ir)) - : row.local_block_value(iEntry, jr, ir); - const ordinal_type ind = row.block_colidx(iEntry); - Kokkos::atomic_add( - &m_y(block_dim * ind + ir, ic), - static_cast( - alpha * val * m_x(block_dim * iBlock + jr, ic))); - } - }); - } - } - // - }); - } -}; - -/* ******************* */ - -/// \brief spMatMultiVec_transpose: version for CPU execution spaces -/// (RangePolicy or trivial serial impl used) -template ()>::type * = nullptr> -void spMatMultiVec_transpose( - const KokkosKernels::Experimental::Controls &controls, - const AlphaType &alpha, - const KokkosSparse::Experimental::BlockCrsMatrix< - AT, AO, AD, Kokkos::MemoryTraits, AS> &A, - const XVector &x, const BetaType &beta, YVector &y, bool useConjugate) { - // This is required to maintain semantics of KokkosKernels native SpMV: - // if y contains NaN but beta = 0, the result y should be filled with 0. - // For example, this is useful for passing in uninitialized y and beta=0. - if (beta == Kokkos::ArithTraits::zero()) - Kokkos::deep_copy(y, Kokkos::ArithTraits::zero()); - else - KokkosBlas::scal(y, beta, y); - // - // Treat the case y <- alpha * A^T * x + beta * y - // - typedef KokkosSparse::Experimental::BlockCrsMatrix< - AT, AO, AD, Kokkos::MemoryTraits, AS> - AMatrix_Internal; - typedef typename AMatrix_Internal::execution_space execution_space; - - AMatrix_Internal A_internal = A; - - bool use_dynamic_schedule = false; // Forces the use of a dynamic schedule - bool use_static_schedule = false; // Forces the use of a static schedule - if (controls.isParameter("schedule")) { - if (controls.getParameter("schedule") == "dynamic") { - use_dynamic_schedule = true; - } else if (controls.getParameter("schedule") == "static") { - use_static_schedule = true; - } - } - - BCRS_GEMM_Transpose_Functor func( - alpha, A_internal, x, y, 1, useConjugate); - if (((A.nnz() > 10000000) || use_dynamic_schedule) && !use_static_schedule) { - Kokkos::parallel_for( - "KokkosSparse::blockcrs_spm_mv", - Kokkos::RangePolicy>( - 0, A.numRows()), - func); - } else { - Kokkos::parallel_for( - "KokkosSparse::blockcrs_spm_mv", - Kokkos::RangePolicy>( - 0, A.numRows()), - func); - } -} - -// -// spMatMultiVec_transpose: version for GPU execution spaces (TeamPolicy used) -// -template ()>::type * = nullptr> -void spMatMultiVec_transpose( - const KokkosKernels::Experimental::Controls &controls, - const AlphaType &alpha, const AMatrix &A, const XVector &x, - const BetaType &beta, YVector &y, bool useConjugate) { - if (A.numRows() <= 0) { - return; - } - - KokkosBlas::scal(y, beta, y); - - typedef typename AMatrix::execution_space execution_space; - - bool use_dynamic_schedule = false; // Forces the use of a dynamic schedule - bool use_static_schedule = false; // Forces the use of a static schedule - if (controls.isParameter("schedule")) { - if (controls.getParameter("schedule") == "dynamic") { - use_dynamic_schedule = true; - } else if (controls.getParameter("schedule") == "static") { - use_static_schedule = true; - } - } - int team_size = -1; - int vector_length = -1; - int64_t blocks_per_thread = -1; - - // - // Use the controls to allow the user to pass in some tuning - // parameters. - // - if (controls.isParameter("team size")) { - team_size = std::stoi(controls.getParameter("team size")); - } - if (controls.isParameter("vector length")) { - vector_length = std::stoi(controls.getParameter("vector length")); - } - if (controls.isParameter("rows per thread")) { - blocks_per_thread = std::stoll(controls.getParameter("rows per thread")); - } - - // - // Use the existing launch parameters routine from SPMV - // - int64_t blocks_per_team = - KokkosSparse::Impl::spmv_launch_parameters( - A.numRows(), A.nnz(), blocks_per_thread, team_size, vector_length); - int64_t worksets = (A.numRows() + blocks_per_team - 1) / blocks_per_team; - - BCRS_GEMM_Transpose_Functor func( - alpha, A, x, y, blocks_per_team, useConjugate); - - if (((A.nnz() > 10000000) || use_dynamic_schedule) && !use_static_schedule) { - Kokkos::TeamPolicy> - policy(1, 1); - if (team_size < 0) - policy = Kokkos::TeamPolicy>( - worksets, Kokkos::AUTO, vector_length); - else - policy = Kokkos::TeamPolicy>( - worksets, team_size, vector_length); - Kokkos::parallel_for("KokkosSparse::blockcrs_spm_mv", - policy, func); - } else { - Kokkos::TeamPolicy> - policy(1, 1); - if (team_size < 0) - policy = - Kokkos::TeamPolicy>( - worksets, Kokkos::AUTO, vector_length); - else - policy = - Kokkos::TeamPolicy>( - worksets, team_size, vector_length); - Kokkos::parallel_for("KokkosSparse::blockcrs_spm_mv", - policy, func); - } -} - -/* ******************* */ - -} // namespace BCRS - -} // namespace Impl -} // namespace Experimental -} // namespace KokkosSparse - -#endif // KOKKOSKERNELS_KOKKOSSPARSE_SPMV_BLOCKCRSMATRIX_IMPL_HPP diff --git a/src/sparse/impl/KokkosSparse_spmv_blockcrsmatrix_spec.hpp b/src/sparse/impl/KokkosSparse_spmv_blockcrsmatrix_spec.hpp deleted file mode 100644 index 14b75f1c39..0000000000 --- a/src/sparse/impl/KokkosSparse_spmv_blockcrsmatrix_spec.hpp +++ /dev/null @@ -1,284 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ -#ifndef KOKKOSSPARSE_IMPL_SPMV_BLOCKCRSMATRIX_SPEC_HPP_ -#define KOKKOSSPARSE_IMPL_SPMV_BLOCKCRSMATRIX_SPEC_HPP_ - -#include -#include -#include - -#include "KokkosSparse_BlockCrsMatrix.hpp" -#include "KokkosKernels_Controls.hpp" -#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY -#include -#endif - -namespace KokkosSparse { -namespace Experimental { -namespace Impl { - -// default is no eti available -template -struct spmv_blockcrsmatrix_eti_spec_avail { - enum : bool { value = false }; -}; - -// default is no eti available -template -struct spmv_mv_blockcrsmatrix_eti_spec_avail { - enum : bool { value = false }; -}; - -} // namespace Impl -} // namespace Experimental -} // namespace KokkosSparse - -#define KOKKOSSPARSE_SPMV_BLOCKCRSMATRIX_ETI_SPEC_AVAIL( \ - SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ - MEM_SPACE_TYPE) \ - template <> \ - struct spmv_blockcrsmatrix_eti_spec_avail< \ - const SCALAR_TYPE, const ORDINAL_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, const OFFSET_TYPE, \ - SCALAR_TYPE const *, LAYOUT_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, \ - SCALAR_TYPE *, LAYOUT_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > { \ - enum : bool { value = true }; \ - }; - -#define KOKKOSSPARSE_SPMV_MV_BLOCKCRSMATRIX_ETI_SPEC_AVAIL( \ - SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ - MEM_SPACE_TYPE) \ - template <> \ - struct spmv_mv_blockcrsmatrix_eti_spec_avail< \ - const SCALAR_TYPE, const ORDINAL_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, const OFFSET_TYPE, \ - SCALAR_TYPE const **, LAYOUT_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, \ - SCALAR_TYPE **, LAYOUT_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > { \ - enum : bool { value = true }; \ - }; - -// Include which ETIs are available -#include -#include -#include - -namespace KokkosSparse { -namespace Experimental { -namespace Impl { - -// declaration -template ::value> -struct SPMV_BLOCKCRSMATRIX { - typedef BlockCrsMatrix AMatrix; - typedef Kokkos::View XVector; - typedef Kokkos::View YVector; - typedef typename YVector::non_const_value_type YScalar; - - static void spmv_blockcrsmatrix( - const KokkosKernels::Experimental::Controls &controls, const char mode[], - const YScalar &alpha, const AMatrix &A, const XVector &x, - const YScalar &beta, const YVector &y); -}; - -// declaration -template ::value> -struct SPMV_MV_BLOCKCRSMATRIX { - typedef BlockCrsMatrix AMatrix; - typedef Kokkos::View XVector; - typedef Kokkos::View YVector; - typedef typename YVector::non_const_value_type YScalar; - - static void spmv_mv_blockcrsmatrix( - const KokkosKernels::Experimental::Controls &controls, const char mode[], - const YScalar &alpha, const AMatrix &A, const XVector &x, - const YScalar &beta, const YVector &y); -}; - -// actual implementations to be compiled -#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY -template -struct SPMV_BLOCKCRSMATRIX { - typedef BlockCrsMatrix AMatrix; - typedef Kokkos::View XVector; - typedef Kokkos::View YVector; - typedef typename YVector::non_const_value_type YScalar; - - static void spmv_blockcrsmatrix( - const KokkosKernels::Experimental::Controls &controls, const char mode[], - const YScalar &alpha, const AMatrix &A, const XVector &X, - const YScalar &beta, const YVector &Y) { - // - if ((mode[0] == KokkosSparse::NoTranspose[0]) || - (mode[0] == KokkosSparse::Conjugate[0])) { - bool useConjugate = (mode[0] == KokkosSparse::Conjugate[0]); - return BCRS::spMatVec_no_transpose(controls, alpha, A, X, beta, Y, - useConjugate); - } else { - bool useConjugate = (mode[0] == KokkosSparse::ConjugateTranspose[0]); - return BCRS::spMatVec_transpose(controls, alpha, A, X, beta, Y, - useConjugate); - } - } -}; - -template -struct SPMV_MV_BLOCKCRSMATRIX { - typedef BlockCrsMatrix AMatrix; - typedef Kokkos::View XVector; - typedef Kokkos::View YVector; - typedef typename YVector::non_const_value_type YScalar; - - static void spmv_mv_blockcrsmatrix( - const KokkosKernels::Experimental::Controls &controls, const char mode[], - const YScalar &alpha, const AMatrix &A, const XVector &X, - const YScalar &beta, const YVector &Y) { - // - if ((mode[0] == KokkosSparse::NoTranspose[0]) || - (mode[0] == KokkosSparse::Conjugate[0])) { - bool useConjugate = (mode[0] == KokkosSparse::Conjugate[0]); - return BCRS::spMatMultiVec_no_transpose(controls, alpha, A, X, beta, Y, - useConjugate); - } else { - bool useConjugate = (mode[0] == KokkosSparse::ConjugateTranspose[0]); - return BCRS::spMatMultiVec_transpose(controls, alpha, A, X, beta, Y, - useConjugate); - } - } -}; - -#endif // !defined(KOKKOSKERNELS_ETI_ONLY) || -// KOKKOSKERNELS_IMPL_COMPILE_LIBRARY -} // namespace Impl -} // namespace Experimental -} // namespace KokkosSparse - -// declare / instantiate the vector version -// Instantiate with A,x,y are all the requested Scalar type (no instantiation of -// mixed-precision operands) -#define KOKKOSSPARSE_SPMV_BLOCKCRSMATRIX_ETI_SPEC_DECL( \ - SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ - MEM_SPACE_TYPE) \ - extern template struct SPMV_BLOCKCRSMATRIX< \ - const SCALAR_TYPE, const ORDINAL_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, const OFFSET_TYPE, \ - SCALAR_TYPE const *, LAYOUT_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, \ - SCALAR_TYPE *, LAYOUT_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, true>; - -#define KOKKOSSPARSE_SPMV_BLOCKCRSMATRIX_ETI_SPEC_INST( \ - SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ - MEM_SPACE_TYPE) \ - template struct SPMV_BLOCKCRSMATRIX< \ - const SCALAR_TYPE, const ORDINAL_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, const OFFSET_TYPE, \ - SCALAR_TYPE const *, LAYOUT_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, \ - SCALAR_TYPE *, LAYOUT_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, true>; - -// declare / instantiate the 2D MV version -// Instantiate with A,x,y are all the requested Scalar type (no instantiation of -// mixed-precision operands) -#define KOKKOSSPARSE_SPMV_MV_BLOCKCRSMATRIX_ETI_SPEC_DECL( \ - SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ - MEM_SPACE_TYPE) \ - extern template struct SPMV_MV_BLOCKCRSMATRIX< \ - const SCALAR_TYPE, const ORDINAL_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, const OFFSET_TYPE, \ - SCALAR_TYPE const **, LAYOUT_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, \ - SCALAR_TYPE **, LAYOUT_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, true>; - -#define KOKKOSSPARSE_SPMV_MV_BLOCKCRSMATRIX_ETI_SPEC_INST( \ - SCALAR_TYPE, ORDINAL_TYPE, OFFSET_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, \ - MEM_SPACE_TYPE) \ - template struct SPMV_MV_BLOCKCRSMATRIX< \ - const SCALAR_TYPE, const ORDINAL_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, const OFFSET_TYPE, \ - SCALAR_TYPE const **, LAYOUT_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, \ - SCALAR_TYPE **, LAYOUT_TYPE, \ - Kokkos::Device, \ - Kokkos::MemoryTraits, true>; - -#include -#include -#include - -#endif // KOKKOSSPARSE_IMPL_SPMV_BLOCKCRSMATRIX_SPEC_HPP_ diff --git a/test_common/KokkosBatched_Test_BlockCrs.hpp b/test_common/KokkosBatched_Test_BlockCrs.hpp deleted file mode 100644 index 32734da625..0000000000 --- a/test_common/KokkosBatched_Test_BlockCrs.hpp +++ /dev/null @@ -1,1467 +0,0 @@ -#include -#include -#include - -#include "KokkosBatched_Util.hpp" -#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) -#include "mkl.h" -#endif - -#if defined(__KOKKOSBATCHED_NVIDIA_CUBLAS__) -#include "cuda_runtime.h" -#include "cublas_v2.h" -#include "cublas_api.h" -#endif - -#include "Kokkos_Core.hpp" - -#include "KokkosBatched_Vector.hpp" - -#include "KokkosBatched_Copy_Decl.hpp" -#include "KokkosBatched_Copy_Impl.hpp" - -#include "KokkosBatched_AddRadial_Decl.hpp" -#include "KokkosBatched_AddRadial_Impl.hpp" - -#include "KokkosBatched_Gemv_Decl.hpp" -#include "KokkosBatched_Gemv_Serial_Impl.hpp" -#include "KokkosBatched_Gemv_Team_Impl.hpp" - -#include "KokkosBatched_Trsv_Decl.hpp" -#include "KokkosBatched_Trsv_Serial_Impl.hpp" -#include "KokkosBatched_Trsv_Team_Impl.hpp" - -#include "KokkosBatched_Gemm_Decl.hpp" -#include "KokkosBatched_Gemm_Serial_Impl.hpp" -#include "KokkosBatched_Gemm_Team_Impl.hpp" - -#include "KokkosBatched_Trsm_Decl.hpp" -#include "KokkosBatched_Trsm_Serial_Impl.hpp" -#include "KokkosBatched_Trsm_Team_Impl.hpp" - -#include "KokkosBatched_LU_Decl.hpp" -#include "KokkosBatched_LU_Serial_Impl.hpp" -#include "KokkosBatched_LU_Team_Impl.hpp" - -#include "KokkosBatched_Test_BlockCrs_Util.hpp" - -namespace KokkosBatched { -namespace Test { - -struct RangeTag {}; -struct TeamTag {}; -struct TeamShmemTag {}; - -template -class FactorizeBlockTridiagMatrices { - public: - typedef ExecSpace exec_space; - typedef ValueType value_type; - typedef ArrayLayout array_layout; - - typedef BlockTridiagMatrices - block_tridiag_matrices_type; - - private: - ordinal_type _ntridiag, _m, _blocksize, _shmemlvl; - - UnmanagedViewType _TA, - _TB, _TC; - typedef typename MagnitudeScalarType::type magnitude_scalar_type; - - public: - FactorizeBlockTridiagMatrices() {} - - // A thread maps nonzero blocks - KOKKOS_INLINE_FUNCTION - void operator()(const RangeTag &, const ordinal_type ij) const { - auto A = - Kokkos::subview(_TA, ij, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); - auto B = - Kokkos::subview(_TB, ij, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); - auto C = - Kokkos::subview(_TC, ij, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); - - auto AA = Kokkos::subview(A, 0, Kokkos::ALL(), Kokkos::ALL()); - auto BB = Kokkos::subview(B, 0, Kokkos::ALL(), Kokkos::ALL()); - auto CC = Kokkos::subview(C, 0, Kokkos::ALL(), Kokkos::ALL()); - auto DD = AA; - - const auto tiny = - Kokkos::Details::ArithTraits::epsilon() * 100; - - const ordinal_type kend = _m - 1; - for (ordinal_type k = 0; k < kend; ++k) { - AA.assign_data(&A(k, 0, 0)); - BB.assign_data(&B(k, 0, 0)); - CC.assign_data(&C(k, 0, 0)); - DD.assign_data(&A(k + 1, 0, 0)); - - SerialAddRadial::invoke(tiny, AA); - SerialLU::invoke(AA); - SerialTrsm::invoke(1.0, AA, BB); - SerialTrsm::invoke(1.0, AA, CC); - SerialGemm::invoke(-1.0, CC, BB, 1.0, DD); - } - AA.assign_data(&A(kend, 0, 0)); - SerialLU::invoke(AA); - } - - template - KOKKOS_INLINE_FUNCTION void operator()(const TeamTag &, - const MemberType &member) const { - const int ijbeg = member.league_rank() * VectorLength; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &idx) { - const int ij = ijbeg + idx; - if (ij < _ntridiag) { - auto A = Kokkos::subview(_TA, ij, Kokkos::ALL(), Kokkos::ALL(), - Kokkos::ALL()); - auto B = Kokkos::subview(_TB, ij, Kokkos::ALL(), Kokkos::ALL(), - Kokkos::ALL()); - auto C = Kokkos::subview(_TC, ij, Kokkos::ALL(), Kokkos::ALL(), - Kokkos::ALL()); - - const auto tiny = - Kokkos::Details::ArithTraits::epsilon() * - 100; - - const ordinal_type kend = _m - 1; - { - auto AA = Kokkos::subview(A, 0, Kokkos::ALL(), Kokkos::ALL()); - auto BB = Kokkos::subview(B, 0, Kokkos::ALL(), Kokkos::ALL()); - auto CC = Kokkos::subview(C, 0, Kokkos::ALL(), Kokkos::ALL()); - auto DD = AA; - - for (ordinal_type k = 0; k < kend; ++k) { - AA.assign_data(&A(k, 0, 0)); - BB.assign_data(&B(k, 0, 0)); - CC.assign_data(&C(k, 0, 0)); - DD.assign_data(&A(k + 1, 0, 0)); - - member.team_barrier(); - TeamAddRadial::invoke(member, tiny, AA); - member.team_barrier(); - TeamLU::invoke(member, AA); - member.team_barrier(); - TeamTrsm::invoke(member, 1.0, AA, BB); - TeamTrsm::invoke(member, 1.0, AA, CC); - member.team_barrier(); - TeamGemm::invoke(member, -1.0, CC, BB, 1.0, - DD); - } - { - member.team_barrier(); - AA.assign_data(&A(kend, 0, 0)); - TeamLU::invoke(member, AA); - } - } -#if 0 - { // 0.028 vs 0.035; without subview it performs 0.028 - const int as0 = A.stride_1(), as1 = A.stride_2(); - const int bs0 = B.stride_1(), bs1 = B.stride_2(); - const int cs0 = C.stride_1(), cs1 = C.stride_2(); - - for (ordinal_type k=0;k - ::invoke(member, _blocksize, _blocksize, AA, as0, as1); - member.team_barrier(); - TeamTrsmInternalLeftLower - ::invoke(member, true, _blocksize, _blocksize, - 1.0, AA, as0, as1, BB, bs0, bs1); - TeamTrsmInternalLeftLower - ::invoke(member, false, _blocksize, _blocksize, - 1.0, AA, as1, as0, CC, cs1, cs0); - member.team_barrier(); - TeamGemmInternal:: - invoke(member, _blocksize, _blocksize, _blocksize, - -1.0, - CC, cs0, cs1, BB, bs0, bs1, - 1.0, - DD, as0, as1); - } - { - member.team_barrier(); - auto AA = &A(kend, 0,0); - TeamLU_Internal - ::invoke(member, _blocksize, _blocksize, AA, as0, as1); - } - } -#endif - } - }); - } - - template - KOKKOS_INLINE_FUNCTION void operator()(const TeamShmemTag &, - const MemberType &member) const { - typedef Kokkos::View packed_view_type; - ScratchViewType sA(member.team_scratch(_shmemlvl), - VectorLength, _blocksize, _blocksize); - - const int ijbeg = member.league_rank() * VectorLength; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &idx) { - const int ij = ijbeg + idx; - if (ij < _ntridiag) { - auto A = Kokkos::subview(_TA, ij, Kokkos::ALL(), Kokkos::ALL(), - Kokkos::ALL()); - auto B = Kokkos::subview(_TB, ij, Kokkos::ALL(), Kokkos::ALL(), - Kokkos::ALL()); - auto C = Kokkos::subview(_TC, ij, Kokkos::ALL(), Kokkos::ALL(), - Kokkos::ALL()); - - auto sAA = Kokkos::subview(sA, idx, Kokkos::ALL(), Kokkos::ALL()); - const ordinal_type kend = _m - 1; - for (ordinal_type k = 0; k < kend; ++k) { - auto AA = Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL()); - auto BB = Kokkos::subview(B, k, Kokkos::ALL(), Kokkos::ALL()); - auto CC = Kokkos::subview(C, k, Kokkos::ALL(), Kokkos::ALL()); - auto DD = Kokkos::subview(A, k + 1, Kokkos::ALL(), Kokkos::ALL()); - - TeamCopy::invoke(member, AA, sAA); - member.team_barrier(); - - TeamLU::invoke(member, sAA); - member.team_barrier(); - - TeamCopy::invoke(member, sAA, AA); - - TeamTrsm::invoke(member, 1.0, sAA, - BB); - TeamTrsm::invoke(member, 1.0, - sAA, CC); - member.team_barrier(); - - TeamGemm::invoke(member, -1.0, CC, BB, 1.0, DD); - } - - { - member.team_barrier(); - auto AA = Kokkos::subview(A, kend, Kokkos::ALL(), Kokkos::ALL()); - TeamLU::invoke(member, AA); - } - } - }); - } - - double FlopCount(const block_tridiag_matrices_type T) { - const int ntridiag = T.NumTridiagMatrices(), m = T.NumRows(), - blocksize = T.BlockSize(); - - return ntridiag * - ((m - 1) * (LU_FlopCount(blocksize, blocksize) + - Trsm_Lower_FlopCountLower(blocksize, blocksize) + - Trsm_Upper_FlopCountUpper(blocksize, blocksize) + - Gemm_FlopCount(blocksize, blocksize, blocksize)) + - LU_FlopCount(blocksize, blocksize)); - } - - // for batched blas check - void run(const int op, const block_tridiag_matrices_type T, - const bool fake = false) { - _ntridiag = T.NumTridiagMatrices(); - _m = T.NumRows(); - _blocksize = T.BlockSize(); - - _TA = T.A(); - _TB = T.B(); - _TC = T.C(); - - // parallel over the instances of tridiagonal matrices - if (!fake) { -#if defined(KOKKOS_ENABLE_CUDA) && defined(__KOKKOSBATCHED_TEST_ENABLE_CUDA__) - typedef FactorizeBlockTridiagMatrices< - exec_space, value_type, array_layout, VectorLength, LU_AlgoTagType, - Trsm_AlgoTagType, Gemm_AlgoTagType> - functor_type; -#endif - - switch (op) { - case 0: { - std::cout << "KokkosBatched::RangeTag::" << Gemm_AlgoTagType::name() - << "\n"; - const Kokkos::RangePolicy policy(0, _ntridiag); - Kokkos::parallel_for( - "KokkosBatched::Test::BlockCrs::FactorizeBlockTridiagMatrices::" - "Op0", - policy, *this); - break; - } -#if defined(KOKKOS_ENABLE_CUDA) && defined(__KOKKOSBATCHED_TEST_ENABLE_CUDA__) - case 1: { - typedef Kokkos::TeamPolicy policy_type; - - int team_size = 0; - - // this is what cuda allows - const int max_team_size = - policy_type(_ntridiag, Kokkos::AUTO, VectorLength) - .team_size_max(functor_type(), Kokkos::ParallelForTag()); - - // this is what algorithm allows - if (std::is_same::value) { - const int mb = - Algo::Gemm::Blocked::mb(); - const int mp = _blocksize % mb, mblk = (_blocksize / mb) + (mp > 0); - // - max parallelism in gemm / 2 (no idea...) - team_size = - std::min(std::max(mblk * mblk / 2, 1), max_team_size / 2); - std::cout << "KokkosBatched::TeamTag::Blocked::TeamSize:: " - << team_size << " " << (max_team_size) << "\n"; - } else { - // - max parallelism in trsm * scheduling efficiency 2 - // - max cuda team size / scheduling efficiency 2 - team_size = - std::min(std::max(_blocksize * 2, 4), max_team_size / 2); - std::cout << "KokkosBatched::TeamTag::Unblocked::TeamSize:: " - << team_size << " " << (max_team_size) << "\n"; - } - - const policy_type policy(_ntridiag, team_size, VectorLength); - Kokkos::parallel_for( - "KokkosBatched::Test::BlockCrs::FactorizeBlockTridiagMatrices::" - "Op1", - policy, *this); - break; - } - case 2: { - typedef Kokkos::View packed_view_type; - typedef Kokkos::TeamPolicy policy_type; - - const int per_team_scratch = - ScratchViewType::shmem_size( - VectorLength, _blocksize, _blocksize); - - _shmemlvl = ((per_team_scratch / 1024) < 48 ? 0 : 1); - { - int team_size = 0; - - // this is what cuda allows - const int max_team_size = - policy_type(_ntridiag, Kokkos::AUTO, VectorLength) - .set_scratch_size(_shmemlvl, - Kokkos::PerTeam(per_team_scratch)) - .team_size_max(functor_type(), Kokkos::ParallelForTag()); - - // this is what algorithm allows - if (std::is_same::value) { - const int mb = - Algo::Gemm::Blocked::mb(); - const int mp = _blocksize % mb, - mblk = (_blocksize / mb) + (mp > 0); - // - max parallelism in gemm / 2 (no idea...) - team_size = - std::min(std::max(mblk * mblk / 2, 1), max_team_size / 2); - std::cout << "KokkosBatched::TeamShmemTag::Blocked::TeamSize:: " - << team_size << " " << (max_team_size) << "\n"; - } else { - team_size = - std::min(std::max(_blocksize * 2, 4), max_team_size / 2); - std::cout << "KokkosBatched::TeamShmemTag::Unblocked::TeamSize:: " - << team_size << " " << (max_team_size) << "\n"; - } - - policy_type policy = - policy_type(_ntridiag, team_size, VectorLength) - .set_scratch_size(_shmemlvl, - Kokkos::PerTeam(per_team_scratch)); - Kokkos::parallel_for( - "KokkosBatched::Test::BlockCrs::FactorizeBlockTridiagMatrices::" - "Op2", - policy, *this); - } - break; - } -#endif - default: { - std::cout << "Not supported operation mode: " << op << " \n"; - break; - } - } - } - } - - template - void a_subtract_mult_l_and_u(const ordinal_type tl, const ordinal_type il, - AViewType A, const ordinal_type tr, - const ordinal_type ir, LViewType L, - UViewType U) { - for (ordinal_type ii = 0; ii < _blocksize; ++ii) - for (ordinal_type jj = 0; jj < _blocksize; ++jj) - for (ordinal_type kk = 0; kk < _blocksize; ++kk) { - const auto l = - (ii == kk ? 1 : ii > kk ? tdiag_val(L, tr, ir, ii, kk) : 0); - const auto u = (kk <= jj ? tdiag_val(U, tr, ir, kk, jj) : 0); - tdiag_val(A, tl, il, ii, jj) -= l * u; - } - } - - template - void a_subtract_mult_b_and_c(const ordinal_type tl, const ordinal_type il, - AViewType A, const ordinal_type tr, - const ordinal_type ir, BViewType B, - CViewType C) { - for (ordinal_type ii = 0; ii < _blocksize; ++ii) - for (ordinal_type jj = 0; jj < _blocksize; ++jj) - for (ordinal_type kk = 0; kk < _blocksize; ++kk) - tdiag_val(A, tl, il, ii, jj) -= - (tdiag_val(B, tr, ir, ii, kk) * tdiag_val(C, tr, ir, kk, jj)); - } - - template - void a_subtract_mult_l_and_b(const ordinal_type tl, const ordinal_type il, - AViewType A, const ordinal_type tr, - const ordinal_type ir, LViewType L, - BViewType B) { - for (ordinal_type ii = 0; ii < _blocksize; ++ii) - for (ordinal_type jj = 0; jj < _blocksize; ++jj) - for (ordinal_type kk = 0; kk < _blocksize; ++kk) { - const auto l = - (ii == kk ? 1.0 : ii > kk ? tdiag_val(L, tr, ir, ii, kk) : 0); - tdiag_val(A, tl, il, ii, jj) -= l * tdiag_val(B, tr, ir, kk, jj); - } - } - - template - void a_subtract_mult_b_and_u(const ordinal_type tl, const ordinal_type il, - AViewType A, const ordinal_type tr, - const ordinal_type ir, BViewType B, - UViewType U) { - for (ordinal_type ii = 0; ii < _blocksize; ++ii) - for (ordinal_type jj = 0; jj < _blocksize; ++jj) - for (ordinal_type kk = 0; kk < _blocksize; ++kk) { - const auto u = (kk <= jj ? tdiag_val(U, tr, ir, kk, jj) : 0); - tdiag_val(A, tl, il, ii, jj) -= tdiag_val(B, tr, ir, ii, kk) * u; - } - } - - bool check(const block_tridiag_matrices_type T) { - // factors - auto DD = Kokkos::create_mirror_view(_TA); - Kokkos::deep_copy(DD, _TA); - auto UU = Kokkos::create_mirror_view(_TB); - Kokkos::deep_copy(UU, _TB); - auto LL = Kokkos::create_mirror_view(_TC); - Kokkos::deep_copy(LL, _TC); - - // input A - auto A = Kokkos::create_mirror_view(T.A()); - Kokkos::deep_copy(A, T.A()); - auto B = Kokkos::create_mirror_view(T.B()); - Kokkos::deep_copy(B, T.B()); - auto C = Kokkos::create_mirror_view(T.C()); - Kokkos::deep_copy(C, T.C()); - - // diffs - Kokkos::View AA( - "AA", _ntridiag, _m, _blocksize, _blocksize), - BB("BB", _ntridiag, _m - 1, _blocksize, _blocksize), - CC("CC", _ntridiag, _m - 1, _blocksize, _blocksize); - - Kokkos::deep_copy(AA, A); - Kokkos::deep_copy(BB, B); - Kokkos::deep_copy(CC, C); - - // Check | A - L U | / | A | - for (ordinal_type t = 0; t < _ntridiag; ++t) { - a_subtract_mult_l_and_u(t, 0, AA, t, 0, DD, DD); - for (ordinal_type i = 1; i < _m; ++i) { - a_subtract_mult_l_and_u(t, i, AA, t, i, DD, DD); - a_subtract_mult_b_and_c(t, i, AA, t, i - 1, LL, UU); - a_subtract_mult_l_and_b(t, i - 1, BB, t, i - 1, DD, UU); - a_subtract_mult_b_and_u(t, i - 1, CC, t, i - 1, LL, DD); - } - } - - double norm = 0, diff = 0; - for (ordinal_type t = 0; t < _ntridiag; ++t) { - for (ordinal_type ii = 0; ii < _blocksize; ++ii) - for (ordinal_type jj = 0; jj < _blocksize; ++jj) { - norm += - Kokkos::ArithTraits::abs(tdiag_val(A, t, 0, ii, jj)); - diff += - Kokkos::ArithTraits::abs(tdiag_val(AA, t, 0, ii, jj)); - } - for (ordinal_type i = 1; i < _m; ++i) - for (ordinal_type ii = 0; ii < _blocksize; ++ii) - for (ordinal_type jj = 0; jj < _blocksize; ++jj) { - norm += Kokkos::ArithTraits::abs( - tdiag_val(A, t, i, ii, jj)); - diff += Kokkos::ArithTraits::abs( - tdiag_val(AA, t, i, ii, jj)); - norm += Kokkos::ArithTraits::abs( - tdiag_val(B, t, i - 1, ii, jj)); - diff += Kokkos::ArithTraits::abs( - tdiag_val(BB, t, i - 1, ii, jj)); - norm += Kokkos::ArithTraits::abs( - tdiag_val(C, t, i - 1, ii, jj)); - diff += Kokkos::ArithTraits::abs( - tdiag_val(CC, t, i - 1, ii, jj)); - } - } - // std::cout << "tridiag factor check norm = " << norm << " diff = " << - // diff << std::endl; - const bool r_val = - diff / norm < 1e2 * std::numeric_limits::epsilon(); - return r_val; - } -}; - -template -class SolveBlockTridiagMatrices { - public: - typedef ExecSpace exec_space; - typedef ValueType value_type; - typedef ArrayLayout array_layout; - - typedef BlockTridiagMatrices - block_tridiag_matrices_type; - typedef PartitionedBlockMultiVector - partitioned_block_multi_vector_type; - - private: - ordinal_type _ntridiag, _m, _blocksize, _nvectors, _shmemlvl; - - ConstUnmanagedViewType - _TA, _TB, _TC; - ConstUnmanagedViewType< - typename partitioned_block_multi_vector_type::value_array_type> - _b; - /**/ UnmanagedViewType< - typename partitioned_block_multi_vector_type::value_array_type> - _x; - - public: - SolveBlockTridiagMatrices() {} - - KOKKOS_INLINE_FUNCTION - void operator()(const RangeTag &, const ordinal_type ij) const { - auto A = - Kokkos::subview(_TA, ij, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); - auto B = - Kokkos::subview(_TB, ij, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); - auto C = - Kokkos::subview(_TC, ij, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); - - // subview patterns - auto A_0_all_all = Kokkos::subview(A, 0, Kokkos::ALL(), Kokkos::ALL()); - auto B_0_all_all = Kokkos::subview(B, 0, Kokkos::ALL(), Kokkos::ALL()); - auto C_0_all_all = Kokkos::subview(C, 0, Kokkos::ALL(), Kokkos::ALL()); - - auto mx_0_0_all_all = - Kokkos::subview(_x, 0, 0, Kokkos::ALL(), Kokkos::ALL()); - auto mb_0_0_all_all = - Kokkos::subview(_b, 0, 0, Kokkos::ALL(), Kokkos::ALL()); - - auto x_0_all = Kokkos::subview(mx_0_0_all_all, 0, Kokkos::ALL()); - auto b_0_all = Kokkos::subview(mb_0_0_all_all, 0, Kokkos::ALL()); - - /// - /// loop over multivectors - /// - - auto &x = mx_0_0_all_all; - auto &b = mb_0_0_all_all; - - auto &xt = x_0_all; - auto xb = x_0_all; - - auto &bt = b_0_all; - auto bb = b_0_all; - - for (int jvec = 0; jvec < _nvectors; ++jvec) { - x.assign_data(&_x(ij, jvec, 0, 0)); - b.assign_data(&_b(ij, jvec, 0, 0)); - - /// - /// forward substitution - /// - { - auto < = A_0_all_all; - auto &LB = C_0_all_all; - - const bool is_same_x_and_b = (x.data() == b.data()); - { - if (!is_same_x_and_b) { - xt.assign_data(&x(0, 0)); - bt.assign_data(&b(0, 0)); - SerialCopy::invoke(bt, xt); - } - } - const ordinal_type kend = _m - 1; - for (ordinal_type k = 0; k < kend; ++k) { - LT.assign_data(&A(k, 0, 0)); - LB.assign_data(&C(k, 0, 0)); - - xt.assign_data(&x(k, 0)); - xb.assign_data(&x(k + 1, 0)); - - if (!is_same_x_and_b) { - bb.assign_data(&b(k + 1, 0)); - SerialCopy::invoke(bb, xb); - } - - SerialTrsv::invoke(1.0, LT, xt); - SerialGemv::invoke(-1.0, LB, xt, - 1.0, xb); - } - - LT.assign_data(&A(kend, 0, 0)); - xt.assign_data(&x(kend, 0)); - SerialTrsv::invoke(1.0, LT, xt); - } - - /// - /// backward substitution - /// - { - auto &UT = B_0_all_all; - auto &UB = A_0_all_all; - - const ordinal_type kbegin = _m - 1; - for (ordinal_type k = kbegin; k > 0; --k) { - UT.assign_data(&B(k - 1, 0, 0)); - UB.assign_data(&A(k, 0, 0)); - - xt.assign_data(&x(k - 1, 0)); - xb.assign_data(&x(k, 0)); - - SerialTrsv::invoke(1.0, UB, xb); - SerialGemv::invoke(-1.0, UT, xb, - 1.0, xt); - } - UT.assign_data(&A(0, 0, 0)); - xt.assign_data(&x(0, 0)); - SerialTrsv::invoke(1.0, UT, xt); - } - } - } - - template - KOKKOS_INLINE_FUNCTION void operator()(const TeamTag &, - const MemberType &member) const { - const int ijbeg = member.league_rank() * VectorLength; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &idx) { - const int ij = ijbeg + idx; - if (ij < _ntridiag) { - auto A = Kokkos::subview(_TA, ij, Kokkos::ALL(), Kokkos::ALL(), - Kokkos::ALL()); - auto B = Kokkos::subview(_TB, ij, Kokkos::ALL(), Kokkos::ALL(), - Kokkos::ALL()); - auto C = Kokkos::subview(_TC, ij, Kokkos::ALL(), Kokkos::ALL(), - Kokkos::ALL()); - - /// - /// loop over multivectors - /// - for (int jvec = 0; jvec < _nvectors; ++jvec) { - auto x = - Kokkos::subview(_x, ij, jvec, Kokkos::ALL(), Kokkos::ALL()); - auto b = - Kokkos::subview(_b, ij, jvec, Kokkos::ALL(), Kokkos::ALL()); - - /// - /// forward substitution - /// - { - const bool is_same_x_and_b = (x.data() == b.data()); - { - if (!is_same_x_and_b) { - auto x0 = Kokkos::subview(x, 0, Kokkos::ALL()); - auto b0 = Kokkos::subview(b, 0, Kokkos::ALL()); - TeamCopy::invoke(member, b0, - x0); - member.team_barrier(); - } - } - const ordinal_type kend = _m - 1; - for (ordinal_type k = 0; k < kend; ++k) { - auto LT = Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL()); - auto LB = Kokkos::subview(C, k, Kokkos::ALL(), Kokkos::ALL()); - - auto xt = Kokkos::subview(x, k, Kokkos::ALL()); - auto xb = Kokkos::subview(x, k + 1, Kokkos::ALL()); - - if (!is_same_x_and_b) { - auto bb = Kokkos::subview(b, k + 1, Kokkos::ALL()); - TeamCopy::invoke(member, bb, - xb); - } - - member.team_barrier(); - TeamTrsv::invoke(member, 1.0, - LT, xt); - - member.team_barrier(); - TeamGemv::invoke(member, -1.0, LB, xt, 1.0, - xb); - } - { - auto LL = - Kokkos::subview(A, kend, Kokkos::ALL(), Kokkos::ALL()); - auto xx = Kokkos::subview(x, kend, Kokkos::ALL()); - member.team_barrier(); - TeamTrsv::invoke(member, 1.0, - LL, xx); - } - } - - /// - /// backward substitution - /// - { - const ordinal_type kbegin = _m - 1; - for (ordinal_type k = kbegin; k > 0; --k) { - auto UT = - Kokkos::subview(B, k - 1, Kokkos::ALL(), Kokkos::ALL()); - auto UB = Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL()); - - auto xt = Kokkos::subview(x, k - 1, Kokkos::ALL()); - auto xb = Kokkos::subview(x, k, Kokkos::ALL()); - - member.team_barrier(); - TeamTrsv::invoke(member, 1.0, - UB, xb); - - member.team_barrier(); - TeamGemv::invoke(member, -1.0, UT, xb, 1.0, - xt); - } - { - auto UU = Kokkos::subview(A, 0, Kokkos::ALL(), Kokkos::ALL()); - auto xx = Kokkos::subview(x, 0, Kokkos::ALL()); - - member.team_barrier(); - TeamTrsv::invoke(member, 1.0, - UU, xx); - } - } - } - } - }); - } - - template - KOKKOS_INLINE_FUNCTION void operator()(const TeamShmemTag &, - const MemberType &member) const { - typedef Kokkos::View packed_view_type; - ScratchViewType s(member.team_scratch(_shmemlvl), - VectorLength, _m, _blocksize); - - const int ijbeg = member.league_rank() * VectorLength; - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(member, VectorLength), [&](const int &idx) { - const int ij = ijbeg + idx; - if (ij < _ntridiag) { - auto A = Kokkos::subview(_TA, ij, Kokkos::ALL(), Kokkos::ALL(), - Kokkos::ALL()); - auto B = Kokkos::subview(_TB, ij, Kokkos::ALL(), Kokkos::ALL(), - Kokkos::ALL()); - auto C = Kokkos::subview(_TC, ij, Kokkos::ALL(), Kokkos::ALL(), - Kokkos::ALL()); - - auto sx = Kokkos::subview(s, idx, Kokkos::ALL(), Kokkos::ALL()); - - /// - /// loop over multivectors - /// - for (int jvec = 0; jvec < _nvectors; ++jvec) { - auto x = - Kokkos::subview(_x, ij, jvec, Kokkos::ALL(), Kokkos::ALL()); - auto b = - Kokkos::subview(_b, ij, jvec, Kokkos::ALL(), Kokkos::ALL()); - - // copy the entire vector into shared memory (if necessary it - // needs chunking) - TeamCopy::invoke(member, b, sx); - member.team_barrier(); - - /// - /// forward substitution - /// - { - const ordinal_type kend = _m - 1; - for (ordinal_type k = 0; k < kend; ++k) { - auto LT = Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL()); - auto LB = Kokkos::subview(C, k, Kokkos::ALL(), Kokkos::ALL()); - - auto xt = Kokkos::subview(sx, k, Kokkos::ALL()); - auto xb = Kokkos::subview(sx, k + 1, Kokkos::ALL()); - - member.team_barrier(); - TeamTrsv::invoke(member, 1.0, - LT, xt); - - member.team_barrier(); - TeamGemv::invoke(member, -1.0, LB, xt, 1.0, - xb); - } - { - auto LL = - Kokkos::subview(A, kend, Kokkos::ALL(), Kokkos::ALL()); - auto xx = Kokkos::subview(sx, kend, Kokkos::ALL()); - member.team_barrier(); - TeamTrsv::invoke(member, 1.0, - LL, xx); - } - } - - /// - /// backward substitution - /// - { - const ordinal_type kbegin = _m - 1; - for (ordinal_type k = kbegin; k > 0; --k) { - auto UT = - Kokkos::subview(B, k - 1, Kokkos::ALL(), Kokkos::ALL()); - auto UB = Kokkos::subview(A, k, Kokkos::ALL(), Kokkos::ALL()); - - auto xt = Kokkos::subview(sx, k - 1, Kokkos::ALL()); - auto xb = Kokkos::subview(sx, k, Kokkos::ALL()); - - member.team_barrier(); - TeamTrsv::invoke(member, 1.0, - UB, xb); - - member.team_barrier(); - TeamGemv::invoke(member, -1.0, UT, xb, 1.0, - xt); - } - { - auto UU = Kokkos::subview(A, 0, Kokkos::ALL(), Kokkos::ALL()); - auto xx = Kokkos::subview(sx, 0, Kokkos::ALL()); - - member.team_barrier(); - TeamTrsv::invoke(member, 1.0, - UU, xx); - } - } - // copy the entire vector into shared memory (if necessary it - // needs chunking) - TeamCopy::invoke(member, sx, x); - member.team_barrier(); - } - } - }); - } - - void run(const int op, const block_tridiag_matrices_type T, - const partitioned_block_multi_vector_type x, - const partitioned_block_multi_vector_type b) { - assert(T.NumTridiagMatrices() == x.NumPartitions()); - assert(T.NumRows() == x.NumRows()); - assert(T.BlockSize() == x.BlockSize()); - - _ntridiag = T.NumTridiagMatrices(); - _m = T.NumRows(); - _blocksize = T.BlockSize(); - _nvectors = x.NumVectors(); - - _TA = T.A(); - _TB = T.B(); - _TC = T.C(); - - _x = x.Values(); - _b = b.Values(); - - { -#if defined(KOKKOS_ENABLE_CUDA) && defined(__KOKKOSBATCHED_TEST_ENABLE_CUDA__) - typedef SolveBlockTridiagMatrices - functor_type; -#endif - - switch (op) { - case 0: { - const Kokkos::RangePolicy policy(0, _ntridiag); - Kokkos::parallel_for( - "KokkosBatched::Test::BlockCrs::SolveBlockTridiagMatrices::Op0", - policy, *this); - break; - } -#if defined(KOKKOS_ENABLE_CUDA) && defined(__KOKKOSBATCHED_TEST_ENABLE_CUDA__) - case 1: { - typedef Kokkos::TeamPolicy policy_type; - - int team_size = 0; - - // this is what cuda allows - const int max_team_size = - policy_type(_ntridiag, Kokkos::AUTO, VectorLength) - .team_size_max(functor_type(), Kokkos::ParallelForTag()); - - // this is what algorithm allows - if (std::is_same::value) { - const int mb = - Algo::Gemv::Blocked::mb(); - const int mp = _blocksize % mb, mblk = (_blocksize / mb) + (mp > 0); - team_size = std::min(std::max(mblk / 2, 1), int(max_team_size / 2)); - } else { - // in solve phase, max peak parallelism is same as blocksize (one - // iteration) better to give blocksize/2 - team_size = - std::min(std::max(_blocksize / 2, 4), int(max_team_size / 2)); - } - - const policy_type policy(_ntridiag, team_size, VectorLength); - Kokkos::parallel_for( - "KokkosBatched::Test::BlockCrs::SolveBlockTridiagMatrices::Op1", - policy, *this); - break; - } - case 2: { - typedef Kokkos::View packed_view_type; - typedef Kokkos::TeamPolicy policy_type; - - const int per_team_scratch = - ScratchViewType::shmem_size(VectorLength, _m, - _blocksize); - - _shmemlvl = ((per_team_scratch / 1024) < 48 ? 0 : 1); - { - int team_size = 0; - - // this is what cuda allows - const int max_team_size = - policy_type(_ntridiag, Kokkos::AUTO, VectorLength) - .set_scratch_size(_shmemlvl, - Kokkos::PerTeam(per_team_scratch)) - .team_size_max(functor_type(), Kokkos::ParallelForTag()); - - // this is what algorithm allows - if (std::is_same::value) { - const int mb = - Algo::Gemv::Blocked::mb(); - const int mp = _blocksize % mb, - mblk = (_blocksize / mb) + (mp > 0); - team_size = - std::min(std::max(mblk / 2, 1), int(max_team_size / 2)); - } else { - team_size = - std::min(std::max(_blocksize / 2, 4), int(max_team_size / 2)); - } - - policy_type policy = - policy_type(_ntridiag, team_size, VectorLength) - .set_scratch_size(_shmemlvl, - Kokkos::PerTeam(per_team_scratch)); - ; - Kokkos::parallel_for( - "KokkosBatched::Test::BlockCrs::SolveBlockTridiagMatrices::Op2", - policy, *this); - } - break; - } -#endif - default: { - std::cout << "Not supported operation mode: " << op << " \n"; - break; - } - } - } - } - - template - void r_subtract_mult_a_and_x(const ordinal_type tr, const ordinal_type ir, - RViewType R, const ordinal_type ta, - const ordinal_type ia, AViewType A, - const ordinal_type tx, const ordinal_type ix, - XViewType X) { - for (ordinal_type kk = 0; kk < _nvectors; ++kk) - for (ordinal_type ii = 0; ii < _blocksize; ++ii) - for (ordinal_type jj = 0; jj < _blocksize; ++jj) - tdiag_val(R, tr, kk, ir, ii) -= - tdiag_val(A, ta, ia, ii, jj) * tdiag_val(X, tx, kk, ix, jj); - } - - bool check(const block_tridiag_matrices_type T, - const partitioned_block_multi_vector_type b) { - // input A - auto AA = Kokkos::create_mirror_view(T.A()); - Kokkos::deep_copy(AA, T.A()); - auto BB = Kokkos::create_mirror_view(T.B()); - Kokkos::deep_copy(BB, T.B()); - auto CC = Kokkos::create_mirror_view(T.C()); - Kokkos::deep_copy(CC, T.C()); - - auto bb = Kokkos::create_mirror_view(b.Values()); - Kokkos::deep_copy(bb, b.Values()); - auto xx = Kokkos::create_mirror_view(_x); - Kokkos::deep_copy(xx, _x); - - // diffs - Kokkos::View rr( - "rr", bb.extent(0), bb.extent(1), bb.extent(2), bb.extent(3)); - - Kokkos::deep_copy(rr, bb); - - // Check | Ax - b | / | b | - for (ordinal_type t = 0; t < _ntridiag; ++t) { - r_subtract_mult_a_and_x(t, 0, rr, t, 0, AA, t, 0, xx); - r_subtract_mult_a_and_x(t, 0, rr, t, 0, BB, t, 1, xx); - - for (ordinal_type i = 1; i < (_m - 1); ++i) { - r_subtract_mult_a_and_x(t, i, rr, t, i - 1, CC, t, i - 1, xx); - r_subtract_mult_a_and_x(t, i, rr, t, i, AA, t, i, xx); - r_subtract_mult_a_and_x(t, i, rr, t, i, BB, t, i + 1, xx); - } - r_subtract_mult_a_and_x(t, _m - 1, rr, t, _m - 2, CC, t, _m - 2, xx); - r_subtract_mult_a_and_x(t, _m - 1, rr, t, _m - 1, AA, t, _m - 1, xx); - } - - double norm = 0, diff = 0; - for (ordinal_type t = 0; t < _ntridiag; ++t) - for (ordinal_type jvec = 0; jvec < _nvectors; ++jvec) - for (ordinal_type i = 0; i < _m; ++i) - for (ordinal_type ii = 0; ii < _blocksize; ++ii) { - norm += Kokkos::ArithTraits::abs( - tdiag_val(bb, t, jvec, i, ii)); - diff += Kokkos::ArithTraits::abs( - tdiag_val(rr, t, jvec, i, ii)); - } - - // std::cout << "tridiag solve check norm = " << norm << " diff = " << - // diff << std::endl; - const bool r_val = - diff / norm < 1e2 * std::numeric_limits::epsilon(); - return r_val; - } -}; - -// unit tests -template -void run(const ordinal_type ni, const ordinal_type nj, const ordinal_type nk, - const ordinal_type blocksize, const ordinal_type nrhs, - const bool test_tpl = false) { - typedef typename DeviceSpace::array_layout DeviceArrayLayout; - typedef Kokkos::DefaultHostExecutionSpace HostSpace; - - bool success = true; - StructuredBlock mesh(ni, nj, nk); - - // Test StructuredBlock. - for (ordinal_type c = 0; c < mesh.size(); ++c) { - ordinal_type i, j, k; - mesh.id2ijk(c, i, j, k); - TEST_ASSERT(i >= 0 && i < mesh.ni, success); - TEST_ASSERT(j >= 0 && j < mesh.nj, success); - TEST_ASSERT(k >= 0 && k < mesh.nk, success); - TEST_ASSERT(mesh.ijk2id(i, j, k) == c, success); - } - - // Graph construction - CrsGraph graph_host = - create_graph_host_for_structured_block( - mesh, StencilShape::cross); - - // Crs matrix and multi vector construction - BlockCrsMatrix A_host(graph_host, blocksize); - fill_block_crs_matrix_host(A_host); - - // Device mirroring - auto A_device = create_mirror(A_host); - deep_copy(A_device, A_host); - - // Test Matrix Vector product - { - const ordinal_type m = graph_host.NumRows(); - - BlockMultiVector x_host(nrhs, m, blocksize); - fill_block_multi_vector_host(x_host); - - auto x_device = create_mirror(x_host); - deep_copy(x_device, x_host); - - BlockMultiVector y1_device(nrhs, m, - blocksize), - y2_device(nrhs, m, blocksize); - - { - BlockCrsMatrixVectorProductByRow matvec; - matvec.run(A_device, x_device, y1_device); - } - { - BlockCrsMatrixVectorProductByBlockRow - matvec; - matvec.run(A_device, x_device, y2_device); - } - - const double rdiff = - compute_relative_diff(y1_device.Values(), y2_device.Values()); - TEST_ASSERT(rdiff <= 1e2 * std::numeric_limits::epsilon(), - success); - } - - // Test Block TriDiag Extraction - BlockTridiagMatrices T_device = - create_block_tridiag_matrices( - mesh.ni * mesh.nj, mesh.nk, blocksize); - { - ExtractBlockTridiagMatrices - extblk(mesh); - extblk.run(A_device, T_device); - TEST_ASSERT(extblk.check(), success); - } - - BlockTridiagMatrices T_org_device = - create_block_tridiag_matrices( - mesh.ni * mesh.nj, mesh.nk, blocksize); - - deep_copy(T_org_device, T_device); - - // Test Block TriDiag Factorization - if (test_tpl) { -#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_COMPACT_BATCHED__) - FactorizeBlockTridiagMatrices< - DeviceSpace, ValueType, DeviceArrayLayout, VectorLength, - Algo::LU::CompactMKL, Algo::Trsm::CompactMKL, Algo::Gemm::CompactMKL> - factorblk; - factorblk.run(0, T_device); // range policy only now - TEST_ASSERT(factorblk.check(T_org_device), success); -#elif defined(__KOKKOSBATCHED_NVIDIA_CUBLAS__) - std::cout << "CUBLAS compact version does not exist\n"; -#else - std::cout << "TPLs (CompactMKL or CUBLAS) are not found\n"; -#endif - } else { - FactorizeBlockTridiagMatrices - factorblk; - factorblk.run(Oper, T_device); - TEST_ASSERT(factorblk.check(T_org_device), success); - } - - // Test Block TriDiag Solve - { - PartitionedBlockMultiVector - b_host = create_partitioned_block_multi_vector( - mesh.ni * mesh.nj, nrhs, mesh.nk, blocksize); - fill_partitioned_block_multi_vector_host(b_host, mesh.ni * mesh.nj); - - auto b_device = create_mirror(b_host); - deep_copy(b_device, b_host); - - PartitionedBlockMultiVector - x_device = create_partitioned_block_multi_vector( - mesh.ni * mesh.nj, nrhs, mesh.nk, blocksize); - if (test_tpl) { -#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_COMPACT_BATCHED__) - SolveBlockTridiagMatrices - solveblk; - - solveblk.run(0, T_device, x_device, b_device); - TEST_ASSERT(solveblk.check(T_org_device, b_device), success); -#elif defined(__KOKKOSBATCHED_NVIDIA_CUBLAS__) - std::cout << "CUBLAS compact version does not exist\n"; -#else - std::cout << "TPLs (CompactMKL or CUBLAS) are not found\n"; -#endif - } else { - SolveBlockTridiagMatrices - solveblk; - - solveblk.run(Oper, T_device, x_device, b_device); - TEST_ASSERT(solveblk.check(T_org_device, b_device), success); - } - } - - if (!success) - std::cout << "Unit Tests:: Failed:: " - << " ni = " << ni << " nj = " << nj << " nk = " << nk - << " blocksize = " << blocksize << " nrhs = " << nrhs << " \n"; -} - -// performance tests -template -int run(const Input &input, const bool test_tpl = false) { - typedef typename DeviceSpace::array_layout DeviceArrayLayout; - typedef Kokkos::DefaultHostExecutionSpace HostSpace; - - const ordinal_type niter = 50; - int dontopt = 0; - bool success = true; - - /// - /// construct a discrete system of equations - /// - const ordinal_type ni = input.ni, nj = input.nj, nk = input.nk, - blocksize = input.bs, nrhs = input.nrhs, opf = input.opf, - ops = input.ops; - - StructuredBlock mesh(ni, nj, nk); - - // something is not copyable ... don't know why yet... - BlockCrsMatrix A_device; - // double t_fill_block_crs_matrix = 0.0, t_fill_graph = 0.0; - { - const StencilShape::Enum stencil_shape = input.stencil_shape; - CrsGraph graph_host; - { - Timer timer("Fill Graph _______________"); - timer.reset(); - graph_host = create_graph_host_for_structured_block( - mesh, stencil_shape); - /* t_fill_graph = */ timer.seconds(); - } - BlockCrsMatrix A_host(graph_host, blocksize); - { - Timer timer("Fill Block CRS Matrix_______________"); - timer.reset(); - fill_block_crs_matrix_host(A_host); - /* t_fill_block_crs_matrix = */ timer.seconds(); - } - A_device = create_mirror(A_host); - deep_copy(A_device, A_host); - } - - // memory size - const double memsize_A = - A_device.Values().extent(0) * blocksize * blocksize * 8; - - /// - /// matrix vector multiplication test - /// - double t_matvec = 0.0; - // double t_fill_block_multi_vector = 0.0; - { - const ordinal_type m = mesh.size(); - - BlockMultiVector x_host(nrhs, m, blocksize); - { - Timer timer("Fill Block Multi Vector______________"); - timer.reset(); - fill_block_multi_vector_host(x_host); - /* t_fill_block_multi_vector = */ timer.seconds(); - } - auto x_device = create_mirror(x_host); - deep_copy(x_device, x_host); - - BlockMultiVector y_device(nrhs, m, - blocksize); - { - // BlockCrsMatrixVectorProductByRow matvec; - BlockCrsMatrixVectorProductByBlockRow - matvec; - { - Timer timer("50 BlockCrsMatrixVectorProduct"); - timer.reset(); - for (ordinal_type i = 0; i < niter; ++i) { - matvec.run(A_device, x_device, y_device); - dontopt += i; - } - t_matvec = timer.seconds(); - } - } - } - - /// - /// block tridiag extraction test - /// - const double memsize_T = - ni * nj * (3 * (nk - 1) * blocksize * blocksize + blocksize * blocksize) * - 8; - - double t_extract = 0.0; - BlockTridiagMatrices T_device = - create_block_tridiag_matrices( - ni * nj, nk, blocksize); - { - ExtractBlockTridiagMatrices - extblk(mesh); - { - Timer timer("ExtractBlockTridiagMatrices"); - timer.reset(); - extblk.run(A_device, T_device); - t_extract = timer.seconds(); - } - if (input.check) TEST_ASSERT(extblk.check(), success); - } - - // keep original matrix for check - BlockTridiagMatrices T_org_device = - create_block_tridiag_matrices( - ni * nj, nk, blocksize); - - deep_copy(T_org_device, T_device); - - /// - /// block tridiag factorization test - /// - double t_factorize = 0.0, f_factorize = 0.0; - if (test_tpl) { -#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_COMPACT_BATCHED__) - FactorizeBlockTridiagMatrices< - DeviceSpace, ValueType, DeviceArrayLayout, VectorLength, - Algo::LU::CompactMKL, Algo::Trsm::CompactMKL, Algo::Gemm::CompactMKL> - factorblk; - f_factorize = - factorblk.FlopCount(T_device) * (sizeof(ValueType) / sizeof(double)); - { - Timer timer("FactorizeBlockTridiagMatrices"); - timer.reset(); - Kokkos::fence(); - factorblk.run(0, T_device); - Kokkos::fence(); - t_factorize = timer.seconds(); - } - TEST_ASSERT(factorblk.check(T_org_device), success); -#elif defined(__KOKKOSBATCHED_NVIDIA_CUBLAS__) - std::cout << "CUBLAS compact version does not exist\n"; -#else - std::cout << "TPLs (CompactMKL or CUBLAS) are not found\n"; -#endif - } else { - FactorizeBlockTridiagMatrices - factorblk; - - f_factorize = - factorblk.FlopCount(T_device) * (sizeof(ValueType) / sizeof(double)); - { - Timer timer("FactorizeBlockTridiagMatrices"); - timer.reset(); - Kokkos::fence(); - factorblk.run(opf, T_device); - Kokkos::fence(); - t_factorize = timer.seconds(); - } - if (input.check) TEST_ASSERT(factorblk.check(T_org_device), success); - } - - /// - /// block tridiag solve test - /// - double t_solve = 0.0; - { - PartitionedBlockMultiVector - b_host = create_partitioned_block_multi_vector( - ni * nj, nrhs, nk, blocksize); - fill_partitioned_block_multi_vector_host(b_host, ni * nj); - - auto b_device = create_mirror(b_host); - deep_copy(b_device, b_host); - - PartitionedBlockMultiVector - x_device = create_partitioned_block_multi_vector( - ni * nj, nrhs, nk, blocksize); - if (test_tpl) { -#if defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL__) && \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_BATCHED__) && \ - defined(__KOKKOSBATCHED_ENABLE_INTEL_MKL_COMPACT_BATCHED__) - SolveBlockTridiagMatrices - solveblk; - { - Timer timer("50 SolveBlockTridiagMatrices"); - timer.reset(); - Kokkos::fence(); - for (ordinal_type i = 0; i < niter; ++i) { - solveblk.run(0, T_device, x_device, b_device); - dontopt += i; - } - Kokkos::fence(); - t_solve = timer.seconds(); - } - if (input.check) - TEST_ASSERT(solveblk.check(T_org_device, b_device), success); -#elif defined(__KOKKOSBATCHED_NVIDIA_CUBLAS__) - std::cout << "CUBLAS compact version does not exist\n"; -#else - std::cout << "TPLs (CompactMKL or CUBLAS) are not found\n"; -#endif - } else { - SolveBlockTridiagMatrices - solveblk; - { - Timer timer("50 SolveBlockTridiagMatrices"); - timer.reset(); - Kokkos::fence(); - for (ordinal_type i = 0; i < niter; ++i) { - solveblk.run(ops, T_device, x_device, b_device); - dontopt += i; - } - Kokkos::fence(); - t_solve = timer.seconds(); - } - if (input.check) - TEST_ASSERT(solveblk.check(T_org_device, b_device), success); - } - } - - const double t_matvec_per_iter = t_matvec / double(niter), - t_solve_per_iter = t_solve / double(niter); - std::cout << " matvec = " << t_matvec_per_iter << std::endl; - std::cout << " extract = " << t_extract - << " extract/matvec = " << (t_extract / t_matvec_per_iter) - << std::endl; - // std::cout << " factor = " << t_factorize << " factor/matvec = " - // << (t_factorize/t_matvec_per_iter) << std::endl; - std::cout << " factor = " << t_factorize - << " factor/matvec = " << (t_factorize / t_matvec_per_iter) - << " flop = " << f_factorize - << " flop/s = " << (f_factorize / t_factorize) << std::endl; - std::cout << " solve = " << t_solve_per_iter - << " solve/matvec = " << (t_solve_per_iter / t_matvec_per_iter) - << std::endl; - std::cout << " memory used = " << (memsize_A + memsize_T) << std::endl; - - return dontopt + success; -} - -} // namespace Test -} // namespace KokkosBatched diff --git a/unit_test/sparse/Test_Sparse.hpp b/unit_test/sparse/Test_Sparse.hpp index e75eb1ce6a..5a240ce228 100644 --- a/unit_test/sparse/Test_Sparse.hpp +++ b/unit_test/sparse/Test_Sparse.hpp @@ -3,7 +3,6 @@ #include "Test_Sparse_block_gauss_seidel.hpp" #include "Test_Sparse_CrsMatrix.hpp" -#include "Test_Sparse_BlockCrsMatrix.hpp" #include "Test_Sparse_BsrMatrix.hpp" #include "Test_Sparse_findRelOffset.hpp" #include "Test_Sparse_gauss_seidel.hpp" @@ -16,7 +15,6 @@ #include "Test_Sparse_SortCrs.hpp" #include "Test_Sparse_spiluk.hpp" #include "Test_Sparse_spmv.hpp" -#include "Test_Sparse_spmv_blockcrs.hpp" #include "Test_Sparse_spmv_bsr.hpp" #include "Test_Sparse_sptrsv.hpp" #include "Test_Sparse_trsv.hpp" diff --git a/unit_test/sparse/Test_Sparse_BlockCrsMatrix.hpp b/unit_test/sparse/Test_Sparse_BlockCrsMatrix.hpp deleted file mode 100644 index 6eb4488c72..0000000000 --- a/unit_test/sparse/Test_Sparse_BlockCrsMatrix.hpp +++ /dev/null @@ -1,384 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -//#include "KokkosKernels_ETIHelperMacros.h" -#include -#include -#include -#include "KokkosSparse_BlockCrsMatrix.hpp" -#include "KokkosSparse_CrsMatrix.hpp" - -// #ifndef kokkos_complex_double -// #define kokkos_complex_double Kokkos::complex -// #define kokkos_complex_float Kokkos::complex -// #endif - -typedef Kokkos::complex kokkos_complex_double; -typedef Kokkos::complex kokkos_complex_float; - -namespace Test { // anonymous - -using std::cerr; -using std::endl; - -// Create a test sparse matrix A. -// -// Identify the matrix to create by number (whichMatrix). The -// following lists the valid options for whichMatrix: -// -// 0: A square 8 x 8 sparse CrsMatrix with implicit block structure -// 1: A square 4 x 4 sparse BlockCrsMatrix -// -// \param ptr [out] Array of row offsets, of length numRows+1. -// \param ind [out] Array of column indices, of length nnz (CrsMatrix) -// or numBlocks (BlockCrsMatrix). -// \param val [out] Array of entries (values), of length nnz. -// \param numRows [out] The number of rows in the matrix. -// \param numCols [out] The number of columns in the matrix. -// \param nnz [out] The number of stored entries in the matrix. -// \param whichMatrix [in] The index of the matrix to create. -template -void makeSparseMatrix( - typename sparseMat_t::StaticCrsGraphType::row_map_type::non_const_type &ptr, - typename sparseMat_t::StaticCrsGraphType::entries_type::non_const_type &ind, - typename sparseMat_t::values_type::non_const_type &val, - typename sparseMat_t::ordinal_type &numRows, - typename sparseMat_t::ordinal_type &numCols, - typename sparseMat_t::size_type &nnz, const int whichMatrix, - typename sparseMat_t::ordinal_type &blockDim) { - typedef typename sparseMat_t::StaticCrsGraphType::row_map_type::non_const_type - ptr_type; - typedef typename sparseMat_t::StaticCrsGraphType::entries_type::non_const_type - ind_type; - typedef typename sparseMat_t::values_type::non_const_type val_type; - typedef typename sparseMat_t::ordinal_type lno_t; - typedef typename sparseMat_t::size_type size_type; - typedef typename sparseMat_t::value_type scalar_t; - - using Kokkos::HostSpace; - using Kokkos::MemoryUnmanaged; - using Kokkos::View; - - if (whichMatrix == 0) { - numRows = 8; - numCols = 8; - nnz = 24; - blockDim = 1; - - const size_type ptrRaw[] = {0, 4, 8, 10, 12, 14, 16, 20, 24}; - const lno_t indRaw[] = {0, 1, 4, 5, 0, 1, 4, 5, 2, 3, 2, 3, - 4, 5, 4, 5, 2, 3, 6, 7, 2, 3, 6, 7}; - const scalar_t valRaw[] = {.1, 1, 4, 5, -.1, -1, -4, -5, 2, 3, -2, -3, - 4, 5, -4, -5, 2, 3, 6, 7, -2, -3, -6, -7}; - - // Create the output Views. - ptr = ptr_type("ptr", numRows + 1); - ind = ind_type("ind", nnz); - val = val_type("val", nnz); - - // Wrap the above three arrays in unmanaged Views, so we can use deep_copy. - typename ptr_type::HostMirror::const_type ptrIn(ptrRaw, numRows + 1); - typename ind_type::HostMirror::const_type indIn(indRaw, nnz); - typename val_type::HostMirror::const_type valIn(valRaw, nnz); - - Kokkos::deep_copy(ptr, ptrIn); - Kokkos::deep_copy(ind, indIn); - Kokkos::deep_copy(val, valIn); - } else if (whichMatrix == 1) { - numRows = 4; - numCols = 4; - nnz = 24; - - blockDim = 2; - const lno_t numBlocks = 6; - - const size_type ptrRaw[] = {0, 2, 3, 4, 6}; - const lno_t indRaw[] = {0, 2, 1, 2, 1, 3}; - const scalar_t valRaw[] = {.1, 1, 4, 5, -.1, -1, -4, -5, 2, 3, -2, -3, - 4, 5, -4, -5, 2, 3, 6, 7, -2, -3, -6, -7}; - - // Create the output Views. - ptr = ptr_type("ptr", numRows + 1); - ind = ind_type("ind", numBlocks); - val = val_type("val", nnz); - - // Wrap the above three arrays in unmanaged Views, so we can use deep_copy. - typename ptr_type::HostMirror::const_type ptrIn(ptrRaw, numRows + 1); - typename ind_type::HostMirror::const_type indIn(indRaw, numBlocks); - typename val_type::HostMirror::const_type valIn(valRaw, nnz); - - Kokkos::deep_copy(ptr, ptrIn); - Kokkos::deep_copy(ind, indIn); - Kokkos::deep_copy(val, valIn); - } - - else { // whichMatrix != 0 - std::ostringstream os; - os << "Invalid whichMatrix value " << whichMatrix - << ". Valid value(s) include " << 0 << "."; - throw std::invalid_argument(os.str()); - } -} - -// Return the Kokkos::CrsMatrix corresponding to makeSparseMatrix(). -template -crsMat_t makeCrsMatrix_BlockStructure() { - typedef typename crsMat_t::StaticCrsGraphType graph_t; - typedef typename graph_t::row_map_type::non_const_type lno_view_t; - typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t; - typedef typename crsMat_t::values_type::non_const_type scalar_view_t; - typedef typename crsMat_t::ordinal_type lno_t; - typedef typename crsMat_t::size_type size_type; - - lno_view_t ptr; - lno_nnz_view_t ind; - scalar_view_t val; - lno_t numRows; - lno_t numCols; - size_type nnz; - lno_t blockDim; - - const int whichMatrix = 0; - makeSparseMatrix(ptr, ind, val, numRows, numCols, nnz, whichMatrix, - blockDim); - return crsMat_t("A", numRows, numCols, nnz, val, ptr, ind); -} - -template -blkcrsMat_t makeBlockCrsMatrix() { - typedef typename blkcrsMat_t::StaticCrsGraphType graph_t; - typedef typename graph_t::row_map_type::non_const_type lno_view_t; - typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t; - typedef typename blkcrsMat_t::values_type::non_const_type scalar_view_t; - typedef typename blkcrsMat_t::ordinal_type lno_t; - typedef typename blkcrsMat_t::size_type size_type; - - lno_view_t ptr; - lno_nnz_view_t ind; - scalar_view_t val; - lno_t numRows; - lno_t numCols; - size_type nnz; - lno_t blockDim; - - const int whichMatrix = 1; - makeSparseMatrix(ptr, ind, val, numRows, numCols, nnz, - whichMatrix, blockDim); - return blkcrsMat_t("blkA", numRows, numCols, nnz, val, ptr, ind, blockDim); -} - -template -struct TestFunctor { - typedef typename MatrixType::value_type scalar_t; - typedef typename MatrixType::ordinal_type lno_t; - - // Members - MatrixType A; - ResultsType d_results; - - // Constructor - TestFunctor(MatrixType &A_, ResultsType &d_results_) - : A(A_), d_results(d_results_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const int /*rid*/) const { - // Test 1: Check member functions behave as expected - bool check0 = true; - bool check1 = true; - bool check2 = true; - bool check3 = true; - for (lno_t i = 0; i < A.numRows(); ++i) { - // Test SparseBlockRowView - { - auto iblockrow = A.block_row(i); - auto num_blocks_in_row = iblockrow.length; - for (auto blk = 0; blk < num_blocks_in_row; ++blk) { - auto view_blk = iblockrow.block(blk); - for (auto lrow = 0; lrow < A.blockDim(); ++lrow) { - auto row_ptr = iblockrow.local_row_in_block(blk, lrow); - for (auto lcol = 0; lcol < A.blockDim(); ++lcol) { - auto entry = iblockrow.local_block_value(blk, lrow, lcol); - // std::cout << "check0: " << ( entry == row_ptr[lcol] ); - // std::cout << "check1: " << ( entry == view_blk(lrow,lcol) ); - check0 = check0 && (entry == row_ptr[lcol]); - check1 = check1 && (entry == view_blk(lrow, lcol)); - } // end local col in row - } // end local row in blk - } // end blk - } - d_results(0) = check0; - d_results(1) = check1; - - // Test SparseBlockRowViewConst - { - auto iblockrow = A.block_row_Const(i); - auto num_blocks_in_row = iblockrow.length; - for (auto blk = 0; blk < num_blocks_in_row; ++blk) { - auto view_blk = iblockrow.block(blk); - for (auto lrow = 0; lrow < A.blockDim(); ++lrow) { - auto row_ptr = iblockrow.local_row_in_block(blk, lrow); - for (auto lcol = 0; lcol < A.blockDim(); ++lcol) { - auto entry = iblockrow.local_block_value(blk, lrow, lcol); - check2 = check2 && (entry == row_ptr[lcol]); - check3 = check3 && (entry == view_blk(lrow, lcol)); - } // end local col in row - } // end local row in blk - } // end blk - } - d_results(0) = check0; - d_results(1) = check1; - d_results(2) = check2; - d_results(3) = check3; - } // end for blk rows - - // Test sumIntoValues - { - check0 = true; - check1 = true; - check2 = true; - const lno_t ncols = 1; - const lno_t cols[] = {3}; - const lno_t browi = 3; - const scalar_t vals[] = { - 10, 11, 20, 22}; // represents a single block: [10 11; 20 22] - const scalar_t result[] = {16, 18, 14, 15}; - - // This block will be summed into the existing block [6 7; -6 -7] - // Expected result: [16 18; 14 15] - A.sumIntoValues(browi, cols, ncols, vals); - auto iblockrow = A.block_row_Const(browi); - auto relBlk = iblockrow.findRelBlockOffset(cols[0]); - auto view_blk = iblockrow.block(relBlk); - for (auto lrow = 0; lrow < A.blockDim(); ++lrow) { - auto row_ptr = iblockrow.local_row_in_block(relBlk, lrow); - for (auto lcol = 0; lcol < A.blockDim(); ++lcol) { - auto entry = iblockrow.local_block_value(relBlk, lrow, lcol); - check0 = check0 && (entry == row_ptr[lcol]); - check1 = check1 && (entry == view_blk(lrow, lcol)); - check2 = check2 && (entry == result[lrow * A.blockDim() + lcol]); - } // end local col in row - } // end local row in blk - d_results(4) = check0; - d_results(5) = check1; - d_results(6) = check2; - } - - // Test replaceValues - { - check0 = true; - check1 = true; - check2 = true; - const lno_t ncols = 1; - const lno_t cols[] = {3}; - const lno_t browi = 3; - const scalar_t valsreplace[] = { - -10, -11, -20, -22}; // represents a single block: [10 11; 20 22] - - // The existing block to be replaced was: [6 7; -6 -7] - A.replaceValues(browi, cols, ncols, valsreplace); - - auto iblockrow = A.block_row_Const(browi); - auto relBlk = iblockrow.findRelBlockOffset(cols[0]); - auto view_blk = iblockrow.block(relBlk); - for (auto lrow = 0; lrow < A.blockDim(); ++lrow) { - auto row_ptr = iblockrow.local_row_in_block(relBlk, lrow); - for (auto lcol = 0; lcol < A.blockDim(); ++lcol) { - auto entry = iblockrow.local_block_value(relBlk, lrow, lcol); - check0 = check0 && (entry == row_ptr[lcol]); - check1 = check1 && (entry == view_blk(lrow, lcol)); - check2 = check2 && (entry == valsreplace[lrow * A.blockDim() + lcol]); - } // end local col in row - } // end local row in blk - d_results(7) = check0; - d_results(8) = check1; - d_results(9) = check2; - } - - } // end operator()(i) -}; // end TestFunctor - -} // namespace Test - -// Create a CrsMatrix and BlockCrsMatrix and test member functions. -template -void testBlockCrsMatrix() { - using namespace Test; - - typedef KokkosSparse::CrsMatrix - crs_matrix_type; - typedef KokkosSparse::Experimental::BlockCrsMatrix - block_crs_matrix_type; - - crs_matrix_type crsA = makeCrsMatrix_BlockStructure(); - block_crs_matrix_type A = makeBlockCrsMatrix(); - - const int num_entries = 10; - typedef Kokkos::View result_view_type; - result_view_type d_results("d_results"); - auto h_results = Kokkos::create_mirror_view(d_results); - - Kokkos::parallel_for( - "KokkosSparse::Test::BlockCrsMatrix", - Kokkos::RangePolicy(0, 1), - Test::TestFunctor(A, d_results)); - - Kokkos::deep_copy(h_results, d_results); - - for (decltype(h_results.extent(0)) i = 0; i < h_results.extent(0); ++i) { - EXPECT_EQ(h_results[i], true); - } -} - -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse##_##blkcrsmatrix##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - testBlockCrsMatrix(); \ - } - -#include - -#undef KOKKOSKERNELS_EXECUTE_TEST diff --git a/unit_test/sparse/Test_Sparse_block_gauss_seidel.hpp b/unit_test/sparse/Test_Sparse_block_gauss_seidel.hpp index 0f4c9b0d67..af04e44578 100644 --- a/unit_test/sparse/Test_Sparse_block_gauss_seidel.hpp +++ b/unit_test/sparse/Test_Sparse_block_gauss_seidel.hpp @@ -212,7 +212,7 @@ void test_block_gauss_seidel_rank1(lno_t numRows, size_type nnz, // this makes consecutive 5 rows to have same columns. // it will add scalar 0's for those entries that does not exists. // the result is still a point crs matrix. - KokkosKernels::Impl::kk_create_blockcrs_formated_point_crsmatrix( + KokkosKernels::Impl::kk_create_bsr_formated_point_crsmatrix( block_size, crsmat.numRows(), crsmat.numCols(), crsmat.graph.row_map, crsmat.graph.entries, crsmat.values, out_r, out_c, pf_rm, pf_e, pf_v); graph_t static_graph2(pf_e, pf_rm); @@ -220,7 +220,7 @@ void test_block_gauss_seidel_rank1(lno_t numRows, size_type nnz, // this converts the previous generated matrix to block matrix. auto input_mat = - MatrixConverter::from_blockcrs_formated_point_crsmatrix( + MatrixConverter::from_bsr_formated_point_crsmatrix( crsmat2, block_size); lno_t nv = ((crsmat2.numRows() + block_size - 1) / block_size) * block_size; @@ -300,14 +300,14 @@ void test_block_gauss_seidel_rank2(lno_t numRows, size_type nnz, // this makes consecutive 5 rows to have same columns. // it will add scalar 0's for those entries that does not exists. // the result is still a point crs matrix. - KokkosKernels::Impl::kk_create_blockcrs_formated_point_crsmatrix( + KokkosKernels::Impl::kk_create_bsr_formated_point_crsmatrix( block_size, crsmat.numRows(), crsmat.numCols(), crsmat.graph.row_map, crsmat.graph.entries, crsmat.values, out_r, out_c, pf_rm, pf_e, pf_v); graph_t static_graph2(pf_e, pf_rm); crsMat_t crsmat2("CrsMatrix2", out_c, pf_v, static_graph2); auto input_mat = - MatrixConverter::from_blockcrs_formated_point_crsmatrix( + MatrixConverter::from_bsr_formated_point_crsmatrix( crsmat2, block_size); lno_t nv = ((crsmat2.numRows() + block_size - 1) / block_size) * block_size; @@ -420,20 +420,20 @@ void test_block_gauss_seidel_empty() { #define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ TEST_F( \ TestCategory, \ - sparse_blockcrs_gauss_seidel_rank1##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_block_gauss_seidel_rank1( \ + sparse_bsr_gauss_seidel_rank1##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_block_gauss_seidel_rank1( \ 500, 500 * 10, 70, 3); \ } \ TEST_F( \ TestCategory, \ - sparse_blockcrs_gauss_seidel_rank2_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_block_gauss_seidel_rank2( \ + sparse_bsr_gauss_seidel_rank2_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_block_gauss_seidel_rank2( \ 500, 500 * 10, 70, 3); \ } \ TEST_F( \ TestCategory, \ - sparse_blockcrs_gauss_seidel_empty_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_block_gauss_seidel_empty(); \ } \ TEST_F( \ diff --git a/unit_test/sparse/Test_Sparse_spmv_blockcrs.hpp b/unit_test/sparse/Test_Sparse_spmv_blockcrs.hpp deleted file mode 100644 index a96af6973e..0000000000 --- a/unit_test/sparse/Test_Sparse_spmv_blockcrs.hpp +++ /dev/null @@ -1,527 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include -#include -#include -#include "KokkosSparse_spmv.hpp" -#include "KokkosSparse_BlockCrsMatrix.hpp" -#include "KokkosSparse_CrsMatrix.hpp" - -#include -#include -#include -#include - -#include "KokkosKernels_Controls.hpp" -#include "KokkosKernels_default_types.hpp" - -typedef Kokkos::complex kokkos_complex_double; -typedef Kokkos::complex kokkos_complex_float; - -namespace Test_BlockCrs { - -/// Random generator -template -inline Scalar random() { - auto const max = static_cast(RAND_MAX) + static_cast(1); - return static_cast(std::rand()) / max; -} - -template -inline void set_random_value(Scalar &v) { - v = random(); -} - -template -inline void set_random_value(Kokkos::complex &v) { - Scalar vre = random(); - Scalar vim = random(); - v = Kokkos::complex(vre, vim); -} - -template -inline void set_random_value(std::complex &v) { - Scalar vre = random(); - Scalar vim = random(); - v = std::complex(vre, vim); -} - -/// \brief Driver routine for checking BlockCrsMatrix times vector -template -void check_blockcrs_times_v(const char fOp[], scalar_t alpha, scalar_t beta, - const lno_t bMax, int &num_errors) { - // The mat_structure view is used to generate a matrix using - // finite difference (FD) or finite element (FE) discretization - // on a cartesian grid. - Kokkos::View mat_structure("Matrix Structure", - 3); - mat_structure(0, 0) = 8; // Request 8 grid point in 'x' direction - mat_structure(0, 1) = 0; // Add BC to the left - mat_structure(0, 2) = 0; // Add BC to the right - mat_structure(1, 0) = 7; // Request 7 grid point in 'y' direction - mat_structure(1, 1) = 0; // Add BC to the bottom - mat_structure(1, 2) = 0; // Add BC to the top - mat_structure(2, 0) = 9; // Request 9 grid point in 'z' direction - mat_structure(2, 1) = 0; // Add BC to the bottom - mat_structure(2, 2) = 0; // Add BC to the top - - typedef - typename KokkosSparse::CrsMatrix - crsMat_t; - typedef typename KokkosSparse::CrsMatrix - h_crsMat_t; - typedef typename crsMat_t::values_type::non_const_type scalar_view_t; - typedef scalar_view_t x_vector_type; - typedef scalar_view_t y_vector_type; - - h_crsMat_t mat_b1 = - Test::generate_structured_matrix3D("FD", mat_structure); - - num_errors = 0; - for (lno_t blockSize = 1; blockSize <= bMax; ++blockSize) { - lno_t nRow = blockSize * mat_b1.numRows(); - lno_t nCol = blockSize * mat_b1.numCols(); - size_type nnz = static_cast(blockSize) * - static_cast(blockSize) * mat_b1.nnz(); - - // - // Create graph for CrsMatrix - // - - Kokkos::View d_rowmap("crsmatrix", nRow + 1); - auto h_rowmap = Kokkos::create_mirror_view(d_rowmap); - - Kokkos::View d_colidx("crsmatrix", nnz); - auto h_colidx = Kokkos::create_mirror_view(d_colidx); - - Kokkos::View d_matval("crsmatrix", nnz); - auto h_matval = Kokkos::create_mirror_view(d_matval); - - for (size_type ii = 0; ii < nnz; ++ii) set_random_value(h_matval[ii]); - - for (lno_t ir = 0; ir < mat_b1.numRows(); ++ir) { - const size_type jbeg = mat_b1.graph.row_map(ir); - const size_type jend = mat_b1.graph.row_map(ir + 1); - for (lno_t ib = 0; ib < blockSize; ++ib) { - const lno_t my_row = ir * blockSize + ib; - h_rowmap[my_row + 1] = h_rowmap[my_row] + (jend - jbeg) * blockSize; - for (size_type ijk = jbeg; ijk < jend; ++ijk) { - const auto col0 = mat_b1.graph.entries(ijk); - for (lno_t jb = 0; jb < blockSize; ++jb) { - h_colidx[h_rowmap[my_row] + (ijk - jbeg) * blockSize + jb] = - col0 * blockSize + jb; - } - } - } - } // for (lno_t ir = 0; ir < mat_b1.numRows(); ++ir) - - Kokkos::deep_copy(d_matval, h_matval); - Kokkos::deep_copy(d_colidx, h_colidx); - Kokkos::deep_copy(d_rowmap, h_rowmap); - - // Create the CrsMatrix for the reference computation - crsMat_t Acrs("new_crs_matr", nRow, nCol, nnz, d_matval, d_rowmap, - d_colidx); - - x_vector_type xref("new_right_hand_side", nRow); - auto h_xref = Kokkos::create_mirror_view(xref); - for (lno_t ir = 0; ir < nRow; ++ir) { - set_random_value(h_xref(ir)); - } - Kokkos::deep_copy(xref, h_xref); - - y_vector_type y0("y_init", nRow); - auto h_y0 = Kokkos::create_mirror_view(y0); - for (lno_t ir = 0; ir < nRow; ++ir) set_random_value(h_y0(ir)); - Kokkos::deep_copy(y0, h_y0); - - y_vector_type ycrs("crs_product_result", nRow); - auto h_ycrs = Kokkos::create_mirror_view(ycrs); - for (lno_t ir = 0; ir < nRow; ++ir) h_ycrs(ir) = h_y0(ir); - Kokkos::deep_copy(ycrs, h_ycrs); - - // Compute the reference product - KokkosSparse::spmv(fOp, alpha, Acrs, xref, beta, ycrs); - - y_vector_type ybcrs("bcrs_product_result", nRow); - auto h_ybcrs = Kokkos::create_mirror_view(ybcrs); - for (lno_t ir = 0; ir < nRow; ++ir) h_ybcrs(ir) = h_y0(ir); - Kokkos::deep_copy(ybcrs, h_ybcrs); - - // Create the BlockCrsMatrix - KokkosSparse::Experimental::BlockCrsMatrix - Abcrs(Acrs, blockSize); - - // Compute the product with the BlockCrsMatrix format - KokkosSparse::spmv(fOp, alpha, Abcrs, xref, beta, ybcrs); - - // Compare the two products - using KATS = Kokkos::ArithTraits; - using mag_type = typename KATS::mag_type; - - const mag_type zero_mag = Kokkos::ArithTraits::zero(); - mag_type error = zero_mag, maxNorm = zero_mag; - - Kokkos::deep_copy(h_ycrs, ycrs); - Kokkos::deep_copy(h_ybcrs, ybcrs); - for (lno_t ir = 0; ir < nRow; ++ir) { - error = std::max(error, KATS::abs(h_ycrs(ir) - h_ybcrs(ir))); - maxNorm = std::max(maxNorm, KATS::abs(h_ycrs(ir))); - } - - mag_type tmps = KATS::abs(alpha) + KATS::abs(beta); - if ((tmps > zero_mag) && (maxNorm == zero_mag)) { - std::cout << " BlockCRSMatrix - SpMV times V >> blockSize " << blockSize - << " maxNorm " << maxNorm << " error " << error << " alpha " - << alpha << " beta " << beta << "\n"; - num_errors += 1; - } - - // - // --- Factor ((nnz / nRow) + 1) = Average number of non-zeros per row - // - const mag_type tol = ((static_cast(nnz) / nRow) + 1) * - Kokkos::ArithTraits::epsilon(); - if (error > tol * maxNorm) { - std::cout << " BlockCRSMatrix - SpMV times V >> blockSize " << blockSize - << " ratio " << error / maxNorm << " tol " << tol << " maxNorm " - << maxNorm << " alpha " << alpha << " beta " << beta << "\n"; - num_errors += 1; - } - - } // for (int blockSize = 1; blockSize <= bMax; ++blockSize) -} - -/// \brief Driver routine for checking BlockCrsMatrix times multiple vector -template -void check_blockcrs_times_mv(const char fOp[], scalar_t alpha, scalar_t beta, - const lno_t bMax, int &num_errors) { - // The mat_structure view is used to generate a matrix using - // finite difference (FD) or finite element (FE) discretization - // on a cartesian grid. - Kokkos::View mat_structure("Matrix Structure", - 3); - mat_structure(0, 0) = 7; // Request 7 grid point in 'x' direction - mat_structure(0, 1) = 0; // Add BC to the left - mat_structure(0, 2) = 0; // Add BC to the right - mat_structure(1, 0) = 5; // Request 11 grid point in 'y' direction - mat_structure(1, 1) = 0; // Add BC to the bottom - mat_structure(1, 2) = 0; // Add BC to the top - mat_structure(2, 0) = 9; // Request 13 grid point in 'y' direction - mat_structure(2, 1) = 0; // Add BC to the bottom - mat_structure(2, 2) = 0; // Add BC to the top - - typedef typename KokkosSparse::CrsMatrix - h_crsMat_t; - typedef - typename KokkosSparse::CrsMatrix - crsMat_t; - typedef Kokkos::View block_vector_t; - - h_crsMat_t mat_b1 = - Test::generate_structured_matrix3D("FD", mat_structure); - - num_errors = 0; - const int nrhs = 5; - - for (lno_t blockSize = 1; blockSize <= bMax; ++blockSize) { - // - // Fill blocks with random values - // - - lno_t nRow = blockSize * mat_b1.numRows(); - lno_t nCol = blockSize * mat_b1.numCols(); - size_type nnz = static_cast(blockSize) * - static_cast(blockSize) * mat_b1.nnz(); - - Kokkos::View d_rowmap("crsmatrix", nRow + 1); - auto h_rowmap = Kokkos::create_mirror_view(d_rowmap); - - Kokkos::View d_colidx("crsmatrix", nnz); - auto h_colidx = Kokkos::create_mirror_view(d_colidx); - - Kokkos::View d_matval("crsmatrix", nnz); - auto h_matval = Kokkos::create_mirror_view(d_matval); - - for (size_type ii = 0; ii < nnz; ++ii) set_random_value(h_matval[ii]); - - for (lno_t ir = 0; ir < mat_b1.numRows(); ++ir) { - const size_type jbeg = mat_b1.graph.row_map(ir); - const size_type jend = mat_b1.graph.row_map(ir + 1); - for (lno_t ib = 0; ib < blockSize; ++ib) { - const lno_t my_row = ir * blockSize + ib; - h_rowmap[my_row + 1] = h_rowmap[my_row] + (jend - jbeg) * blockSize; - for (size_type ijk = jbeg; ijk < jend; ++ijk) { - const auto col0 = mat_b1.graph.entries(ijk); - for (lno_t jb = 0; jb < blockSize; ++jb) { - h_colidx[h_rowmap[my_row] + (ijk - jbeg) * blockSize + jb] = - col0 * blockSize + jb; - } - } - } - } // for (lno_t ir = 0; ir < mat_b1.numRows(); ++ir) - - Kokkos::deep_copy(d_matval, h_matval); - Kokkos::deep_copy(d_colidx, h_colidx); - Kokkos::deep_copy(d_rowmap, h_rowmap); - - // Create the CrsMatrix for the reference computation - crsMat_t Acrs("new_crs_matr", nRow, nCol, nnz, d_matval, d_rowmap, - d_colidx); - - block_vector_t xref("new_right_hand_side", nRow, nrhs); - auto h_xref = Kokkos::create_mirror_view(xref); - for (int jc = 0; jc < nrhs; ++jc) - for (lno_t ir = 0; ir < nRow; ++ir) set_random_value(h_xref(ir, jc)); - Kokkos::deep_copy(xref, h_xref); - - block_vector_t y0("y_init", nRow, nrhs); - auto h_y0 = Kokkos::create_mirror_view(y0); - for (int jc = 0; jc < nrhs; ++jc) - for (lno_t ir = 0; ir < nRow; ++ir) set_random_value(h_y0(ir, jc)); - Kokkos::deep_copy(y0, h_y0); - - block_vector_t ycrs("crs_product_result", nRow, nrhs); - auto h_ycrs = Kokkos::create_mirror_view(ycrs); - for (int jc = 0; jc < nrhs; ++jc) - for (lno_t ir = 0; ir < nRow; ++ir) h_ycrs(ir, jc) = h_y0(ir, jc); - Kokkos::deep_copy(ycrs, h_ycrs); - - KokkosSparse::spmv(fOp, alpha, Acrs, xref, beta, ycrs); - - block_vector_t ybcrs("bcrs_product_result", nRow, nrhs); - auto h_ybcrs = Kokkos::create_mirror_view(ybcrs); - for (int jc = 0; jc < nrhs; ++jc) - for (lno_t ir = 0; ir < nRow; ++ir) h_ybcrs(ir, jc) = h_y0(ir, jc); - Kokkos::deep_copy(ybcrs, h_ybcrs); - - // Create the BlockCrsMatrix - KokkosSparse::Experimental::BlockCrsMatrix - Abcrs(Acrs, blockSize); - - // Compute the product for the BlockCrsMatrix format - KokkosSparse::spmv(fOp, alpha, Abcrs, xref, beta, ybcrs); - - Kokkos::deep_copy(h_ycrs, ycrs); - Kokkos::deep_copy(h_ybcrs, ybcrs); - - // Compare the two products - using KATS = Kokkos::ArithTraits; - using mag_type = typename KATS::mag_type; - - const mag_type zero_mag = Kokkos::ArithTraits::zero(); - mag_type error = zero_mag, maxNorm = zero_mag; - - for (int jc = 0; jc < nrhs; ++jc) { - for (int ir = 0; ir < nRow; ++ir) { - error = std::max(error, - KATS::abs(h_ycrs(ir, jc) - h_ybcrs(ir, jc))); - maxNorm = std::max(maxNorm, KATS::abs(h_ycrs(ir, jc))); - } - } - - const mag_type tmps = KATS::abs(alpha) + KATS::abs(beta); - if ((tmps > zero_mag) && (maxNorm == zero_mag)) { - std::cout << " BlockCRSMatrix - SpMV times MV >> blockSize " << blockSize - << " maxNorm " << maxNorm << " error " << error << " alpha " - << alpha << " beta " << beta << "\n"; - num_errors += 1; - } - - const mag_type tol = ((static_cast(nnz) / nRow) + 1) * - Kokkos::ArithTraits::epsilon(); - - if (error > tol * maxNorm) { - std::cout << " BlockCRSMatrix - SpMV times MV >> blockSize " << blockSize - << " ratio " << error / maxNorm << " tol " << tol << " maxNorm " - << maxNorm << " alpha " << alpha << " beta " << beta << "\n"; - num_errors += 1; - } - - } // for (int blockSize = 1; blockSize <= bMax; ++blockSize) -} - -} // namespace Test_BlockCrs - -template -void testSpMVBlockCrsMatrix() { - // - // Test for the operation y <- alpha * Op(A) * x + beta * y - // - - // Define the function Op: Op(A) = A, Op(A) = conj(A), Op(A) = A^T, Op(A) = - // A^H - std::vector modes = {'N', 'C', 'T', 'H'}; - - // Define a set of pairs (alpha, beta) - std::vector testAlphaBeta = {0.0, 0.0, -1.0, 0.0, - 0.0, 1.0, 3.1, -2.5}; - - // - // Set the largest block size for the block matrix - // The code will create matrices with block sizes 1, .., bMax - // - const lno_t bMax = 13; - - //--- Test single vector case - for (const auto mode : modes) { - int num_errors = 0; - for (size_t ii = 0; ii < testAlphaBeta.size(); ii += 2) { - auto alpha_s = static_cast(testAlphaBeta[ii]); - auto beta_s = static_cast(testAlphaBeta[ii + 1]); - num_errors = 0; - Test_BlockCrs::check_blockcrs_times_v( - &mode, alpha_s, beta_s, bMax, num_errors); - if (num_errors > 0) { - printf( - "KokkosSparse::Test::spmv_blockcrs: %i errors of %i with params: " - "%c %lf %lf\n", - num_errors, bMax, mode, Kokkos::ArithTraits::abs(alpha_s), - Kokkos::ArithTraits::abs(beta_s)); - } - EXPECT_TRUE(num_errors == 0); - } - } -} - -template -void testBlockCrsMatrix_SpM_MV() { - // - // Test for the operation Y <- alpha * Op(A) * X + beta * Y - // - - // Define the function Op: Op(A) = A, Op(A) = conj(A), Op(A) = A^T, Op(A) = - // A^H - std::vector modes = {'N', 'C', 'T', 'H'}; - - // Define a set of pairs (alpha, beta) - std::vector testAlphaBeta = {0.0, 0.0, -1.0, 0.0, - 0.0, 1.0, 3.1, -2.5}; - - // - // Set the largest block size for the block matrix - // The code will create matrices with block sizes 1, .., bMax - // - const lno_t bMax = 13; - - //--- Test multiple vector case - for (auto mode : modes) { - int num_errors = 0; - for (size_t ii = 0; ii < testAlphaBeta.size(); ii += 2) { - auto alpha_s = static_cast(testAlphaBeta[ii]); - auto beta_s = static_cast(testAlphaBeta[ii + 1]); - num_errors = 0; - Test_BlockCrs::check_blockcrs_times_mv(&mode, alpha_s, beta_s, - bMax, num_errors); - if (num_errors > 0) { - printf( - "KokkosSparse::Test::spm_mv_blockcrs: %i errors of %i with params: " - "%c %lf %lf\n", - num_errors, bMax, mode, Kokkos::ArithTraits::abs(alpha_s), - Kokkos::ArithTraits::abs(beta_s)); - } - EXPECT_TRUE(num_errors == 0); - } - } -} - -////////////////////////// - -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse##_##bcrs_times_vec##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - testSpMVBlockCrsMatrix(); \ - } - -#include - -#undef KOKKOSKERNELS_EXECUTE_TEST - -////////////////////////// - -#define EXECUTE_BCRS_TIMES_MVEC_TEST(SCALAR, ORDINAL, OFFSET, LAYOUT, DEVICE) \ - TEST_F( \ - TestCategory, \ - sparse##_##bcrs_times_multivec##_##SCALAR##_##ORDINAL##_##OFFSET##_##LAYOUT##_##DEVICE) { \ - testBlockCrsMatrix_SpM_MV(); \ - } - -#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) - -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - EXECUTE_BCRS_TIMES_MVEC_TEST(SCALAR, ORDINAL, OFFSET, LayoutLeft, \ - TestExecSpace) - -#include - -#undef KOKKOSKERNELS_EXECUTE_TEST - -#endif // KOKKOSKERNELS_INST_LAYOUTLEFT - -#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) - -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - EXECUTE_BCRS_TIMES_MVEC_TEST(SCALAR, ORDINAL, OFFSET, LayoutRight, \ - TestExecSpace) - -#include - -#undef KOKKOSKERNELS_EXECUTE_TEST - -#endif // KOKKOSKERNELS_INST_LAYOUTRIGHT - -#undef EXECUTE_BCRS_TIMES_MVEC_TEST