From 0104ec1986d5614e1e0243232db69ab5dc9ef043 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Thu, 2 Jun 2022 16:59:54 -0600 Subject: [PATCH 1/5] common clean-up: removing sparse and graph features from common A lot of things in the common folder are actually purely sparse and/or graph related. This clean-up is necessary ahead of the change of directory structure and to allow modular compilation of the library. --- example/gmres/ex_real_A.cpp | 4 +- example/gmres/test_cmplx_A.cpp | 3 +- example/gmres/test_prec.cpp | 5 +- example/gmres/test_real_A.cpp | 3 +- .../sparse/KokkosSparse_wiki_gauss_seidel.cpp | 3 +- perf_test/graph/KokkosGraph_color.cpp | 5 +- perf_test/graph/KokkosGraph_color_d2.cpp | 3 +- perf_test/graph/KokkosGraph_mis_d2.cpp | 3 +- perf_test/sparse/KokkosSparse_gs.cpp | 5 +- perf_test/sparse/KokkosSparse_kk_spmv.cpp | 5 +- .../sparse/KokkosSparse_multimem_spgemm.hpp | 17 +- perf_test/sparse/KokkosSparse_pcg.cpp | 3 +- perf_test/sparse/KokkosSparse_run_spgemm.hpp | 6 +- .../sparse/KokkosSparse_run_spgemm_jacobi.hpp | 23 +- perf_test/sparse/KokkosSparse_spadd.cpp | 4 +- perf_test/sparse/KokkosSparse_spiluk.cpp | 5 +- perf_test/sparse/KokkosSparse_spmv.cpp | 5 +- perf_test/sparse/KokkosSparse_sptrsv.cpp | 7 +- .../sparse/KokkosSparse_sptrsv_supernode.cpp | 5 +- src/common/KokkosKernels_IOUtils.hpp | 1252 ---------------- src/common/KokkosKernels_Sorting.hpp | 577 -------- src/common/KokkosKernels_Utils.hpp | 2 +- src/graph/KokkosGraph_ExplicitCoarsening.hpp | 10 +- .../tpls/KokkosKernels_tpl_handles_decl.hpp | 2 +- .../tpls/KokkosKernels_tpl_handles_def.hpp | 2 +- ...kosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp | 4 +- .../tpls/KokkosSparse_spmv_tpl_spec_decl.hpp | 6 +- .../KokkosKernels_Controls.hpp | 0 .../KokkosKernels_Handle.hpp | 0 src/sparse/KokkosSparse_IOUtils.hpp | 1270 +++++++++++++++++ src/sparse/KokkosSparse_SortCrs.hpp | 725 ++++++++++ .../KokkosSparse_Utils.hpp} | 0 .../KokkosSparse_Utils_cusparse.hpp} | 0 .../KokkosSparse_Utils_mkl.hpp} | 0 .../KokkosSparse_Utils_rocsparse.hpp} | 0 src/sparse/KokkosSparse_sptrsv_cholmod.hpp | 2 +- src/sparse/KokkosSparse_sptrsv_supernode.hpp | 4 +- .../impl/KokkosSparse_gauss_seidel_impl.hpp | 6 +- .../impl/KokkosSparse_spadd_symbolic_impl.hpp | 6 +- src/sparse/impl/KokkosSparse_spgemm_impl.hpp | 2 +- .../impl/KokkosSparse_spgemm_mkl_impl.hpp | 2 +- ...okkosSparse_twostage_gauss_seidel_impl.hpp | 8 +- unit_test/common/Test_Common.hpp | 1 - unit_test/common/Test_Common_Sorting.hpp | 247 ---- unit_test/graph/Test_Graph_graph_color.hpp | 6 +- .../Test_Graph_graph_color_deterministic.hpp | 2 +- .../Test_Graph_graph_color_distance2.hpp | 10 +- unit_test/graph/Test_Graph_mis2.hpp | 7 +- unit_test/sparse/Test_Sparse.hpp | 2 + unit_test/sparse/Test_Sparse_SortCrs.hpp | 311 ++++ .../Test_Sparse_Transpose.hpp} | 11 +- .../sparse/Test_Sparse_Utils_cusparse.hpp | 2 +- .../sparse/Test_Sparse_block_gauss_seidel.hpp | 7 +- unit_test/sparse/Test_Sparse_bspgemm.hpp | 13 +- unit_test/sparse/Test_Sparse_gauss_seidel.hpp | 17 +- unit_test/sparse/Test_Sparse_rocsparse.hpp | 2 +- unit_test/sparse/Test_Sparse_spgemm.hpp | 13 +- .../sparse/Test_Sparse_spgemm_jacobi.hpp | 11 +- unit_test/sparse/Test_Sparse_spiluk.hpp | 2 +- unit_test/sparse/Test_Sparse_spmv.hpp | 9 +- unit_test/sparse/Test_Sparse_sptrsv.hpp | 2 +- unit_test/sparse/Test_Sparse_trsv.hpp | 5 +- 62 files changed, 2465 insertions(+), 2209 deletions(-) rename src/{common => sparse}/KokkosKernels_Controls.hpp (100%) rename src/{common => sparse}/KokkosKernels_Handle.hpp (100%) create mode 100644 src/sparse/KokkosSparse_IOUtils.hpp create mode 100644 src/sparse/KokkosSparse_SortCrs.hpp rename src/{common/KokkosKernels_SparseUtils.hpp => sparse/KokkosSparse_Utils.hpp} (100%) rename src/{common/KokkosKernels_SparseUtils_cusparse.hpp => sparse/KokkosSparse_Utils_cusparse.hpp} (100%) rename src/{common/KokkosKernels_SparseUtils_mkl.hpp => sparse/KokkosSparse_Utils_mkl.hpp} (100%) rename src/{common/KokkosKernels_SparseUtils_rocsparse.hpp => sparse/KokkosSparse_Utils_rocsparse.hpp} (100%) create mode 100644 unit_test/sparse/Test_Sparse_SortCrs.hpp rename unit_test/{common/Test_Common_Transpose.hpp => sparse/Test_Sparse_Transpose.hpp} (95%) diff --git a/example/gmres/ex_real_A.cpp b/example/gmres/ex_real_A.cpp index 1e3ba19585..b3e95605f7 100644 --- a/example/gmres/ex_real_A.cpp +++ b/example/gmres/ex_real_A.cpp @@ -43,7 +43,7 @@ */ #include -#include "KokkosKernels_IOUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" #include #include #include @@ -117,7 +117,7 @@ int main(int argc, char* argv[]) { { // Read in a matrix Market file and use it to test the Kokkos Operator. KokkosSparse::CrsMatrix A = - KokkosKernels::Impl::read_kokkos_crst_matrix< + KokkosSparse::Impl::read_kokkos_crst_matrix< KokkosSparse::CrsMatrix>(filename.c_str()); int n = A.numRows(); diff --git a/example/gmres/test_cmplx_A.cpp b/example/gmres/test_cmplx_A.cpp index bc1ddce35b..ad8d19fb03 100644 --- a/example/gmres/test_cmplx_A.cpp +++ b/example/gmres/test_cmplx_A.cpp @@ -44,6 +44,7 @@ #include #include "KokkosKernels_IOUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" #include #include #include @@ -77,7 +78,7 @@ int main(int /*argc*/, char** /*argv[]*/) { { // Read in a matrix Market file and use it to test the Kokkos Operator. KokkosSparse::CrsMatrix A = - KokkosKernels::Impl::read_kokkos_crst_matrix< + KokkosSparse::Impl::read_kokkos_crst_matrix< KokkosSparse::CrsMatrix>(filename.c_str()); int n = A.numRows(); diff --git a/example/gmres/test_prec.cpp b/example/gmres/test_prec.cpp index a75c9dc59a..11122edccd 100644 --- a/example/gmres/test_prec.cpp +++ b/example/gmres/test_prec.cpp @@ -48,6 +48,7 @@ #include #include #include +#include "KokkosSparse_IOUtils.hpp" int main(int argc, char* argv[]) { typedef double ST; @@ -114,13 +115,13 @@ int main(int argc, char* argv[]) { { // Generate a diagonal matrix with entries 1, 2, ...., 1000 and its inverse. KokkosSparse::CrsMatrix A = - KokkosKernels::Impl::kk_generate_diag_matrix< + KokkosSparse::Impl::kk_generate_diag_matrix< KokkosSparse::CrsMatrix>(n); KokkosSparse::Experimental::MatrixPrec* myPrec = new KokkosSparse::Experimental::MatrixPrec( - KokkosKernels::Impl::kk_generate_diag_matrix< + KokkosSparse::Impl::kk_generate_diag_matrix< KokkosSparse::CrsMatrix>(n, true)); ViewVectorType X(Kokkos::view_alloc(Kokkos::WithoutInitializing, "X"), diff --git a/example/gmres/test_real_A.cpp b/example/gmres/test_real_A.cpp index 26103da035..abfb3f0101 100644 --- a/example/gmres/test_real_A.cpp +++ b/example/gmres/test_real_A.cpp @@ -44,6 +44,7 @@ #include #include "KokkosKernels_IOUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" #include #include #include @@ -89,7 +90,7 @@ int main(int /*argc*/, char** /*argv[]*/) { cOT diagDominance = 1; nnz = 10 * numRows; sp_matrix_type A = - KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< sp_matrix_type>(numRows, numCols, nnz, 0, ncOT(0.01 * numRows), diagDominance); diff --git a/example/wiki/sparse/KokkosSparse_wiki_gauss_seidel.cpp b/example/wiki/sparse/KokkosSparse_wiki_gauss_seidel.cpp index 1fc1fc37d2..57b8ddd4ec 100644 --- a/example/wiki/sparse/KokkosSparse_wiki_gauss_seidel.cpp +++ b/example/wiki/sparse/KokkosSparse_wiki_gauss_seidel.cpp @@ -2,6 +2,7 @@ #include "KokkosKernels_default_types.hpp" #include "KokkosKernels_Handle.hpp" #include "KokkosKernels_IOUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" #include "KokkosSparse_spmv.hpp" #include "KokkosSparse_CrsMatrix.hpp" #include "KokkosSparse_gauss_seidel.hpp" @@ -37,7 +38,7 @@ int main() //Get approx. 20 entries per row //Diagonals are 2x the absolute sum of all other entries. Offset nnz = numRows * 20; - Matrix A = KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix(numRows, numRows, nnz, 2, 100, 1.05 * one); + Matrix A = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix(numRows, numRows, nnz, 2, 100, 1.05 * one); std::cout << "Generated a matrix with " << numRows << " rows/cols, and " << nnz << " entries.\n"; //Create a kernel handle, then a Gauss-Seidel handle with the default algorithm Handle handle; diff --git a/perf_test/graph/KokkosGraph_color.cpp b/perf_test/graph/KokkosGraph_color.cpp index 8b16111157..7c6dda889f 100644 --- a/perf_test/graph/KokkosGraph_color.cpp +++ b/perf_test/graph/KokkosGraph_color.cpp @@ -55,6 +55,7 @@ #include "KokkosKernels_TestParameters.hpp" #include "KokkosGraph_Distance1Color.hpp" #include "KokkosKernels_TestUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" void print_options(std::ostream &os, const char *app_name, unsigned int indent = 0) { @@ -376,7 +377,7 @@ void run_multi_mem_experiment(Parameters params) { if (params.a_mem_space == 1) { fast_crstmat_t a_fast_crsmat; a_fast_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( + KokkosSparse::Impl::read_kokkos_crst_matrix( a_mat_file); a_fast_crsgraph = a_fast_crsmat.graph; num_cols = a_fast_crsmat.numCols(); @@ -384,7 +385,7 @@ void run_multi_mem_experiment(Parameters params) { } else { slow_crstmat_t a_slow_crsmat; a_slow_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( + KokkosSparse::Impl::read_kokkos_crst_matrix( a_mat_file); a_slow_crsgraph = a_slow_crsmat.graph; num_cols = a_slow_crsmat.numCols(); diff --git a/perf_test/graph/KokkosGraph_color_d2.cpp b/perf_test/graph/KokkosGraph_color_d2.cpp index 7d6f45889a..b47fe21a70 100644 --- a/perf_test/graph/KokkosGraph_color_d2.cpp +++ b/perf_test/graph/KokkosGraph_color_d2.cpp @@ -65,6 +65,7 @@ #include #include "KokkosKernels_default_types.hpp" #include "KokkosKernels_TestUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" using namespace KokkosGraph; @@ -595,7 +596,7 @@ void experiment_driver(const D2Parameters& params) { using graph_t = typename crsMat_t::StaticCrsGraphType; crsMat_t A = - KokkosKernels::Impl::read_kokkos_crst_matrix(params.mtx_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(params.mtx_file); graph_t Agraph = A.graph; int num_cols = A.numCols(); diff --git a/perf_test/graph/KokkosGraph_mis_d2.cpp b/perf_test/graph/KokkosGraph_mis_d2.cpp index c68d5f85e2..dfe7715a1d 100644 --- a/perf_test/graph/KokkosGraph_mis_d2.cpp +++ b/perf_test/graph/KokkosGraph_mis_d2.cpp @@ -66,6 +66,7 @@ #include "KokkosGraph_MIS2.hpp" #include "KokkosKernels_default_types.hpp" #include "KokkosKernels_TestUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" using namespace KokkosGraph; @@ -253,7 +254,7 @@ void run_mis2(const MIS2Parameters& params) { Kokkos::Timer t; crsMat_t A_in = - KokkosKernels::Impl::read_kokkos_crst_matrix(params.mtx_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(params.mtx_file); std::cout << "I/O time: " << t.seconds() << " s\n"; t.reset(); // Symmetrize the matrix just in case diff --git a/perf_test/sparse/KokkosSparse_gs.cpp b/perf_test/sparse/KokkosSparse_gs.cpp index 3d2be67676..2136cbb640 100644 --- a/perf_test/sparse/KokkosSparse_gs.cpp +++ b/perf_test/sparse/KokkosSparse_gs.cpp @@ -52,6 +52,7 @@ #include #include #include "KokkosKernels_default_types.hpp" +#include "KokkosSparse_IOUtils.hpp" #include #include #include @@ -177,7 +178,7 @@ crsMat_t generateLongRowMatrix(const GS_Parameters& params) { rowmap.data(), numRows + 1)); crsMat_t A("A", numRows, numRows, totalEntries, valuesView, rowmapView, entriesView); - A = KokkosKernels::sort_and_merge_matrix(A); + A = KokkosSparse::sort_and_merge_matrix(A); if (params.graph_symmetric) { // Symmetrize on host, rather than relying on the parallel versions (those // can be tested for symmetric=false) @@ -203,7 +204,7 @@ void runGS(const GS_Parameters& params) { typedef typename crsMat_t::values_type::non_const_type scalar_view_t; crsMat_t A; if (params.matrix_path) - A = KokkosKernels::Impl::read_kokkos_crst_matrix( + A = KokkosSparse::Impl::read_kokkos_crst_matrix( params.matrix_path); else A = generateLongRowMatrix(params); diff --git a/perf_test/sparse/KokkosSparse_kk_spmv.cpp b/perf_test/sparse/KokkosSparse_kk_spmv.cpp index 953294b120..40887d67ec 100644 --- a/perf_test/sparse/KokkosSparse_kk_spmv.cpp +++ b/perf_test/sparse/KokkosSparse_kk_spmv.cpp @@ -55,6 +55,7 @@ #include #include #include +#include #include #include "KokkosKernels_default_types.hpp" @@ -74,11 +75,11 @@ void run_spmv(Ordinal numRows, Ordinal numCols, const char* filename, int loop, srand(17312837); matrix_type A; if (filename) - A = KokkosKernels::Impl::read_kokkos_crst_matrix(filename); + A = KokkosSparse::Impl::read_kokkos_crst_matrix(filename); else { Offset nnz = 10 * numRows; // note: the help text says the bandwidth is fixed at 0.01 * numRows - A = KokkosKernels::Impl::kk_generate_sparse_matrix( + A = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numCols, nnz, 0, 0.01 * numRows); } numRows = A.numRows(); diff --git a/perf_test/sparse/KokkosSparse_multimem_spgemm.hpp b/perf_test/sparse/KokkosSparse_multimem_spgemm.hpp index 371f1b1d33..78520d64eb 100644 --- a/perf_test/sparse/KokkosSparse_multimem_spgemm.hpp +++ b/perf_test/sparse/KokkosSparse_multimem_spgemm.hpp @@ -44,6 +44,7 @@ #include "KokkosSparse_CrsMatrix.hpp" #include "KokkosSparse_run_spgemm.hpp" +#include "KokkosSparse_IOUtils.hpp" namespace KokkosKernels { @@ -74,11 +75,11 @@ void run_multi_mem_spgemm(Parameters params) { if (params.a_mem_space == 1) { a_fast_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( + KokkosSparse::Impl::read_kokkos_crst_matrix( a_mat_file); } else { a_slow_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( + KokkosSparse::Impl::read_kokkos_crst_matrix( a_mat_file); } @@ -90,12 +91,12 @@ void run_multi_mem_spgemm(Parameters params) { } else if (params.b_mem_space == 1) { if (b_mat_file == NULL) b_mat_file = a_mat_file; b_fast_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( + KokkosSparse::Impl::read_kokkos_crst_matrix( b_mat_file); } else { if (b_mat_file == NULL) b_mat_file = a_mat_file; b_slow_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( + KokkosSparse::Impl::read_kokkos_crst_matrix( b_mat_file); } @@ -222,18 +223,18 @@ void run_multi_mem_spgemm(Parameters params) { if (c_mat_file != NULL) { if (params.c_mem_space == 1) { - KokkosKernels::sort_crs_matrix(c_fast_crsmat); + KokkosSparse::sort_crs_matrix(c_fast_crsmat); - KokkosKernels::Impl::write_graph_bin( + KokkosSparse::Impl::write_graph_bin( (lno_t)(c_fast_crsmat.numRows()), (size_type)(c_fast_crsmat.graph.entries.extent(0)), c_fast_crsmat.graph.row_map.data(), c_fast_crsmat.graph.entries.data(), c_fast_crsmat.values.data(), c_mat_file); } else { - KokkosKernels::sort_crs_matrix(c_slow_crsmat); + KokkosSparse::sort_crs_matrix(c_slow_crsmat); - KokkosKernels::Impl::write_graph_bin( + KokkosSparse::Impl::write_graph_bin( (lno_t)c_slow_crsmat.numRows(), (size_type)c_slow_crsmat.graph.entries.extent(0), c_slow_crsmat.graph.row_map.data(), diff --git a/perf_test/sparse/KokkosSparse_pcg.cpp b/perf_test/sparse/KokkosSparse_pcg.cpp index 5f34ec1cd9..a98a8fcec8 100644 --- a/perf_test/sparse/KokkosSparse_pcg.cpp +++ b/perf_test/sparse/KokkosSparse_pcg.cpp @@ -49,6 +49,7 @@ #include "KokkosKernels_IOUtils.hpp" #include "KokkosKernels_default_types.hpp" #include "KokkosKernels_TestUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" #include #define MAXVAL 1 @@ -263,7 +264,7 @@ void run_pcg(int *cmdline, const char *mtx_file) { default_lno_t *xadj, *adj; default_scalar *ew; - KokkosKernels::Impl::read_matrix(&nv, &ne, &xadj, &adj, &ew, mtx_file); diff --git a/perf_test/sparse/KokkosSparse_run_spgemm.hpp b/perf_test/sparse/KokkosSparse_run_spgemm.hpp index caedb013c3..5ece07e403 100644 --- a/perf_test/sparse/KokkosSparse_run_spgemm.hpp +++ b/perf_test/sparse/KokkosSparse_run_spgemm.hpp @@ -44,7 +44,7 @@ #include "KokkosSparse_spgemm.hpp" #include "KokkosKernels_TestParameters.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_SortCrs.hpp" #define TRANPOSEFIRST false #define TRANPOSESECOND false @@ -67,7 +67,7 @@ bool is_same_matrix(crsMat_t output_mat1, crsMat_t output_mat2) { size_t nentries2 = output_mat2.graph.entries.extent(0); size_t nvals2 = output_mat2.values.extent(0); - KokkosKernels::sort_crs_matrix(output_mat1); + KokkosSparse::sort_crs_matrix(output_mat1); if (nrows1 != nrows2) { std::cerr << "row count is different" << std::endl; @@ -82,7 +82,7 @@ bool is_same_matrix(crsMat_t output_mat1, crsMat_t output_mat2) { return false; } - KokkosKernels::sort_crs_matrix(output_mat2); + KokkosSparse::sort_crs_matrix(output_mat2); bool is_identical = true; is_identical = KokkosKernels::Impl::kk_is_identical_view< diff --git a/perf_test/sparse/KokkosSparse_run_spgemm_jacobi.hpp b/perf_test/sparse/KokkosSparse_run_spgemm_jacobi.hpp index b5ac32a86e..c48066316b 100644 --- a/perf_test/sparse/KokkosSparse_run_spgemm_jacobi.hpp +++ b/perf_test/sparse/KokkosSparse_run_spgemm_jacobi.hpp @@ -45,7 +45,8 @@ #include "KokkosSparse_CrsMatrix.hpp" #include "KokkosKernels_TestParameters.hpp" #include "KokkosSparse_spgemm.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_SortCrs.hpp" +#include "KokkosSparse_IOUtils.hpp" #define TRANSPOSEFIRST false #define TRANSPOSESECOND false @@ -69,7 +70,7 @@ bool is_same_matrix(crsMat_t output_mat1, crsMat_t output_mat2) { size_t nentries2 = output_mat2.graph.entries.extent(0); size_t nvals2 = output_mat2.values.extent(0); - KokkosKernels::sort_crs_matrix(output_mat1); + KokkosSparse::sort_crs_matrix(output_mat1); if (nrows1 != nrows2) { std::cerr << "row count is different" << std::endl; @@ -84,7 +85,7 @@ bool is_same_matrix(crsMat_t output_mat1, crsMat_t output_mat2) { return false; } - KokkosKernels::sort_crs_matrix(output_mat2); + KokkosSparse::sort_crs_matrix(output_mat2); bool is_identical = true; is_identical = KokkosKernels::Impl::kk_is_identical_view< @@ -337,11 +338,11 @@ void run_spgemm_jacobi(Parameters params) { if (params.a_mem_space == 1) { a_fast_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( + KokkosSparse::Impl::read_kokkos_crst_matrix( a_mat_file); } else { a_slow_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( + KokkosSparse::Impl::read_kokkos_crst_matrix( a_mat_file); } @@ -353,12 +354,12 @@ void run_spgemm_jacobi(Parameters params) { } else if (params.b_mem_space == 1) { if (b_mat_file == NULL) b_mat_file = a_mat_file; b_fast_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( + KokkosSparse::Impl::read_kokkos_crst_matrix( b_mat_file); } else { if (b_mat_file == NULL) b_mat_file = a_mat_file; b_slow_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( + KokkosSparse::Impl::read_kokkos_crst_matrix( b_mat_file); } @@ -485,18 +486,18 @@ void run_spgemm_jacobi(Parameters params) { if (c_mat_file != NULL) { if (params.c_mem_space == 1) { - KokkosKernels::sort_crs_matrix(c_fast_crsmat); + KokkosSparse::sort_crs_matrix(c_fast_crsmat); - KokkosKernels::Impl::write_graph_bin( + KokkosSparse::Impl::write_graph_bin( (lno_t)(c_fast_crsmat.numRows()), (size_type)(c_fast_crsmat.graph.entries.extent(0)), c_fast_crsmat.graph.row_map.data(), c_fast_crsmat.graph.entries.data(), c_fast_crsmat.values.data(), c_mat_file); } else { - KokkosKernels::sort_crs_matrix(c_slow_crsmat); + KokkosSparse::sort_crs_matrix(c_slow_crsmat); - KokkosKernels::Impl::write_graph_bin( + KokkosSparse::Impl::write_graph_bin( (lno_t)c_slow_crsmat.numRows(), (size_type)c_slow_crsmat.graph.entries.extent(0), c_slow_crsmat.graph.row_map.data(), diff --git a/perf_test/sparse/KokkosSparse_spadd.cpp b/perf_test/sparse/KokkosSparse_spadd.cpp index de8b5fcca8..963ada8836 100644 --- a/perf_test/sparse/KokkosSparse_spadd.cpp +++ b/perf_test/sparse/KokkosSparse_spadd.cpp @@ -46,8 +46,8 @@ #include "KokkosKernels_config.h" #include "KokkosKernels_Handle.hpp" #include "KokkosKernels_IOUtils.hpp" -#include "KokkosKernels_SparseUtils_cusparse.hpp" -#include "KokkosKernels_SparseUtils_mkl.hpp" +#include "KokkosSparse_Utils_cusparse.hpp" +#include "KokkosSparse_Utils_mkl.hpp" #include "KokkosSparse_spadd.hpp" #include "KokkosKernels_TestUtils.hpp" diff --git a/perf_test/sparse/KokkosSparse_spiluk.cpp b/perf_test/sparse/KokkosSparse_spiluk.cpp index 2ee9573880..b86ecc352f 100644 --- a/perf_test/sparse/KokkosSparse_spiluk.cpp +++ b/perf_test/sparse/KokkosSparse_spiluk.cpp @@ -58,13 +58,14 @@ #include -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosSparse_spiluk.hpp" #include "KokkosSparse_spmv.hpp" #include "KokkosBlas1_nrm2.hpp" #include "KokkosSparse_CrsMatrix.hpp" #include "KokkosKernels_default_types.hpp" #include +#include #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) && \ (!defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION)) @@ -111,7 +112,7 @@ int test_spiluk_perf(std::vector tests, std::string afilename, int kin, if (!afilename.empty()) { std::cout << "ILU(K) Begin: Read matrix filename " << afilename << std::endl; - crsmat_t A = KokkosKernels::Impl::read_kokkos_crst_matrix( + crsmat_t A = KokkosSparse::Impl::read_kokkos_crst_matrix( afilename.c_str()); // in_matrix graph_t graph = A.graph; // in_graph const size_type nrows = graph.numRows(); diff --git a/perf_test/sparse/KokkosSparse_spmv.cpp b/perf_test/sparse/KokkosSparse_spmv.cpp index 6b67905adc..9eec6181a7 100644 --- a/perf_test/sparse/KokkosSparse_spmv.cpp +++ b/perf_test/sparse/KokkosSparse_spmv.cpp @@ -55,6 +55,7 @@ #include #include #include +#include #include #include "KokkosKernels_default_types.hpp" #include @@ -90,12 +91,12 @@ int test_crs_matrix_singlevec(Ordinal numRows, Ordinal numCols, int test, srand(17312837); matrix_type A; if (filename) - A = KokkosKernels::Impl::read_kokkos_crst_matrix(filename); + A = KokkosSparse::Impl::read_kokkos_crst_matrix(filename); else { Offset nnz = 10 * numRows; // note: the help text says the bandwidth is fixed at 0.01 * numRows // CAVEAT: small problem sizes are problematic, b/c of 0.01*numRows - A = KokkosKernels::Impl::kk_generate_sparse_matrix( + A = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numCols, nnz, 0, 0.01 * numRows); } SPMVTestData test_data = setup_test(&data, A, rows_per_thread, team_size, diff --git a/perf_test/sparse/KokkosSparse_sptrsv.cpp b/perf_test/sparse/KokkosSparse_sptrsv.cpp index c6787242d9..a27ed3f6d2 100644 --- a/perf_test/sparse/KokkosSparse_sptrsv.cpp +++ b/perf_test/sparse/KokkosSparse_sptrsv.cpp @@ -58,12 +58,13 @@ #include -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosSparse_sptrsv.hpp" #include "KokkosSparse_spmv.hpp" #include "KokkosSparse_CrsMatrix.hpp" #include "KokkosKernels_default_types.hpp" #include +#include "KokkosSparse_IOUtils.hpp" //#define INTERNAL_CUSPARSE @@ -159,7 +160,7 @@ int test_sptrsv_perf(std::vector tests, const std::string &lfilename, if (!lfilename.empty()) { std::cout << "Lower Tri Begin: Read matrix filename " << lfilename << std::endl; - crsmat_t triMtx = KokkosKernels::Impl::read_kokkos_crst_matrix( + crsmat_t triMtx = KokkosSparse::Impl::read_kokkos_crst_matrix( lfilename.c_str()); // in_matrix graph_t graph = triMtx.graph; // in_graph const size_type nrows = graph.numRows(); @@ -567,7 +568,7 @@ int test_sptrsv_perf(std::vector tests, const std::string &lfilename, if (!ufilename.empty()) { std::cout << "Upper Tri Begin: Read matrix filename " << ufilename << std::endl; - crsmat_t triMtx = KokkosKernels::Impl::read_kokkos_crst_matrix( + crsmat_t triMtx = KokkosSparse::Impl::read_kokkos_crst_matrix( ufilename.c_str()); // in_matrix graph_t graph = triMtx.graph; // in_graph const size_type nrows = graph.numRows(); diff --git a/perf_test/sparse/KokkosSparse_sptrsv_supernode.cpp b/perf_test/sparse/KokkosSparse_sptrsv_supernode.cpp index 039c88e9c1..ad8e1ba8b9 100644 --- a/perf_test/sparse/KokkosSparse_sptrsv_supernode.cpp +++ b/perf_test/sparse/KokkosSparse_sptrsv_supernode.cpp @@ -43,9 +43,10 @@ */ #include "Kokkos_Random.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosSparse_spmv.hpp" #include "KokkosSparse_CrsMatrix.hpp" +#include "KokkosSparse_IOUtils.hpp" #include "KokkosSparse_sptrsv.hpp" #include "KokkosSparse_sptrsv_supernode.hpp" @@ -130,7 +131,7 @@ int test_sptrsv_perf(std::vector tests, bool verbose, std::cout << " > Read a triangular-matrix filename " << matrix_filename << std::endl; host_crsmat_t M = - KokkosKernels::Impl::read_kokkos_crst_matrix( + KokkosSparse::Impl::read_kokkos_crst_matrix( matrix_filename.c_str()); const size_type nrows = M.graph.numRows(); // transpose the matrix to be stored in CCS diff --git a/src/common/KokkosKernels_IOUtils.hpp b/src/common/KokkosKernels_IOUtils.hpp index d450221797..fe72d0cbf3 100644 --- a/src/common/KokkosKernels_IOUtils.hpp +++ b/src/common/KokkosKernels_IOUtils.hpp @@ -59,7 +59,6 @@ #include #include "Kokkos_Random.hpp" #include "KokkosKernels_SimpleUtils.hpp" -#include "KokkosSparse_CrsMatrix.hpp" #include namespace KokkosKernels { @@ -89,384 +88,6 @@ inline void getRandomBounds(double mag, Kokkos::complex &start, end = Kokkos::complex(mag, mag); } -// MD: Bases on Christian's sparseMatrix_generate function in test_crsmatrix.cpp -// file. -template -void kk_sparseMatrix_generate(OrdinalType nrows, OrdinalType ncols, - SizeType &nnz, OrdinalType row_size_variance, - OrdinalType bandwidth, ScalarType *&values, - SizeType *&rowPtr, OrdinalType *&colInd, - OrdinalType block_elem_count = 1) { - rowPtr = new SizeType[nrows + 1]; - - OrdinalType elements_per_row = nrows ? nnz / nrows : 0; - srand(13721); - rowPtr[0] = 0; - for (int row = 0; row < nrows; row++) { - int varianz = (1.0 * rand() / RAND_MAX - 0.5) * row_size_variance; - int numRowEntries = elements_per_row + varianz; - if (numRowEntries < 0) numRowEntries = 0; - // Clamping numRowEntries above accomplishes 2 things: - // - If ncols is 0, numRowEntries will also be 0 - // - With numRowEntries at most 2/3 the number of columns, in the worst - // case - // 90% of insertions will succeed after 6 tries - if (numRowEntries > 0.66 * ncols) numRowEntries = 0.66 * ncols; - rowPtr[row + 1] = rowPtr[row] + numRowEntries; - } - nnz = rowPtr[nrows]; - values = new ScalarType[nnz]; - colInd = new OrdinalType[nnz]; - for (OrdinalType row = 0; row < nrows; row++) { - for (SizeType k = rowPtr[row]; k < rowPtr[row + 1]; ++k) { - while (true) { - OrdinalType pos = (1.0 * rand() / RAND_MAX - 0.5) * bandwidth + row; - while (pos < 0) pos += ncols; - while (pos >= ncols) pos -= ncols; - - bool is_already_in_the_row = false; - for (SizeType j = rowPtr[row]; j < k; j++) { - if (colInd[j] == pos) { - is_already_in_the_row = true; - break; - } - } - if (!is_already_in_the_row) { - colInd[k] = pos; - break; - } - } - } - } - // Sample each value from uniform (-50, 50) for real types, or (-50 - 50i, 50 - // + 50i) for complex types. - Kokkos::View valuesView( - values, nnz * block_elem_count); - ScalarType randStart, randEnd; - getRandomBounds(50.0, randStart, randEnd); - Kokkos::Random_XorShift64_Pool pool(13718); - Kokkos::fill_random(valuesView, pool, randStart, randEnd); -} - -template -void kk_sparseMatrix_generate_lower_upper_triangle( - char uplo, OrdinalType nrows, OrdinalType ncols, SizeType &nnz, - OrdinalType /*row_size_variance*/, OrdinalType /*bandwidth*/, - ScalarType *&values, SizeType *&rowPtr, OrdinalType *&colInd) { - rowPtr = new SizeType[nrows + 1]; - - // OrdinalType elements_per_row = nnz/nrows; - srand(13721); - rowPtr[0] = 0; - for (int row = 0; row < nrows; row++) { - if (uplo == 'L') - rowPtr[row + 1] = rowPtr[row] + row + 1; - else - rowPtr[row + 1] = rowPtr[row] + ncols - (row); - } - nnz = rowPtr[nrows]; - values = new ScalarType[nnz]; - colInd = new OrdinalType[nnz]; - for (OrdinalType row = 0; row < nrows; row++) { - for (SizeType k = rowPtr[row]; k < rowPtr[row + 1]; k++) { - if (uplo == 'L') - colInd[k] = k - rowPtr[row]; - else - colInd[k] = row + (k - rowPtr[row]); - values[k] = 1.0; - } - } -} - -template -void kk_diagonally_dominant_sparseMatrix_generate( - OrdinalType nrows, OrdinalType ncols, SizeType &nnz, - OrdinalType row_size_variance, OrdinalType bandwidth, ScalarType *&values, - SizeType *&rowPtr, OrdinalType *&colInd, - ScalarType diagDominance = 10 * Kokkos::ArithTraits::one()) { - rowPtr = new SizeType[nrows + 1]; - - OrdinalType elements_per_row = nnz / nrows; - srand(13721); - rowPtr[0] = 0; - for (int row = 0; row < nrows; row++) { - int varianz = (1.0 * rand() / RAND_MAX - 0.5) * row_size_variance; - if (varianz < 1) varianz = 1; - if (varianz > 0.75 * ncols) varianz = 0.75 * ncols; - rowPtr[row + 1] = rowPtr[row] + elements_per_row + varianz; - if (rowPtr[row + 1] <= rowPtr[row]) // This makes sure that there is - rowPtr[row + 1] = rowPtr[row] + 1; // at least one nonzero in the row - } - nnz = rowPtr[nrows]; - values = new ScalarType[nnz]; - colInd = new OrdinalType[nnz]; - for (OrdinalType row = 0; row < nrows; row++) { - ScalarType total_values = 0; - std::unordered_set entriesInRow; - // We always add the diagonal entry (after this loop) - entriesInRow.insert(row); - for (SizeType k = rowPtr[row]; k < rowPtr[row + 1] - 1; k++) { - while (true) { - OrdinalType pos = (1.0 * rand() / RAND_MAX - 0.5) * bandwidth + row; - while (pos < 0) pos += ncols; - while (pos >= ncols) pos -= ncols; - - if (entriesInRow.find(pos) == entriesInRow.end()) { - entriesInRow.insert(pos); - colInd[k] = pos; - values[k] = 100.0 * rand() / RAND_MAX - 50.0; - total_values += - Kokkos::Details::ArithTraits::abs(values[k]); - break; - } - } - } - - colInd[rowPtr[row + 1] - 1] = row; - values[rowPtr[row + 1] - 1] = total_values * diagDominance; - } -} - -// This function creates a diagonal sparse matrix for testing matrix operations. -// The elements on the diagonal are 1, 2, ..., n-1, n. -// If "invert" is true, it will return the inverse of the above diagonal matrix. -template -crsMat_t kk_generate_diag_matrix(typename crsMat_t::const_ordinal_type n, - const bool invert = false) { - typedef typename crsMat_t::ordinal_type ot; - typedef typename crsMat_t::StaticCrsGraphType graph_t; - typedef typename graph_t::row_map_type::non_const_type row_map_view_t; - typedef typename graph_t::entries_type::non_const_type cols_view_t; - typedef typename crsMat_t::values_type::non_const_type values_view_t; - - typedef typename row_map_view_t::non_const_value_type size_type; - typedef typename cols_view_t::non_const_value_type lno_t; - typedef typename values_view_t::non_const_value_type scalar_t; - - row_map_view_t rowmap_view("rowmap_view", n + 1); - cols_view_t columns_view("colsmap_view", n); - values_view_t values_view("values_view", n); - - { - typename row_map_view_t::HostMirror hr = - Kokkos::create_mirror_view(rowmap_view); - typename cols_view_t::HostMirror hc = - Kokkos::create_mirror_view(columns_view); - typename values_view_t::HostMirror hv = - Kokkos::create_mirror_view(values_view); - - for (lno_t i = 0; i <= n; ++i) { - hr(i) = size_type(i); - } - - for (ot i = 0; i < n; ++i) { - hc(i) = lno_t(i); - if (invert) { - hv(i) = scalar_t(1.0) / (scalar_t(i + 1)); - } else { - hv(i) = scalar_t(i + 1); - } - } - Kokkos::deep_copy(rowmap_view, hr); - Kokkos::deep_copy(columns_view, hc); - Kokkos::deep_copy(values_view, hv); - } - - graph_t static_graph(columns_view, rowmap_view); - crsMat_t crsmat("CrsMatrix", n, values_view, static_graph); - return crsmat; -} - -template -crsMat_t kk_generate_diagonally_dominant_sparse_matrix( - typename crsMat_t::const_ordinal_type nrows, - typename crsMat_t::const_ordinal_type ncols, - typename crsMat_t::non_const_size_type &nnz, - typename crsMat_t::const_ordinal_type row_size_variance, - typename crsMat_t::const_ordinal_type bandwidth, - typename crsMat_t::const_value_type diagDominance = - 10 * Kokkos::ArithTraits::one()) { - typedef typename crsMat_t::StaticCrsGraphType graph_t; - typedef typename graph_t::row_map_type::non_const_type row_map_view_t; - typedef typename graph_t::entries_type::non_const_type cols_view_t; - typedef typename crsMat_t::values_type::non_const_type values_view_t; - - typedef typename row_map_view_t::non_const_value_type size_type; - typedef typename cols_view_t::non_const_value_type lno_t; - typedef typename values_view_t::non_const_value_type scalar_t; - lno_t *adj; - size_type *xadj; //, nnzA; - scalar_t *values; - - kk_diagonally_dominant_sparseMatrix_generate( - nrows, ncols, nnz, row_size_variance, bandwidth, values, xadj, adj, - diagDominance); - - row_map_view_t rowmap_view("rowmap_view", nrows + 1); - cols_view_t columns_view("colsmap_view", nnz); - values_view_t values_view("values_view", nnz); - - { - typename row_map_view_t::HostMirror hr = - Kokkos::create_mirror_view(rowmap_view); - typename cols_view_t::HostMirror hc = - Kokkos::create_mirror_view(columns_view); - typename values_view_t::HostMirror hv = - Kokkos::create_mirror_view(values_view); - - for (lno_t i = 0; i <= nrows; ++i) { - hr(i) = xadj[i]; - } - - for (size_type i = 0; i < nnz; ++i) { - hc(i) = adj[i]; - hv(i) = values[i]; - } - Kokkos::deep_copy(rowmap_view, hr); - Kokkos::deep_copy(columns_view, hc); - Kokkos::deep_copy(values_view, hv); - } - - graph_t static_graph(columns_view, rowmap_view); - crsMat_t crsmat("CrsMatrix", ncols, values_view, static_graph); - delete[] xadj; - delete[] adj; - delete[] values; - return crsmat; -} - -template -crsMat_t kk_generate_triangular_sparse_matrix( - char uplo, typename crsMat_t::const_ordinal_type nrows, - typename crsMat_t::const_ordinal_type ncols, - typename crsMat_t::non_const_size_type &nnz, - typename crsMat_t::const_ordinal_type row_size_variance, - typename crsMat_t::const_ordinal_type bandwidth) { - typedef typename crsMat_t::StaticCrsGraphType graph_t; - typedef typename graph_t::row_map_type::non_const_type row_map_view_t; - typedef typename graph_t::entries_type::non_const_type cols_view_t; - typedef typename crsMat_t::values_type::non_const_type values_view_t; - - typedef typename row_map_view_t::non_const_value_type size_type; - typedef typename cols_view_t::non_const_value_type lno_t; - typedef typename values_view_t::non_const_value_type scalar_t; - lno_t *adj; - size_type *xadj; //, nnzA; - scalar_t *values; - - kk_sparseMatrix_generate_lower_upper_triangle( - uplo, nrows, ncols, nnz, row_size_variance, bandwidth, values, xadj, adj); - - row_map_view_t rowmap_view("rowmap_view", nrows + 1); - cols_view_t columns_view("colsmap_view", nnz); - values_view_t values_view("values_view", nnz); - - { - typename row_map_view_t::HostMirror hr = - Kokkos::create_mirror_view(rowmap_view); - typename cols_view_t::HostMirror hc = - Kokkos::create_mirror_view(columns_view); - typename values_view_t::HostMirror hv = - Kokkos::create_mirror_view(values_view); - - for (lno_t i = 0; i <= nrows; ++i) { - hr(i) = xadj[i]; - } - - for (size_type i = 0; i < nnz; ++i) { - hc(i) = adj[i]; - hv(i) = values[i]; - } - Kokkos::deep_copy(rowmap_view, hr); - Kokkos::deep_copy(columns_view, hc); - Kokkos::deep_copy(values_view, hv); - Kokkos::fence(); - } - - graph_t static_graph(columns_view, rowmap_view); - crsMat_t crsmat("CrsMatrix", ncols, values_view, static_graph); - delete[] xadj; - delete[] adj; - delete[] values; - return crsmat; -} - -template -crsMat_t kk_generate_sparse_matrix( - typename crsMat_t::const_ordinal_type nrows, - typename crsMat_t::const_ordinal_type ncols, - typename crsMat_t::non_const_size_type &nnz, - typename crsMat_t::const_ordinal_type row_size_variance, - typename crsMat_t::const_ordinal_type bandwidth) { - typedef typename crsMat_t::StaticCrsGraphType graph_t; - typedef typename graph_t::row_map_type::non_const_type row_map_view_t; - typedef typename graph_t::entries_type::non_const_type cols_view_t; - typedef typename crsMat_t::values_type::non_const_type values_view_t; - - typedef typename row_map_view_t::non_const_value_type size_type; - typedef typename cols_view_t::non_const_value_type lno_t; - typedef typename values_view_t::non_const_value_type scalar_t; - lno_t *adj; - size_type *xadj; //, nnzA; - scalar_t *values; - - kk_sparseMatrix_generate( - nrows, ncols, nnz, row_size_variance, bandwidth, values, xadj, adj); - - row_map_view_t rowmap_view("rowmap_view", nrows + 1); - cols_view_t columns_view("colsmap_view", nnz); - values_view_t values_view("values_view", nnz); - - { - typename row_map_view_t::HostMirror hr = - Kokkos::create_mirror_view(rowmap_view); - typename cols_view_t::HostMirror hc = - Kokkos::create_mirror_view(columns_view); - typename values_view_t::HostMirror hv = - Kokkos::create_mirror_view(values_view); - - for (lno_t i = 0; i <= nrows; ++i) { - hr(i) = xadj[i]; - } - - for (size_type i = 0; i < nnz; ++i) { - hc(i) = adj[i]; - hv(i) = values[i]; - } - Kokkos::deep_copy(rowmap_view, hr); - Kokkos::deep_copy(columns_view, hc); - Kokkos::deep_copy(values_view, hv); - } - - graph_t static_graph(columns_view, rowmap_view); - crsMat_t crsmat("CrsMatrix", ncols, values_view, static_graph); - delete[] xadj; - delete[] adj; - delete[] values; - return crsmat; -} - -template -bsrMat_t kk_generate_sparse_matrix( - typename bsrMat_t::const_ordinal_type block_dim, - typename bsrMat_t::const_ordinal_type nrows, - typename bsrMat_t::const_ordinal_type ncols, - typename bsrMat_t::non_const_size_type &nnz, - typename bsrMat_t::const_ordinal_type row_size_variance, - typename bsrMat_t::const_ordinal_type bandwidth) { - typedef KokkosSparse::CrsMatrix< - typename bsrMat_t::value_type, typename bsrMat_t::ordinal_type, - typename bsrMat_t::device_type, typename bsrMat_t::memory_traits, - typename bsrMat_t::size_type> - crsMat_t; - - const auto crs_mtx = kk_generate_sparse_matrix( - nrows * block_dim, ncols * block_dim, nnz, row_size_variance, bandwidth); - bsrMat_t bsrmat(crs_mtx, block_dim); - return bsrmat; -} -// TODO: need to fix the size_type. All over the reading inputs are lno_t. - template void md_malloc(stype **arr, size_t n, std::string /*alloc_str*/ = "") { *arr = new stype[n]; @@ -647,130 +268,6 @@ inline void kk_read_3Dview_from_file(idx_array_type &view, Kokkos::fence(); } -template -void convert_crs_to_lower_triangle_edge_list(idx nv, idx *xadj, idx *adj, - idx *lower_triangle_srcs, - idx *lower_triangle_dests) { - idx ind = 0; - for (idx i = 0; i < nv; ++i) { - idx xb = xadj[i]; - idx xe = xadj[i + 1]; - for (idx j = xb; j < xe; ++j) { - idx dst = adj[j]; - if (i < dst) { - lower_triangle_srcs[ind] = i; - lower_triangle_dests[ind++] = dst; - } - } - } -} - -template -void convert_crs_to_edge_list(idx nv, idx *xadj, idx *srcs) { - for (idx i = 0; i < nv; ++i) { - idx xb = xadj[i]; - idx xe = xadj[i + 1]; - for (idx j = xb; j < xe; ++j) { - srcs[j] = i; - } - } -} - -template -void convert_edge_list_to_csr(lno_t nv, size_type ne, lno_t *srcs, lno_t *dests, - wt *ew, size_type *xadj, lno_t *adj, wt *crs_ew) { - std::vector> edges(ne); - for (size_type i = 0; i < ne; ++i) { - edges[i].src = srcs[i]; - edges[i].dst = dests[i]; - edges[i].ew = ew[i]; - } - std::sort(edges.begin(), edges.begin() + ne); - - size_type eind = 0; - for (lno_t i = 0; i < nv; ++i) { - (xadj)[i] = eind; - while (edges[eind].src == i) { - (adj)[eind] = edges[eind].dst; - (*crs_ew)[eind] = edges[eind].ew; - ++eind; - } - } - xadj[nv] = eind; -} - -template -void convert_undirected_edge_list_to_csr(lno_t nv, size_type ne, in_lno_t *srcs, - in_lno_t *dests, size_type *xadj, - lno_t *adj) { - std::vector> edges(ne * 2); - for (size_type i = 0; i < ne; ++i) { - edges[i * 2].src = srcs[i]; - edges[i * 2].dst = dests[i]; - - edges[i * 2 + 1].src = dests[i]; - edges[i * 2 + 1].dst = srcs[i]; - } -#ifdef KOKKOSKERNELS_HAVE_OUTER -#include -#include -#include -#include - __gnu_parallel::parallel_sort_mwms *>( - &(edges[0]), &(edges[0]) + ne * 2, - std::less>(), 64); -#else - std::sort(edges.begin(), edges.begin() + ne * 2); -#endif - - size_type eind = 0; - for (lno_t i = 0; i < nv; ++i) { - (xadj)[i] = eind; - while (edges[eind].src == i) { - (adj)[eind] = edges[eind].dst; - //(*crs_ew)[eind] = edges[eind].ew; - ++eind; - } - } - xadj[nv] = eind; -} -/* - -template -void read_graph_src_dst_bin( - lno_t *nv, size_type *ne - ,size_type **xadj, lno_t **adj, scalar_t **ew, - const char *fnameSrc, const char *fnameTarg){ - - size_t numEdges = 0; - size_t *srcs, *dst; //this type is hard coded - buildEdgeListFromBinSrcTarg_undirected( - fnameSrc, fnameTarg, - &numEdges, - &srcs, &dst); - - lno_t num_vertex = 0; - for (size_t i = 0; i < numEdges; ++i){ - if (num_vertex < srcs[i]) num_vertex = srcs[i]; - if (num_vertex < dst[i]) num_vertex = dst[i]; - } - num_vertex += 1; - - *nv = num_vertex; - *ne = numEdges * 2; - - md_malloc(xadj, num_vertex + 1); - md_malloc(adj, numEdges * 2); - convert_undirected_edge_list_to_csr ( - num_vertex, numEdges, - srcs, dst, - *xadj, *adj); - - delete [] srcs; - delete [] dst; -} -*/ - template void write_edgelist_bin(size_t ne, const idx *edge_begins, const idx *edge_ends, const wt *ew, const char *filename) { @@ -797,270 +294,6 @@ void read_edgelist_bin(idx *ne, idx **edge_begins, idx **edge_ends, wt **ew, myFile.close(); } -template -void write_graph_bin(lno_t nv, size_type ne, const size_type *xadj, - const lno_t *adj, const scalar_t *ew, - const char *filename) { - std::ofstream myFile(filename, std::ios::out | std::ios::binary); - myFile.write((char *)&nv, sizeof(lno_t)); - myFile.write((char *)&ne, sizeof(size_type)); - myFile.write((char *)xadj, sizeof(size_type) * (nv + 1)); - - myFile.write((char *)adj, sizeof(lno_t) * (ne)); - - myFile.write((char *)ew, sizeof(scalar_t) * (ne)); - - myFile.close(); -} - -template -void write_graph_crs(lno_t nv, size_type ne, const size_type *xadj, - const lno_t *adj, const scalar_t *ew, - const char *filename) { - std::ofstream myFile(filename, std::ios::out); - myFile << nv << " " << ne << std::endl; - - for (lno_t i = 0; i <= nv; ++i) { - myFile << xadj[i] << " "; - } - myFile << std::endl; - - for (lno_t i = 0; i < nv; ++i) { - size_type b = xadj[i]; - size_type e = xadj[i + 1]; - for (size_type j = b; j < e; ++j) { - myFile << adj[j] << " "; - } - myFile << std::endl; - } - for (size_type i = 0; i < ne; ++i) { - myFile << ew[i] << " "; - } - myFile << std::endl; - - myFile.close(); -} - -template -void write_graph_ligra(lno_t nv, size_type ne, const size_type *xadj, - const lno_t *adj, const scalar_t * /*ew*/, - const char *filename) { - std::ofstream ff(filename); - ff << "AdjacencyGraph" << std::endl; - ff << nv << std::endl << ne << std::endl; - for (lno_t i = 0; i < nv; ++i) { - ff << xadj[i] << std::endl; - } - for (size_type i = 0; i < ne; ++i) { - ff << adj[i] << std::endl; - } - ff.close(); -} - -// MM: types and utility functions for parsing the MatrixMarket format -namespace MM { -enum MtxObject { UNDEFINED_OBJECT, MATRIX, VECTOR }; -enum MtxFormat { UNDEFINED_FORMAT, COORDINATE, ARRAY }; -enum MtxField { - UNDEFINED_FIELD, - REAL, // includes both float and double - COMPLEX, // includes complex and complex - INTEGER, // includes all integer types - PATTERN // not a type, but means the value for every entry is 1 -}; -enum MtxSym { - UNDEFINED_SYMMETRY, - GENERAL, - SYMMETRIC, // A(i, j) = A(j, i) - SKEW_SYMMETRIC, // A(i, j) = -A(j, i) - HERMITIAN // A(i, j) = a + bi; A(j, i) = a - bi -}; - -// readScalar/writeScalar: read and write a scalar in the form that it appears -// in an .mtx file. The >> and << operators won't work, because complex appears -// as "real imag", not "(real, imag)" -template -scalar_t readScalar(std::istream &is) { - scalar_t val; - is >> val; - return val; -} - -template <> -inline Kokkos::complex readScalar(std::istream &is) { - float r, i; - is >> r; - is >> i; - return Kokkos::complex(r, i); -} - -template <> -inline Kokkos::complex readScalar(std::istream &is) { - double r, i; - is >> r; - is >> i; - return Kokkos::complex(r, i); -} - -template -void writeScalar(std::ostream &os, scalar_t val) { - os << val; -} - -template <> -inline void writeScalar(std::ostream &os, Kokkos::complex val) { - os << val.real() << ' ' << val.imag(); -} - -template <> -inline void writeScalar(std::ostream &os, Kokkos::complex val) { - os << val.real() << ' ' << val.imag(); -} - -// symmetryFlip: given a value for A(i, j), return the value that -// should be inserted at A(j, i) (if any) -template -scalar_t symmetryFlip(scalar_t val, MtxSym symFlag) { - if (symFlag == SKEW_SYMMETRIC) return -val; - return val; -} - -template <> -inline Kokkos::complex symmetryFlip(Kokkos::complex val, - MtxSym symFlag) { - if (symFlag == HERMITIAN) - return Kokkos::conj(val); - else if (symFlag == SKEW_SYMMETRIC) - return -val; - return val; -} - -template <> -inline Kokkos::complex symmetryFlip(Kokkos::complex val, - MtxSym symFlag) { - if (symFlag == HERMITIAN) - return Kokkos::conj(val); - else if (symFlag == SKEW_SYMMETRIC) - return -val; - return val; -} -} // namespace MM - -template -void write_matrix_mtx(lno_t nrows, lno_t ncols, size_type nentries, - const size_type *xadj, const lno_t *adj, - const scalar_t *vals, const char *filename) { - std::ofstream myFile(filename); - myFile << "%%MatrixMarket matrix coordinate "; - if (std::is_same>::value || - std::is_same>::value) - myFile << "complex"; - else - myFile << "real"; - myFile << " general\n"; - myFile << nrows << " " << ncols << " " << nentries << '\n'; - myFile << std::setprecision(17) << std::scientific; - for (lno_t i = 0; i < nrows; ++i) { - size_type b = xadj[i]; - size_type e = xadj[i + 1]; - for (size_type j = b; j < e; ++j) { - myFile << i + 1 << " " << adj[j] + 1 << " "; - MM::writeScalar(myFile, vals[j]); - myFile << '\n'; - } - } - myFile.close(); -} - -template -void write_graph_mtx(lno_t nv, size_type ne, const size_type *xadj, - const lno_t *adj, const scalar_t *ew, - const char *filename) { - std::ofstream myFile(filename); - myFile << "%%MatrixMarket matrix coordinate "; - if (std::is_same>::value || - std::is_same>::value) - myFile << "complex"; - else - myFile << "real"; - myFile << " general\n"; - myFile << nv << " " << nv << " " << ne << '\n'; - myFile << std::setprecision(8) << std::scientific; - for (lno_t i = 0; i < nv; ++i) { - size_type b = xadj[i]; - size_type e = xadj[i + 1]; - for (size_type j = b; j < e; ++j) { - myFile << i + 1 << " " << (adj)[j] + 1 << " "; - MM::writeScalar(myFile, ew[j]); - myFile << '\n'; - } - } - - myFile.close(); -} - -template -void read_graph_bin(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, - scalar_t **ew, const char *filename) { - std::ifstream myFile(filename, std::ios::in | std::ios::binary); - - myFile.read((char *)nv, sizeof(lno_t)); - myFile.read((char *)ne, sizeof(size_type)); - md_malloc(xadj, *nv + 1); - md_malloc(adj, *ne); - md_malloc(ew, *ne); - myFile.read((char *)*xadj, sizeof(size_type) * (*nv + 1)); - myFile.read((char *)*adj, sizeof(lno_t) * (*ne)); - myFile.read((char *)*ew, sizeof(scalar_t) * (*ne)); - myFile.close(); -} - -// When Kokkos issue #2313 is resolved, can delete -// parseScalar and just use operator>> -template -scalar_t parseScalar(std::istream &is) { - scalar_t val; - is >> val; - return val; -} - -template <> -inline Kokkos::complex parseScalar(std::istream &is) { - std::complex val; - is >> val; - return Kokkos::complex(val); -} - -template <> -inline Kokkos::complex parseScalar(std::istream &is) { - std::complex val; - is >> val; - return Kokkos::complex(val); -} - -template -void read_graph_crs(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, - scalar_t **ew, const char *filename) { - std::ifstream myFile(filename, std::ios::in); - myFile >> *nv >> *ne; - - md_malloc(xadj, *nv + 1); - md_malloc(adj, *ne); - md_malloc(ew, *ne); - - for (lno_t i = 0; i <= *nv; ++i) { - myFile >> (*xadj)[i]; - } - - for (size_type i = 0; i < *ne; ++i) { - myFile >> (*adj)[i]; - } - for (size_type i = 0; i < *ne; ++i) { - (*ew)[i] = parseScalar(myFile); - } - myFile.close(); -} - inline bool endswith(std::string const &fullString, std::string const &ending) { if (fullString.length() >= ending.length()) { return (0 == fullString.compare(fullString.length() - ending.length(), @@ -1070,491 +303,6 @@ inline bool endswith(std::string const &fullString, std::string const &ending) { } } -template -void write_kokkos_crst_matrix(crs_matrix_t a_crsmat, const char *filename) { - typedef typename crs_matrix_t::StaticCrsGraphType graph_t; - typedef typename graph_t::row_map_type::non_const_type row_map_view_t; - typedef typename graph_t::entries_type::non_const_type cols_view_t; - typedef typename crs_matrix_t::values_type::non_const_type values_view_t; - - typedef typename row_map_view_t::value_type offset_t; - typedef typename cols_view_t::value_type lno_t; - typedef typename values_view_t::value_type scalar_t; - typedef typename values_view_t::size_type size_type; - - size_type nnz = a_crsmat.nnz(); - - auto a_rowmap_view = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), a_crsmat.graph.row_map); - auto a_entries_view = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), a_crsmat.graph.entries); - auto a_values_view = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a_crsmat.values); - offset_t *a_rowmap = const_cast(a_rowmap_view.data()); - lno_t *a_entries = a_entries_view.data(); - scalar_t *a_values = a_values_view.data(); - - std::string strfilename(filename); - if (endswith(strfilename, ".mtx") || endswith(strfilename, ".mm")) { - write_matrix_mtx( - a_crsmat.numRows(), a_crsmat.numCols(), a_crsmat.nnz(), a_rowmap, - a_entries, a_values, filename); - return; - } else if (a_crsmat.numRows() != a_crsmat.numCols()) { - throw std::runtime_error( - "For formats other than MatrixMarket (suffix .mm or .mtx),\n" - "write_kokkos_crst_matrix only supports square matrices"); - } - if (endswith(strfilename, ".bin")) { - write_graph_bin( - a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); - } else if (endswith(strfilename, ".ligra")) { - write_graph_ligra( - a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); - } else if (endswith(strfilename, ".crs")) { - write_graph_crs( - a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); - } else { - std::string errMsg = - std::string("write_kokkos_crst_matrix: File extension on ") + filename + - " does not correspond to a known format"; - throw std::runtime_error(errMsg); - } -} - -template -int read_mtx(const char *fileName, lno_t *nrows, lno_t *ncols, size_type *ne, - size_type **xadj, lno_t **adj, scalar_t **ew, - bool symmetrize = false, bool remove_diagonal = true, - bool transpose = false) { - using namespace MM; - std::ifstream mmf(fileName, std::ifstream::in); - if (!mmf.is_open()) { - throw std::runtime_error("File cannot be opened\n"); - } - - std::string fline = ""; - getline(mmf, fline); - - if (fline.size() < 2 || fline[0] != '%' || fline[1] != '%') { - throw std::runtime_error("Invalid MM file. Line-1\n"); - } - - // make sure every required field is in the file, by initializing them to - // UNDEFINED_* - MtxObject mtx_object = UNDEFINED_OBJECT; - MtxFormat mtx_format = UNDEFINED_FORMAT; - MtxField mtx_field = UNDEFINED_FIELD; - MtxSym mtx_sym = UNDEFINED_SYMMETRY; - - if (fline.find("matrix") != std::string::npos) { - mtx_object = MATRIX; - } else if (fline.find("vector") != std::string::npos) { - mtx_object = VECTOR; - throw std::runtime_error( - "MatrixMarket \"vector\" is not supported by KokkosKernels read_mtx()"); - } - - if (fline.find("coordinate") != std::string::npos) { - // sparse - mtx_format = COORDINATE; - } else if (fline.find("array") != std::string::npos) { - // dense - mtx_format = ARRAY; - } - - if (fline.find("real") != std::string::npos || - fline.find("double") != std::string::npos) { - if (std::is_same::value || - std::is_same::value) - mtx_field = REAL; - else { - if (!std::is_floating_point::value) - throw std::runtime_error( - "scalar_t in read_mtx() incompatible with float or double typed " - "MatrixMarket file."); - else - mtx_field = REAL; - } - } else if (fline.find("complex") != std::string::npos) { - if (!(std::is_same>::value || - std::is_same>::value)) - throw std::runtime_error( - "scalar_t in read_mtx() incompatible with complex-typed MatrixMarket " - "file."); - else - mtx_field = COMPLEX; - } else if (fline.find("integer") != std::string::npos) { - if (std::is_integral::value || - std::is_floating_point::value || - std::is_same::value || - std::is_same::value) - mtx_field = INTEGER; - else - throw std::runtime_error( - "scalar_t in read_mtx() incompatible with integer-typed MatrixMarket " - "file."); - } else if (fline.find("pattern") != std::string::npos) { - mtx_field = PATTERN; - // any reasonable choice for scalar_t can represent "1" or "1.0 + 0i", so - // nothing to check here - } - - if (fline.find("general") != std::string::npos) { - mtx_sym = GENERAL; - } else if (fline.find("skew-symmetric") != std::string::npos) { - mtx_sym = SKEW_SYMMETRIC; - } else if (fline.find("symmetric") != std::string::npos) { - // checking for "symmetric" after "skew-symmetric" because it's a substring - mtx_sym = SYMMETRIC; - } else if (fline.find("hermitian") != std::string::npos || - fline.find("Hermitian") != std::string::npos) { - mtx_sym = HERMITIAN; - } - // Validate the matrix attributes - if (mtx_format == ARRAY) { - if (mtx_sym == UNDEFINED_SYMMETRY) mtx_sym = GENERAL; - if (mtx_sym != GENERAL) - throw std::runtime_error( - "array format MatrixMarket file must have general symmetry (optional " - "to include \"general\")"); - } - if (mtx_object == UNDEFINED_OBJECT) - throw std::runtime_error( - "MatrixMarket file header is missing the object type."); - if (mtx_format == UNDEFINED_FORMAT) - throw std::runtime_error("MatrixMarket file header is missing the format."); - if (mtx_field == UNDEFINED_FIELD) - throw std::runtime_error( - "MatrixMarket file header is missing the field type."); - if (mtx_sym == UNDEFINED_SYMMETRY) - throw std::runtime_error( - "MatrixMarket file header is missing the symmetry type."); - - while (1) { - getline(mmf, fline); - if (fline[0] != '%') break; - } - std::stringstream ss(fline); - lno_t nr = 0, nc = 0; - size_type nnz = 0; - ss >> nr >> nc; - if (mtx_format == COORDINATE) - ss >> nnz; - else - nnz = nr * nc; - size_type numEdges = nnz; - symmetrize = symmetrize || mtx_sym != GENERAL; - if (symmetrize && nr != nc) { - throw std::runtime_error("A non-square matrix cannot be symmetrized."); - } - if (mtx_format == ARRAY) { - // Array format only supports general symmetry and non-pattern - if (symmetrize) - throw std::runtime_error( - "array format MatrixMarket file cannot be symmetrized."); - if (mtx_field == PATTERN) - throw std::runtime_error( - "array format MatrixMarket file can't have \"pattern\" field type."); - } - if (symmetrize) { - numEdges = 2 * nnz; - } - // numEdges is only an upper bound (diagonal entries may be removed) - std::vector> edges(numEdges); - size_type nE = 0; - lno_t numDiagonal = 0; - for (size_type i = 0; i < nnz; ++i) { - getline(mmf, fline); - std::stringstream ss2(fline); - struct Edge tmp; - // read source, dest (edge) and weight (value) - lno_t s, d; - scalar_t w; - if (mtx_format == ARRAY) { - // In array format, entries are listed in column major order, - // so the row and column can be determined just from the index i - //(but make them 1-based indices, to match the way coordinate works) - s = i % nr + 1; // row - d = i / nr + 1; // col - } else { - // In coordinate format, row and col of each entry is read from file - ss2 >> s >> d; - } - if (mtx_field == PATTERN) - w = 1; - else - w = readScalar(ss2); - if (!transpose) { - tmp.src = s - 1; - tmp.dst = d - 1; - tmp.ew = w; - } else { - tmp.src = d - 1; - tmp.dst = s - 1; - tmp.ew = w; - } - if (tmp.src == tmp.dst) { - numDiagonal++; - if (!remove_diagonal) { - edges[nE++] = tmp; - } - continue; - } - edges[nE++] = tmp; - if (symmetrize) { - struct Edge tmp2; - tmp2.src = tmp.dst; - tmp2.dst = tmp.src; - // the symmetrized value is w, -w or conj(w) if mtx_sym is - // SYMMETRIC, SKEW_SYMMETRIC or HERMITIAN, respectively. - tmp2.ew = symmetryFlip(tmp.ew, mtx_sym); - edges[nE++] = tmp2; - } - } - mmf.close(); - std::sort(edges.begin(), edges.begin() + nE); - if (transpose) { - lno_t tmp = nr; - nr = nc; - nc = tmp; - } - // idx *nv, idx *ne, idx **xadj, idx **adj, wt **wt - *nrows = nr; - *ncols = nc; - *ne = nE; - //*xadj = new idx[nr + 1]; - md_malloc(xadj, nr + 1); - //*adj = new idx[nE]; - md_malloc(adj, nE); - //*ew = new wt[nE]; - md_malloc(ew, nE); - size_type eind = 0; - size_type actual = 0; - for (lno_t i = 0; i < nr; ++i) { - (*xadj)[i] = actual; - bool is_first = true; - while (eind < nE && edges[eind].src == i) { - if (is_first || !symmetrize || eind == 0 || - (eind > 0 && edges[eind - 1].dst != edges[eind].dst)) { - (*adj)[actual] = edges[eind].dst; - (*ew)[actual] = edges[eind].ew; - ++actual; - } - is_first = false; - ++eind; - } - } - (*xadj)[nr] = actual; - *ne = actual; - return 0; -} - -// Version of read_mtx which does not capture the number of columns. -// This is the old interface; it's kept for backwards compatibility. -template -int read_mtx(const char *fileName, lno_t *nv, size_type *ne, size_type **xadj, - lno_t **adj, scalar_t **ew, bool symmetrize = false, - bool remove_diagonal = true, bool transpose = false) { - lno_t ncol; // will discard - return read_mtx(fileName, nv, &ncol, ne, xadj, - adj, ew, symmetrize, - remove_diagonal, transpose); -} - -template -void read_matrix(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, - scalar_t **ew, const char *filename) { - std::string strfilename(filename); - if (endswith(strfilename, ".mtx") || endswith(strfilename, ".mm")) { - read_mtx(filename, nv, ne, xadj, adj, ew, false, false, false); - } - - else if (endswith(strfilename, ".bin")) { - read_graph_bin(nv, ne, xadj, adj, ew, filename); - } - - else if (endswith(strfilename, ".crs")) { - read_graph_crs(nv, ne, xadj, adj, ew, filename); - } - - else { - throw std::runtime_error("Reader is not available\n"); - } -} - -template -crsMat_t read_kokkos_crst_matrix(const char *filename_) { - std::string strfilename(filename_); - bool isMatrixMarket = - endswith(strfilename, ".mtx") || endswith(strfilename, ".mm"); - - typedef typename crsMat_t::StaticCrsGraphType graph_t; - typedef typename graph_t::row_map_type::non_const_type row_map_view_t; - typedef typename graph_t::entries_type::non_const_type cols_view_t; - typedef typename crsMat_t::values_type::non_const_type values_view_t; - - typedef typename row_map_view_t::value_type size_type; - typedef typename cols_view_t::value_type lno_t; - typedef typename values_view_t::value_type scalar_t; - - lno_t nr, nc, *adj; - size_type *xadj, nnzA; - scalar_t *values; - - if (isMatrixMarket) { - // MatrixMarket file contains the exact number of columns - read_mtx(filename_, &nr, &nc, &nnzA, &xadj, - &adj, &values, false, false, false); - } else { - //.crs and .bin files don't contain #cols, so will compute it later based on - // the entries - read_matrix(&nr, &nnzA, &xadj, &adj, &values, - filename_); - } - - row_map_view_t rowmap_view("rowmap_view", nr + 1); - cols_view_t columns_view("colsmap_view", nnzA); - values_view_t values_view("values_view", nnzA); - - { - Kokkos::View> - hr(xadj, nr + 1); - Kokkos::View> - hc(adj, nnzA); - Kokkos::View> - hv(values, nnzA); - Kokkos::deep_copy(rowmap_view, hr); - Kokkos::deep_copy(columns_view, hc); - Kokkos::deep_copy(values_view, hv); - } - - if (!isMatrixMarket) { - KokkosKernels::Impl::kk_view_reduce_max( - nnzA, columns_view, nc); - nc++; - } - - graph_t static_graph(columns_view, rowmap_view); - crsMat_t crsmat("CrsMatrix", nc, values_view, static_graph); - delete[] xadj; - delete[] adj; - delete[] values; - return crsmat; -} - -template -crsGraph_t read_kokkos_crst_graph(const char *filename_) { - typedef typename crsGraph_t::row_map_type::non_const_type row_map_view_t; - typedef typename crsGraph_t::entries_type::non_const_type cols_view_t; - - typedef typename row_map_view_t::value_type size_type; - typedef typename cols_view_t::value_type lno_t; - typedef double scalar_t; - - lno_t nv, *adj; - size_type *xadj, nnzA; - scalar_t *values; - read_matrix(&nv, &nnzA, &xadj, &adj, &values, - filename_); - - row_map_view_t rowmap_view("rowmap_view", nv + 1); - cols_view_t columns_view("colsmap_view", nnzA); - - { - typename row_map_view_t::HostMirror hr = - Kokkos::create_mirror_view(rowmap_view); - typename cols_view_t::HostMirror hc = - Kokkos::create_mirror_view(columns_view); - - for (lno_t i = 0; i <= nv; ++i) { - hr(i) = xadj[i]; - } - - for (size_type i = 0; i < nnzA; ++i) { - hc(i) = adj[i]; - } - Kokkos::deep_copy(rowmap_view, hr); - Kokkos::deep_copy(columns_view, hc); - } - - lno_t ncols = 0; - KokkosKernels::Impl::kk_view_reduce_max( - nnzA, columns_view, ncols); - ncols += 1; - - crsGraph_t static_graph(columns_view, rowmap_view, ncols); - delete[] xadj; - delete[] adj; - delete[] values; - return static_graph; -} - -template -inline void kk_sequential_create_incidence_matrix( - nnz_lno_t num_rows, const size_type *xadj, const nnz_lno_t *adj, - size_type *i_adj // output. preallocated -) { - std::vector c_xadj(num_rows); - for (nnz_lno_t i = 0; i < num_rows; i++) { - c_xadj[i] = xadj[i]; - } - int eCnt = 0; - for (nnz_lno_t i = 0; i < num_rows; i++) { - size_type begin = xadj[i]; - size_type end = xadj[i + 1]; - nnz_lno_t adjsize = end - begin; - - for (nnz_lno_t j = 0; j < adjsize; j++) { - size_type aind = j + begin; - nnz_lno_t col = adj[aind]; - if (i < col) { - i_adj[c_xadj[i]++] = eCnt; - i_adj[c_xadj[col]++] = eCnt++; - } - } - } - - for (nnz_lno_t i = 0; i < num_rows; i++) { - if (c_xadj[i] != xadj[i + 1]) { - std::cout << "i:" << i << " c_xadj[i]:" << c_xadj[i] - << " xadj[i+1]:" << xadj[i + 1] << std::endl; - } - } -} - -template -inline void kk_sequential_create_incidence_matrix_transpose( - const nnz_lno_t num_rows, const size_type num_edges, const size_type *xadj, - const nnz_lno_t *adj, - size_type *i_xadj, // output. preallocated - nnz_lno_t *i_adj // output. preallocated -) { - for (nnz_lno_t i = 0; i < num_edges / 2 + 1; i++) { - i_xadj[i] = i * 2; - } - int eCnt = 0; - for (nnz_lno_t i = 0; i < num_rows; i++) { - size_type begin = xadj[i]; - size_type end = xadj[i + 1]; - nnz_lno_t adjsize = end - begin; - - for (nnz_lno_t j = 0; j < adjsize; j++) { - size_type aind = j + begin; - nnz_lno_t col = adj[aind]; - if (i < col) { - i_adj[eCnt++] = i; - i_adj[eCnt++] = col; - } - } - } -} - } // namespace Impl } // namespace KokkosKernels diff --git a/src/common/KokkosKernels_Sorting.hpp b/src/common/KokkosKernels_Sorting.hpp index 208688ae5b..8b897047d9 100644 --- a/src/common/KokkosKernels_Sorting.hpp +++ b/src/common/KokkosKernels_Sorting.hpp @@ -61,48 +61,6 @@ struct DefaultComparator { }; } // namespace Impl -// ---------------------------------- -// BSR matrix/graph sorting utilities -// ---------------------------------- - -template -void sort_bsr_matrix(const bsrMat_t& A); - -// ---------------------------------- -// CRS matrix/graph sorting utilities -// ---------------------------------- - -// The sort_crs* functions sort the adjacent column list for each row into -// ascending order. - -template -void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, - const values_t& values); - -template -void sort_crs_matrix(const crsMat_t& A); - -template -void sort_crs_graph(const rowmap_t& rowmap, const entries_t& entries); - -template -void sort_crs_graph(const crsGraph_t& G); - -// sort_and_merge_matrix produces a new matrix which is equivalent to A but is -// sorted and has no duplicated entries: each (i, j) is unique. Values for -// duplicated entries are summed. -template -crsMat_t sort_and_merge_matrix(const crsMat_t& A); - -template -crsGraph_t sort_and_merge_graph(const crsGraph_t& G); - -template -void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, - const entries_t& entries_in, rowmap_t& rowmap_out, - entries_t& entries_out); - // ---------------------------- // General device-level sorting // ---------------------------- @@ -155,240 +113,6 @@ KOKKOS_INLINE_FUNCTION void TeamBitonicSort2( namespace Impl { -template -struct SortCrsMatrixFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using scalar_t = typename values_t::non_const_value_type; - using team_mem = typename Kokkos::TeamPolicy::member_type; - // The functor owns memory for entriesAux, so it can't have - // MemoryTraits - using entries_managed_t = Kokkos::View; - using values_managed_t = Kokkos::View; - - SortCrsMatrixFunctor(bool usingRangePol, const rowmap_t& rowmap_, - const entries_t& entries_, const values_t& values_) - : rowmap(rowmap_), entries(entries_), values(values_) { - if (usingRangePol) { - entriesAux = entries_managed_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), - entries.extent(0)); - valuesAux = values_managed_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values aux"), - values.extent(0)); - } - // otherwise, aux arrays won't be allocated (sorting in place) - } - - KOKKOS_INLINE_FUNCTION void operator()(const lno_t i) const { - size_type rowStart = rowmap(i); - size_type rowEnd = rowmap(i + 1); - lno_t rowNum = rowEnd - rowStart; - // Radix sort requires unsigned keys for comparison - using unsigned_lno_t = typename std::make_unsigned::type; - KokkosKernels::SerialRadixSort2( - (unsigned_lno_t*)entries.data() + rowStart, - (unsigned_lno_t*)entriesAux.data() + rowStart, values.data() + rowStart, - valuesAux.data() + rowStart, rowNum); - } - - KOKKOS_INLINE_FUNCTION void operator()(const team_mem t) const { - size_type i = t.league_rank(); - size_type rowStart = rowmap(i); - size_type rowEnd = rowmap(i + 1); - lno_t rowNum = rowEnd - rowStart; - KokkosKernels::TeamBitonicSort2( - entries.data() + rowStart, values.data() + rowStart, rowNum, t); - } - - rowmap_t rowmap; - entries_t entries; - entries_managed_t entriesAux; - values_t values; - values_managed_t valuesAux; -}; - -template -struct SortCrsGraphFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using team_mem = typename Kokkos::TeamPolicy::member_type; - // The functor owns memory for entriesAux, so it can't have - // MemoryTraits - using entries_managed_t = Kokkos::View; - - SortCrsGraphFunctor(bool usingRangePol, const rowmap_t& rowmap_, - const entries_t& entries_) - : rowmap(rowmap_), entries(entries_) { - if (usingRangePol) { - entriesAux = entries_managed_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), - entries.extent(0)); - } - // otherwise, aux arrays won't be allocated (sorting in place) - } - - KOKKOS_INLINE_FUNCTION void operator()(const lno_t i) const { - size_type rowStart = rowmap(i); - size_type rowEnd = rowmap(i + 1); - lno_t rowNum = rowEnd - rowStart; - // Radix sort requires unsigned keys for comparison - using unsigned_lno_t = typename std::make_unsigned::type; - KokkosKernels::SerialRadixSort( - (unsigned_lno_t*)entries.data() + rowStart, - (unsigned_lno_t*)entriesAux.data() + rowStart, rowNum); - } - - KOKKOS_INLINE_FUNCTION void operator()(const team_mem t) const { - size_type i = t.league_rank(); - size_type rowStart = rowmap(i); - size_type rowEnd = rowmap(i + 1); - lno_t rowNum = rowEnd - rowStart; - KokkosKernels::TeamBitonicSort( - entries.data() + rowStart, rowNum, t); - } - - rowmap_t rowmap; - entries_t entries; - entries_managed_t entriesAux; -}; - -template -struct MergedRowmapFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using c_rowmap_t = typename rowmap_t::const_type; - - // Precondition: entries are sorted within each row - MergedRowmapFunctor(const rowmap_t& mergedCounts_, const c_rowmap_t& rowmap_, - const entries_t& entries_) - : mergedCounts(mergedCounts_), rowmap(rowmap_), entries(entries_) {} - - KOKKOS_INLINE_FUNCTION void operator()(lno_t row, size_type& lnewNNZ) const { - size_type rowBegin = rowmap(row); - size_type rowEnd = rowmap(row + 1); - if (rowEnd == rowBegin) { - // Row was empty to begin with - mergedCounts(row) = 0; - return; - } - // Otherwise, the first entry in the row exists - lno_t uniqueEntries = 1; - for (size_type j = rowBegin + 1; j < rowEnd; j++) { - if (entries(j - 1) != entries(j)) uniqueEntries++; - } - mergedCounts(row) = uniqueEntries; - lnewNNZ += uniqueEntries; - if (row == lno_t((rowmap.extent(0) - 1) - 1)) mergedCounts(row + 1) = 0; - } - - rowmap_t mergedCounts; - c_rowmap_t rowmap; - entries_t entries; -}; - -template -struct MatrixMergedEntriesFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using scalar_t = typename values_t::non_const_value_type; - - // Precondition: entries are sorted within each row - MatrixMergedEntriesFunctor(const rowmap_t& rowmap_, const entries_t& entries_, - const values_t& values_, - const rowmap_t& mergedRowmap_, - const entries_t& mergedEntries_, - const values_t& mergedValues_) - : rowmap(rowmap_), - entries(entries_), - values(values_), - mergedRowmap(mergedRowmap_), - mergedEntries(mergedEntries_), - mergedValues(mergedValues_) {} - - KOKKOS_INLINE_FUNCTION void operator()(lno_t row) const { - size_type rowBegin = rowmap(row); - size_type rowEnd = rowmap(row + 1); - if (rowEnd == rowBegin) { - // Row was empty to begin with, nothing to do - return; - } - // Otherwise, accumulate the value for each column - scalar_t accumVal = values(rowBegin); - lno_t accumCol = entries(rowBegin); - size_type insertPos = mergedRowmap(row); - for (size_type j = rowBegin + 1; j < rowEnd; j++) { - if (accumCol == entries(j)) { - // accumulate - accumVal += values(j); - } else { - // write out and reset - mergedValues(insertPos) = accumVal; - mergedEntries(insertPos) = accumCol; - insertPos++; - accumVal = values(j); - accumCol = entries(j); - } - } - // always left with the last unique entry - mergedValues(insertPos) = accumVal; - mergedEntries(insertPos) = accumCol; - } - - rowmap_t rowmap; - entries_t entries; - values_t values; - rowmap_t mergedRowmap; - entries_t mergedEntries; - values_t mergedValues; -}; - -template -struct GraphMergedEntriesFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - - // Precondition: entries are sorted within each row - GraphMergedEntriesFunctor(const rowmap_t& rowmap_, const entries_t& entries_, - const rowmap_t& mergedRowmap_, - const entries_t& mergedEntries_) - : rowmap(rowmap_), - entries(entries_), - mergedRowmap(mergedRowmap_), - mergedEntries(mergedEntries_) {} - - KOKKOS_INLINE_FUNCTION void operator()(lno_t row) const { - size_type rowBegin = rowmap(row); - size_type rowEnd = rowmap(row + 1); - if (rowEnd == rowBegin) { - // Row was empty to begin with, nothing to do - return; - } - // Otherwise, accumulate the value for each column - lno_t accumCol = entries(rowBegin); - size_type insertPos = mergedRowmap(row); - for (size_type j = rowBegin + 1; j < rowEnd; j++) { - if (accumCol != entries(j)) { - // write out and reset - mergedEntries(insertPos) = accumCol; - insertPos++; - accumCol = entries(j); - } - } - // always left with the last unique entry - mergedEntries(insertPos) = accumCol; - } - - rowmap_t rowmap; - entries_t entries; - rowmap_t mergedRowmap; - entries_t mergedEntries; -}; - // Functor that sorts a view on one team template @@ -524,274 +248,6 @@ struct BitonicPhase2Functor { } // namespace Impl -// Sort a CRS matrix: within each row, sort entries ascending by column. -// At the same time, permute the values. -template -void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, - const values_t& values) { - using lno_t = typename entries_t::non_const_value_type; - using team_pol = Kokkos::TeamPolicy; - bool useRadix = !Impl::kk_is_gpu_exec_space(); - lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; - if (numRows == 0) return; - Impl::SortCrsMatrixFunctor - funct(useRadix, rowmap, entries, values); - if (useRadix) { - Kokkos::parallel_for("sort_crs_matrix", - Kokkos::RangePolicy(0, numRows), - funct); - } else { - // Try to get teamsize to be largest power of 2 not greater than avg entries - // per row - // TODO (probably important for performnce): add thread-level sort also, and - // use that for small avg degree. But this works for now. - lno_t idealTeamSize = 1; - lno_t avgDeg = (entries.extent(0) + numRows - 1) / numRows; - while (idealTeamSize < avgDeg / 2) { - idealTeamSize *= 2; - } - team_pol temp(numRows, 1); - lno_t maxTeamSize = temp.team_size_max(funct, Kokkos::ParallelForTag()); - lno_t teamSize = std::min(idealTeamSize, maxTeamSize); - Kokkos::parallel_for("sort_crs_matrix", team_pol(numRows, teamSize), funct); - } -} - -template -void sort_crs_matrix(const crsMat_t& A) { - // Note: rowmap_t has const values, but that's OK as sorting doesn't modify it - using rowmap_t = typename crsMat_t::row_map_type; - using entries_t = typename crsMat_t::index_type::non_const_type; - using values_t = typename crsMat_t::values_type::non_const_type; - using exec_space = typename crsMat_t::execution_space; - // NOTE: the rowmap of a StaticCrsGraph is const-valued, but the - // entries and CrsMatrix values are non-const (so sorting them directly - // is allowed) - sort_crs_matrix( - A.graph.row_map, A.graph.entries, A.values); -} - -namespace Impl { - -template -KOKKOS_INLINE_FUNCTION void kk_swap(T& a, T& b) { - T t = a; - a = b; - b = t; -} - -template -struct sort_bsr_functor { - using lno_t = typename entries_type::non_const_value_type; - - row_map_type rowmap; - entries_type entries; - values_type values; - const lno_t blocksize; - - sort_bsr_functor(row_map_type rowmap_, entries_type entries_, - values_type values_, const lno_t blocksize_) - : rowmap(rowmap_), - entries(entries_), - values(values_), - blocksize(blocksize_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const lno_t i) const { - const lno_t rowStart = rowmap(i); - const lno_t rowSize = rowmap(i + 1) - rowStart; - auto* e = entries.data() + rowStart; - auto* v = values.data() + rowStart * blocksize; - bool done = false; - while (!done) { - done = true; - for (lno_t j = 1; j < rowSize; ++j) { - const lno_t jp = j - 1; - if (e[jp] <= e[j]) continue; - Impl::kk_swap(e[jp], e[j]); - auto const vb = v + j * blocksize; - auto const vbp = v + jp * blocksize; - for (lno_t k = 0; k < blocksize; - ++k) // std::swap_ranges(vb, vb + blocksize, vbp); - Impl::kk_swap(vb[k], vbp[k]); - done = false; - } - } - } -}; - -} // namespace Impl - -// Sort a BRS matrix: within each row, sort entries ascending by column and -// permute the values accordingly. -template -void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, - const entries_t& entries, const values_t& values) { - // TODO: this is O(N^2) mock for debugging - do regular implementation based - // on Radix/Bitonic sort (like CSR) IDEA: maybe we need only one general - // Radix2/Bitonic2 and CSR sorting may call it with blockSize=1 ? - lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; - if (numRows == 0) return; - const lno_t blocksize = blockdim * blockdim; - - assert(values.extent(0) == entries.extent(0) * blocksize); - Impl::sort_bsr_functor bsr_sorter( - rowmap, entries, values, blocksize); - Kokkos::parallel_for("sort_bsr_matrix", - Kokkos::RangePolicy(0, numRows), - bsr_sorter); -} - -// Sort a BSR matrix (like CRS but single values are replaced with contignous -// blocks) -template -void sort_bsr_matrix(const bsrMat_t& A) { - // NOTE: unlike rowmap, entries and values are non-const, so we can sort them - // directly - sort_bsr_matrix( - A.blockDim(), A.graph.row_map, A.graph.entries, A.values); -} - -// Sort a CRS graph: within each row, sort entries ascending by column. -template -void sort_crs_graph(const rowmap_t& rowmap, const entries_t& entries) { - using lno_t = typename entries_t::non_const_value_type; - using team_pol = Kokkos::TeamPolicy; - bool useRadix = !Impl::kk_is_gpu_exec_space(); - lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; - if (numRows == 0) return; - Impl::SortCrsGraphFunctor funct( - useRadix, rowmap, entries); - if (useRadix) { - Kokkos::parallel_for("sort_crs_graph", - Kokkos::RangePolicy(0, numRows), - funct); - } else { - // Try to get teamsize to be largest power of 2 less than or equal to - // half the entries per row. 0.5 * #entries is bitonic's parallelism within - // a row. - // TODO (probably important for performnce): add thread-level sort also, and - // use that for small avg degree. But this works for now. - lno_t idealTeamSize = 1; - lno_t avgDeg = (entries.extent(0) + numRows - 1) / numRows; - while (idealTeamSize < avgDeg / 2) { - idealTeamSize *= 2; - } - team_pol temp(numRows, 1); - lno_t maxTeamSize = temp.team_size_max(funct, Kokkos::ParallelForTag()); - lno_t teamSize = std::min(idealTeamSize, maxTeamSize); - Kokkos::parallel_for("sort_crs_graph", team_pol(numRows, teamSize), funct); - } -} - -template -void sort_crs_graph(const crsGraph_t& G) { - static_assert( - !std::is_const::value, - "sort_crs_graph requires StaticCrsGraph entries to be non-const."); - sort_crs_graph(G.row_map, G.entries); -} - -// Sort the rows of matrix, and merge duplicate entries. -template -crsMat_t sort_and_merge_matrix(const crsMat_t& A) { - using c_rowmap_t = typename crsMat_t::row_map_type; - using rowmap_t = typename crsMat_t::row_map_type::non_const_type; - using entries_t = typename crsMat_t::index_type::non_const_type; - using values_t = typename crsMat_t::values_type::non_const_type; - using size_type = typename rowmap_t::non_const_value_type; - using exec_space = typename crsMat_t::execution_space; - using range_t = Kokkos::RangePolicy; - sort_crs_matrix(A); - // Count entries per row into a new rowmap, in terms of merges that can be - // done - rowmap_t mergedRowmap( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "SortedMerged rowmap"), - A.numRows() + 1); - size_type numCompressedEntries = 0; - Kokkos::parallel_reduce(range_t(0, A.numRows()), - Impl::MergedRowmapFunctor( - mergedRowmap, A.graph.row_map, A.graph.entries), - numCompressedEntries); - // Prefix sum to get rowmap - Impl::kk_exclusive_parallel_prefix_sum(A.numRows() + 1, - mergedRowmap); - entries_t mergedEntries("SortedMerged entries", numCompressedEntries); - values_t mergedValues("SortedMerged values", numCompressedEntries); - // Compute merged entries and values - Kokkos::parallel_for( - range_t(0, A.numRows()), - Impl::MatrixMergedEntriesFunctor( - A.graph.row_map, A.graph.entries, A.values, mergedRowmap, - mergedEntries, mergedValues)); - // Finally, construct the new compressed matrix - return crsMat_t("SortedMerged", A.numRows(), A.numCols(), - numCompressedEntries, mergedValues, mergedRowmap, - mergedEntries); -} - -template -void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, - const entries_t& entries_in, rowmap_t& rowmap_out, - entries_t& entries_out) { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using range_t = Kokkos::RangePolicy; - using const_rowmap_t = typename rowmap_t::const_type; - lno_t numRows = rowmap_in.extent(0); - if (numRows <= 1) { - // Matrix has zero rows - rowmap_out = rowmap_t(); - entries_out = entries_t(); - return; - } - numRows--; - // Sort in place - sort_crs_graph(rowmap_in, entries_in); - // Count entries per row into a new rowmap, in terms of merges that can be - // done - rowmap_out = rowmap_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "SortedMerged rowmap"), - numRows + 1); - size_type numCompressedEntries = 0; - Kokkos::parallel_reduce(range_t(0, numRows), - Impl::MergedRowmapFunctor( - rowmap_out, rowmap_in, entries_in), - numCompressedEntries); - // Prefix sum to get rowmap - Impl::kk_exclusive_parallel_prefix_sum(numRows + 1, - rowmap_out); - entries_out = entries_t("SortedMerged entries", numCompressedEntries); - // Compute merged entries and values - Kokkos::parallel_for( - range_t(0, numRows), - Impl::GraphMergedEntriesFunctor( - rowmap_in, entries_in, rowmap_out, entries_out)); -} - -template -crsGraph_t sort_and_merge_graph(const crsGraph_t& G) { - using rowmap_t = typename crsGraph_t::row_map_type::non_const_type; - using entries_t = typename crsGraph_t::entries_type; - static_assert( - !std::is_const::value, - "sort_and_merge_graph requires StaticCrsGraph entries to be non-const."); - rowmap_t mergedRowmap; - entries_t mergedEntries; - sort_and_merge_graph(G.row_map, G.entries, mergedRowmap, - mergedEntries); - return crsGraph_t(mergedEntries, mergedRowmap); -} - // Version to be called from host on a single array // Generally ~2x slower than Kokkos::sort() for large arrays (> 50 M elements), // but faster for smaller arrays. @@ -1125,39 +581,6 @@ KOKKOS_INLINE_FUNCTION void TeamBitonicSort2(ValueType* values, PermType* perm, // For backward compatibility: keep the public interface accessible in // KokkosKernels::Impl:: namespace Impl { -template -[[deprecated]] void sort_crs_graph(const rowmap_t& rowmap, - const entries_t& entries) { - KokkosKernels::sort_crs_graph(rowmap, - entries); -} - -template -[[deprecated]] void sort_crs_matrix(const rowmap_t& rowmap, - const entries_t& entries, - const values_t& values) { - KokkosKernels::sort_crs_matrix(rowmap, entries, values); -} - -template -[[deprecated]] void sort_crs_matrix(const crsMat_t& A) { - KokkosKernels::sort_crs_matrix(A); -} - -template -[[deprecated]] void sort_and_merge_graph( - const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, - rowmap_t& rowmap_out, entries_t& entries_out) { - KokkosKernels::sort_and_merge_graph( - rowmap_in, entries_in, rowmap_out, entries_out); -} - -template -[[deprecated]] crsMat_t sort_and_merge_matrix(const crsMat_t& A) { - return KokkosKernels::sort_and_merge_matrix(A); -} template < typename View, typename ExecSpace, typename Ordinal, diff --git a/src/common/KokkosKernels_Utils.hpp b/src/common/KokkosKernels_Utils.hpp index 655d89ba67..a6649f102b 100644 --- a/src/common/KokkosKernels_Utils.hpp +++ b/src/common/KokkosKernels_Utils.hpp @@ -49,7 +49,7 @@ #include "KokkosKernels_ExecSpaceUtils.hpp" #include "KokkosKernels_SimpleUtils.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosKernels_PrintUtils.hpp" #include "KokkosKernels_VectorUtils.hpp" diff --git a/src/graph/KokkosGraph_ExplicitCoarsening.hpp b/src/graph/KokkosGraph_ExplicitCoarsening.hpp index 8992aa4bb8..322004c0b6 100644 --- a/src/graph/KokkosGraph_ExplicitCoarsening.hpp +++ b/src/graph/KokkosGraph_ExplicitCoarsening.hpp @@ -46,7 +46,7 @@ #define KOKKOSGRAPH_EXPLICIT_COARSEN_HPP #include "KokkosGraph_ExplicitCoarsening_impl.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_SortCrs.hpp" namespace KokkosGraph { namespace Experimental { @@ -86,8 +86,8 @@ void graph_explicit_coarsen( if (compress) { coarse_rowmap_t mergedRowmap; coarse_entries_t mergedEntries; - KokkosKernels::sort_and_merge_graph( + KokkosSparse::sort_and_merge_graph( coarseRowmap, coarseEntries, mergedRowmap, mergedEntries); coarseRowmap = mergedRowmap; coarseEntries = mergedEntries; @@ -125,8 +125,8 @@ void graph_explicit_coarsen_with_inverse_map( if (compress) { coarse_rowmap_t mergedRowmap; coarse_entries_t mergedEntries; - KokkosKernels::sort_and_merge_graph( + KokkosSparse::sort_and_merge_graph( coarseRowmap, coarseEntries, mergedRowmap, mergedEntries); coarseRowmap = mergedRowmap; coarseEntries = mergedEntries; diff --git a/src/impl/tpls/KokkosKernels_tpl_handles_decl.hpp b/src/impl/tpls/KokkosKernels_tpl_handles_decl.hpp index 50b2d1c2ef..aef089fd06 100644 --- a/src/impl/tpls/KokkosKernels_tpl_handles_decl.hpp +++ b/src/impl/tpls/KokkosKernels_tpl_handles_decl.hpp @@ -48,7 +48,7 @@ #include "KokkosBlas_tpl_spec.hpp" #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE -#include "KokkosKernels_SparseUtils_cusparse.hpp" +#include "KokkosSparse_Utils_cusparse.hpp" namespace KokkosKernels { namespace Impl { diff --git a/src/impl/tpls/KokkosKernels_tpl_handles_def.hpp b/src/impl/tpls/KokkosKernels_tpl_handles_def.hpp index 84b5386a00..a5187986e5 100644 --- a/src/impl/tpls/KokkosKernels_tpl_handles_def.hpp +++ b/src/impl/tpls/KokkosKernels_tpl_handles_def.hpp @@ -69,7 +69,7 @@ CusparseSingleton& CusparseSingleton::singleton() { #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE -#include "KokkosKernels_SparseUtils_rocsparse.hpp" +#include "KokkosSparse_Utils_rocsparse.hpp" namespace KokkosKernels { namespace Impl { diff --git a/src/impl/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp b/src/impl/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp index 77b76868f3..d0ea5cdc26 100644 --- a/src/impl/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp @@ -46,7 +46,7 @@ #define KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_DECL_HPP #include "KokkosKernels_Controls.hpp" -#include "KokkosKernels_SparseUtils_mkl.hpp" +#include "KokkosSparse_Utils_mkl.hpp" #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL #include @@ -454,7 +454,7 @@ KOKKOSSPARSE_SPMV_MV_MKL(Kokkos::complex, Kokkos::OpenMP, // cuSPARSE #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE #include "cusparse.h" -#include "KokkosKernels_SparseUtils_cusparse.hpp" +#include "KokkosSparse_Utils_cusparse.hpp" // // From https://docs.nvidia.com/cuda/cusparse/index.html#bsrmv diff --git a/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp b/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp index d6f36c0a2b..0a92b91eb2 100644 --- a/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp @@ -50,7 +50,7 @@ // cuSPARSE #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE #include "cusparse.h" -#include "KokkosKernels_SparseUtils_cusparse.hpp" +#include "KokkosSparse_Utils_cusparse.hpp" namespace KokkosSparse { namespace Impl { @@ -385,7 +385,7 @@ KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, // rocSPARSE #if defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) #include -#include "KokkosKernels_SparseUtils_rocsparse.hpp" +#include "KokkosSparse_Utils_rocsparse.hpp" namespace KokkosSparse { namespace Impl { @@ -542,7 +542,7 @@ KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, Kokkos::LayoutRight, #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL #include -#include "KokkosKernels_SparseUtils_mkl.hpp" +#include "KokkosSparse_Utils_mkl.hpp" namespace KokkosSparse { namespace Impl { diff --git a/src/common/KokkosKernels_Controls.hpp b/src/sparse/KokkosKernels_Controls.hpp similarity index 100% rename from src/common/KokkosKernels_Controls.hpp rename to src/sparse/KokkosKernels_Controls.hpp diff --git a/src/common/KokkosKernels_Handle.hpp b/src/sparse/KokkosKernels_Handle.hpp similarity index 100% rename from src/common/KokkosKernels_Handle.hpp rename to src/sparse/KokkosKernels_Handle.hpp diff --git a/src/sparse/KokkosSparse_IOUtils.hpp b/src/sparse/KokkosSparse_IOUtils.hpp new file mode 100644 index 0000000000..d847fc9d10 --- /dev/null +++ b/src/sparse/KokkosSparse_IOUtils.hpp @@ -0,0 +1,1270 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef _KOKKOSSPARSE_IOUTILS_HPP +#define _KOKKOSSPARSE_IOUTILS_HPP + +#include "KokkosKernels_IOUtils.hpp" +#include "KokkosSparse_CrsMatrix.hpp" + +namespace KokkosSparse { +namespace Impl { + +// MD: Bases on Christian's sparseMatrix_generate function in test_crsmatrix.cpp +// file. +template +void kk_sparseMatrix_generate(OrdinalType nrows, OrdinalType ncols, + SizeType &nnz, OrdinalType row_size_variance, + OrdinalType bandwidth, ScalarType *&values, + SizeType *&rowPtr, OrdinalType *&colInd, + OrdinalType block_elem_count = 1) { + rowPtr = new SizeType[nrows + 1]; + + OrdinalType elements_per_row = nrows ? nnz / nrows : 0; + srand(13721); + rowPtr[0] = 0; + for (int row = 0; row < nrows; row++) { + int varianz = (1.0 * rand() / RAND_MAX - 0.5) * row_size_variance; + int numRowEntries = elements_per_row + varianz; + if (numRowEntries < 0) numRowEntries = 0; + // Clamping numRowEntries above accomplishes 2 things: + // - If ncols is 0, numRowEntries will also be 0 + // - With numRowEntries at most 2/3 the number of columns, in the worst + // case + // 90% of insertions will succeed after 6 tries + if (numRowEntries > 0.66 * ncols) numRowEntries = 0.66 * ncols; + rowPtr[row + 1] = rowPtr[row] + numRowEntries; + } + nnz = rowPtr[nrows]; + values = new ScalarType[nnz]; + colInd = new OrdinalType[nnz]; + for (OrdinalType row = 0; row < nrows; row++) { + for (SizeType k = rowPtr[row]; k < rowPtr[row + 1]; ++k) { + while (true) { + OrdinalType pos = (1.0 * rand() / RAND_MAX - 0.5) * bandwidth + row; + while (pos < 0) pos += ncols; + while (pos >= ncols) pos -= ncols; + + bool is_already_in_the_row = false; + for (SizeType j = rowPtr[row]; j < k; j++) { + if (colInd[j] == pos) { + is_already_in_the_row = true; + break; + } + } + if (!is_already_in_the_row) { + colInd[k] = pos; + break; + } + } + } + } + // Sample each value from uniform (-50, 50) for real types, or (-50 - 50i, 50 + // + 50i) for complex types. + Kokkos::View valuesView( + values, nnz * block_elem_count); + ScalarType randStart, randEnd; + KokkosKernels::Impl::getRandomBounds(50.0, randStart, randEnd); + Kokkos::Random_XorShift64_Pool pool(13718); + Kokkos::fill_random(valuesView, pool, randStart, randEnd); +} + +template +void kk_sparseMatrix_generate_lower_upper_triangle( + char uplo, OrdinalType nrows, OrdinalType ncols, SizeType &nnz, + OrdinalType /*row_size_variance*/, OrdinalType /*bandwidth*/, + ScalarType *&values, SizeType *&rowPtr, OrdinalType *&colInd) { + rowPtr = new SizeType[nrows + 1]; + + // OrdinalType elements_per_row = nnz/nrows; + srand(13721); + rowPtr[0] = 0; + for (int row = 0; row < nrows; row++) { + if (uplo == 'L') + rowPtr[row + 1] = rowPtr[row] + row + 1; + else + rowPtr[row + 1] = rowPtr[row] + ncols - (row); + } + nnz = rowPtr[nrows]; + values = new ScalarType[nnz]; + colInd = new OrdinalType[nnz]; + for (OrdinalType row = 0; row < nrows; row++) { + for (SizeType k = rowPtr[row]; k < rowPtr[row + 1]; k++) { + if (uplo == 'L') + colInd[k] = k - rowPtr[row]; + else + colInd[k] = row + (k - rowPtr[row]); + values[k] = 1.0; + } + } +} + +template +void kk_diagonally_dominant_sparseMatrix_generate( + OrdinalType nrows, OrdinalType ncols, SizeType &nnz, + OrdinalType row_size_variance, OrdinalType bandwidth, ScalarType *&values, + SizeType *&rowPtr, OrdinalType *&colInd, + ScalarType diagDominance = 10 * Kokkos::ArithTraits::one()) { + rowPtr = new SizeType[nrows + 1]; + + OrdinalType elements_per_row = nnz / nrows; + srand(13721); + rowPtr[0] = 0; + for (int row = 0; row < nrows; row++) { + int varianz = (1.0 * rand() / RAND_MAX - 0.5) * row_size_variance; + if (varianz < 1) varianz = 1; + if (varianz > 0.75 * ncols) varianz = 0.75 * ncols; + rowPtr[row + 1] = rowPtr[row] + elements_per_row + varianz; + if (rowPtr[row + 1] <= rowPtr[row]) // This makes sure that there is + rowPtr[row + 1] = rowPtr[row] + 1; // at least one nonzero in the row + } + nnz = rowPtr[nrows]; + values = new ScalarType[nnz]; + colInd = new OrdinalType[nnz]; + for (OrdinalType row = 0; row < nrows; row++) { + ScalarType total_values = 0; + std::unordered_set entriesInRow; + // We always add the diagonal entry (after this loop) + entriesInRow.insert(row); + for (SizeType k = rowPtr[row]; k < rowPtr[row + 1] - 1; k++) { + while (true) { + OrdinalType pos = (1.0 * rand() / RAND_MAX - 0.5) * bandwidth + row; + while (pos < 0) pos += ncols; + while (pos >= ncols) pos -= ncols; + + if (entriesInRow.find(pos) == entriesInRow.end()) { + entriesInRow.insert(pos); + colInd[k] = pos; + values[k] = 100.0 * rand() / RAND_MAX - 50.0; + total_values += + Kokkos::Details::ArithTraits::abs(values[k]); + break; + } + } + } + + colInd[rowPtr[row + 1] - 1] = row; + values[rowPtr[row + 1] - 1] = total_values * diagDominance; + } +} + +// This function creates a diagonal sparse matrix for testing matrix operations. +// The elements on the diagonal are 1, 2, ..., n-1, n. +// If "invert" is true, it will return the inverse of the above diagonal matrix. +template +crsMat_t kk_generate_diag_matrix(typename crsMat_t::const_ordinal_type n, + const bool invert = false) { + typedef typename crsMat_t::ordinal_type ot; + typedef typename crsMat_t::StaticCrsGraphType graph_t; + typedef typename graph_t::row_map_type::non_const_type row_map_view_t; + typedef typename graph_t::entries_type::non_const_type cols_view_t; + typedef typename crsMat_t::values_type::non_const_type values_view_t; + + typedef typename row_map_view_t::non_const_value_type size_type; + typedef typename cols_view_t::non_const_value_type lno_t; + typedef typename values_view_t::non_const_value_type scalar_t; + + row_map_view_t rowmap_view("rowmap_view", n + 1); + cols_view_t columns_view("colsmap_view", n); + values_view_t values_view("values_view", n); + + { + typename row_map_view_t::HostMirror hr = + Kokkos::create_mirror_view(rowmap_view); + typename cols_view_t::HostMirror hc = + Kokkos::create_mirror_view(columns_view); + typename values_view_t::HostMirror hv = + Kokkos::create_mirror_view(values_view); + + for (lno_t i = 0; i <= n; ++i) { + hr(i) = size_type(i); + } + + for (ot i = 0; i < n; ++i) { + hc(i) = lno_t(i); + if (invert) { + hv(i) = scalar_t(1.0) / (scalar_t(i + 1)); + } else { + hv(i) = scalar_t(i + 1); + } + } + Kokkos::deep_copy(rowmap_view, hr); + Kokkos::deep_copy(columns_view, hc); + Kokkos::deep_copy(values_view, hv); + } + + graph_t static_graph(columns_view, rowmap_view); + crsMat_t crsmat("CrsMatrix", n, values_view, static_graph); + return crsmat; +} + +template +crsMat_t kk_generate_diagonally_dominant_sparse_matrix( + typename crsMat_t::const_ordinal_type nrows, + typename crsMat_t::const_ordinal_type ncols, + typename crsMat_t::non_const_size_type &nnz, + typename crsMat_t::const_ordinal_type row_size_variance, + typename crsMat_t::const_ordinal_type bandwidth, + typename crsMat_t::const_value_type diagDominance = + 10 * Kokkos::ArithTraits::one()) { + typedef typename crsMat_t::StaticCrsGraphType graph_t; + typedef typename graph_t::row_map_type::non_const_type row_map_view_t; + typedef typename graph_t::entries_type::non_const_type cols_view_t; + typedef typename crsMat_t::values_type::non_const_type values_view_t; + + typedef typename row_map_view_t::non_const_value_type size_type; + typedef typename cols_view_t::non_const_value_type lno_t; + typedef typename values_view_t::non_const_value_type scalar_t; + lno_t *adj; + size_type *xadj; //, nnzA; + scalar_t *values; + + kk_diagonally_dominant_sparseMatrix_generate( + nrows, ncols, nnz, row_size_variance, bandwidth, values, xadj, adj, + diagDominance); + + row_map_view_t rowmap_view("rowmap_view", nrows + 1); + cols_view_t columns_view("colsmap_view", nnz); + values_view_t values_view("values_view", nnz); + + { + typename row_map_view_t::HostMirror hr = + Kokkos::create_mirror_view(rowmap_view); + typename cols_view_t::HostMirror hc = + Kokkos::create_mirror_view(columns_view); + typename values_view_t::HostMirror hv = + Kokkos::create_mirror_view(values_view); + + for (lno_t i = 0; i <= nrows; ++i) { + hr(i) = xadj[i]; + } + + for (size_type i = 0; i < nnz; ++i) { + hc(i) = adj[i]; + hv(i) = values[i]; + } + Kokkos::deep_copy(rowmap_view, hr); + Kokkos::deep_copy(columns_view, hc); + Kokkos::deep_copy(values_view, hv); + } + + graph_t static_graph(columns_view, rowmap_view); + crsMat_t crsmat("CrsMatrix", ncols, values_view, static_graph); + delete[] xadj; + delete[] adj; + delete[] values; + return crsmat; +} + +template +crsMat_t kk_generate_triangular_sparse_matrix( + char uplo, typename crsMat_t::const_ordinal_type nrows, + typename crsMat_t::const_ordinal_type ncols, + typename crsMat_t::non_const_size_type &nnz, + typename crsMat_t::const_ordinal_type row_size_variance, + typename crsMat_t::const_ordinal_type bandwidth) { + typedef typename crsMat_t::StaticCrsGraphType graph_t; + typedef typename graph_t::row_map_type::non_const_type row_map_view_t; + typedef typename graph_t::entries_type::non_const_type cols_view_t; + typedef typename crsMat_t::values_type::non_const_type values_view_t; + + typedef typename row_map_view_t::non_const_value_type size_type; + typedef typename cols_view_t::non_const_value_type lno_t; + typedef typename values_view_t::non_const_value_type scalar_t; + lno_t *adj; + size_type *xadj; //, nnzA; + scalar_t *values; + + kk_sparseMatrix_generate_lower_upper_triangle( + uplo, nrows, ncols, nnz, row_size_variance, bandwidth, values, xadj, adj); + + row_map_view_t rowmap_view("rowmap_view", nrows + 1); + cols_view_t columns_view("colsmap_view", nnz); + values_view_t values_view("values_view", nnz); + + { + typename row_map_view_t::HostMirror hr = + Kokkos::create_mirror_view(rowmap_view); + typename cols_view_t::HostMirror hc = + Kokkos::create_mirror_view(columns_view); + typename values_view_t::HostMirror hv = + Kokkos::create_mirror_view(values_view); + + for (lno_t i = 0; i <= nrows; ++i) { + hr(i) = xadj[i]; + } + + for (size_type i = 0; i < nnz; ++i) { + hc(i) = adj[i]; + hv(i) = values[i]; + } + Kokkos::deep_copy(rowmap_view, hr); + Kokkos::deep_copy(columns_view, hc); + Kokkos::deep_copy(values_view, hv); + Kokkos::fence(); + } + + graph_t static_graph(columns_view, rowmap_view); + crsMat_t crsmat("CrsMatrix", ncols, values_view, static_graph); + delete[] xadj; + delete[] adj; + delete[] values; + return crsmat; +} + +template +crsMat_t kk_generate_sparse_matrix( + typename crsMat_t::const_ordinal_type nrows, + typename crsMat_t::const_ordinal_type ncols, + typename crsMat_t::non_const_size_type &nnz, + typename crsMat_t::const_ordinal_type row_size_variance, + typename crsMat_t::const_ordinal_type bandwidth) { + typedef typename crsMat_t::StaticCrsGraphType graph_t; + typedef typename graph_t::row_map_type::non_const_type row_map_view_t; + typedef typename graph_t::entries_type::non_const_type cols_view_t; + typedef typename crsMat_t::values_type::non_const_type values_view_t; + + typedef typename row_map_view_t::non_const_value_type size_type; + typedef typename cols_view_t::non_const_value_type lno_t; + typedef typename values_view_t::non_const_value_type scalar_t; + lno_t *adj; + size_type *xadj; //, nnzA; + scalar_t *values; + + kk_sparseMatrix_generate( + nrows, ncols, nnz, row_size_variance, bandwidth, values, xadj, adj); + + row_map_view_t rowmap_view("rowmap_view", nrows + 1); + cols_view_t columns_view("colsmap_view", nnz); + values_view_t values_view("values_view", nnz); + + { + typename row_map_view_t::HostMirror hr = + Kokkos::create_mirror_view(rowmap_view); + typename cols_view_t::HostMirror hc = + Kokkos::create_mirror_view(columns_view); + typename values_view_t::HostMirror hv = + Kokkos::create_mirror_view(values_view); + + for (lno_t i = 0; i <= nrows; ++i) { + hr(i) = xadj[i]; + } + + for (size_type i = 0; i < nnz; ++i) { + hc(i) = adj[i]; + hv(i) = values[i]; + } + Kokkos::deep_copy(rowmap_view, hr); + Kokkos::deep_copy(columns_view, hc); + Kokkos::deep_copy(values_view, hv); + } + + graph_t static_graph(columns_view, rowmap_view); + crsMat_t crsmat("CrsMatrix", ncols, values_view, static_graph); + delete[] xadj; + delete[] adj; + delete[] values; + return crsmat; +} + +template +bsrMat_t kk_generate_sparse_matrix( + typename bsrMat_t::const_ordinal_type block_dim, + typename bsrMat_t::const_ordinal_type nrows, + typename bsrMat_t::const_ordinal_type ncols, + typename bsrMat_t::non_const_size_type &nnz, + typename bsrMat_t::const_ordinal_type row_size_variance, + typename bsrMat_t::const_ordinal_type bandwidth) { + typedef KokkosSparse::CrsMatrix< + typename bsrMat_t::value_type, typename bsrMat_t::ordinal_type, + typename bsrMat_t::device_type, typename bsrMat_t::memory_traits, + typename bsrMat_t::size_type> + crsMat_t; + + const auto crs_mtx = kk_generate_sparse_matrix( + nrows * block_dim, ncols * block_dim, nnz, row_size_variance, bandwidth); + bsrMat_t bsrmat(crs_mtx, block_dim); + return bsrmat; +} +// TODO: need to fix the size_type. All over the reading inputs are lno_t. + +template +void convert_crs_to_lower_triangle_edge_list(idx nv, idx *xadj, idx *adj, + idx *lower_triangle_srcs, + idx *lower_triangle_dests) { + idx ind = 0; + for (idx i = 0; i < nv; ++i) { + idx xb = xadj[i]; + idx xe = xadj[i + 1]; + for (idx j = xb; j < xe; ++j) { + idx dst = adj[j]; + if (i < dst) { + lower_triangle_srcs[ind] = i; + lower_triangle_dests[ind++] = dst; + } + } + } +} + +template +void convert_crs_to_edge_list(idx nv, idx *xadj, idx *srcs) { + for (idx i = 0; i < nv; ++i) { + idx xb = xadj[i]; + idx xe = xadj[i + 1]; + for (idx j = xb; j < xe; ++j) { + srcs[j] = i; + } + } +} + +template +void convert_edge_list_to_csr(lno_t nv, size_type ne, lno_t *srcs, lno_t *dests, + wt *ew, size_type *xadj, lno_t *adj, wt *crs_ew) { + std::vector> edges(ne); + for (size_type i = 0; i < ne; ++i) { + edges[i].src = srcs[i]; + edges[i].dst = dests[i]; + edges[i].ew = ew[i]; + } + std::sort(edges.begin(), edges.begin() + ne); + + size_type eind = 0; + for (lno_t i = 0; i < nv; ++i) { + (xadj)[i] = eind; + while (edges[eind].src == i) { + (adj)[eind] = edges[eind].dst; + (*crs_ew)[eind] = edges[eind].ew; + ++eind; + } + } + xadj[nv] = eind; +} + +template +void convert_undirected_edge_list_to_csr(lno_t nv, size_type ne, in_lno_t *srcs, + in_lno_t *dests, size_type *xadj, + lno_t *adj) { + std::vector> edges(ne * 2); + for (size_type i = 0; i < ne; ++i) { + edges[i * 2].src = srcs[i]; + edges[i * 2].dst = dests[i]; + + edges[i * 2 + 1].src = dests[i]; + edges[i * 2 + 1].dst = srcs[i]; + } +#ifdef KOKKOSKERNELS_HAVE_OUTER +#include +#include +#include +#include + __gnu_parallel::parallel_sort_mwms *>( + &(edges[0]), &(edges[0]) + ne * 2, + std::less>(), 64); +#else + std::sort(edges.begin(), edges.begin() + ne * 2); +#endif + + size_type eind = 0; + for (lno_t i = 0; i < nv; ++i) { + (xadj)[i] = eind; + while (edges[eind].src == i) { + (adj)[eind] = edges[eind].dst; + //(*crs_ew)[eind] = edges[eind].ew; + ++eind; + } + } + xadj[nv] = eind; +} + +template +void write_graph_bin(lno_t nv, size_type ne, const size_type *xadj, + const lno_t *adj, const scalar_t *ew, + const char *filename) { + std::ofstream myFile(filename, std::ios::out | std::ios::binary); + myFile.write((char *)&nv, sizeof(lno_t)); + myFile.write((char *)&ne, sizeof(size_type)); + myFile.write((char *)xadj, sizeof(size_type) * (nv + 1)); + + myFile.write((char *)adj, sizeof(lno_t) * (ne)); + + myFile.write((char *)ew, sizeof(scalar_t) * (ne)); + + myFile.close(); +} + +template +void write_graph_crs(lno_t nv, size_type ne, const size_type *xadj, + const lno_t *adj, const scalar_t *ew, + const char *filename) { + std::ofstream myFile(filename, std::ios::out); + myFile << nv << " " << ne << std::endl; + + for (lno_t i = 0; i <= nv; ++i) { + myFile << xadj[i] << " "; + } + myFile << std::endl; + + for (lno_t i = 0; i < nv; ++i) { + size_type b = xadj[i]; + size_type e = xadj[i + 1]; + for (size_type j = b; j < e; ++j) { + myFile << adj[j] << " "; + } + myFile << std::endl; + } + for (size_type i = 0; i < ne; ++i) { + myFile << ew[i] << " "; + } + myFile << std::endl; + + myFile.close(); +} + +template +void write_graph_ligra(lno_t nv, size_type ne, const size_type *xadj, + const lno_t *adj, const scalar_t * /*ew*/, + const char *filename) { + std::ofstream ff(filename); + ff << "AdjacencyGraph" << std::endl; + ff << nv << std::endl << ne << std::endl; + for (lno_t i = 0; i < nv; ++i) { + ff << xadj[i] << std::endl; + } + for (size_type i = 0; i < ne; ++i) { + ff << adj[i] << std::endl; + } + ff.close(); +} + +// MM: types and utility functions for parsing the MatrixMarket format +namespace MM { +enum MtxObject { UNDEFINED_OBJECT, MATRIX, VECTOR }; +enum MtxFormat { UNDEFINED_FORMAT, COORDINATE, ARRAY }; +enum MtxField { + UNDEFINED_FIELD, + REAL, // includes both float and double + COMPLEX, // includes complex and complex + INTEGER, // includes all integer types + PATTERN // not a type, but means the value for every entry is 1 +}; +enum MtxSym { + UNDEFINED_SYMMETRY, + GENERAL, + SYMMETRIC, // A(i, j) = A(j, i) + SKEW_SYMMETRIC, // A(i, j) = -A(j, i) + HERMITIAN // A(i, j) = a + bi; A(j, i) = a - bi +}; + +// readScalar/writeScalar: read and write a scalar in the form that it appears +// in an .mtx file. The >> and << operators won't work, because complex appears +// as "real imag", not "(real, imag)" +template +scalar_t readScalar(std::istream &is) { + scalar_t val; + is >> val; + return val; +} + +template <> +inline Kokkos::complex readScalar(std::istream &is) { + float r, i; + is >> r; + is >> i; + return Kokkos::complex(r, i); +} + +template <> +inline Kokkos::complex readScalar(std::istream &is) { + double r, i; + is >> r; + is >> i; + return Kokkos::complex(r, i); +} + +template +void writeScalar(std::ostream &os, scalar_t val) { + os << val; +} + +template <> +inline void writeScalar(std::ostream &os, Kokkos::complex val) { + os << val.real() << ' ' << val.imag(); +} + +template <> +inline void writeScalar(std::ostream &os, Kokkos::complex val) { + os << val.real() << ' ' << val.imag(); +} + +// symmetryFlip: given a value for A(i, j), return the value that +// should be inserted at A(j, i) (if any) +template +scalar_t symmetryFlip(scalar_t val, MtxSym symFlag) { + if (symFlag == SKEW_SYMMETRIC) return -val; + return val; +} + +template <> +inline Kokkos::complex symmetryFlip(Kokkos::complex val, + MtxSym symFlag) { + if (symFlag == HERMITIAN) + return Kokkos::conj(val); + else if (symFlag == SKEW_SYMMETRIC) + return -val; + return val; +} + +template <> +inline Kokkos::complex symmetryFlip(Kokkos::complex val, + MtxSym symFlag) { + if (symFlag == HERMITIAN) + return Kokkos::conj(val); + else if (symFlag == SKEW_SYMMETRIC) + return -val; + return val; +} +} // namespace MM + +template +void write_matrix_mtx(lno_t nrows, lno_t ncols, size_type nentries, + const size_type *xadj, const lno_t *adj, + const scalar_t *vals, const char *filename) { + std::ofstream myFile(filename); + myFile << "%%MatrixMarket matrix coordinate "; + if (std::is_same>::value || + std::is_same>::value) + myFile << "complex"; + else + myFile << "real"; + myFile << " general\n"; + myFile << nrows << " " << ncols << " " << nentries << '\n'; + myFile << std::setprecision(17) << std::scientific; + for (lno_t i = 0; i < nrows; ++i) { + size_type b = xadj[i]; + size_type e = xadj[i + 1]; + for (size_type j = b; j < e; ++j) { + myFile << i + 1 << " " << adj[j] + 1 << " "; + MM::writeScalar(myFile, vals[j]); + myFile << '\n'; + } + } + myFile.close(); +} + +template +void write_graph_mtx(lno_t nv, size_type ne, const size_type *xadj, + const lno_t *adj, const scalar_t *ew, + const char *filename) { + std::ofstream myFile(filename); + myFile << "%%MatrixMarket matrix coordinate "; + if (std::is_same>::value || + std::is_same>::value) + myFile << "complex"; + else + myFile << "real"; + myFile << " general\n"; + myFile << nv << " " << nv << " " << ne << '\n'; + myFile << std::setprecision(8) << std::scientific; + for (lno_t i = 0; i < nv; ++i) { + size_type b = xadj[i]; + size_type e = xadj[i + 1]; + for (size_type j = b; j < e; ++j) { + myFile << i + 1 << " " << (adj)[j] + 1 << " "; + MM::writeScalar(myFile, ew[j]); + myFile << '\n'; + } + } + + myFile.close(); +} + +template +void read_graph_bin(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, + scalar_t **ew, const char *filename) { + std::ifstream myFile(filename, std::ios::in | std::ios::binary); + + myFile.read((char *)nv, sizeof(lno_t)); + myFile.read((char *)ne, sizeof(size_type)); + KokkosKernels::Impl::md_malloc(xadj, *nv + 1); + KokkosKernels::Impl::md_malloc(adj, *ne); + KokkosKernels::Impl::md_malloc(ew, *ne); + myFile.read((char *)*xadj, sizeof(size_type) * (*nv + 1)); + myFile.read((char *)*adj, sizeof(lno_t) * (*ne)); + myFile.read((char *)*ew, sizeof(scalar_t) * (*ne)); + myFile.close(); +} + +// When Kokkos issue #2313 is resolved, can delete +// parseScalar and just use operator>> +template +scalar_t parseScalar(std::istream &is) { + scalar_t val; + is >> val; + return val; +} + +template <> +inline Kokkos::complex parseScalar(std::istream &is) { + std::complex val; + is >> val; + return Kokkos::complex(val); +} + +template <> +inline Kokkos::complex parseScalar(std::istream &is) { + std::complex val; + is >> val; + return Kokkos::complex(val); +} + +template +void read_graph_crs(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, + scalar_t **ew, const char *filename) { + std::ifstream myFile(filename, std::ios::in); + myFile >> *nv >> *ne; + + KokkosKernels::Impl::md_malloc(xadj, *nv + 1); + KokkosKernels::Impl::md_malloc(adj, *ne); + KokkosKernels::Impl::md_malloc(ew, *ne); + + for (lno_t i = 0; i <= *nv; ++i) { + myFile >> (*xadj)[i]; + } + + for (size_type i = 0; i < *ne; ++i) { + myFile >> (*adj)[i]; + } + for (size_type i = 0; i < *ne; ++i) { + (*ew)[i] = parseScalar(myFile); + } + myFile.close(); +} + +template +void write_kokkos_crst_matrix(crs_matrix_t a_crsmat, const char *filename) { + typedef typename crs_matrix_t::StaticCrsGraphType graph_t; + typedef typename graph_t::row_map_type::non_const_type row_map_view_t; + typedef typename graph_t::entries_type::non_const_type cols_view_t; + typedef typename crs_matrix_t::values_type::non_const_type values_view_t; + + typedef typename row_map_view_t::value_type offset_t; + typedef typename cols_view_t::value_type lno_t; + typedef typename values_view_t::value_type scalar_t; + typedef typename values_view_t::size_type size_type; + + size_type nnz = a_crsmat.nnz(); + + auto a_rowmap_view = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), a_crsmat.graph.row_map); + auto a_entries_view = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), a_crsmat.graph.entries); + auto a_values_view = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a_crsmat.values); + offset_t *a_rowmap = const_cast(a_rowmap_view.data()); + lno_t *a_entries = a_entries_view.data(); + scalar_t *a_values = a_values_view.data(); + + std::string strfilename(filename); + if (KokkosKernels::Impl::endswith(strfilename, ".mtx") || KokkosKernels::Impl::endswith(strfilename, ".mm")) { + write_matrix_mtx( + a_crsmat.numRows(), a_crsmat.numCols(), a_crsmat.nnz(), a_rowmap, + a_entries, a_values, filename); + return; + } else if (a_crsmat.numRows() != a_crsmat.numCols()) { + throw std::runtime_error( + "For formats other than MatrixMarket (suffix .mm or .mtx),\n" + "write_kokkos_crst_matrix only supports square matrices"); + } + if (KokkosKernels::Impl::endswith(strfilename, ".bin")) { + write_graph_bin( + a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); + } else if (KokkosKernels::Impl::endswith(strfilename, ".ligra")) { + write_graph_ligra( + a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); + } else if (KokkosKernels::Impl::endswith(strfilename, ".crs")) { + write_graph_crs( + a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); + } else { + std::string errMsg = + std::string("write_kokkos_crst_matrix: File extension on ") + filename + + " does not correspond to a known format"; + throw std::runtime_error(errMsg); + } +} + +template +int read_mtx(const char *fileName, lno_t *nrows, lno_t *ncols, size_type *ne, + size_type **xadj, lno_t **adj, scalar_t **ew, + bool symmetrize = false, bool remove_diagonal = true, + bool transpose = false) { + using namespace MM; + std::ifstream mmf(fileName, std::ifstream::in); + if (!mmf.is_open()) { + throw std::runtime_error("File cannot be opened\n"); + } + + std::string fline = ""; + getline(mmf, fline); + + if (fline.size() < 2 || fline[0] != '%' || fline[1] != '%') { + throw std::runtime_error("Invalid MM file. Line-1\n"); + } + + // make sure every required field is in the file, by initializing them to + // UNDEFINED_* + MtxObject mtx_object = UNDEFINED_OBJECT; + MtxFormat mtx_format = UNDEFINED_FORMAT; + MtxField mtx_field = UNDEFINED_FIELD; + MtxSym mtx_sym = UNDEFINED_SYMMETRY; + + if (fline.find("matrix") != std::string::npos) { + mtx_object = MATRIX; + } else if (fline.find("vector") != std::string::npos) { + mtx_object = VECTOR; + throw std::runtime_error( + "MatrixMarket \"vector\" is not supported by KokkosKernels read_mtx()"); + } + + if (fline.find("coordinate") != std::string::npos) { + // sparse + mtx_format = COORDINATE; + } else if (fline.find("array") != std::string::npos) { + // dense + mtx_format = ARRAY; + } + + if (fline.find("real") != std::string::npos || + fline.find("double") != std::string::npos) { + if (std::is_same::value || + std::is_same::value) + mtx_field = REAL; + else { + if (!std::is_floating_point::value) + throw std::runtime_error( + "scalar_t in read_mtx() incompatible with float or double typed " + "MatrixMarket file."); + else + mtx_field = REAL; + } + } else if (fline.find("complex") != std::string::npos) { + if (!(std::is_same>::value || + std::is_same>::value)) + throw std::runtime_error( + "scalar_t in read_mtx() incompatible with complex-typed MatrixMarket " + "file."); + else + mtx_field = COMPLEX; + } else if (fline.find("integer") != std::string::npos) { + if (std::is_integral::value || + std::is_floating_point::value || + std::is_same::value || + std::is_same::value) + mtx_field = INTEGER; + else + throw std::runtime_error( + "scalar_t in read_mtx() incompatible with integer-typed MatrixMarket " + "file."); + } else if (fline.find("pattern") != std::string::npos) { + mtx_field = PATTERN; + // any reasonable choice for scalar_t can represent "1" or "1.0 + 0i", so + // nothing to check here + } + + if (fline.find("general") != std::string::npos) { + mtx_sym = GENERAL; + } else if (fline.find("skew-symmetric") != std::string::npos) { + mtx_sym = SKEW_SYMMETRIC; + } else if (fline.find("symmetric") != std::string::npos) { + // checking for "symmetric" after "skew-symmetric" because it's a substring + mtx_sym = SYMMETRIC; + } else if (fline.find("hermitian") != std::string::npos || + fline.find("Hermitian") != std::string::npos) { + mtx_sym = HERMITIAN; + } + // Validate the matrix attributes + if (mtx_format == ARRAY) { + if (mtx_sym == UNDEFINED_SYMMETRY) mtx_sym = GENERAL; + if (mtx_sym != GENERAL) + throw std::runtime_error( + "array format MatrixMarket file must have general symmetry (optional " + "to include \"general\")"); + } + if (mtx_object == UNDEFINED_OBJECT) + throw std::runtime_error( + "MatrixMarket file header is missing the object type."); + if (mtx_format == UNDEFINED_FORMAT) + throw std::runtime_error("MatrixMarket file header is missing the format."); + if (mtx_field == UNDEFINED_FIELD) + throw std::runtime_error( + "MatrixMarket file header is missing the field type."); + if (mtx_sym == UNDEFINED_SYMMETRY) + throw std::runtime_error( + "MatrixMarket file header is missing the symmetry type."); + + while (1) { + getline(mmf, fline); + if (fline[0] != '%') break; + } + std::stringstream ss(fline); + lno_t nr = 0, nc = 0; + size_type nnz = 0; + ss >> nr >> nc; + if (mtx_format == COORDINATE) + ss >> nnz; + else + nnz = nr * nc; + size_type numEdges = nnz; + symmetrize = symmetrize || mtx_sym != GENERAL; + if (symmetrize && nr != nc) { + throw std::runtime_error("A non-square matrix cannot be symmetrized."); + } + if (mtx_format == ARRAY) { + // Array format only supports general symmetry and non-pattern + if (symmetrize) + throw std::runtime_error( + "array format MatrixMarket file cannot be symmetrized."); + if (mtx_field == PATTERN) + throw std::runtime_error( + "array format MatrixMarket file can't have \"pattern\" field type."); + } + if (symmetrize) { + numEdges = 2 * nnz; + } + // numEdges is only an upper bound (diagonal entries may be removed) + std::vector> edges(numEdges); + size_type nE = 0; + lno_t numDiagonal = 0; + for (size_type i = 0; i < nnz; ++i) { + getline(mmf, fline); + std::stringstream ss2(fline); + struct KokkosKernels::Impl::Edge tmp; + // read source, dest (edge) and weight (value) + lno_t s, d; + scalar_t w; + if (mtx_format == ARRAY) { + // In array format, entries are listed in column major order, + // so the row and column can be determined just from the index i + //(but make them 1-based indices, to match the way coordinate works) + s = i % nr + 1; // row + d = i / nr + 1; // col + } else { + // In coordinate format, row and col of each entry is read from file + ss2 >> s >> d; + } + if (mtx_field == PATTERN) + w = 1; + else + w = readScalar(ss2); + if (!transpose) { + tmp.src = s - 1; + tmp.dst = d - 1; + tmp.ew = w; + } else { + tmp.src = d - 1; + tmp.dst = s - 1; + tmp.ew = w; + } + if (tmp.src == tmp.dst) { + numDiagonal++; + if (!remove_diagonal) { + edges[nE++] = tmp; + } + continue; + } + edges[nE++] = tmp; + if (symmetrize) { + struct KokkosKernels::Impl::Edge tmp2; + tmp2.src = tmp.dst; + tmp2.dst = tmp.src; + // the symmetrized value is w, -w or conj(w) if mtx_sym is + // SYMMETRIC, SKEW_SYMMETRIC or HERMITIAN, respectively. + tmp2.ew = symmetryFlip(tmp.ew, mtx_sym); + edges[nE++] = tmp2; + } + } + mmf.close(); + std::sort(edges.begin(), edges.begin() + nE); + if (transpose) { + lno_t tmp = nr; + nr = nc; + nc = tmp; + } + // idx *nv, idx *ne, idx **xadj, idx **adj, wt **wt + *nrows = nr; + *ncols = nc; + *ne = nE; + //*xadj = new idx[nr + 1]; + KokkosKernels::Impl::md_malloc(xadj, nr + 1); + //*adj = new idx[nE]; + KokkosKernels::Impl::md_malloc(adj, nE); + //*ew = new wt[nE]; + KokkosKernels::Impl::md_malloc(ew, nE); + size_type eind = 0; + size_type actual = 0; + for (lno_t i = 0; i < nr; ++i) { + (*xadj)[i] = actual; + bool is_first = true; + while (eind < nE && edges[eind].src == i) { + if (is_first || !symmetrize || eind == 0 || + (eind > 0 && edges[eind - 1].dst != edges[eind].dst)) { + (*adj)[actual] = edges[eind].dst; + (*ew)[actual] = edges[eind].ew; + ++actual; + } + is_first = false; + ++eind; + } + } + (*xadj)[nr] = actual; + *ne = actual; + return 0; +} + +// Version of read_mtx which does not capture the number of columns. +// This is the old interface; it's kept for backwards compatibility. +template +int read_mtx(const char *fileName, lno_t *nv, size_type *ne, size_type **xadj, + lno_t **adj, scalar_t **ew, bool symmetrize = false, + bool remove_diagonal = true, bool transpose = false) { + lno_t ncol; // will discard + return read_mtx(fileName, nv, &ncol, ne, xadj, + adj, ew, symmetrize, + remove_diagonal, transpose); +} + +template +void read_matrix(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, + scalar_t **ew, const char *filename) { + std::string strfilename(filename); + if (KokkosKernels::Impl::endswith(strfilename, ".mtx") || KokkosKernels::Impl::endswith(strfilename, ".mm")) { + read_mtx(filename, nv, ne, xadj, adj, ew, false, false, false); + } + + else if (KokkosKernels::Impl::endswith(strfilename, ".bin")) { + read_graph_bin(nv, ne, xadj, adj, ew, filename); + } + + else if (KokkosKernels::Impl::endswith(strfilename, ".crs")) { + read_graph_crs(nv, ne, xadj, adj, ew, filename); + } + + else { + throw std::runtime_error("Reader is not available\n"); + } +} + +template +crsMat_t read_kokkos_crst_matrix(const char *filename_) { + std::string strfilename(filename_); + bool isMatrixMarket = + KokkosKernels::Impl::endswith(strfilename, ".mtx") || KokkosKernels::Impl::endswith(strfilename, ".mm"); + + typedef typename crsMat_t::StaticCrsGraphType graph_t; + typedef typename graph_t::row_map_type::non_const_type row_map_view_t; + typedef typename graph_t::entries_type::non_const_type cols_view_t; + typedef typename crsMat_t::values_type::non_const_type values_view_t; + + typedef typename row_map_view_t::value_type size_type; + typedef typename cols_view_t::value_type lno_t; + typedef typename values_view_t::value_type scalar_t; + + lno_t nr, nc, *adj; + size_type *xadj, nnzA; + scalar_t *values; + + if (isMatrixMarket) { + // MatrixMarket file contains the exact number of columns + read_mtx(filename_, &nr, &nc, &nnzA, &xadj, + &adj, &values, false, false, false); + } else { + //.crs and .bin files don't contain #cols, so will compute it later based on + // the entries + read_matrix(&nr, &nnzA, &xadj, &adj, &values, + filename_); + } + + row_map_view_t rowmap_view("rowmap_view", nr + 1); + cols_view_t columns_view("colsmap_view", nnzA); + values_view_t values_view("values_view", nnzA); + + { + Kokkos::View> + hr(xadj, nr + 1); + Kokkos::View> + hc(adj, nnzA); + Kokkos::View> + hv(values, nnzA); + Kokkos::deep_copy(rowmap_view, hr); + Kokkos::deep_copy(columns_view, hc); + Kokkos::deep_copy(values_view, hv); + } + + if (!isMatrixMarket) { + KokkosKernels::Impl::kk_view_reduce_max( + nnzA, columns_view, nc); + nc++; + } + + graph_t static_graph(columns_view, rowmap_view); + crsMat_t crsmat("CrsMatrix", nc, values_view, static_graph); + delete[] xadj; + delete[] adj; + delete[] values; + return crsmat; +} + +template +crsGraph_t read_kokkos_crst_graph(const char *filename_) { + typedef typename crsGraph_t::row_map_type::non_const_type row_map_view_t; + typedef typename crsGraph_t::entries_type::non_const_type cols_view_t; + + typedef typename row_map_view_t::value_type size_type; + typedef typename cols_view_t::value_type lno_t; + typedef double scalar_t; + + lno_t nv, *adj; + size_type *xadj, nnzA; + scalar_t *values; + read_matrix(&nv, &nnzA, &xadj, &adj, &values, + filename_); + + row_map_view_t rowmap_view("rowmap_view", nv + 1); + cols_view_t columns_view("colsmap_view", nnzA); + + { + typename row_map_view_t::HostMirror hr = + Kokkos::create_mirror_view(rowmap_view); + typename cols_view_t::HostMirror hc = + Kokkos::create_mirror_view(columns_view); + + for (lno_t i = 0; i <= nv; ++i) { + hr(i) = xadj[i]; + } + + for (size_type i = 0; i < nnzA; ++i) { + hc(i) = adj[i]; + } + Kokkos::deep_copy(rowmap_view, hr); + Kokkos::deep_copy(columns_view, hc); + } + + lno_t ncols = 0; + KokkosKernels::Impl::kk_view_reduce_max( + nnzA, columns_view, ncols); + ncols += 1; + + crsGraph_t static_graph(columns_view, rowmap_view, ncols); + delete[] xadj; + delete[] adj; + delete[] values; + return static_graph; +} + +template +inline void kk_sequential_create_incidence_matrix( + nnz_lno_t num_rows, const size_type *xadj, const nnz_lno_t *adj, + size_type *i_adj // output. preallocated +) { + std::vector c_xadj(num_rows); + for (nnz_lno_t i = 0; i < num_rows; i++) { + c_xadj[i] = xadj[i]; + } + int eCnt = 0; + for (nnz_lno_t i = 0; i < num_rows; i++) { + size_type begin = xadj[i]; + size_type end = xadj[i + 1]; + nnz_lno_t adjsize = end - begin; + + for (nnz_lno_t j = 0; j < adjsize; j++) { + size_type aind = j + begin; + nnz_lno_t col = adj[aind]; + if (i < col) { + i_adj[c_xadj[i]++] = eCnt; + i_adj[c_xadj[col]++] = eCnt++; + } + } + } + + for (nnz_lno_t i = 0; i < num_rows; i++) { + if (c_xadj[i] != xadj[i + 1]) { + std::cout << "i:" << i << " c_xadj[i]:" << c_xadj[i] + << " xadj[i+1]:" << xadj[i + 1] << std::endl; + } + } +} + +template +inline void kk_sequential_create_incidence_matrix_transpose( + const nnz_lno_t num_rows, const size_type num_edges, const size_type *xadj, + const nnz_lno_t *adj, + size_type *i_xadj, // output. preallocated + nnz_lno_t *i_adj // output. preallocated +) { + for (nnz_lno_t i = 0; i < num_edges / 2 + 1; i++) { + i_xadj[i] = i * 2; + } + int eCnt = 0; + for (nnz_lno_t i = 0; i < num_rows; i++) { + size_type begin = xadj[i]; + size_type end = xadj[i + 1]; + nnz_lno_t adjsize = end - begin; + + for (nnz_lno_t j = 0; j < adjsize; j++) { + size_type aind = j + begin; + nnz_lno_t col = adj[aind]; + if (i < col) { + i_adj[eCnt++] = i; + i_adj[eCnt++] = col; + } + } + } +} + +} // namespace Impl +} // namespace KokkosKernels +#endif // _KOKKOSSPARSE_IOUTILS_HPP diff --git a/src/sparse/KokkosSparse_SortCrs.hpp b/src/sparse/KokkosSparse_SortCrs.hpp new file mode 100644 index 0000000000..03d51386e5 --- /dev/null +++ b/src/sparse/KokkosSparse_SortCrs.hpp @@ -0,0 +1,725 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef _KOKKOSSPARSE_SORTCRS_HPP +#define _KOKKOSSPARSE_SORTCRS_HPP + +#include "Kokkos_Core.hpp" +#include "KokkosKernels_Sorting.hpp" + +namespace KokkosSparse { + +// ---------------------------------- +// BSR matrix/graph sorting utilities +// ---------------------------------- + +// Sort a BRS matrix: within each row, sort entries ascending by column and +// permute the values accordingly. +template +void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, + const entries_t& entries, const values_t& values); + +template +void sort_bsr_matrix(const bsrMat_t& A); + +// ---------------------------------- +// CRS matrix/graph sorting utilities +// ---------------------------------- + +// The sort_crs* functions sort the adjacent column list for each row into +// ascending order. + +template +void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, + const values_t& values); + +template +void sort_crs_matrix(const crsMat_t& A); + +template +void sort_crs_graph(const rowmap_t& rowmap, const entries_t& entries); + +template +void sort_crs_graph(const crsGraph_t& G); + +// sort_and_merge_matrix produces a new matrix which is equivalent to A but is +// sorted and has no duplicated entries: each (i, j) is unique. Values for +// duplicated entries are summed. +template +crsMat_t sort_and_merge_matrix(const crsMat_t& A); + +template +crsGraph_t sort_and_merge_graph(const crsGraph_t& G); + +template +void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, + const entries_t& entries_in, rowmap_t& rowmap_out, + entries_t& entries_out); + +namespace Impl { + +template +struct SortCrsMatrixFunctor { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + using scalar_t = typename values_t::non_const_value_type; + using team_mem = typename Kokkos::TeamPolicy::member_type; + // The functor owns memory for entriesAux, so it can't have + // MemoryTraits + using entries_managed_t = Kokkos::View; + using values_managed_t = Kokkos::View; + + SortCrsMatrixFunctor(bool usingRangePol, const rowmap_t& rowmap_, + const entries_t& entries_, const values_t& values_) + : rowmap(rowmap_), entries(entries_), values(values_) { + if (usingRangePol) { + entriesAux = entries_managed_t( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), + entries.extent(0)); + valuesAux = values_managed_t( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values aux"), + values.extent(0)); + } + // otherwise, aux arrays won't be allocated (sorting in place) + } + + KOKKOS_INLINE_FUNCTION void operator()(const lno_t i) const { + size_type rowStart = rowmap(i); + size_type rowEnd = rowmap(i + 1); + lno_t rowNum = rowEnd - rowStart; + // Radix sort requires unsigned keys for comparison + using unsigned_lno_t = typename std::make_unsigned::type; + KokkosKernels::SerialRadixSort2( + (unsigned_lno_t*)entries.data() + rowStart, + (unsigned_lno_t*)entriesAux.data() + rowStart, values.data() + rowStart, + valuesAux.data() + rowStart, rowNum); + } + + KOKKOS_INLINE_FUNCTION void operator()(const team_mem t) const { + size_type i = t.league_rank(); + size_type rowStart = rowmap(i); + size_type rowEnd = rowmap(i + 1); + lno_t rowNum = rowEnd - rowStart; + KokkosKernels::TeamBitonicSort2( + entries.data() + rowStart, values.data() + rowStart, rowNum, t); + } + + rowmap_t rowmap; + entries_t entries; + entries_managed_t entriesAux; + values_t values; + values_managed_t valuesAux; +}; + +template +struct SortCrsGraphFunctor { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + using team_mem = typename Kokkos::TeamPolicy::member_type; + // The functor owns memory for entriesAux, so it can't have + // MemoryTraits + using entries_managed_t = Kokkos::View; + + SortCrsGraphFunctor(bool usingRangePol, const rowmap_t& rowmap_, + const entries_t& entries_) + : rowmap(rowmap_), entries(entries_) { + if (usingRangePol) { + entriesAux = entries_managed_t( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), + entries.extent(0)); + } + // otherwise, aux arrays won't be allocated (sorting in place) + } + + KOKKOS_INLINE_FUNCTION void operator()(const lno_t i) const { + size_type rowStart = rowmap(i); + size_type rowEnd = rowmap(i + 1); + lno_t rowNum = rowEnd - rowStart; + // Radix sort requires unsigned keys for comparison + using unsigned_lno_t = typename std::make_unsigned::type; + KokkosKernels::SerialRadixSort( + (unsigned_lno_t*)entries.data() + rowStart, + (unsigned_lno_t*)entriesAux.data() + rowStart, rowNum); + } + + KOKKOS_INLINE_FUNCTION void operator()(const team_mem t) const { + size_type i = t.league_rank(); + size_type rowStart = rowmap(i); + size_type rowEnd = rowmap(i + 1); + lno_t rowNum = rowEnd - rowStart; + KokkosKernels::TeamBitonicSort( + entries.data() + rowStart, rowNum, t); + } + + rowmap_t rowmap; + entries_t entries; + entries_managed_t entriesAux; +}; + +template +struct MergedRowmapFunctor { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + using c_rowmap_t = typename rowmap_t::const_type; + + // Precondition: entries are sorted within each row + MergedRowmapFunctor(const rowmap_t& mergedCounts_, const c_rowmap_t& rowmap_, + const entries_t& entries_) + : mergedCounts(mergedCounts_), rowmap(rowmap_), entries(entries_) {} + + KOKKOS_INLINE_FUNCTION void operator()(lno_t row, size_type& lnewNNZ) const { + size_type rowBegin = rowmap(row); + size_type rowEnd = rowmap(row + 1); + if (rowEnd == rowBegin) { + // Row was empty to begin with + mergedCounts(row) = 0; + return; + } + // Otherwise, the first entry in the row exists + lno_t uniqueEntries = 1; + for (size_type j = rowBegin + 1; j < rowEnd; j++) { + if (entries(j - 1) != entries(j)) uniqueEntries++; + } + mergedCounts(row) = uniqueEntries; + lnewNNZ += uniqueEntries; + if (row == lno_t((rowmap.extent(0) - 1) - 1)) mergedCounts(row + 1) = 0; + } + + rowmap_t mergedCounts; + c_rowmap_t rowmap; + entries_t entries; +}; + +template +struct MatrixMergedEntriesFunctor { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + using scalar_t = typename values_t::non_const_value_type; + + // Precondition: entries are sorted within each row + MatrixMergedEntriesFunctor(const rowmap_t& rowmap_, const entries_t& entries_, + const values_t& values_, + const rowmap_t& mergedRowmap_, + const entries_t& mergedEntries_, + const values_t& mergedValues_) + : rowmap(rowmap_), + entries(entries_), + values(values_), + mergedRowmap(mergedRowmap_), + mergedEntries(mergedEntries_), + mergedValues(mergedValues_) {} + + KOKKOS_INLINE_FUNCTION void operator()(lno_t row) const { + size_type rowBegin = rowmap(row); + size_type rowEnd = rowmap(row + 1); + if (rowEnd == rowBegin) { + // Row was empty to begin with, nothing to do + return; + } + // Otherwise, accumulate the value for each column + scalar_t accumVal = values(rowBegin); + lno_t accumCol = entries(rowBegin); + size_type insertPos = mergedRowmap(row); + for (size_type j = rowBegin + 1; j < rowEnd; j++) { + if (accumCol == entries(j)) { + // accumulate + accumVal += values(j); + } else { + // write out and reset + mergedValues(insertPos) = accumVal; + mergedEntries(insertPos) = accumCol; + insertPos++; + accumVal = values(j); + accumCol = entries(j); + } + } + // always left with the last unique entry + mergedValues(insertPos) = accumVal; + mergedEntries(insertPos) = accumCol; + } + + rowmap_t rowmap; + entries_t entries; + values_t values; + rowmap_t mergedRowmap; + entries_t mergedEntries; + values_t mergedValues; +}; + +template +struct GraphMergedEntriesFunctor { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + + // Precondition: entries are sorted within each row + GraphMergedEntriesFunctor(const rowmap_t& rowmap_, const entries_t& entries_, + const rowmap_t& mergedRowmap_, + const entries_t& mergedEntries_) + : rowmap(rowmap_), + entries(entries_), + mergedRowmap(mergedRowmap_), + mergedEntries(mergedEntries_) {} + + KOKKOS_INLINE_FUNCTION void operator()(lno_t row) const { + size_type rowBegin = rowmap(row); + size_type rowEnd = rowmap(row + 1); + if (rowEnd == rowBegin) { + // Row was empty to begin with, nothing to do + return; + } + // Otherwise, accumulate the value for each column + lno_t accumCol = entries(rowBegin); + size_type insertPos = mergedRowmap(row); + for (size_type j = rowBegin + 1; j < rowEnd; j++) { + if (accumCol != entries(j)) { + // write out and reset + mergedEntries(insertPos) = accumCol; + insertPos++; + accumCol = entries(j); + } + } + // always left with the last unique entry + mergedEntries(insertPos) = accumCol; + } + + rowmap_t rowmap; + entries_t entries; + rowmap_t mergedRowmap; + entries_t mergedEntries; +}; + +template +KOKKOS_INLINE_FUNCTION void kk_swap(T& a, T& b) { + T t = a; + a = b; + b = t; +} + +template +struct sort_bsr_functor { + using lno_t = typename entries_type::non_const_value_type; + + row_map_type rowmap; + entries_type entries; + values_type values; + const lno_t blocksize; + + sort_bsr_functor(row_map_type rowmap_, entries_type entries_, + values_type values_, const lno_t blocksize_) + : rowmap(rowmap_), + entries(entries_), + values(values_), + blocksize(blocksize_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const lno_t i) const { + const lno_t rowStart = rowmap(i); + const lno_t rowSize = rowmap(i + 1) - rowStart; + auto* e = entries.data() + rowStart; + auto* v = values.data() + rowStart * blocksize; + bool done = false; + while (!done) { + done = true; + for (lno_t j = 1; j < rowSize; ++j) { + const lno_t jp = j - 1; + if (e[jp] <= e[j]) continue; + Impl::kk_swap(e[jp], e[j]); + auto const vb = v + j * blocksize; + auto const vbp = v + jp * blocksize; + for (lno_t k = 0; k < blocksize; + ++k) // std::swap_ranges(vb, vb + blocksize, vbp); + Impl::kk_swap(vb[k], vbp[k]); + done = false; + } + } + } +}; + +} // namespace Impl + +// Sort a CRS matrix: within each row, sort entries ascending by column. +// At the same time, permute the values. +template +void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, + const values_t& values) { + using lno_t = typename entries_t::non_const_value_type; + using team_pol = Kokkos::TeamPolicy; + bool useRadix = !KokkosKernels::Impl::kk_is_gpu_exec_space(); + lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; + if (numRows == 0) return; + Impl::SortCrsMatrixFunctor + funct(useRadix, rowmap, entries, values); + if (useRadix) { + Kokkos::parallel_for("sort_crs_matrix", + Kokkos::RangePolicy(0, numRows), + funct); + } else { + // Try to get teamsize to be largest power of 2 not greater than avg entries + // per row + // TODO (probably important for performnce): add thread-level sort also, and + // use that for small avg degree. But this works for now. + lno_t idealTeamSize = 1; + lno_t avgDeg = (entries.extent(0) + numRows - 1) / numRows; + while (idealTeamSize < avgDeg / 2) { + idealTeamSize *= 2; + } + team_pol temp(numRows, 1); + lno_t maxTeamSize = temp.team_size_max(funct, Kokkos::ParallelForTag()); + lno_t teamSize = std::min(idealTeamSize, maxTeamSize); + Kokkos::parallel_for("sort_crs_matrix", team_pol(numRows, teamSize), funct); + } +} + +template +void sort_crs_matrix(const crsMat_t& A) { + // Note: rowmap_t has const values, but that's OK as sorting doesn't modify it + using rowmap_t = typename crsMat_t::row_map_type; + using entries_t = typename crsMat_t::index_type::non_const_type; + using values_t = typename crsMat_t::values_type::non_const_type; + using exec_space = typename crsMat_t::execution_space; + // NOTE: the rowmap of a StaticCrsGraph is const-valued, but the + // entries and CrsMatrix values are non-const (so sorting them directly + // is allowed) + sort_crs_matrix( + A.graph.row_map, A.graph.entries, A.values); +} + +// Sort a BRS matrix: within each row, sort entries ascending by column and +// permute the values accordingly. +template +void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, + const entries_t& entries, const values_t& values) { + // TODO: this is O(N^2) mock for debugging - do regular implementation based + // on Radix/Bitonic sort (like CSR) IDEA: maybe we need only one general + // Radix2/Bitonic2 and CSR sorting may call it with blockSize=1 ? + lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; + if (numRows == 0) return; + const lno_t blocksize = blockdim * blockdim; + + assert(values.extent(0) == entries.extent(0) * blocksize); + Impl::sort_bsr_functor bsr_sorter( + rowmap, entries, values, blocksize); + Kokkos::parallel_for("sort_bsr_matrix", + Kokkos::RangePolicy(0, numRows), + bsr_sorter); +} + +// Sort a BSR matrix (like CRS but single values are replaced with contignous +// blocks) +template +void sort_bsr_matrix(const bsrMat_t& A) { + // NOTE: unlike rowmap, entries and values are non-const, so we can sort them + // directly + sort_bsr_matrix( + A.blockDim(), A.graph.row_map, A.graph.entries, A.values); +} + +// Sort a CRS graph: within each row, sort entries ascending by column. +template +void sort_crs_graph(const rowmap_t& rowmap, const entries_t& entries) { + using lno_t = typename entries_t::non_const_value_type; + using team_pol = Kokkos::TeamPolicy; + bool useRadix = !KokkosKernels::Impl::kk_is_gpu_exec_space(); + lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; + if (numRows == 0) return; + Impl::SortCrsGraphFunctor funct( + useRadix, rowmap, entries); + if (useRadix) { + Kokkos::parallel_for("sort_crs_graph", + Kokkos::RangePolicy(0, numRows), + funct); + } else { + // Try to get teamsize to be largest power of 2 less than or equal to + // half the entries per row. 0.5 * #entries is bitonic's parallelism within + // a row. + // TODO (probably important for performnce): add thread-level sort also, and + // use that for small avg degree. But this works for now. + lno_t idealTeamSize = 1; + lno_t avgDeg = (entries.extent(0) + numRows - 1) / numRows; + while (idealTeamSize < avgDeg / 2) { + idealTeamSize *= 2; + } + team_pol temp(numRows, 1); + lno_t maxTeamSize = temp.team_size_max(funct, Kokkos::ParallelForTag()); + lno_t teamSize = std::min(idealTeamSize, maxTeamSize); + Kokkos::parallel_for("sort_crs_graph", team_pol(numRows, teamSize), funct); + } +} + +template +void sort_crs_graph(const crsGraph_t& G) { + static_assert( + !std::is_const::value, + "sort_crs_graph requires StaticCrsGraph entries to be non-const."); + sort_crs_graph(G.row_map, G.entries); +} + +// Sort the rows of matrix, and merge duplicate entries. +template +crsMat_t sort_and_merge_matrix(const crsMat_t& A) { + using c_rowmap_t = typename crsMat_t::row_map_type; + using rowmap_t = typename crsMat_t::row_map_type::non_const_type; + using entries_t = typename crsMat_t::index_type::non_const_type; + using values_t = typename crsMat_t::values_type::non_const_type; + using size_type = typename rowmap_t::non_const_value_type; + using exec_space = typename crsMat_t::execution_space; + using range_t = Kokkos::RangePolicy; + sort_crs_matrix(A); + // Count entries per row into a new rowmap, in terms of merges that can be + // done + rowmap_t mergedRowmap( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "SortedMerged rowmap"), + A.numRows() + 1); + size_type numCompressedEntries = 0; + Kokkos::parallel_reduce(range_t(0, A.numRows()), + Impl::MergedRowmapFunctor( + mergedRowmap, A.graph.row_map, A.graph.entries), + numCompressedEntries); + // Prefix sum to get rowmap + KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum(A.numRows() + 1, + mergedRowmap); + entries_t mergedEntries("SortedMerged entries", numCompressedEntries); + values_t mergedValues("SortedMerged values", numCompressedEntries); + // Compute merged entries and values + Kokkos::parallel_for( + range_t(0, A.numRows()), + Impl::MatrixMergedEntriesFunctor( + A.graph.row_map, A.graph.entries, A.values, mergedRowmap, + mergedEntries, mergedValues)); + // Finally, construct the new compressed matrix + return crsMat_t("SortedMerged", A.numRows(), A.numCols(), + numCompressedEntries, mergedValues, mergedRowmap, + mergedEntries); +} + +template +void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, + const entries_t& entries_in, rowmap_t& rowmap_out, + entries_t& entries_out) { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + using range_t = Kokkos::RangePolicy; + using const_rowmap_t = typename rowmap_t::const_type; + lno_t numRows = rowmap_in.extent(0); + if (numRows <= 1) { + // Matrix has zero rows + rowmap_out = rowmap_t(); + entries_out = entries_t(); + return; + } + numRows--; + // Sort in place + sort_crs_graph(rowmap_in, entries_in); + // Count entries per row into a new rowmap, in terms of merges that can be + // done + rowmap_out = rowmap_t( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "SortedMerged rowmap"), + numRows + 1); + size_type numCompressedEntries = 0; + Kokkos::parallel_reduce(range_t(0, numRows), + Impl::MergedRowmapFunctor( + rowmap_out, rowmap_in, entries_in), + numCompressedEntries); + // Prefix sum to get rowmap + KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum(numRows + 1, + rowmap_out); + entries_out = entries_t("SortedMerged entries", numCompressedEntries); + // Compute merged entries and values + Kokkos::parallel_for( + range_t(0, numRows), + Impl::GraphMergedEntriesFunctor( + rowmap_in, entries_in, rowmap_out, entries_out)); +} + +template +crsGraph_t sort_and_merge_graph(const crsGraph_t& G) { + using rowmap_t = typename crsGraph_t::row_map_type::non_const_type; + using entries_t = typename crsGraph_t::entries_type; + static_assert( + !std::is_const::value, + "sort_and_merge_graph requires StaticCrsGraph entries to be non-const."); + rowmap_t mergedRowmap; + entries_t mergedEntries; + sort_and_merge_graph(G.row_map, G.entries, mergedRowmap, + mergedEntries); + return crsGraph_t(mergedEntries, mergedRowmap); +} + +} // namespace KokkosSparse + +namespace KokkosKernels { + +// ---------------------------------- +// BSR matrix/graph sorting utilities +// ---------------------------------- + +// Sort a BRS matrix: within each row, sort entries ascending by column and +// permute the values accordingly. +template +[[deprecated]] +void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, + const entries_t& entries, const values_t& values) { + KokkosSparse::sort_bsr_matrix(blockdim, rowmap, entries, values); +} + +template +[[deprecated]] +void sort_bsr_matrix(const bsrMat_t& A) { + KokkosSparse::sort_bsr_matrix(A); +} + +// ---------------------------------- +// CRS matrix/graph sorting utilities +// ---------------------------------- + +// The sort_crs* functions sort the adjacent column list for each row into +// ascending order. + +template +[[deprecated]] +void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, + const values_t& values) { + KokkosSparse::sort_crs_matrix(rowmap, entries, values); +} + +template +[[deprecated]] +void sort_crs_matrix(const crsMat_t& A) { + KokkosSparse::sort_crs_matrix(A); +} + +template +[[deprecated]] +void sort_crs_graph(const rowmap_t& rowmap, const entries_t& entries) { + KokkosSparse::sort_crs_graph(rowmap, entries); +} + +template +[[deprecated]] +void sort_crs_graph(const crsGraph_t& G) { + KokkosSparse::sort_crs_graph(G); +} + +// sort_and_merge_matrix produces a new matrix which is equivalent to A but is +// sorted and has no duplicated entries: each (i, j) is unique. Values for +// duplicated entries are summed. +template +[[deprecated]] +crsMat_t sort_and_merge_matrix(const crsMat_t& A) { + KokkosSparse::sort_and_merge_matrix(A); +} + +template +[[deprecated]] +crsGraph_t sort_and_merge_graph(const crsGraph_t& G) { + KokkosSparse::sort_and_merge_graph(G); +} + +template +[[deprecated]] +void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, + const entries_t& entries_in, rowmap_t& rowmap_out, + entries_t& entries_out) { + KokkosSparse::sort_and_merge_graph(rowmap_in, entries_in, rowmap_out, entries_out); +} + +// For backward compatibility: keep the public interface accessible in +// KokkosKernels::Impl:: +namespace Impl { +template +[[deprecated]] void sort_crs_graph(const rowmap_t& rowmap, + const entries_t& entries) { + KokkosKernels::sort_crs_graph(rowmap, + entries); +} + +template +[[deprecated]] void sort_crs_matrix(const rowmap_t& rowmap, + const entries_t& entries, + const values_t& values) { + KokkosKernels::sort_crs_matrix(rowmap, entries, values); +} + +template +[[deprecated]] void sort_crs_matrix(const crsMat_t& A) { + KokkosKernels::sort_crs_matrix(A); +} + +template +[[deprecated]] void sort_and_merge_graph( + const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, + rowmap_t& rowmap_out, entries_t& entries_out) { + KokkosKernels::sort_and_merge_graph( + rowmap_in, entries_in, rowmap_out, entries_out); +} + +template +[[deprecated]] crsMat_t sort_and_merge_matrix(const crsMat_t& A) { + return KokkosKernels::sort_and_merge_matrix(A); +} + +} // namespace Impl +} // namespace KokkosKernels + +#endif // _KOKKOSSPARSE_SORTCRS_HPP diff --git a/src/common/KokkosKernels_SparseUtils.hpp b/src/sparse/KokkosSparse_Utils.hpp similarity index 100% rename from src/common/KokkosKernels_SparseUtils.hpp rename to src/sparse/KokkosSparse_Utils.hpp diff --git a/src/common/KokkosKernels_SparseUtils_cusparse.hpp b/src/sparse/KokkosSparse_Utils_cusparse.hpp similarity index 100% rename from src/common/KokkosKernels_SparseUtils_cusparse.hpp rename to src/sparse/KokkosSparse_Utils_cusparse.hpp diff --git a/src/common/KokkosKernels_SparseUtils_mkl.hpp b/src/sparse/KokkosSparse_Utils_mkl.hpp similarity index 100% rename from src/common/KokkosKernels_SparseUtils_mkl.hpp rename to src/sparse/KokkosSparse_Utils_mkl.hpp diff --git a/src/common/KokkosKernels_SparseUtils_rocsparse.hpp b/src/sparse/KokkosSparse_Utils_rocsparse.hpp similarity index 100% rename from src/common/KokkosKernels_SparseUtils_rocsparse.hpp rename to src/sparse/KokkosSparse_Utils_rocsparse.hpp diff --git a/src/sparse/KokkosSparse_sptrsv_cholmod.hpp b/src/sparse/KokkosSparse_sptrsv_cholmod.hpp index 796ee579bd..6d354047cf 100644 --- a/src/sparse/KokkosSparse_sptrsv_cholmod.hpp +++ b/src/sparse/KokkosSparse_sptrsv_cholmod.hpp @@ -56,7 +56,7 @@ defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) #include "cholmod.h" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosSparse_sptrsv_supernode.hpp" namespace KokkosSparse { diff --git a/src/sparse/KokkosSparse_sptrsv_supernode.hpp b/src/sparse/KokkosSparse_sptrsv_supernode.hpp index fa9a607be7..1c86121bde 100644 --- a/src/sparse/KokkosSparse_sptrsv_supernode.hpp +++ b/src/sparse/KokkosSparse_sptrsv_supernode.hpp @@ -63,7 +63,7 @@ #include "KokkosBatched_Trmm_Decl.hpp" #include "KokkosBatched_Trmm_Serial_Impl.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_SortCrs.hpp" #include "KokkosSparse_sptrsv.hpp" namespace KokkosSparse { @@ -597,7 +597,7 @@ host_graph_t generate_supernodal_graph(bool col_major, graph_t &graph, #endif // sort column ids per row - KokkosKernels::sort_crs_graph(hr, hc); #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE time_seconds = timer.seconds(); diff --git a/src/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp b/src/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp index 0f265dfbc4..62b86ca72e 100644 --- a/src/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp +++ b/src/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp @@ -52,7 +52,7 @@ #include "KokkosKernels_Uniform_Initialized_MemoryPool.hpp" #include "KokkosKernels_BitUtils.hpp" #include "KokkosKernels_SimpleUtils.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_SortCrs.hpp" // FOR DEBUGGING #include "KokkosBlas1_nrm2.hpp" @@ -979,8 +979,8 @@ class PointGaussSeidel { gsHandle->set_long_row_x(long_row_x); } else { // Just sort rows by ID. - KokkosKernels::sort_crs_graph(color_xadj, color_adj); + KokkosSparse::sort_crs_graph(color_xadj, color_adj); } #ifdef KOKKOSSPARSE_IMPL_TIME_REVERSE MyExecSpace().fence(); diff --git a/src/sparse/impl/KokkosSparse_spadd_symbolic_impl.hpp b/src/sparse/impl/KokkosSparse_spadd_symbolic_impl.hpp index 2131cec751..c4ae435f55 100644 --- a/src/sparse/impl/KokkosSparse_spadd_symbolic_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spadd_symbolic_impl.hpp @@ -46,7 +46,7 @@ #define _KOKKOS_SPADD_SYMBOLIC_IMPL_HPP #include "KokkosKernels_Handle.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_SortCrs.hpp" #include "Kokkos_ArithTraits.hpp" namespace KokkosSparse { @@ -593,8 +593,8 @@ void spadd_symbolic_impl( "KokkosSparse::SpAdd:Symbolic::InputNotSorted::UnmergedSum", range_type(0, nrows), unmergedSum); // sort the unmerged sum - KokkosKernels::sort_crs_matrix( + KokkosSparse::sort_crs_matrix( c_rowmap_upperbound, c_entries_uncompressed, ab_perm); ordinal_view_t a_pos( Kokkos::view_alloc(Kokkos::WithoutInitializing, "A entry positions"), diff --git a/src/sparse/impl/KokkosSparse_spgemm_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_impl.hpp index 9b4c28c877..dadc944b09 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_impl.hpp @@ -47,7 +47,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp index d1bfb3db5c..9a6ab70f9e 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp @@ -46,7 +46,7 @@ #define _KOKKOSSPGEMMMKL_HPP #include "KokkosKernels_config.h" -#include "KokkosKernels_SparseUtils_mkl.hpp" +#include "KokkosSparse_Utils_mkl.hpp" #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL #include "mkl_spblas.h" diff --git a/src/sparse/impl/KokkosSparse_twostage_gauss_seidel_impl.hpp b/src/sparse/impl/KokkosSparse_twostage_gauss_seidel_impl.hpp index 19bc5ec163..6adafd6319 100644 --- a/src/sparse/impl/KokkosSparse_twostage_gauss_seidel_impl.hpp +++ b/src/sparse/impl/KokkosSparse_twostage_gauss_seidel_impl.hpp @@ -57,7 +57,7 @@ // needed for classical GS #include "KokkosSparse_sptrsv.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosSparse_gauss_seidel_handle.hpp" @@ -854,10 +854,10 @@ class TwostageGaussSeidel { // values // CuSparse needs matrix sorted by column indexes for each row // TODO: may need to move this to symbolic/numeric of sptrsv - KokkosKernels::sort_crs_matrix( + KokkosSparse::sort_crs_matrix( rowmap_viewL, column_viewL, values_viewL); - KokkosKernels::sort_crs_matrix( rowmap_viewU, column_viewU, values_viewU); diff --git a/unit_test/common/Test_Common.hpp b/unit_test/common/Test_Common.hpp index 9d6958e816..cc4204d076 100644 --- a/unit_test/common/Test_Common.hpp +++ b/unit_test/common/Test_Common.hpp @@ -8,7 +8,6 @@ // #include #include #include -#include #include #include #include diff --git a/unit_test/common/Test_Common_Sorting.hpp b/unit_test/common/Test_Common_Sorting.hpp index 1580a0c98b..f0320cb637 100644 --- a/unit_test/common/Test_Common_Sorting.hpp +++ b/unit_test/common/Test_Common_Sorting.hpp @@ -525,226 +525,6 @@ void testBitonicSortLexicographic() { ASSERT_TRUE(ordered); } -template -void testSortCRS(default_lno_t numRows, default_lno_t numCols, - default_size_type nnz, bool doValues, bool doStructInterface) { - using scalar_t = default_scalar; - using lno_t = default_lno_t; - using size_type = default_size_type; - using mem_space = typename exec_space::memory_space; - using device_t = Kokkos::Device; - using crsMat_t = - KokkosSparse::CrsMatrix; - using rowmap_t = typename crsMat_t::row_map_type; - using entries_t = typename crsMat_t::index_type; - using values_t = typename crsMat_t::values_type; - // Create a random matrix on device - // IMPORTANT: kk_generate_sparse_matrix does not sort the rows, if it did this - // wouldn't test anything - crsMat_t A = KokkosKernels::Impl::kk_generate_sparse_matrix( - numRows, numCols, nnz, 2, numCols / 2); - auto rowmap = A.graph.row_map; - auto entries = A.graph.entries; - auto values = A.values; - Kokkos::View rowmapHost("rowmap host", - numRows + 1); - Kokkos::View entriesHost("sorted entries host", - nnz); - Kokkos::View valuesHost("sorted values host", - nnz); - Kokkos::deep_copy(rowmapHost, rowmap); - Kokkos::deep_copy(entriesHost, entries); - Kokkos::deep_copy(valuesHost, values); - struct ColValue { - ColValue() {} - ColValue(lno_t c, scalar_t v) : col(c), val(v) {} - bool operator<(const ColValue& rhs) const { return col < rhs.col; } - bool operator==(const ColValue& rhs) const { - return col == rhs.col && val == rhs.val; - } - lno_t col; - scalar_t val; - }; - // sort one row at a time on host using STL. - { - for (lno_t i = 0; i < numRows; i++) { - std::vector rowCopy; - for (size_type j = rowmapHost(i); j < rowmapHost(i + 1); j++) - rowCopy.emplace_back(entriesHost(j), valuesHost(j)); - std::sort(rowCopy.begin(), rowCopy.end()); - // write sorted row back - for (size_t j = 0; j < rowCopy.size(); j++) { - entriesHost(rowmapHost(i) + j) = rowCopy[j].col; - valuesHost(rowmapHost(i) + j) = rowCopy[j].val; - } - } - } - // call the actual sort routine being tested - if (doValues) { - if (doStructInterface) { - KokkosKernels::sort_crs_matrix(A); - } else { - KokkosKernels::sort_crs_matrix( - A.graph.row_map, A.graph.entries, A.values); - } - } else { - if (doStructInterface) { - KokkosKernels::sort_crs_graph(A.graph); - } else { - KokkosKernels::sort_crs_graph( - A.graph.row_map, A.graph.entries); - } - } - // Copy to host and compare - Kokkos::View entriesOut("sorted entries host", - nnz); - Kokkos::View valuesOut("sorted values host", - nnz); - Kokkos::deep_copy(entriesOut, entries); - Kokkos::deep_copy(valuesOut, values); - for (size_type i = 0; i < nnz; i++) { - EXPECT_EQ(entriesHost(i), entriesOut(i)) - << "Sorted column indices are wrong!"; - if (doValues) { - EXPECT_EQ(valuesHost(i), valuesOut(i)) << "Sorted values are wrong!"; - } - } -} - -template -void testSortCRSUnmanaged(bool doValues, bool doStructInterface) { - // This test is about bug #960. - using scalar_t = default_scalar; - using lno_t = default_lno_t; - using size_type = default_size_type; - using mem_space = typename exec_space::memory_space; - using device_t = Kokkos::Device; - using crsMat_t = - KokkosSparse::CrsMatrix, - size_type>; - using crsMat_Managed_t = - KokkosSparse::CrsMatrix; - using rowmap_t = typename crsMat_t::row_map_type; - using entries_t = typename crsMat_t::index_type; - using values_t = typename crsMat_t::values_type; - const lno_t numRows = 50; - const lno_t numCols = numRows; - size_type nnz = numRows * 5; - // Create a random matrix on device - // IMPORTANT: kk_generate_sparse_matrix does not sort the rows, if it did this - // wouldn't test anything - crsMat_Managed_t A_managed = - KokkosKernels::Impl::kk_generate_sparse_matrix( - numRows, numCols, nnz, 2, numCols / 2); - crsMat_t A(A_managed); - auto rowmap = A.graph.row_map; - auto entries = A.graph.entries; - auto values = A.values; - if (doValues) { - if (doStructInterface) { - KokkosKernels::sort_crs_matrix(A); - } else { - KokkosKernels::sort_crs_matrix( - A.graph.row_map, A.graph.entries, A.values); - } - } else { - if (doStructInterface) { - KokkosKernels::sort_crs_graph(A.graph); - } else { - KokkosKernels::sort_crs_graph( - A.graph.row_map, A.graph.entries); - } - } -} - -template -void testSortAndMerge() { - using size_type = default_size_type; - using lno_t = default_lno_t; - using scalar_t = default_scalar; - using mem_space = typename exec_space::memory_space; - using device_t = Kokkos::Device; - using crsMat_t = - KokkosSparse::CrsMatrix; - using rowmap_t = typename crsMat_t::row_map_type::non_const_type; - using entries_t = typename crsMat_t::index_type; - using values_t = typename crsMat_t::values_type; - using Kokkos::HostSpace; - using Kokkos::MemoryTraits; - using Kokkos::Unmanaged; - // Create a small CRS matrix on host - std::vector inRowmap = {0, 4, 4, 5, 7, 10}; - std::vector inEntries = { - 4, 3, 5, 3, // row 0 - // row 1 has no entries - 6, // row 2 - 2, 2, // row 3 - 0, 1, 2 // row 4 - }; - // note: choosing values that can be represented exactly by float - std::vector inValues = { - 1.5, 4, 1, -3, // row 0 - // row 1 - 2, // row 2 - -1, -2, // row 3 - 0, 3.5, -2.25 // row 4 - }; - lno_t nrows = 5; - lno_t ncols = 7; - size_type nnz = inEntries.size(); - Kokkos::View> hostInRowmap( - inRowmap.data(), nrows + 1); - Kokkos::View> hostInEntries( - inEntries.data(), nnz); - Kokkos::View> hostInValues( - inValues.data(), nnz); - rowmap_t devInRowmap("", nrows + 1); - entries_t devInEntries("", nnz); - values_t devInValues("", nnz); - Kokkos::deep_copy(devInRowmap, hostInRowmap); - Kokkos::deep_copy(devInEntries, hostInEntries); - Kokkos::deep_copy(devInValues, hostInValues); - crsMat_t input("Input", nrows, ncols, nnz, devInValues, devInRowmap, - devInEntries); - crsMat_t output = KokkosKernels::sort_and_merge_matrix(input); - exec_space().fence(); - EXPECT_EQ(output.numRows(), nrows); - EXPECT_EQ(output.numCols(), ncols); - auto outRowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), - output.graph.row_map); - auto outEntries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), - output.graph.entries); - auto outValues = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output.values); - // Expect 2 merges to have taken place - std::vector goldRowmap = {0, 3, 3, 4, 5, 8}; - std::vector goldEntries = { - 3, 4, 5, // row 0 - // row 1 has no entries - 6, // row 2 - 2, // row 3 - 0, 1, 2 // row 4 - }; - // note: choosing values that can be represented exactly by float - std::vector goldValues = { - 1, 1.5, 1, // row 0 - // row 1 - 2, // row 2 - -3, // row 3 - 0, 3.5, -2.25 // row 4 - }; - EXPECT_EQ(goldRowmap.size(), outRowmap.extent(0)); - EXPECT_EQ(goldEntries.size(), outEntries.extent(0)); - EXPECT_EQ(goldValues.size(), outValues.extent(0)); - EXPECT_EQ(goldValues.size(), output.nnz()); - for (lno_t i = 0; i < nrows + 1; i++) EXPECT_EQ(goldRowmap[i], outRowmap(i)); - for (size_type i = 0; i < output.nnz(); i++) { - EXPECT_EQ(goldEntries[i], outEntries(i)); - EXPECT_EQ(goldValues[i], outValues(i)); - } -} - TEST_F(TestCategory, common_serial_radix) { // Test serial radix over some contiguous small arrays // 1st arg is #arrays, 2nd arg is max subarray size @@ -805,31 +585,4 @@ TEST_F(TestCategory, common_device_bitonic) { testBitonicSortLexicographic(); } -TEST_F(TestCategory, common_sort_crsgraph) { - for (int doStructInterface = 0; doStructInterface < 2; doStructInterface++) { - testSortCRS(10, 10, 20, false, doStructInterface); - testSortCRS(100, 100, 2000, false, doStructInterface); - testSortCRS(1000, 1000, 30000, false, doStructInterface); - testSortCRSUnmanaged(false, doStructInterface); - } -} - -TEST_F(TestCategory, common_sort_crsmatrix) { - for (int doStructInterface = 0; doStructInterface < 2; doStructInterface++) { - testSortCRS(10, 10, 20, true, doStructInterface); - testSortCRS(100, 100, 2000, true, doStructInterface); - testSortCRS(1000, 1000, 30000, true, doStructInterface); - testSortCRSUnmanaged(true, doStructInterface); - } -} - -TEST_F(TestCategory, common_sort_crs_longrows) { - testSortCRS(1, 50000, 10000, false, false); - testSortCRS(1, 50000, 10000, true, false); -} - -TEST_F(TestCategory, common_sort_merge_crsmatrix) { - testSortAndMerge(); -} - #endif diff --git a/unit_test/graph/Test_Graph_graph_color.hpp b/unit_test/graph/Test_Graph_graph_color.hpp index ef7c14a931..da86546862 100644 --- a/unit_test/graph/Test_Graph_graph_color.hpp +++ b/unit_test/graph/Test_Graph_graph_color.hpp @@ -47,8 +47,8 @@ #include "KokkosGraph_Distance1Color.hpp" #include "KokkosSparse_CrsMatrix.hpp" -#include "KokkosKernels_IOUtils.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosKernels_Handle.hpp" using namespace KokkosKernels; @@ -115,7 +115,7 @@ void test_coloring(lno_t numRows, size_type nnz, lno_t bandwidth, // typedef typename lno_view_t::non_const_value_type size_type; lno_t numCols = numRows; - crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numCols, nnz, row_size_variance, bandwidth); typename lno_view_t::non_const_type sym_xadj; diff --git a/unit_test/graph/Test_Graph_graph_color_deterministic.hpp b/unit_test/graph/Test_Graph_graph_color_deterministic.hpp index ec718e9aa4..2fd64675ec 100644 --- a/unit_test/graph/Test_Graph_graph_color_deterministic.hpp +++ b/unit_test/graph/Test_Graph_graph_color_deterministic.hpp @@ -48,7 +48,7 @@ #include "KokkosGraph_Distance1Color.hpp" #include "KokkosSparse_CrsMatrix.hpp" #include "KokkosKernels_IOUtils.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosKernels_Handle.hpp" using namespace KokkosKernels; diff --git a/unit_test/graph/Test_Graph_graph_color_distance2.hpp b/unit_test/graph/Test_Graph_graph_color_distance2.hpp index 70158941a8..45444cd136 100644 --- a/unit_test/graph/Test_Graph_graph_color_distance2.hpp +++ b/unit_test/graph/Test_Graph_graph_color_distance2.hpp @@ -49,8 +49,8 @@ #include "KokkosGraph_Distance2Color.hpp" #include "KokkosGraph_MIS2.hpp" #include "KokkosSparse_CrsMatrix.hpp" -#include "KokkosKernels_IOUtils.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosKernels_Handle.hpp" #include "KokkosKernels_ExecSpaceUtils.hpp" @@ -159,7 +159,7 @@ void test_dist2_coloring(lno_t numVerts, size_type nnz, lno_t bandwidth, KokkosKernelsHandle; // Generate graph, and add some out-of-bounds columns - crsMat A = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat A = KokkosSparse::Impl::kk_generate_sparse_matrix( numVerts, numVerts, nnz, row_size_variance, bandwidth); auto G = A.graph; // Symmetrize the graph @@ -216,7 +216,7 @@ void test_bipartite_symmetric(lno_t numVerts, size_type nnz, lno_t bandwidth, KokkosKernelsHandle; // Generate graph, and add some out-of-bounds columns - crsMat A = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat A = KokkosSparse::Impl::kk_generate_sparse_matrix( numVerts, numVerts, nnz, row_size_variance, bandwidth); auto G = A.graph; // Symmetrize the graph @@ -273,7 +273,7 @@ void test_bipartite(lno_t numRows, lno_t numCols, size_type nnz, KokkosKernelsHandle; // Generate graph - crsMat A = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat A = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numCols, nnz, row_size_variance, bandwidth); auto G = A.graph; rowmap_t t_rowmap("rowmap^T", numCols + 1); diff --git a/unit_test/graph/Test_Graph_mis2.hpp b/unit_test/graph/Test_Graph_mis2.hpp index ed3acc3b85..c1b5e179fe 100644 --- a/unit_test/graph/Test_Graph_mis2.hpp +++ b/unit_test/graph/Test_Graph_mis2.hpp @@ -50,7 +50,8 @@ #include "KokkosGraph_ExplicitCoarsening.hpp" #include "KokkosSparse_CrsMatrix.hpp" #include "KokkosKernels_IOUtils.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosKernels_Handle.hpp" #include "KokkosKernels_ExecSpaceUtils.hpp" @@ -122,7 +123,7 @@ void test_mis2(lno_t numVerts, size_type nnz, lno_t bandwidth, using rowmap_t = typename c_rowmap_t::non_const_type; using entries_t = typename c_entries_t::non_const_type; // Generate graph, and add some out-of-bounds columns - crsMat A = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat A = KokkosSparse::Impl::kk_generate_sparse_matrix( numVerts, numVerts, nnz, row_size_variance, bandwidth); auto G = A.graph; // Symmetrize the graph @@ -164,7 +165,7 @@ void test_mis2_coarsening(lno_t numVerts, size_type nnz, lno_t bandwidth, using entries_t = typename c_entries_t::non_const_type; using labels_t = entries_t; // Generate graph, and add some out-of-bounds columns - crsMat A = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat A = KokkosSparse::Impl::kk_generate_sparse_matrix( numVerts, numVerts, nnz, row_size_variance, bandwidth); auto G = A.graph; // Symmetrize the graph diff --git a/unit_test/sparse/Test_Sparse.hpp b/unit_test/sparse/Test_Sparse.hpp index 65cbb40ca5..e75eb1ce6a 100644 --- a/unit_test/sparse/Test_Sparse.hpp +++ b/unit_test/sparse/Test_Sparse.hpp @@ -13,12 +13,14 @@ #include "Test_Sparse_spgemm_jacobi.hpp" #include "Test_Sparse_spgemm.hpp" #include "Test_Sparse_bspgemm.hpp" +#include "Test_Sparse_SortCrs.hpp" #include "Test_Sparse_spiluk.hpp" #include "Test_Sparse_spmv.hpp" #include "Test_Sparse_spmv_blockcrs.hpp" #include "Test_Sparse_spmv_bsr.hpp" #include "Test_Sparse_sptrsv.hpp" #include "Test_Sparse_trsv.hpp" +#include "Test_Sparse_Transpose.hpp" #include "Test_Sparse_TestUtils_RandCscMat.hpp" #include "Test_Sparse_csc2csr.hpp" diff --git a/unit_test/sparse/Test_Sparse_SortCrs.hpp b/unit_test/sparse/Test_Sparse_SortCrs.hpp new file mode 100644 index 0000000000..edae86304c --- /dev/null +++ b/unit_test/sparse/Test_Sparse_SortCrs.hpp @@ -0,0 +1,311 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// \file Test_Sparse_SortCrs.hpp +/// \brief Tests for sort_crs_matrix and sort_crs_graph in KokkosSparse_SortCrs.hpp + +#ifndef KOKKOSSPARSE_SORTCRSTEST_HPP +#define KOKKOSSPARSE_SORTCRSTEST_HPP + +#include +#include +#include +#include "KokkosSparse_IOUtils.hpp" +#include +#include +#include +#include +#include +#include + + + +template +void testSortCRS(default_lno_t numRows, default_lno_t numCols, + default_size_type nnz, bool doValues, bool doStructInterface) { + using scalar_t = default_scalar; + using lno_t = default_lno_t; + using size_type = default_size_type; + using mem_space = typename exec_space::memory_space; + using device_t = Kokkos::Device; + using crsMat_t = + KokkosSparse::CrsMatrix; + using rowmap_t = typename crsMat_t::row_map_type; + using entries_t = typename crsMat_t::index_type; + using values_t = typename crsMat_t::values_type; + // Create a random matrix on device + // IMPORTANT: kk_generate_sparse_matrix does not sort the rows, if it did this + // wouldn't test anything + crsMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix( + numRows, numCols, nnz, 2, numCols / 2); + auto rowmap = A.graph.row_map; + auto entries = A.graph.entries; + auto values = A.values; + Kokkos::View rowmapHost("rowmap host", + numRows + 1); + Kokkos::View entriesHost("sorted entries host", + nnz); + Kokkos::View valuesHost("sorted values host", + nnz); + Kokkos::deep_copy(rowmapHost, rowmap); + Kokkos::deep_copy(entriesHost, entries); + Kokkos::deep_copy(valuesHost, values); + struct ColValue { + ColValue() {} + ColValue(lno_t c, scalar_t v) : col(c), val(v) {} + bool operator<(const ColValue& rhs) const { return col < rhs.col; } + bool operator==(const ColValue& rhs) const { + return col == rhs.col && val == rhs.val; + } + lno_t col; + scalar_t val; + }; + // sort one row at a time on host using STL. + { + for (lno_t i = 0; i < numRows; i++) { + std::vector rowCopy; + for (size_type j = rowmapHost(i); j < rowmapHost(i + 1); j++) + rowCopy.emplace_back(entriesHost(j), valuesHost(j)); + std::sort(rowCopy.begin(), rowCopy.end()); + // write sorted row back + for (size_t j = 0; j < rowCopy.size(); j++) { + entriesHost(rowmapHost(i) + j) = rowCopy[j].col; + valuesHost(rowmapHost(i) + j) = rowCopy[j].val; + } + } + } + // call the actual sort routine being tested + if (doValues) { + if (doStructInterface) { + KokkosSparse::sort_crs_matrix(A); + } else { + KokkosSparse::sort_crs_matrix( + A.graph.row_map, A.graph.entries, A.values); + } + } else { + if (doStructInterface) { + KokkosSparse::sort_crs_graph(A.graph); + } else { + KokkosSparse::sort_crs_graph( + A.graph.row_map, A.graph.entries); + } + } + // Copy to host and compare + Kokkos::View entriesOut("sorted entries host", + nnz); + Kokkos::View valuesOut("sorted values host", + nnz); + Kokkos::deep_copy(entriesOut, entries); + Kokkos::deep_copy(valuesOut, values); + for (size_type i = 0; i < nnz; i++) { + EXPECT_EQ(entriesHost(i), entriesOut(i)) + << "Sorted column indices are wrong!"; + if (doValues) { + EXPECT_EQ(valuesHost(i), valuesOut(i)) << "Sorted values are wrong!"; + } + } +} + +template +void testSortCRSUnmanaged(bool doValues, bool doStructInterface) { + // This test is about bug #960. + using scalar_t = default_scalar; + using lno_t = default_lno_t; + using size_type = default_size_type; + using mem_space = typename exec_space::memory_space; + using device_t = Kokkos::Device; + using crsMat_t = + KokkosSparse::CrsMatrix, + size_type>; + using crsMat_Managed_t = + KokkosSparse::CrsMatrix; + using rowmap_t = typename crsMat_t::row_map_type; + using entries_t = typename crsMat_t::index_type; + using values_t = typename crsMat_t::values_type; + const lno_t numRows = 50; + const lno_t numCols = numRows; + size_type nnz = numRows * 5; + // Create a random matrix on device + // IMPORTANT: kk_generate_sparse_matrix does not sort the rows, if it did this + // wouldn't test anything + crsMat_Managed_t A_managed = + KokkosSparse::Impl::kk_generate_sparse_matrix( + numRows, numCols, nnz, 2, numCols / 2); + crsMat_t A(A_managed); + auto rowmap = A.graph.row_map; + auto entries = A.graph.entries; + auto values = A.values; + if (doValues) { + if (doStructInterface) { + KokkosSparse::sort_crs_matrix(A); + } else { + KokkosSparse::sort_crs_matrix( + A.graph.row_map, A.graph.entries, A.values); + } + } else { + if (doStructInterface) { + KokkosSparse::sort_crs_graph(A.graph); + } else { + KokkosSparse::sort_crs_graph( + A.graph.row_map, A.graph.entries); + } + } +} + +template +void testSortAndMerge() { + using size_type = default_size_type; + using lno_t = default_lno_t; + using scalar_t = default_scalar; + using mem_space = typename exec_space::memory_space; + using device_t = Kokkos::Device; + using crsMat_t = + KokkosSparse::CrsMatrix; + using rowmap_t = typename crsMat_t::row_map_type::non_const_type; + using entries_t = typename crsMat_t::index_type; + using values_t = typename crsMat_t::values_type; + using Kokkos::HostSpace; + using Kokkos::MemoryTraits; + using Kokkos::Unmanaged; + // Create a small CRS matrix on host + std::vector inRowmap = {0, 4, 4, 5, 7, 10}; + std::vector inEntries = { + 4, 3, 5, 3, // row 0 + // row 1 has no entries + 6, // row 2 + 2, 2, // row 3 + 0, 1, 2 // row 4 + }; + // note: choosing values that can be represented exactly by float + std::vector inValues = { + 1.5, 4, 1, -3, // row 0 + // row 1 + 2, // row 2 + -1, -2, // row 3 + 0, 3.5, -2.25 // row 4 + }; + lno_t nrows = 5; + lno_t ncols = 7; + size_type nnz = inEntries.size(); + Kokkos::View> hostInRowmap( + inRowmap.data(), nrows + 1); + Kokkos::View> hostInEntries( + inEntries.data(), nnz); + Kokkos::View> hostInValues( + inValues.data(), nnz); + rowmap_t devInRowmap("", nrows + 1); + entries_t devInEntries("", nnz); + values_t devInValues("", nnz); + Kokkos::deep_copy(devInRowmap, hostInRowmap); + Kokkos::deep_copy(devInEntries, hostInEntries); + Kokkos::deep_copy(devInValues, hostInValues); + crsMat_t input("Input", nrows, ncols, nnz, devInValues, devInRowmap, + devInEntries); + crsMat_t output = KokkosSparse::sort_and_merge_matrix(input); + exec_space().fence(); + EXPECT_EQ(output.numRows(), nrows); + EXPECT_EQ(output.numCols(), ncols); + auto outRowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), + output.graph.row_map); + auto outEntries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), + output.graph.entries); + auto outValues = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output.values); + // Expect 2 merges to have taken place + std::vector goldRowmap = {0, 3, 3, 4, 5, 8}; + std::vector goldEntries = { + 3, 4, 5, // row 0 + // row 1 has no entries + 6, // row 2 + 2, // row 3 + 0, 1, 2 // row 4 + }; + // note: choosing values that can be represented exactly by float + std::vector goldValues = { + 1, 1.5, 1, // row 0 + // row 1 + 2, // row 2 + -3, // row 3 + 0, 3.5, -2.25 // row 4 + }; + EXPECT_EQ(goldRowmap.size(), outRowmap.extent(0)); + EXPECT_EQ(goldEntries.size(), outEntries.extent(0)); + EXPECT_EQ(goldValues.size(), outValues.extent(0)); + EXPECT_EQ(goldValues.size(), output.nnz()); + for (lno_t i = 0; i < nrows + 1; i++) EXPECT_EQ(goldRowmap[i], outRowmap(i)); + for (size_type i = 0; i < output.nnz(); i++) { + EXPECT_EQ(goldEntries[i], outEntries(i)); + EXPECT_EQ(goldValues[i], outValues(i)); + } +} + +TEST_F(TestCategory, common_sort_crsgraph) { + for (int doStructInterface = 0; doStructInterface < 2; doStructInterface++) { + testSortCRS(10, 10, 20, false, doStructInterface); + testSortCRS(100, 100, 2000, false, doStructInterface); + testSortCRS(1000, 1000, 30000, false, doStructInterface); + testSortCRSUnmanaged(false, doStructInterface); + } +} + +TEST_F(TestCategory, common_sort_crsmatrix) { + for (int doStructInterface = 0; doStructInterface < 2; doStructInterface++) { + testSortCRS(10, 10, 20, true, doStructInterface); + testSortCRS(100, 100, 2000, true, doStructInterface); + testSortCRS(1000, 1000, 30000, true, doStructInterface); + testSortCRSUnmanaged(true, doStructInterface); + } +} + +TEST_F(TestCategory, common_sort_crs_longrows) { + testSortCRS(1, 50000, 10000, false, false); + testSortCRS(1, 50000, 10000, true, false); +} + +TEST_F(TestCategory, common_sort_merge_crsmatrix) { + testSortAndMerge(); +} + +#endif // KOKKOSSPARSE_SORTCRSTEST_HPP diff --git a/unit_test/common/Test_Common_Transpose.hpp b/unit_test/sparse/Test_Sparse_Transpose.hpp similarity index 95% rename from unit_test/common/Test_Common_Transpose.hpp rename to unit_test/sparse/Test_Sparse_Transpose.hpp index fba29da81d..7431d0c485 100644 --- a/unit_test/common/Test_Common_Transpose.hpp +++ b/unit_test/sparse/Test_Sparse_Transpose.hpp @@ -49,11 +49,12 @@ #include #include -#include -#include +#include #include +#include #include #include +#include template struct ExactCompare { @@ -85,7 +86,7 @@ void testTranspose(int numRows, int numCols, bool doValues) { using values_t = typename crsMat_t::values_type::non_const_type; size_type nnz = 10 * numRows; // Generate a matrix that has 0 entries in some rows - crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numCols, nnz, 3 * 10, numRows / 2); // compute the transpose while unsorted, then transpose again rowmap_t t_rowmap("Rowmap^T", numCols + 1); // this view is initialized to 0 @@ -124,8 +125,8 @@ void testTranspose(int numRows, int numCols, bool doValues) { } // Sort both the transpose-transpose, and the original matrix (to compare // directly) - KokkosKernels::sort_crs_matrix(input_mat); - KokkosKernels::sort_crs_matrix( + KokkosSparse::sort_crs_matrix(input_mat); + KokkosSparse::sort_crs_matrix( tt_rowmap, tt_entries, tt_values); // The views should now be exactly identical, since they represent the same // matrix and are sorted diff --git a/unit_test/sparse/Test_Sparse_Utils_cusparse.hpp b/unit_test/sparse/Test_Sparse_Utils_cusparse.hpp index 3d85ec394a..0ad16c54d0 100644 --- a/unit_test/sparse/Test_Sparse_Utils_cusparse.hpp +++ b/unit_test/sparse/Test_Sparse_Utils_cusparse.hpp @@ -7,7 +7,7 @@ #include #include -#include "KokkosKernels_SparseUtils_cusparse.hpp" +#include "KokkosSparse_Utils_cusparse.hpp" void test_cusparse_safe_call() { bool caught_exception = false; diff --git a/unit_test/sparse/Test_Sparse_block_gauss_seidel.hpp b/unit_test/sparse/Test_Sparse_block_gauss_seidel.hpp index cd90ec39ea..0f4c9b0d67 100644 --- a/unit_test/sparse/Test_Sparse_block_gauss_seidel.hpp +++ b/unit_test/sparse/Test_Sparse_block_gauss_seidel.hpp @@ -48,7 +48,8 @@ #include "KokkosKernels_TestUtils.hpp" #include "KokkosKernels_Handle.hpp" #include "KokkosKernels_IOUtils.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include #include #include @@ -200,7 +201,7 @@ void test_block_gauss_seidel_rank1(lno_t numRows, size_type nnz, lno_t block_size = params.block_size; crsMat_t crsmat = - KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); lno_view_t pf_rm; @@ -288,7 +289,7 @@ void test_block_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t block_size = params.block_size; crsMat_t crsmat = - KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); lno_view_t pf_rm; diff --git a/unit_test/sparse/Test_Sparse_bspgemm.hpp b/unit_test/sparse/Test_Sparse_bspgemm.hpp index a3ec84fedf..7374ac6a78 100644 --- a/unit_test/sparse/Test_Sparse_bspgemm.hpp +++ b/unit_test/sparse/Test_Sparse_bspgemm.hpp @@ -45,10 +45,11 @@ #include #include -#include "KokkosKernels_SparseUtils.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_Utils.hpp" +#include "KokkosSparse_SortCrs.hpp" #include "KokkosSparse_spgemm.hpp" #include "KokkosSparse_BsrMatrix.hpp" +#include "KokkosSparse_IOUtils.hpp" using namespace KokkosSparse; @@ -120,8 +121,8 @@ bool is_same_block_matrix(bsrMat_t output_mat_actual, return false; } - KokkosKernels::sort_bsr_matrix(output_mat_actual); - KokkosKernels::sort_bsr_matrix(output_mat_reference); + KokkosSparse::sort_bsr_matrix(output_mat_actual); + KokkosSparse::sort_bsr_matrix(output_mat_reference); bool is_identical = true; is_identical = KokkosKernels::Impl::kk_is_identical_view< @@ -187,9 +188,9 @@ void test_bspgemm(lno_t blkDim, lno_t m, lno_t k, lno_t n, size_type nnz, // Generate random compressed sparse row matrix. Randomly generated (non-zero) // values are stored in a 1-D (1 rank) array. - bsrMat_t A = KokkosKernels::Impl::kk_generate_sparse_matrix( + bsrMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix( blkDim, m, k, nnz, row_size_variance, bandwidth); - bsrMat_t B = KokkosKernels::Impl::kk_generate_sparse_matrix( + bsrMat_t B = KokkosSparse::Impl::kk_generate_sparse_matrix( blkDim, k, n, nnz, row_size_variance, bandwidth); const bool is_empy_case = m < 1 || n < 1 || k < 1 || nnz < 1; diff --git a/unit_test/sparse/Test_Sparse_gauss_seidel.hpp b/unit_test/sparse/Test_Sparse_gauss_seidel.hpp index 6e9661ea62..627a9fc99e 100644 --- a/unit_test/sparse/Test_Sparse_gauss_seidel.hpp +++ b/unit_test/sparse/Test_Sparse_gauss_seidel.hpp @@ -47,6 +47,7 @@ #include #include "KokkosKernels_Handle.hpp" #include "KokkosKernels_IOUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" //#include #include #include @@ -61,7 +62,7 @@ #include "KokkosSparse_gauss_seidel.hpp" #include "KokkosSparse_partitioning_impl.hpp" #include "KokkosSparse_sor_sequential_impl.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_SortCrs.hpp" #include "KokkosKernels_TestUtils.hpp" // #ifndef kokkos_complex_double @@ -183,7 +184,7 @@ void test_gauss_seidel_rank1(lno_t numRows, size_type nnz, lno_t bandwidth, srand(245); lno_t numCols = numRows; crsMat_t input_mat = - KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); if (symmetric) { // Symmetrize on host, rather than relying on the parallel versions (those @@ -272,7 +273,7 @@ void test_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t numCols = numRows; crsMat_t input_mat = - KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); if (symmetric) { // Symmetrize on host, rather than relying on the parallel versions (those @@ -396,7 +397,7 @@ void test_sequential_sor(lno_t numRows, size_type nnz, lno_t bandwidth, crsMat_t; lno_t numCols = numRows; crsMat_t input_mat = - KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); auto rowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), input_mat.graph.row_map); @@ -472,7 +473,7 @@ void test_balloon_clustering(lno_t numRows, size_type nnzPerRow, srand(245); size_type nnzTotal = nnzPerRow * numRows; lno_t nnzVariance = nnzPerRow / 4; - crsMat_t A = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numRows, nnzTotal, nnzVariance, bandwidth); lno_row_view_t symRowmap; lno_nnz_view_t symEntries; @@ -609,7 +610,7 @@ void test_gauss_seidel_long_rows(lno_t numRows, lno_t numLongRows, rowmap.data(), numRows + 1)); crsMat_t input_mat("A", numRows, numRows, totalEntries, valuesView, rowmapView, entriesView); - input_mat = KokkosKernels::sort_and_merge_matrix(input_mat); + input_mat = KokkosSparse::sort_and_merge_matrix(input_mat); if (symmetric) { // Symmetrize on host, rather than relying on the parallel versions (those // can be tested for symmetric=false) @@ -660,11 +661,11 @@ void test_gauss_seidel_custom_coloring(lno_t numRows, lno_t nnzPerRow) { const scalar_t one = Kokkos::ArithTraits::one(); size_type nnz = nnzPerRow * numRows; crsMat_t input_mat = - KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< crsMat_t>(numRows, numRows, nnz, 0, numRows / 10, 2.0 * one); input_mat = Test::symmetrize(input_mat); - input_mat = KokkosKernels::sort_and_merge_matrix(input_mat); + input_mat = KokkosSparse::sort_and_merge_matrix(input_mat); scalar_view_t solution_x( Kokkos::view_alloc(Kokkos::WithoutInitializing, "X (correct)"), numRows); create_random_x_vector(solution_x); diff --git a/unit_test/sparse/Test_Sparse_rocsparse.hpp b/unit_test/sparse/Test_Sparse_rocsparse.hpp index 27e0b1f9fd..fe1bf8e9b2 100644 --- a/unit_test/sparse/Test_Sparse_rocsparse.hpp +++ b/unit_test/sparse/Test_Sparse_rocsparse.hpp @@ -7,7 +7,7 @@ #include #include #include -#include "KokkosKernels_SparseUtils_rocsparse.hpp" +#include "KokkosSparse_Utils_rocsparse.hpp" void test_rocsparse_version() { // Print version diff --git a/unit_test/sparse/Test_Sparse_spgemm.hpp b/unit_test/sparse/Test_Sparse_spgemm.hpp index a7b9432857..a1e33c0ca6 100644 --- a/unit_test/sparse/Test_Sparse_spgemm.hpp +++ b/unit_test/sparse/Test_Sparse_spgemm.hpp @@ -45,8 +45,8 @@ #include #include -#include "KokkosKernels_SparseUtils.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_Utils.hpp" +#include "KokkosSparse_SortCrs.hpp" #include #include #include @@ -58,6 +58,7 @@ #include #include +#include // This file contains the matrix for test_issue402 #include "matrixIssue402.hpp" @@ -197,8 +198,8 @@ bool is_same_matrix(crsMat_t output_mat_actual, crsMat_t output_mat_reference) { return false; } - KokkosKernels::sort_crs_matrix(output_mat_actual); - KokkosKernels::sort_crs_matrix(output_mat_reference); + KokkosSparse::sort_crs_matrix(output_mat_actual); + KokkosSparse::sort_crs_matrix(output_mat_reference); bool is_identical = true; is_identical = KokkosKernels::Impl::kk_is_identical_view< @@ -264,9 +265,9 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth, // Generate random compressed sparse row matrix. Randomly generated (non-zero) // values are stored in a 1-D (1 rank) array. - crsMat_t A = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix( m, k, nnz, row_size_variance, bandwidth); - crsMat_t B = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t B = KokkosSparse::Impl::kk_generate_sparse_matrix( k, n, nnz, row_size_variance, bandwidth); const bool is_empy_case = m < 1 || n < 1 || k < 1 || nnz < 1; diff --git a/unit_test/sparse/Test_Sparse_spgemm_jacobi.hpp b/unit_test/sparse/Test_Sparse_spgemm_jacobi.hpp index 885b1a07fe..f9db6f4d8d 100644 --- a/unit_test/sparse/Test_Sparse_spgemm_jacobi.hpp +++ b/unit_test/sparse/Test_Sparse_spgemm_jacobi.hpp @@ -45,8 +45,8 @@ #include #include -#include "KokkosKernels_SparseUtils.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_Utils.hpp" +#include "KokkosSparse_SortCrs.hpp" #include #include #include @@ -58,6 +58,7 @@ #include #include +#include using namespace KokkosSparse; using namespace KokkosSparse::Experimental; @@ -154,7 +155,7 @@ bool is_same_mat(crsMat_t output_mat1, crsMat_t output_mat2) { size_t nentries2 = output_mat2.graph.entries.extent(0); size_t nvals2 = output_mat2.values.extent(0); - KokkosKernels::sort_crs_matrix(output_mat1); + KokkosSparse::sort_crs_matrix(output_mat1); if (nrows1 != nrows2) { std::cout << "nrows1:" << nrows1 << " nrows2:" << nrows2 << std::endl; @@ -170,7 +171,7 @@ bool is_same_mat(crsMat_t output_mat1, crsMat_t output_mat2) { return false; } - KokkosKernels::sort_crs_matrix(output_mat2); + KokkosSparse::sort_crs_matrix(output_mat2); bool is_identical = true; is_identical = KokkosKernels::Impl::kk_is_identical_view< @@ -225,7 +226,7 @@ void test_spgemm_jacobi(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t numCols = numRows; crsMat_t input_mat = - KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); crsMat_t output_mat2; diff --git a/unit_test/sparse/Test_Sparse_spiluk.hpp b/unit_test/sparse/Test_Sparse_spiluk.hpp index 353543b751..8f9ef99063 100644 --- a/unit_test/sparse/Test_Sparse_spiluk.hpp +++ b/unit_test/sparse/Test_Sparse_spiluk.hpp @@ -49,7 +49,7 @@ #include #include -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosSparse_CrsMatrix.hpp" #include #include "KokkosBlas1_nrm2.hpp" diff --git a/unit_test/sparse/Test_Sparse_spmv.hpp b/unit_test/sparse/Test_Sparse_spmv.hpp index 5cb729f311..8a15153dce 100644 --- a/unit_test/sparse/Test_Sparse_spmv.hpp +++ b/unit_test/sparse/Test_Sparse_spmv.hpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "KokkosKernels_Controls.hpp" @@ -422,7 +423,7 @@ void test_spmv(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t numCols = numRows; - crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numCols, nnz, row_size_variance, bandwidth); lno_t nr = input_mat.numRows(); lno_t nc = input_mat.numCols(); @@ -513,7 +514,7 @@ void test_spmv_mv(lno_t numRows, size_type nnz, lno_t bandwidth, Kokkos::fill_random(b_xt, rand_pool, randomUpperBound(max_x)); Kokkos::fill_random(b_yt, rand_pool, randomUpperBound(max_y)); - crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numCols, nnz, row_size_variance, bandwidth); const lno_t max_nnz_per_row = @@ -574,7 +575,7 @@ void test_spmv_mv_heavy(lno_t numRows, size_type nnz, lno_t bandwidth, constexpr mag_t max_y = static_cast(10); constexpr mag_t max_val = static_cast(10); - crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numRows, nnz, row_size_variance, bandwidth); Kokkos::Random_XorShift64_Pool rand_pool( 13718); @@ -889,7 +890,7 @@ void test_spmv_controls(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t numCols = numRows; - crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numCols, nnz, row_size_variance, bandwidth); lno_t nr = input_mat.numRows(); lno_t nc = input_mat.numCols(); diff --git a/unit_test/sparse/Test_Sparse_sptrsv.hpp b/unit_test/sparse/Test_Sparse_sptrsv.hpp index 0b175da13d..08c5494c88 100644 --- a/unit_test/sparse/Test_Sparse_sptrsv.hpp +++ b/unit_test/sparse/Test_Sparse_sptrsv.hpp @@ -50,7 +50,7 @@ #include #include "KokkosKernels_IOUtils.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosSparse_spmv.hpp" #include "KokkosSparse_CrsMatrix.hpp" diff --git a/unit_test/sparse/Test_Sparse_trsv.hpp b/unit_test/sparse/Test_Sparse_trsv.hpp index 4b1f00c98a..776674344a 100644 --- a/unit_test/sparse/Test_Sparse_trsv.hpp +++ b/unit_test/sparse/Test_Sparse_trsv.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -76,7 +77,7 @@ void test_trsv_mv(lno_t numRows, size_type nnz, lno_t bandwidth, // this function creates a dense lower and upper triangular matrix. // TODO: SHOULD CHANGE IT TO SPARSE crsMat_t lower_part = - KokkosKernels::Impl::kk_generate_triangular_sparse_matrix( + KokkosSparse::Impl::kk_generate_triangular_sparse_matrix( 'L', numRows, numCols, nnz, row_size_variance, bandwidth); KokkosSparse::spmv("N", alpha, lower_part, b_x_copy, beta, b_y); Test::check_trsv_mv(lower_part, b_x, b_y, b_x_copy, numMV, "L", "N"); @@ -86,7 +87,7 @@ void test_trsv_mv(lno_t numRows, size_type nnz, lno_t bandwidth, // typedef typename Kokkos::View indexview; crsMat_t upper_part = - KokkosKernels::Impl::kk_generate_triangular_sparse_matrix( + KokkosSparse::Impl::kk_generate_triangular_sparse_matrix( 'U', numRows, numCols, nnz, row_size_variance, bandwidth); KokkosSparse::spmv("N", alpha, upper_part, b_x_copy, beta, b_y); Test::check_trsv_mv(upper_part, b_x, b_y, b_x_copy, numMV, "U", "N"); From e2a88fccc4442a254a4c51cc782a191ca7130bfe Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Thu, 2 Jun 2022 17:44:54 -0600 Subject: [PATCH 2/5] Applying clang-format --- .../sparse/KokkosSparse_wiki_gauss_seidel.cpp | 99 ++++++++++--------- perf_test/graph/KokkosGraph_color.cpp | 6 +- .../sparse/KokkosSparse_multimem_spgemm.hpp | 12 +-- perf_test/sparse/KokkosSparse_pcg.cpp | 5 +- .../sparse/KokkosSparse_run_spgemm_jacobi.hpp | 12 +-- src/sparse/KokkosSparse_IOUtils.hpp | 22 +++-- src/sparse/KokkosSparse_SortCrs.hpp | 66 ++++++------- src/sparse/KokkosSparse_sptrsv_supernode.hpp | 2 +- ...okkosSparse_twostage_gauss_seidel_impl.hpp | 2 +- unit_test/sparse/Test_Sparse_SortCrs.hpp | 7 +- 10 files changed, 114 insertions(+), 119 deletions(-) diff --git a/example/wiki/sparse/KokkosSparse_wiki_gauss_seidel.cpp b/example/wiki/sparse/KokkosSparse_wiki_gauss_seidel.cpp index 57b8ddd4ec..ce171c46bd 100644 --- a/example/wiki/sparse/KokkosSparse_wiki_gauss_seidel.cpp +++ b/example/wiki/sparse/KokkosSparse_wiki_gauss_seidel.cpp @@ -8,83 +8,90 @@ #include "KokkosSparse_gauss_seidel.hpp" #include "KokkosBlas1_nrm2.hpp" -//Parallel Gauss-Seidel Preconditioner/Smoother +// Parallel Gauss-Seidel Preconditioner/Smoother // -Uses graph coloring to find independent row sets, // and applies GS to each set in parallel // -Here, use to solve a diagonally dominant linear system directly. -//Helper to print out colors in the shape of the grid -int main() -{ - using Scalar = default_scalar; - using Mag = Kokkos::ArithTraits::mag_type; - using Ordinal = default_lno_t; - using Offset = default_size_type; +// Helper to print out colors in the shape of the grid +int main() { + using Scalar = default_scalar; + using Mag = Kokkos::ArithTraits::mag_type; + using Ordinal = default_lno_t; + using Offset = default_size_type; using ExecSpace = Kokkos::DefaultExecutionSpace; - using MemSpace = typename ExecSpace::memory_space; - using Device = Kokkos::Device; - using Handle = KokkosKernels::Experimental:: - KokkosKernelsHandle; - using Matrix = KokkosSparse::CrsMatrix; - using Vector = typename Matrix::values_type; + using MemSpace = typename ExecSpace::memory_space; + using Device = Kokkos::Device; + using Handle = KokkosKernels::Experimental::KokkosKernelsHandle< + Offset, Ordinal, default_scalar, ExecSpace, MemSpace, MemSpace>; + using Matrix = KokkosSparse::CrsMatrix; + using Vector = typename Matrix::values_type; constexpr Ordinal numRows = 10000; - const Scalar one = Kokkos::ArithTraits::one(); - const Mag magOne = Kokkos::ArithTraits::one(); - //Solve tolerance + const Scalar one = Kokkos::ArithTraits::one(); + const Mag magOne = Kokkos::ArithTraits::one(); + // Solve tolerance const Mag tolerance = 1e-6 * magOne; Kokkos::initialize(); { - //Generate a square, strictly diagonally dominant, but nonsymmetric matrix on which Gauss-Seidel should converge. - //Get approx. 20 entries per row - //Diagonals are 2x the absolute sum of all other entries. + // Generate a square, strictly diagonally dominant, but nonsymmetric matrix + // on which Gauss-Seidel should converge. Get approx. 20 entries per row + // Diagonals are 2x the absolute sum of all other entries. Offset nnz = numRows * 20; - Matrix A = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix(numRows, numRows, nnz, 2, 100, 1.05 * one); - std::cout << "Generated a matrix with " << numRows << " rows/cols, and " << nnz << " entries.\n"; - //Create a kernel handle, then a Gauss-Seidel handle with the default algorithm + Matrix A = + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< + Matrix>(numRows, numRows, nnz, 2, 100, 1.05 * one); + std::cout << "Generated a matrix with " << numRows << " rows/cols, and " + << nnz << " entries.\n"; + // Create a kernel handle, then a Gauss-Seidel handle with the default + // algorithm Handle handle; handle.create_gs_handle(KokkosSparse::GS_DEFAULT); - //Set up Gauss-Seidel for the graph (matrix sparsity pattern) - KokkosSparse::Experimental::gauss_seidel_symbolic(&handle, numRows, numRows, A.graph.row_map, A.graph.entries, false); - //Set up Gauss-Seidel for the matrix values (numeric) - //Another matrix with the same sparsity pattern could re-use the handle and symbolic phase, and only call numeric. - KokkosSparse::Experimental::gauss_seidel_numeric(&handle, numRows, numRows, A.graph.row_map, A.graph.entries, A.values, false); - //Now, preconditioner is ready to use. Set up an unknown vector (uninitialized) and randomized right-hand-side vector. + // Set up Gauss-Seidel for the graph (matrix sparsity pattern) + KokkosSparse::Experimental::gauss_seidel_symbolic( + &handle, numRows, numRows, A.graph.row_map, A.graph.entries, false); + // Set up Gauss-Seidel for the matrix values (numeric) + // Another matrix with the same sparsity pattern could re-use the handle and + // symbolic phase, and only call numeric. + KokkosSparse::Experimental::gauss_seidel_numeric( + &handle, numRows, numRows, A.graph.row_map, A.graph.entries, A.values, + false); + // Now, preconditioner is ready to use. Set up an unknown vector + // (uninitialized) and randomized right-hand-side vector. Vector x(Kokkos::view_alloc(Kokkos::WithoutInitializing, "x"), numRows); Vector b(Kokkos::view_alloc(Kokkos::WithoutInitializing, "b"), numRows); Vector res(Kokkos::view_alloc(Kokkos::WithoutInitializing, "res"), numRows); auto bHost = Kokkos::create_mirror_view(b); - for(Ordinal i = 0; i < numRows; i++) + for (Ordinal i = 0; i < numRows; i++) bHost(i) = 3 * ((one * rand()) / RAND_MAX); Kokkos::deep_copy(b, bHost); - //Measure initial residual norm ||Ax - b||, where x is 0 - Mag initialRes = KokkosBlas::nrm2(b); + // Measure initial residual norm ||Ax - b||, where x is 0 + Mag initialRes = KokkosBlas::nrm2(b); Mag scaledResNorm = magOne; - bool firstIter = true; - //Iterate until reaching the tolerance + bool firstIter = true; + // Iterate until reaching the tolerance int numIters = 0; - while(scaledResNorm > tolerance) - { - //Run one sweep of forward Gauss-Seidel (SOR with omega = 1.0) - //If this is the first iteration, tell apply: + while (scaledResNorm > tolerance) { + // Run one sweep of forward Gauss-Seidel (SOR with omega = 1.0) + // If this is the first iteration, tell apply: // * to zero out x (it was uninitialized) - // * that b has changed since the previous apply (since there was no previous apply) + // * that b has changed since the previous apply (since there was no + // previous apply) KokkosSparse::Experimental::forward_sweep_gauss_seidel_apply( - &handle, numRows, numRows, - A.graph.row_map, A.graph.entries, A.values, + &handle, numRows, numRows, A.graph.row_map, A.graph.entries, A.values, x, b, firstIter, firstIter, one, 1); firstIter = false; - //Now, compute the new residual norm using SPMV + // Now, compute the new residual norm using SPMV Kokkos::deep_copy(res, b); - //Compute res := Ax - res (since res is now equal to b, this is Ax - b) + // Compute res := Ax - res (since res is now equal to b, this is Ax - b) KokkosSparse::spmv("N", one, A, x, -one, res); - //Recompute the scaled norm + // Recompute the scaled norm scaledResNorm = KokkosBlas::nrm2(res) / initialRes; numIters++; - std::cout << "Iteration " << numIters << " scaled residual norm: " << scaledResNorm << '\n'; + std::cout << "Iteration " << numIters + << " scaled residual norm: " << scaledResNorm << '\n'; } std::cout << "SUCCESS: converged in " << numIters << " iterations.\n"; } Kokkos::finalize(); return 0; } - diff --git a/perf_test/graph/KokkosGraph_color.cpp b/perf_test/graph/KokkosGraph_color.cpp index 7c6dda889f..cc19c19675 100644 --- a/perf_test/graph/KokkosGraph_color.cpp +++ b/perf_test/graph/KokkosGraph_color.cpp @@ -377,16 +377,14 @@ void run_multi_mem_experiment(Parameters params) { if (params.a_mem_space == 1) { fast_crstmat_t a_fast_crsmat; a_fast_crsmat = - KokkosSparse::Impl::read_kokkos_crst_matrix( - a_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(a_mat_file); a_fast_crsgraph = a_fast_crsmat.graph; num_cols = a_fast_crsmat.numCols(); } else { slow_crstmat_t a_slow_crsmat; a_slow_crsmat = - KokkosSparse::Impl::read_kokkos_crst_matrix( - a_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(a_mat_file); a_slow_crsgraph = a_slow_crsmat.graph; num_cols = a_slow_crsmat.numCols(); } diff --git a/perf_test/sparse/KokkosSparse_multimem_spgemm.hpp b/perf_test/sparse/KokkosSparse_multimem_spgemm.hpp index 78520d64eb..d7ae6da430 100644 --- a/perf_test/sparse/KokkosSparse_multimem_spgemm.hpp +++ b/perf_test/sparse/KokkosSparse_multimem_spgemm.hpp @@ -75,12 +75,10 @@ void run_multi_mem_spgemm(Parameters params) { if (params.a_mem_space == 1) { a_fast_crsmat = - KokkosSparse::Impl::read_kokkos_crst_matrix( - a_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(a_mat_file); } else { a_slow_crsmat = - KokkosSparse::Impl::read_kokkos_crst_matrix( - a_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(a_mat_file); } if ((b_mat_file == NULL || strcmp(b_mat_file, a_mat_file) == 0) && @@ -91,13 +89,11 @@ void run_multi_mem_spgemm(Parameters params) { } else if (params.b_mem_space == 1) { if (b_mat_file == NULL) b_mat_file = a_mat_file; b_fast_crsmat = - KokkosSparse::Impl::read_kokkos_crst_matrix( - b_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(b_mat_file); } else { if (b_mat_file == NULL) b_mat_file = a_mat_file; b_slow_crsmat = - KokkosSparse::Impl::read_kokkos_crst_matrix( - b_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(b_mat_file); } if (params.a_mem_space == 1) { diff --git a/perf_test/sparse/KokkosSparse_pcg.cpp b/perf_test/sparse/KokkosSparse_pcg.cpp index a98a8fcec8..b485158125 100644 --- a/perf_test/sparse/KokkosSparse_pcg.cpp +++ b/perf_test/sparse/KokkosSparse_pcg.cpp @@ -264,9 +264,8 @@ void run_pcg(int *cmdline, const char *mtx_file) { default_lno_t *xadj, *adj; default_scalar *ew; - KokkosSparse::Impl::read_matrix(&nv, &ne, &xadj, &adj, &ew, - mtx_file); + KokkosSparse::Impl::read_matrix( + &nv, &ne, &xadj, &adj, &ew, mtx_file); typedef typename KokkosSparse::CrsMatrix( - a_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(a_mat_file); } else { a_slow_crsmat = - KokkosSparse::Impl::read_kokkos_crst_matrix( - a_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(a_mat_file); } if ((b_mat_file == NULL || strcmp(b_mat_file, a_mat_file) == 0) && @@ -354,13 +352,11 @@ void run_spgemm_jacobi(Parameters params) { } else if (params.b_mem_space == 1) { if (b_mat_file == NULL) b_mat_file = a_mat_file; b_fast_crsmat = - KokkosSparse::Impl::read_kokkos_crst_matrix( - b_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(b_mat_file); } else { if (b_mat_file == NULL) b_mat_file = a_mat_file; b_slow_crsmat = - KokkosSparse::Impl::read_kokkos_crst_matrix( - b_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(b_mat_file); } if (params.a_mem_space == 1) { diff --git a/src/sparse/KokkosSparse_IOUtils.hpp b/src/sparse/KokkosSparse_IOUtils.hpp index d847fc9d10..fa6d08f960 100644 --- a/src/sparse/KokkosSparse_IOUtils.hpp +++ b/src/sparse/KokkosSparse_IOUtils.hpp @@ -497,7 +497,8 @@ void convert_undirected_edge_list_to_csr(lno_t nv, size_type ne, in_lno_t *srcs, #include #include #include - __gnu_parallel::parallel_sort_mwms *>( + __gnu_parallel::parallel_sort_mwms< + false, true, struct KokkosKernels::Impl::Edge *>( &(edges[0]), &(edges[0]) + ne * 2, std::less>(), 64); #else @@ -805,7 +806,8 @@ void write_kokkos_crst_matrix(crs_matrix_t a_crsmat, const char *filename) { scalar_t *a_values = a_values_view.data(); std::string strfilename(filename); - if (KokkosKernels::Impl::endswith(strfilename, ".mtx") || KokkosKernels::Impl::endswith(strfilename, ".mm")) { + if (KokkosKernels::Impl::endswith(strfilename, ".mtx") || + KokkosKernels::Impl::endswith(strfilename, ".mm")) { write_matrix_mtx( a_crsmat.numRows(), a_crsmat.numCols(), a_crsmat.nnz(), a_rowmap, a_entries, a_values, filename); @@ -971,7 +973,8 @@ int read_mtx(const char *fileName, lno_t *nrows, lno_t *ncols, size_type *ne, numEdges = 2 * nnz; } // numEdges is only an upper bound (diagonal entries may be removed) - std::vector> edges(numEdges); + std::vector> edges( + numEdges); size_type nE = 0; lno_t numDiagonal = 0; for (size_type i = 0; i < nnz; ++i) { @@ -1076,7 +1079,8 @@ template void read_matrix(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, scalar_t **ew, const char *filename) { std::string strfilename(filename); - if (KokkosKernels::Impl::endswith(strfilename, ".mtx") || KokkosKernels::Impl::endswith(strfilename, ".mm")) { + if (KokkosKernels::Impl::endswith(strfilename, ".mtx") || + KokkosKernels::Impl::endswith(strfilename, ".mm")) { read_mtx(filename, nv, ne, xadj, adj, ew, false, false, false); } @@ -1096,8 +1100,8 @@ void read_matrix(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, template crsMat_t read_kokkos_crst_matrix(const char *filename_) { std::string strfilename(filename_); - bool isMatrixMarket = - KokkosKernels::Impl::endswith(strfilename, ".mtx") || KokkosKernels::Impl::endswith(strfilename, ".mm"); + bool isMatrixMarket = KokkosKernels::Impl::endswith(strfilename, ".mtx") || + KokkosKernels::Impl::endswith(strfilename, ".mm"); typedef typename crsMat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type::non_const_type row_map_view_t; @@ -1265,6 +1269,6 @@ inline void kk_sequential_create_incidence_matrix_transpose( } } -} // namespace Impl -} // namespace KokkosKernels -#endif // _KOKKOSSPARSE_IOUTILS_HPP +} // namespace Impl +} // namespace KokkosSparse +#endif // _KOKKOSSPARSE_IOUTILS_HPP diff --git a/src/sparse/KokkosSparse_SortCrs.hpp b/src/sparse/KokkosSparse_SortCrs.hpp index 03d51386e5..11e3b43acb 100644 --- a/src/sparse/KokkosSparse_SortCrs.hpp +++ b/src/sparse/KokkosSparse_SortCrs.hpp @@ -392,8 +392,8 @@ void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, const values_t& values) { using lno_t = typename entries_t::non_const_value_type; using team_pol = Kokkos::TeamPolicy; - bool useRadix = !KokkosKernels::Impl::kk_is_gpu_exec_space(); - lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; + bool useRadix = !KokkosKernels::Impl::kk_is_gpu_exec_space(); + lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; if (numRows == 0) return; Impl::SortCrsMatrixFunctor funct(useRadix, rowmap, entries, values); @@ -472,8 +472,8 @@ template void sort_crs_graph(const rowmap_t& rowmap, const entries_t& entries) { using lno_t = typename entries_t::non_const_value_type; using team_pol = Kokkos::TeamPolicy; - bool useRadix = !KokkosKernels::Impl::kk_is_gpu_exec_space(); - lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; + bool useRadix = !KokkosKernels::Impl::kk_is_gpu_exec_space(); + lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; if (numRows == 0) return; Impl::SortCrsGraphFunctor funct( useRadix, rowmap, entries); @@ -531,8 +531,8 @@ crsMat_t sort_and_merge_matrix(const crsMat_t& A) { mergedRowmap, A.graph.row_map, A.graph.entries), numCompressedEntries); // Prefix sum to get rowmap - KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum(A.numRows() + 1, - mergedRowmap); + KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum( + A.numRows() + 1, mergedRowmap); entries_t mergedEntries("SortedMerged entries", numCompressedEntries); values_t mergedValues("SortedMerged values", numCompressedEntries); // Compute merged entries and values @@ -576,8 +576,8 @@ void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, rowmap_out, rowmap_in, entries_in), numCompressedEntries); // Prefix sum to get rowmap - KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum(numRows + 1, - rowmap_out); + KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum( + numRows + 1, rowmap_out); entries_out = entries_t("SortedMerged entries", numCompressedEntries); // Compute merged entries and values Kokkos::parallel_for( @@ -601,7 +601,7 @@ crsGraph_t sort_and_merge_graph(const crsGraph_t& G) { return crsGraph_t(mergedEntries, mergedRowmap); } -} // namespace KokkosSparse +} // namespace KokkosSparse namespace KokkosKernels { @@ -614,15 +614,15 @@ namespace KokkosKernels { template -[[deprecated]] -void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, - const entries_t& entries, const values_t& values) { +[[deprecated]] void sort_bsr_matrix(const lno_t blockdim, + const rowmap_t& rowmap, + const entries_t& entries, + const values_t& values) { KokkosSparse::sort_bsr_matrix(blockdim, rowmap, entries, values); } template -[[deprecated]] -void sort_bsr_matrix(const bsrMat_t& A) { +[[deprecated]] void sort_bsr_matrix(const bsrMat_t& A) { KokkosSparse::sort_bsr_matrix(A); } @@ -635,27 +635,25 @@ void sort_bsr_matrix(const bsrMat_t& A) { template -[[deprecated]] -void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, - const values_t& values) { +[[deprecated]] void sort_crs_matrix(const rowmap_t& rowmap, + const entries_t& entries, + const values_t& values) { KokkosSparse::sort_crs_matrix(rowmap, entries, values); } template -[[deprecated]] -void sort_crs_matrix(const crsMat_t& A) { +[[deprecated]] void sort_crs_matrix(const crsMat_t& A) { KokkosSparse::sort_crs_matrix(A); } template -[[deprecated]] -void sort_crs_graph(const rowmap_t& rowmap, const entries_t& entries) { +[[deprecated]] void sort_crs_graph(const rowmap_t& rowmap, + const entries_t& entries) { KokkosSparse::sort_crs_graph(rowmap, entries); } template -[[deprecated]] -void sort_crs_graph(const crsGraph_t& G) { +[[deprecated]] void sort_crs_graph(const crsGraph_t& G) { KokkosSparse::sort_crs_graph(G); } @@ -663,23 +661,21 @@ void sort_crs_graph(const crsGraph_t& G) { // sorted and has no duplicated entries: each (i, j) is unique. Values for // duplicated entries are summed. template -[[deprecated]] -crsMat_t sort_and_merge_matrix(const crsMat_t& A) { +[[deprecated]] crsMat_t sort_and_merge_matrix(const crsMat_t& A) { KokkosSparse::sort_and_merge_matrix(A); } template -[[deprecated]] -crsGraph_t sort_and_merge_graph(const crsGraph_t& G) { +[[deprecated]] crsGraph_t sort_and_merge_graph(const crsGraph_t& G) { KokkosSparse::sort_and_merge_graph(G); } template -[[deprecated]] -void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, - const entries_t& entries_in, rowmap_t& rowmap_out, - entries_t& entries_out) { - KokkosSparse::sort_and_merge_graph(rowmap_in, entries_in, rowmap_out, entries_out); +[[deprecated]] void sort_and_merge_graph( + const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, + rowmap_t& rowmap_out, entries_t& entries_out) { + KokkosSparse::sort_and_merge_graph(rowmap_in, entries_in, rowmap_out, + entries_out); } // For backward compatibility: keep the public interface accessible in @@ -719,7 +715,7 @@ template return KokkosKernels::sort_and_merge_matrix(A); } -} // namespace Impl -} // namespace KokkosKernels +} // namespace Impl +} // namespace KokkosKernels -#endif // _KOKKOSSPARSE_SORTCRS_HPP +#endif // _KOKKOSSPARSE_SORTCRS_HPP diff --git a/src/sparse/KokkosSparse_sptrsv_supernode.hpp b/src/sparse/KokkosSparse_sptrsv_supernode.hpp index 1c86121bde..481bd2cc0a 100644 --- a/src/sparse/KokkosSparse_sptrsv_supernode.hpp +++ b/src/sparse/KokkosSparse_sptrsv_supernode.hpp @@ -598,7 +598,7 @@ host_graph_t generate_supernodal_graph(bool col_major, graph_t &graph, // sort column ids per row KokkosSparse::sort_crs_graph(hr, hc); + row_map_view_host_t, cols_view_host_t>(hr, hc); #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE time_seconds = timer.seconds(); std::cout << " > Generate Supernodal Graph: sort graph : " diff --git a/src/sparse/impl/KokkosSparse_twostage_gauss_seidel_impl.hpp b/src/sparse/impl/KokkosSparse_twostage_gauss_seidel_impl.hpp index 6adafd6319..d779ff3e96 100644 --- a/src/sparse/impl/KokkosSparse_twostage_gauss_seidel_impl.hpp +++ b/src/sparse/impl/KokkosSparse_twostage_gauss_seidel_impl.hpp @@ -858,7 +858,7 @@ class TwostageGaussSeidel { entries_view_t, values_view_t>( rowmap_viewL, column_viewL, values_viewL); KokkosSparse::sort_crs_matrix( + entries_view_t, values_view_t>( rowmap_viewU, column_viewU, values_viewU); // now do symbolic diff --git a/unit_test/sparse/Test_Sparse_SortCrs.hpp b/unit_test/sparse/Test_Sparse_SortCrs.hpp index edae86304c..a4d30b40a1 100644 --- a/unit_test/sparse/Test_Sparse_SortCrs.hpp +++ b/unit_test/sparse/Test_Sparse_SortCrs.hpp @@ -43,7 +43,8 @@ */ /// \file Test_Sparse_SortCrs.hpp -/// \brief Tests for sort_crs_matrix and sort_crs_graph in KokkosSparse_SortCrs.hpp +/// \brief Tests for sort_crs_matrix and sort_crs_graph in +/// KokkosSparse_SortCrs.hpp #ifndef KOKKOSSPARSE_SORTCRSTEST_HPP #define KOKKOSSPARSE_SORTCRSTEST_HPP @@ -59,8 +60,6 @@ #include #include - - template void testSortCRS(default_lno_t numRows, default_lno_t numCols, default_size_type nnz, bool doValues, bool doStructInterface) { @@ -308,4 +307,4 @@ TEST_F(TestCategory, common_sort_merge_crsmatrix) { testSortAndMerge(); } -#endif // KOKKOSSPARSE_SORTCRSTEST_HPP +#endif // KOKKOSSPARSE_SORTCRSTEST_HPP From be71d80e81ab4c80213b8c535a8b34939010d30f Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Mon, 6 Jun 2022 11:02:44 -0600 Subject: [PATCH 3/5] common cleanup: fixing issue with sparse performance tests Some tests had not been compiled on my local machine due to the instantition guards in these tests. Now that the types are enabled the issue was reproduced and fixed. --- perf_test/sparse/KokkosSparse_block_pcg.cpp | 6 +++--- perf_test/sparse/KokkosSparse_spadd.cpp | 16 ++++++++-------- src/common/KokkosKernels_IOUtils.hpp | 1 + 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/perf_test/sparse/KokkosSparse_block_pcg.cpp b/perf_test/sparse/KokkosSparse_block_pcg.cpp index 89ab0bfdca..25d7a65fdd 100644 --- a/perf_test/sparse/KokkosSparse_block_pcg.cpp +++ b/perf_test/sparse/KokkosSparse_block_pcg.cpp @@ -50,7 +50,7 @@ #include "KokkosSparse_pcg.hpp" #include "KokkosKernels_Utils.hpp" -#include "KokkosKernels_IOUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" #include "KokkosKernels_TestUtils.hpp" @@ -75,7 +75,7 @@ crsMat_t create_crs_matrix(char *mtx_bin_file) { if (std::string(mtx_bin_file) == "auto") { INDEX_TYPE num_rows = 11, num_cols = 11, nnz = 40; - crsmat = KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + crsmat = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< crsMat_t>(num_rows, num_cols, nnz, 3, 5); printf("generating test matrix automatically\n"); printf(" num rows: %d", num_rows); @@ -86,7 +86,7 @@ crsMat_t create_crs_matrix(char *mtx_bin_file) { INDEX_TYPE *xadj, *adj; SCALAR_TYPE *ew; - KokkosKernels::Impl::read_matrix( + KokkosSparse::Impl::read_matrix( &nv, &ne, &xadj, &adj, &ew, mtx_bin_file); row_map_view_t rowmap_view("rowmap_view", nv + 1); diff --git a/perf_test/sparse/KokkosSparse_spadd.cpp b/perf_test/sparse/KokkosSparse_spadd.cpp index 963ada8836..877b3c5df1 100644 --- a/perf_test/sparse/KokkosSparse_spadd.cpp +++ b/perf_test/sparse/KokkosSparse_spadd.cpp @@ -45,7 +45,7 @@ #include #include "KokkosKernels_config.h" #include "KokkosKernels_Handle.hpp" -#include "KokkosKernels_IOUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" #include "KokkosSparse_Utils_cusparse.hpp" #include "KokkosSparse_Utils_mkl.hpp" #include "KokkosSparse_spadd.hpp" @@ -111,19 +111,19 @@ void run_experiment(const Params& params) { lno_t n = params.n; if (params.amtx.length()) { std::cout << "Loading A from " << params.amtx << '\n'; - A = KokkosKernels::Impl::read_kokkos_crst_matrix( + A = KokkosSparse::Impl::read_kokkos_crst_matrix( params.amtx.c_str()); m = A.numRows(); n = A.numCols(); } else { std::cout << "Randomly generating A\n"; size_type nnzUnused = m * params.nnzPerRow; - A = KokkosKernels::Impl::kk_generate_sparse_matrix( + A = KokkosSparse::Impl::kk_generate_sparse_matrix( m, n, nnzUnused, 0, (n + 3) / 3); } if (params.bmtx.length()) { std::cout << "Loading B from " << params.bmtx << '\n'; - B = KokkosKernels::Impl::read_kokkos_crst_matrix( + B = KokkosSparse::Impl::read_kokkos_crst_matrix( params.bmtx.c_str()); } else if (params.bDiag) { std::cout << "Generating B as diagonal matrix.\n"; @@ -154,7 +154,7 @@ void run_experiment(const Params& params) { } else { std::cout << "Randomly generating B\n"; size_type nnzUnused = m * params.nnzPerRow; - B = KokkosKernels::Impl::kk_generate_sparse_matrix( + B = KokkosSparse::Impl::kk_generate_sparse_matrix( m, n, nnzUnused, 0, (n + 3) / 3); } // Make sure dimensions are compatible @@ -186,8 +186,8 @@ void run_experiment(const Params& params) { if (params.sorted) { std::cout << "Assuming input matrices are sorted (explicitly sorting just " "in case)\n"; - KokkosKernels::sort_crs_matrix(A); - KokkosKernels::sort_crs_matrix(B); + KokkosSparse::sort_crs_matrix(A); + KokkosSparse::sort_crs_matrix(B); } else std::cout << "Assuming input matrices are not sorted.\n"; kh.create_spadd_handle(params.sorted); @@ -363,7 +363,7 @@ void run_experiment(const Params& params) { std::cout << "Writing C (" << m << "x" << n << ") to " << params.cmtx << "\n"; crsMat_t C("C", m, n, c_nnz, valuesC, row_mapC, entriesC); - KokkosKernels::Impl::write_kokkos_crst_matrix( + KokkosSparse::Impl::write_kokkos_crst_matrix( C, params.cmtx.c_str()); } } diff --git a/src/common/KokkosKernels_IOUtils.hpp b/src/common/KokkosKernels_IOUtils.hpp index fe72d0cbf3..42f31af65a 100644 --- a/src/common/KokkosKernels_IOUtils.hpp +++ b/src/common/KokkosKernels_IOUtils.hpp @@ -269,6 +269,7 @@ inline void kk_read_3Dview_from_file(idx_array_type &view, } template +[[deprecated]] void write_edgelist_bin(size_t ne, const idx *edge_begins, const idx *edge_ends, const wt *ew, const char *filename) { std::ofstream myFile(filename, std::ios::out | std::ios::binary); From a64734939a9fe109a7bbe90dabc651159bc40429 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Mon, 6 Jun 2022 14:21:09 -0600 Subject: [PATCH 4/5] common cleanup: fixing an issue with a default template redefinition --- src/sparse/KokkosSparse_SortCrs.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/sparse/KokkosSparse_SortCrs.hpp b/src/sparse/KokkosSparse_SortCrs.hpp index 11e3b43acb..97bad80f39 100644 --- a/src/sparse/KokkosSparse_SortCrs.hpp +++ b/src/sparse/KokkosSparse_SortCrs.hpp @@ -435,8 +435,7 @@ void sort_crs_matrix(const crsMat_t& A) { // Sort a BRS matrix: within each row, sort entries ascending by column and // permute the values accordingly. template + typename values_t, typename lno_t> void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, const entries_t& entries, const values_t& values) { // TODO: this is O(N^2) mock for debugging - do regular implementation based From 142577db1a748895761eb5daca4802974ae403c0 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Tue, 7 Jun 2022 10:04:33 -0600 Subject: [PATCH 5/5] common cleanup: applying clang-format --- perf_test/sparse/KokkosSparse_spadd.cpp | 12 ++++++------ src/common/KokkosKernels_IOUtils.hpp | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/perf_test/sparse/KokkosSparse_spadd.cpp b/perf_test/sparse/KokkosSparse_spadd.cpp index 877b3c5df1..5a273e6694 100644 --- a/perf_test/sparse/KokkosSparse_spadd.cpp +++ b/perf_test/sparse/KokkosSparse_spadd.cpp @@ -118,8 +118,8 @@ void run_experiment(const Params& params) { } else { std::cout << "Randomly generating A\n"; size_type nnzUnused = m * params.nnzPerRow; - A = KokkosSparse::Impl::kk_generate_sparse_matrix( - m, n, nnzUnused, 0, (n + 3) / 3); + A = KokkosSparse::Impl::kk_generate_sparse_matrix(m, n, nnzUnused, + 0, (n + 3) / 3); } if (params.bmtx.length()) { std::cout << "Loading B from " << params.bmtx << '\n'; @@ -154,8 +154,8 @@ void run_experiment(const Params& params) { } else { std::cout << "Randomly generating B\n"; size_type nnzUnused = m * params.nnzPerRow; - B = KokkosSparse::Impl::kk_generate_sparse_matrix( - m, n, nnzUnused, 0, (n + 3) / 3); + B = KokkosSparse::Impl::kk_generate_sparse_matrix(m, n, nnzUnused, + 0, (n + 3) / 3); } // Make sure dimensions are compatible if (A.numRows() != B.numRows() || A.numCols() != B.numCols()) { @@ -363,8 +363,8 @@ void run_experiment(const Params& params) { std::cout << "Writing C (" << m << "x" << n << ") to " << params.cmtx << "\n"; crsMat_t C("C", m, n, c_nnz, valuesC, row_mapC, entriesC); - KokkosSparse::Impl::write_kokkos_crst_matrix( - C, params.cmtx.c_str()); + KokkosSparse::Impl::write_kokkos_crst_matrix(C, + params.cmtx.c_str()); } } diff --git a/src/common/KokkosKernels_IOUtils.hpp b/src/common/KokkosKernels_IOUtils.hpp index 42f31af65a..08e6f3cdc7 100644 --- a/src/common/KokkosKernels_IOUtils.hpp +++ b/src/common/KokkosKernels_IOUtils.hpp @@ -269,9 +269,9 @@ inline void kk_read_3Dview_from_file(idx_array_type &view, } template -[[deprecated]] -void write_edgelist_bin(size_t ne, const idx *edge_begins, const idx *edge_ends, - const wt *ew, const char *filename) { +[[deprecated]] void write_edgelist_bin(size_t ne, const idx *edge_begins, + const idx *edge_ends, const wt *ew, + const char *filename) { std::ofstream myFile(filename, std::ios::out | std::ios::binary); myFile.write((char *)&ne, sizeof(idx)); myFile.write((char *)edge_begins, sizeof(idx) * (ne));