diff --git a/example/gmres/ex_real_A.cpp b/example/gmres/ex_real_A.cpp index 1e3ba19585..b3e95605f7 100644 --- a/example/gmres/ex_real_A.cpp +++ b/example/gmres/ex_real_A.cpp @@ -43,7 +43,7 @@ */ #include -#include "KokkosKernels_IOUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" #include #include #include @@ -117,7 +117,7 @@ int main(int argc, char* argv[]) { { // Read in a matrix Market file and use it to test the Kokkos Operator. KokkosSparse::CrsMatrix A = - KokkosKernels::Impl::read_kokkos_crst_matrix< + KokkosSparse::Impl::read_kokkos_crst_matrix< KokkosSparse::CrsMatrix>(filename.c_str()); int n = A.numRows(); diff --git a/example/gmres/test_cmplx_A.cpp b/example/gmres/test_cmplx_A.cpp index bc1ddce35b..ad8d19fb03 100644 --- a/example/gmres/test_cmplx_A.cpp +++ b/example/gmres/test_cmplx_A.cpp @@ -44,6 +44,7 @@ #include #include "KokkosKernels_IOUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" #include #include #include @@ -77,7 +78,7 @@ int main(int /*argc*/, char** /*argv[]*/) { { // Read in a matrix Market file and use it to test the Kokkos Operator. KokkosSparse::CrsMatrix A = - KokkosKernels::Impl::read_kokkos_crst_matrix< + KokkosSparse::Impl::read_kokkos_crst_matrix< KokkosSparse::CrsMatrix>(filename.c_str()); int n = A.numRows(); diff --git a/example/gmres/test_prec.cpp b/example/gmres/test_prec.cpp index a75c9dc59a..11122edccd 100644 --- a/example/gmres/test_prec.cpp +++ b/example/gmres/test_prec.cpp @@ -48,6 +48,7 @@ #include #include #include +#include "KokkosSparse_IOUtils.hpp" int main(int argc, char* argv[]) { typedef double ST; @@ -114,13 +115,13 @@ int main(int argc, char* argv[]) { { // Generate a diagonal matrix with entries 1, 2, ...., 1000 and its inverse. KokkosSparse::CrsMatrix A = - KokkosKernels::Impl::kk_generate_diag_matrix< + KokkosSparse::Impl::kk_generate_diag_matrix< KokkosSparse::CrsMatrix>(n); KokkosSparse::Experimental::MatrixPrec* myPrec = new KokkosSparse::Experimental::MatrixPrec( - KokkosKernels::Impl::kk_generate_diag_matrix< + KokkosSparse::Impl::kk_generate_diag_matrix< KokkosSparse::CrsMatrix>(n, true)); ViewVectorType X(Kokkos::view_alloc(Kokkos::WithoutInitializing, "X"), diff --git a/example/gmres/test_real_A.cpp b/example/gmres/test_real_A.cpp index 26103da035..abfb3f0101 100644 --- a/example/gmres/test_real_A.cpp +++ b/example/gmres/test_real_A.cpp @@ -44,6 +44,7 @@ #include #include "KokkosKernels_IOUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" #include #include #include @@ -89,7 +90,7 @@ int main(int /*argc*/, char** /*argv[]*/) { cOT diagDominance = 1; nnz = 10 * numRows; sp_matrix_type A = - KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< sp_matrix_type>(numRows, numCols, nnz, 0, ncOT(0.01 * numRows), diagDominance); diff --git a/example/wiki/sparse/KokkosSparse_wiki_gauss_seidel.cpp b/example/wiki/sparse/KokkosSparse_wiki_gauss_seidel.cpp index 1fc1fc37d2..ce171c46bd 100644 --- a/example/wiki/sparse/KokkosSparse_wiki_gauss_seidel.cpp +++ b/example/wiki/sparse/KokkosSparse_wiki_gauss_seidel.cpp @@ -2,88 +2,96 @@ #include "KokkosKernels_default_types.hpp" #include "KokkosKernels_Handle.hpp" #include "KokkosKernels_IOUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" #include "KokkosSparse_spmv.hpp" #include "KokkosSparse_CrsMatrix.hpp" #include "KokkosSparse_gauss_seidel.hpp" #include "KokkosBlas1_nrm2.hpp" -//Parallel Gauss-Seidel Preconditioner/Smoother +// Parallel Gauss-Seidel Preconditioner/Smoother // -Uses graph coloring to find independent row sets, // and applies GS to each set in parallel // -Here, use to solve a diagonally dominant linear system directly. -//Helper to print out colors in the shape of the grid -int main() -{ - using Scalar = default_scalar; - using Mag = Kokkos::ArithTraits::mag_type; - using Ordinal = default_lno_t; - using Offset = default_size_type; +// Helper to print out colors in the shape of the grid +int main() { + using Scalar = default_scalar; + using Mag = Kokkos::ArithTraits::mag_type; + using Ordinal = default_lno_t; + using Offset = default_size_type; using ExecSpace = Kokkos::DefaultExecutionSpace; - using MemSpace = typename ExecSpace::memory_space; - using Device = Kokkos::Device; - using Handle = KokkosKernels::Experimental:: - KokkosKernelsHandle; - using Matrix = KokkosSparse::CrsMatrix; - using Vector = typename Matrix::values_type; + using MemSpace = typename ExecSpace::memory_space; + using Device = Kokkos::Device; + using Handle = KokkosKernels::Experimental::KokkosKernelsHandle< + Offset, Ordinal, default_scalar, ExecSpace, MemSpace, MemSpace>; + using Matrix = KokkosSparse::CrsMatrix; + using Vector = typename Matrix::values_type; constexpr Ordinal numRows = 10000; - const Scalar one = Kokkos::ArithTraits::one(); - const Mag magOne = Kokkos::ArithTraits::one(); - //Solve tolerance + const Scalar one = Kokkos::ArithTraits::one(); + const Mag magOne = Kokkos::ArithTraits::one(); + // Solve tolerance const Mag tolerance = 1e-6 * magOne; Kokkos::initialize(); { - //Generate a square, strictly diagonally dominant, but nonsymmetric matrix on which Gauss-Seidel should converge. - //Get approx. 20 entries per row - //Diagonals are 2x the absolute sum of all other entries. + // Generate a square, strictly diagonally dominant, but nonsymmetric matrix + // on which Gauss-Seidel should converge. Get approx. 20 entries per row + // Diagonals are 2x the absolute sum of all other entries. Offset nnz = numRows * 20; - Matrix A = KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix(numRows, numRows, nnz, 2, 100, 1.05 * one); - std::cout << "Generated a matrix with " << numRows << " rows/cols, and " << nnz << " entries.\n"; - //Create a kernel handle, then a Gauss-Seidel handle with the default algorithm + Matrix A = + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< + Matrix>(numRows, numRows, nnz, 2, 100, 1.05 * one); + std::cout << "Generated a matrix with " << numRows << " rows/cols, and " + << nnz << " entries.\n"; + // Create a kernel handle, then a Gauss-Seidel handle with the default + // algorithm Handle handle; handle.create_gs_handle(KokkosSparse::GS_DEFAULT); - //Set up Gauss-Seidel for the graph (matrix sparsity pattern) - KokkosSparse::Experimental::gauss_seidel_symbolic(&handle, numRows, numRows, A.graph.row_map, A.graph.entries, false); - //Set up Gauss-Seidel for the matrix values (numeric) - //Another matrix with the same sparsity pattern could re-use the handle and symbolic phase, and only call numeric. - KokkosSparse::Experimental::gauss_seidel_numeric(&handle, numRows, numRows, A.graph.row_map, A.graph.entries, A.values, false); - //Now, preconditioner is ready to use. Set up an unknown vector (uninitialized) and randomized right-hand-side vector. + // Set up Gauss-Seidel for the graph (matrix sparsity pattern) + KokkosSparse::Experimental::gauss_seidel_symbolic( + &handle, numRows, numRows, A.graph.row_map, A.graph.entries, false); + // Set up Gauss-Seidel for the matrix values (numeric) + // Another matrix with the same sparsity pattern could re-use the handle and + // symbolic phase, and only call numeric. + KokkosSparse::Experimental::gauss_seidel_numeric( + &handle, numRows, numRows, A.graph.row_map, A.graph.entries, A.values, + false); + // Now, preconditioner is ready to use. Set up an unknown vector + // (uninitialized) and randomized right-hand-side vector. Vector x(Kokkos::view_alloc(Kokkos::WithoutInitializing, "x"), numRows); Vector b(Kokkos::view_alloc(Kokkos::WithoutInitializing, "b"), numRows); Vector res(Kokkos::view_alloc(Kokkos::WithoutInitializing, "res"), numRows); auto bHost = Kokkos::create_mirror_view(b); - for(Ordinal i = 0; i < numRows; i++) + for (Ordinal i = 0; i < numRows; i++) bHost(i) = 3 * ((one * rand()) / RAND_MAX); Kokkos::deep_copy(b, bHost); - //Measure initial residual norm ||Ax - b||, where x is 0 - Mag initialRes = KokkosBlas::nrm2(b); + // Measure initial residual norm ||Ax - b||, where x is 0 + Mag initialRes = KokkosBlas::nrm2(b); Mag scaledResNorm = magOne; - bool firstIter = true; - //Iterate until reaching the tolerance + bool firstIter = true; + // Iterate until reaching the tolerance int numIters = 0; - while(scaledResNorm > tolerance) - { - //Run one sweep of forward Gauss-Seidel (SOR with omega = 1.0) - //If this is the first iteration, tell apply: + while (scaledResNorm > tolerance) { + // Run one sweep of forward Gauss-Seidel (SOR with omega = 1.0) + // If this is the first iteration, tell apply: // * to zero out x (it was uninitialized) - // * that b has changed since the previous apply (since there was no previous apply) + // * that b has changed since the previous apply (since there was no + // previous apply) KokkosSparse::Experimental::forward_sweep_gauss_seidel_apply( - &handle, numRows, numRows, - A.graph.row_map, A.graph.entries, A.values, + &handle, numRows, numRows, A.graph.row_map, A.graph.entries, A.values, x, b, firstIter, firstIter, one, 1); firstIter = false; - //Now, compute the new residual norm using SPMV + // Now, compute the new residual norm using SPMV Kokkos::deep_copy(res, b); - //Compute res := Ax - res (since res is now equal to b, this is Ax - b) + // Compute res := Ax - res (since res is now equal to b, this is Ax - b) KokkosSparse::spmv("N", one, A, x, -one, res); - //Recompute the scaled norm + // Recompute the scaled norm scaledResNorm = KokkosBlas::nrm2(res) / initialRes; numIters++; - std::cout << "Iteration " << numIters << " scaled residual norm: " << scaledResNorm << '\n'; + std::cout << "Iteration " << numIters + << " scaled residual norm: " << scaledResNorm << '\n'; } std::cout << "SUCCESS: converged in " << numIters << " iterations.\n"; } Kokkos::finalize(); return 0; } - diff --git a/perf_test/graph/KokkosGraph_color.cpp b/perf_test/graph/KokkosGraph_color.cpp index 8b16111157..cc19c19675 100644 --- a/perf_test/graph/KokkosGraph_color.cpp +++ b/perf_test/graph/KokkosGraph_color.cpp @@ -55,6 +55,7 @@ #include "KokkosKernels_TestParameters.hpp" #include "KokkosGraph_Distance1Color.hpp" #include "KokkosKernels_TestUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" void print_options(std::ostream &os, const char *app_name, unsigned int indent = 0) { @@ -376,16 +377,14 @@ void run_multi_mem_experiment(Parameters params) { if (params.a_mem_space == 1) { fast_crstmat_t a_fast_crsmat; a_fast_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( - a_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(a_mat_file); a_fast_crsgraph = a_fast_crsmat.graph; num_cols = a_fast_crsmat.numCols(); } else { slow_crstmat_t a_slow_crsmat; a_slow_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( - a_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(a_mat_file); a_slow_crsgraph = a_slow_crsmat.graph; num_cols = a_slow_crsmat.numCols(); } diff --git a/perf_test/graph/KokkosGraph_color_d2.cpp b/perf_test/graph/KokkosGraph_color_d2.cpp index 7d6f45889a..b47fe21a70 100644 --- a/perf_test/graph/KokkosGraph_color_d2.cpp +++ b/perf_test/graph/KokkosGraph_color_d2.cpp @@ -65,6 +65,7 @@ #include #include "KokkosKernels_default_types.hpp" #include "KokkosKernels_TestUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" using namespace KokkosGraph; @@ -595,7 +596,7 @@ void experiment_driver(const D2Parameters& params) { using graph_t = typename crsMat_t::StaticCrsGraphType; crsMat_t A = - KokkosKernels::Impl::read_kokkos_crst_matrix(params.mtx_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(params.mtx_file); graph_t Agraph = A.graph; int num_cols = A.numCols(); diff --git a/perf_test/graph/KokkosGraph_mis_d2.cpp b/perf_test/graph/KokkosGraph_mis_d2.cpp index c68d5f85e2..dfe7715a1d 100644 --- a/perf_test/graph/KokkosGraph_mis_d2.cpp +++ b/perf_test/graph/KokkosGraph_mis_d2.cpp @@ -66,6 +66,7 @@ #include "KokkosGraph_MIS2.hpp" #include "KokkosKernels_default_types.hpp" #include "KokkosKernels_TestUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" using namespace KokkosGraph; @@ -253,7 +254,7 @@ void run_mis2(const MIS2Parameters& params) { Kokkos::Timer t; crsMat_t A_in = - KokkosKernels::Impl::read_kokkos_crst_matrix(params.mtx_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(params.mtx_file); std::cout << "I/O time: " << t.seconds() << " s\n"; t.reset(); // Symmetrize the matrix just in case diff --git a/perf_test/sparse/KokkosSparse_block_pcg.cpp b/perf_test/sparse/KokkosSparse_block_pcg.cpp index 89ab0bfdca..25d7a65fdd 100644 --- a/perf_test/sparse/KokkosSparse_block_pcg.cpp +++ b/perf_test/sparse/KokkosSparse_block_pcg.cpp @@ -50,7 +50,7 @@ #include "KokkosSparse_pcg.hpp" #include "KokkosKernels_Utils.hpp" -#include "KokkosKernels_IOUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" #include "KokkosKernels_TestUtils.hpp" @@ -75,7 +75,7 @@ crsMat_t create_crs_matrix(char *mtx_bin_file) { if (std::string(mtx_bin_file) == "auto") { INDEX_TYPE num_rows = 11, num_cols = 11, nnz = 40; - crsmat = KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + crsmat = KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< crsMat_t>(num_rows, num_cols, nnz, 3, 5); printf("generating test matrix automatically\n"); printf(" num rows: %d", num_rows); @@ -86,7 +86,7 @@ crsMat_t create_crs_matrix(char *mtx_bin_file) { INDEX_TYPE *xadj, *adj; SCALAR_TYPE *ew; - KokkosKernels::Impl::read_matrix( + KokkosSparse::Impl::read_matrix( &nv, &ne, &xadj, &adj, &ew, mtx_bin_file); row_map_view_t rowmap_view("rowmap_view", nv + 1); diff --git a/perf_test/sparse/KokkosSparse_gs.cpp b/perf_test/sparse/KokkosSparse_gs.cpp index 3d2be67676..2136cbb640 100644 --- a/perf_test/sparse/KokkosSparse_gs.cpp +++ b/perf_test/sparse/KokkosSparse_gs.cpp @@ -52,6 +52,7 @@ #include #include #include "KokkosKernels_default_types.hpp" +#include "KokkosSparse_IOUtils.hpp" #include #include #include @@ -177,7 +178,7 @@ crsMat_t generateLongRowMatrix(const GS_Parameters& params) { rowmap.data(), numRows + 1)); crsMat_t A("A", numRows, numRows, totalEntries, valuesView, rowmapView, entriesView); - A = KokkosKernels::sort_and_merge_matrix(A); + A = KokkosSparse::sort_and_merge_matrix(A); if (params.graph_symmetric) { // Symmetrize on host, rather than relying on the parallel versions (those // can be tested for symmetric=false) @@ -203,7 +204,7 @@ void runGS(const GS_Parameters& params) { typedef typename crsMat_t::values_type::non_const_type scalar_view_t; crsMat_t A; if (params.matrix_path) - A = KokkosKernels::Impl::read_kokkos_crst_matrix( + A = KokkosSparse::Impl::read_kokkos_crst_matrix( params.matrix_path); else A = generateLongRowMatrix(params); diff --git a/perf_test/sparse/KokkosSparse_kk_spmv.cpp b/perf_test/sparse/KokkosSparse_kk_spmv.cpp index 953294b120..40887d67ec 100644 --- a/perf_test/sparse/KokkosSparse_kk_spmv.cpp +++ b/perf_test/sparse/KokkosSparse_kk_spmv.cpp @@ -55,6 +55,7 @@ #include #include #include +#include #include #include "KokkosKernels_default_types.hpp" @@ -74,11 +75,11 @@ void run_spmv(Ordinal numRows, Ordinal numCols, const char* filename, int loop, srand(17312837); matrix_type A; if (filename) - A = KokkosKernels::Impl::read_kokkos_crst_matrix(filename); + A = KokkosSparse::Impl::read_kokkos_crst_matrix(filename); else { Offset nnz = 10 * numRows; // note: the help text says the bandwidth is fixed at 0.01 * numRows - A = KokkosKernels::Impl::kk_generate_sparse_matrix( + A = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numCols, nnz, 0, 0.01 * numRows); } numRows = A.numRows(); diff --git a/perf_test/sparse/KokkosSparse_multimem_spgemm.hpp b/perf_test/sparse/KokkosSparse_multimem_spgemm.hpp index 371f1b1d33..d7ae6da430 100644 --- a/perf_test/sparse/KokkosSparse_multimem_spgemm.hpp +++ b/perf_test/sparse/KokkosSparse_multimem_spgemm.hpp @@ -44,6 +44,7 @@ #include "KokkosSparse_CrsMatrix.hpp" #include "KokkosSparse_run_spgemm.hpp" +#include "KokkosSparse_IOUtils.hpp" namespace KokkosKernels { @@ -74,12 +75,10 @@ void run_multi_mem_spgemm(Parameters params) { if (params.a_mem_space == 1) { a_fast_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( - a_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(a_mat_file); } else { a_slow_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( - a_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(a_mat_file); } if ((b_mat_file == NULL || strcmp(b_mat_file, a_mat_file) == 0) && @@ -90,13 +89,11 @@ void run_multi_mem_spgemm(Parameters params) { } else if (params.b_mem_space == 1) { if (b_mat_file == NULL) b_mat_file = a_mat_file; b_fast_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( - b_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(b_mat_file); } else { if (b_mat_file == NULL) b_mat_file = a_mat_file; b_slow_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( - b_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(b_mat_file); } if (params.a_mem_space == 1) { @@ -222,18 +219,18 @@ void run_multi_mem_spgemm(Parameters params) { if (c_mat_file != NULL) { if (params.c_mem_space == 1) { - KokkosKernels::sort_crs_matrix(c_fast_crsmat); + KokkosSparse::sort_crs_matrix(c_fast_crsmat); - KokkosKernels::Impl::write_graph_bin( + KokkosSparse::Impl::write_graph_bin( (lno_t)(c_fast_crsmat.numRows()), (size_type)(c_fast_crsmat.graph.entries.extent(0)), c_fast_crsmat.graph.row_map.data(), c_fast_crsmat.graph.entries.data(), c_fast_crsmat.values.data(), c_mat_file); } else { - KokkosKernels::sort_crs_matrix(c_slow_crsmat); + KokkosSparse::sort_crs_matrix(c_slow_crsmat); - KokkosKernels::Impl::write_graph_bin( + KokkosSparse::Impl::write_graph_bin( (lno_t)c_slow_crsmat.numRows(), (size_type)c_slow_crsmat.graph.entries.extent(0), c_slow_crsmat.graph.row_map.data(), diff --git a/perf_test/sparse/KokkosSparse_pcg.cpp b/perf_test/sparse/KokkosSparse_pcg.cpp index 5f34ec1cd9..b485158125 100644 --- a/perf_test/sparse/KokkosSparse_pcg.cpp +++ b/perf_test/sparse/KokkosSparse_pcg.cpp @@ -49,6 +49,7 @@ #include "KokkosKernels_IOUtils.hpp" #include "KokkosKernels_default_types.hpp" #include "KokkosKernels_TestUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" #include #define MAXVAL 1 @@ -263,9 +264,8 @@ void run_pcg(int *cmdline, const char *mtx_file) { default_lno_t *xadj, *adj; default_scalar *ew; - KokkosKernels::Impl::read_matrix(&nv, &ne, &xadj, &adj, &ew, - mtx_file); + KokkosSparse::Impl::read_matrix( + &nv, &ne, &xadj, &adj, &ew, mtx_file); typedef typename KokkosSparse::CrsMatrix( - a_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(a_mat_file); } else { a_slow_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( - a_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(a_mat_file); } if ((b_mat_file == NULL || strcmp(b_mat_file, a_mat_file) == 0) && @@ -353,13 +352,11 @@ void run_spgemm_jacobi(Parameters params) { } else if (params.b_mem_space == 1) { if (b_mat_file == NULL) b_mat_file = a_mat_file; b_fast_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( - b_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(b_mat_file); } else { if (b_mat_file == NULL) b_mat_file = a_mat_file; b_slow_crsmat = - KokkosKernels::Impl::read_kokkos_crst_matrix( - b_mat_file); + KokkosSparse::Impl::read_kokkos_crst_matrix(b_mat_file); } if (params.a_mem_space == 1) { @@ -485,18 +482,18 @@ void run_spgemm_jacobi(Parameters params) { if (c_mat_file != NULL) { if (params.c_mem_space == 1) { - KokkosKernels::sort_crs_matrix(c_fast_crsmat); + KokkosSparse::sort_crs_matrix(c_fast_crsmat); - KokkosKernels::Impl::write_graph_bin( + KokkosSparse::Impl::write_graph_bin( (lno_t)(c_fast_crsmat.numRows()), (size_type)(c_fast_crsmat.graph.entries.extent(0)), c_fast_crsmat.graph.row_map.data(), c_fast_crsmat.graph.entries.data(), c_fast_crsmat.values.data(), c_mat_file); } else { - KokkosKernels::sort_crs_matrix(c_slow_crsmat); + KokkosSparse::sort_crs_matrix(c_slow_crsmat); - KokkosKernels::Impl::write_graph_bin( + KokkosSparse::Impl::write_graph_bin( (lno_t)c_slow_crsmat.numRows(), (size_type)c_slow_crsmat.graph.entries.extent(0), c_slow_crsmat.graph.row_map.data(), diff --git a/perf_test/sparse/KokkosSparse_spadd.cpp b/perf_test/sparse/KokkosSparse_spadd.cpp index de8b5fcca8..5a273e6694 100644 --- a/perf_test/sparse/KokkosSparse_spadd.cpp +++ b/perf_test/sparse/KokkosSparse_spadd.cpp @@ -45,9 +45,9 @@ #include #include "KokkosKernels_config.h" #include "KokkosKernels_Handle.hpp" -#include "KokkosKernels_IOUtils.hpp" -#include "KokkosKernels_SparseUtils_cusparse.hpp" -#include "KokkosKernels_SparseUtils_mkl.hpp" +#include "KokkosSparse_IOUtils.hpp" +#include "KokkosSparse_Utils_cusparse.hpp" +#include "KokkosSparse_Utils_mkl.hpp" #include "KokkosSparse_spadd.hpp" #include "KokkosKernels_TestUtils.hpp" @@ -111,19 +111,19 @@ void run_experiment(const Params& params) { lno_t n = params.n; if (params.amtx.length()) { std::cout << "Loading A from " << params.amtx << '\n'; - A = KokkosKernels::Impl::read_kokkos_crst_matrix( + A = KokkosSparse::Impl::read_kokkos_crst_matrix( params.amtx.c_str()); m = A.numRows(); n = A.numCols(); } else { std::cout << "Randomly generating A\n"; size_type nnzUnused = m * params.nnzPerRow; - A = KokkosKernels::Impl::kk_generate_sparse_matrix( - m, n, nnzUnused, 0, (n + 3) / 3); + A = KokkosSparse::Impl::kk_generate_sparse_matrix(m, n, nnzUnused, + 0, (n + 3) / 3); } if (params.bmtx.length()) { std::cout << "Loading B from " << params.bmtx << '\n'; - B = KokkosKernels::Impl::read_kokkos_crst_matrix( + B = KokkosSparse::Impl::read_kokkos_crst_matrix( params.bmtx.c_str()); } else if (params.bDiag) { std::cout << "Generating B as diagonal matrix.\n"; @@ -154,8 +154,8 @@ void run_experiment(const Params& params) { } else { std::cout << "Randomly generating B\n"; size_type nnzUnused = m * params.nnzPerRow; - B = KokkosKernels::Impl::kk_generate_sparse_matrix( - m, n, nnzUnused, 0, (n + 3) / 3); + B = KokkosSparse::Impl::kk_generate_sparse_matrix(m, n, nnzUnused, + 0, (n + 3) / 3); } // Make sure dimensions are compatible if (A.numRows() != B.numRows() || A.numCols() != B.numCols()) { @@ -186,8 +186,8 @@ void run_experiment(const Params& params) { if (params.sorted) { std::cout << "Assuming input matrices are sorted (explicitly sorting just " "in case)\n"; - KokkosKernels::sort_crs_matrix(A); - KokkosKernels::sort_crs_matrix(B); + KokkosSparse::sort_crs_matrix(A); + KokkosSparse::sort_crs_matrix(B); } else std::cout << "Assuming input matrices are not sorted.\n"; kh.create_spadd_handle(params.sorted); @@ -363,8 +363,8 @@ void run_experiment(const Params& params) { std::cout << "Writing C (" << m << "x" << n << ") to " << params.cmtx << "\n"; crsMat_t C("C", m, n, c_nnz, valuesC, row_mapC, entriesC); - KokkosKernels::Impl::write_kokkos_crst_matrix( - C, params.cmtx.c_str()); + KokkosSparse::Impl::write_kokkos_crst_matrix(C, + params.cmtx.c_str()); } } diff --git a/perf_test/sparse/KokkosSparse_spiluk.cpp b/perf_test/sparse/KokkosSparse_spiluk.cpp index 2ee9573880..b86ecc352f 100644 --- a/perf_test/sparse/KokkosSparse_spiluk.cpp +++ b/perf_test/sparse/KokkosSparse_spiluk.cpp @@ -58,13 +58,14 @@ #include -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosSparse_spiluk.hpp" #include "KokkosSparse_spmv.hpp" #include "KokkosBlas1_nrm2.hpp" #include "KokkosSparse_CrsMatrix.hpp" #include "KokkosKernels_default_types.hpp" #include +#include #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) && \ (!defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION)) @@ -111,7 +112,7 @@ int test_spiluk_perf(std::vector tests, std::string afilename, int kin, if (!afilename.empty()) { std::cout << "ILU(K) Begin: Read matrix filename " << afilename << std::endl; - crsmat_t A = KokkosKernels::Impl::read_kokkos_crst_matrix( + crsmat_t A = KokkosSparse::Impl::read_kokkos_crst_matrix( afilename.c_str()); // in_matrix graph_t graph = A.graph; // in_graph const size_type nrows = graph.numRows(); diff --git a/perf_test/sparse/KokkosSparse_spmv.cpp b/perf_test/sparse/KokkosSparse_spmv.cpp index 6b67905adc..9eec6181a7 100644 --- a/perf_test/sparse/KokkosSparse_spmv.cpp +++ b/perf_test/sparse/KokkosSparse_spmv.cpp @@ -55,6 +55,7 @@ #include #include #include +#include #include #include "KokkosKernels_default_types.hpp" #include @@ -90,12 +91,12 @@ int test_crs_matrix_singlevec(Ordinal numRows, Ordinal numCols, int test, srand(17312837); matrix_type A; if (filename) - A = KokkosKernels::Impl::read_kokkos_crst_matrix(filename); + A = KokkosSparse::Impl::read_kokkos_crst_matrix(filename); else { Offset nnz = 10 * numRows; // note: the help text says the bandwidth is fixed at 0.01 * numRows // CAVEAT: small problem sizes are problematic, b/c of 0.01*numRows - A = KokkosKernels::Impl::kk_generate_sparse_matrix( + A = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numCols, nnz, 0, 0.01 * numRows); } SPMVTestData test_data = setup_test(&data, A, rows_per_thread, team_size, diff --git a/perf_test/sparse/KokkosSparse_sptrsv.cpp b/perf_test/sparse/KokkosSparse_sptrsv.cpp index c6787242d9..a27ed3f6d2 100644 --- a/perf_test/sparse/KokkosSparse_sptrsv.cpp +++ b/perf_test/sparse/KokkosSparse_sptrsv.cpp @@ -58,12 +58,13 @@ #include -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosSparse_sptrsv.hpp" #include "KokkosSparse_spmv.hpp" #include "KokkosSparse_CrsMatrix.hpp" #include "KokkosKernels_default_types.hpp" #include +#include "KokkosSparse_IOUtils.hpp" //#define INTERNAL_CUSPARSE @@ -159,7 +160,7 @@ int test_sptrsv_perf(std::vector tests, const std::string &lfilename, if (!lfilename.empty()) { std::cout << "Lower Tri Begin: Read matrix filename " << lfilename << std::endl; - crsmat_t triMtx = KokkosKernels::Impl::read_kokkos_crst_matrix( + crsmat_t triMtx = KokkosSparse::Impl::read_kokkos_crst_matrix( lfilename.c_str()); // in_matrix graph_t graph = triMtx.graph; // in_graph const size_type nrows = graph.numRows(); @@ -567,7 +568,7 @@ int test_sptrsv_perf(std::vector tests, const std::string &lfilename, if (!ufilename.empty()) { std::cout << "Upper Tri Begin: Read matrix filename " << ufilename << std::endl; - crsmat_t triMtx = KokkosKernels::Impl::read_kokkos_crst_matrix( + crsmat_t triMtx = KokkosSparse::Impl::read_kokkos_crst_matrix( ufilename.c_str()); // in_matrix graph_t graph = triMtx.graph; // in_graph const size_type nrows = graph.numRows(); diff --git a/perf_test/sparse/KokkosSparse_sptrsv_supernode.cpp b/perf_test/sparse/KokkosSparse_sptrsv_supernode.cpp index 039c88e9c1..ad8e1ba8b9 100644 --- a/perf_test/sparse/KokkosSparse_sptrsv_supernode.cpp +++ b/perf_test/sparse/KokkosSparse_sptrsv_supernode.cpp @@ -43,9 +43,10 @@ */ #include "Kokkos_Random.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosSparse_spmv.hpp" #include "KokkosSparse_CrsMatrix.hpp" +#include "KokkosSparse_IOUtils.hpp" #include "KokkosSparse_sptrsv.hpp" #include "KokkosSparse_sptrsv_supernode.hpp" @@ -130,7 +131,7 @@ int test_sptrsv_perf(std::vector tests, bool verbose, std::cout << " > Read a triangular-matrix filename " << matrix_filename << std::endl; host_crsmat_t M = - KokkosKernels::Impl::read_kokkos_crst_matrix( + KokkosSparse::Impl::read_kokkos_crst_matrix( matrix_filename.c_str()); const size_type nrows = M.graph.numRows(); // transpose the matrix to be stored in CCS diff --git a/src/common/KokkosKernels_IOUtils.hpp b/src/common/KokkosKernels_IOUtils.hpp index d450221797..08e6f3cdc7 100644 --- a/src/common/KokkosKernels_IOUtils.hpp +++ b/src/common/KokkosKernels_IOUtils.hpp @@ -59,7 +59,6 @@ #include #include "Kokkos_Random.hpp" #include "KokkosKernels_SimpleUtils.hpp" -#include "KokkosSparse_CrsMatrix.hpp" #include namespace KokkosKernels { @@ -89,384 +88,6 @@ inline void getRandomBounds(double mag, Kokkos::complex &start, end = Kokkos::complex(mag, mag); } -// MD: Bases on Christian's sparseMatrix_generate function in test_crsmatrix.cpp -// file. -template -void kk_sparseMatrix_generate(OrdinalType nrows, OrdinalType ncols, - SizeType &nnz, OrdinalType row_size_variance, - OrdinalType bandwidth, ScalarType *&values, - SizeType *&rowPtr, OrdinalType *&colInd, - OrdinalType block_elem_count = 1) { - rowPtr = new SizeType[nrows + 1]; - - OrdinalType elements_per_row = nrows ? nnz / nrows : 0; - srand(13721); - rowPtr[0] = 0; - for (int row = 0; row < nrows; row++) { - int varianz = (1.0 * rand() / RAND_MAX - 0.5) * row_size_variance; - int numRowEntries = elements_per_row + varianz; - if (numRowEntries < 0) numRowEntries = 0; - // Clamping numRowEntries above accomplishes 2 things: - // - If ncols is 0, numRowEntries will also be 0 - // - With numRowEntries at most 2/3 the number of columns, in the worst - // case - // 90% of insertions will succeed after 6 tries - if (numRowEntries > 0.66 * ncols) numRowEntries = 0.66 * ncols; - rowPtr[row + 1] = rowPtr[row] + numRowEntries; - } - nnz = rowPtr[nrows]; - values = new ScalarType[nnz]; - colInd = new OrdinalType[nnz]; - for (OrdinalType row = 0; row < nrows; row++) { - for (SizeType k = rowPtr[row]; k < rowPtr[row + 1]; ++k) { - while (true) { - OrdinalType pos = (1.0 * rand() / RAND_MAX - 0.5) * bandwidth + row; - while (pos < 0) pos += ncols; - while (pos >= ncols) pos -= ncols; - - bool is_already_in_the_row = false; - for (SizeType j = rowPtr[row]; j < k; j++) { - if (colInd[j] == pos) { - is_already_in_the_row = true; - break; - } - } - if (!is_already_in_the_row) { - colInd[k] = pos; - break; - } - } - } - } - // Sample each value from uniform (-50, 50) for real types, or (-50 - 50i, 50 - // + 50i) for complex types. - Kokkos::View valuesView( - values, nnz * block_elem_count); - ScalarType randStart, randEnd; - getRandomBounds(50.0, randStart, randEnd); - Kokkos::Random_XorShift64_Pool pool(13718); - Kokkos::fill_random(valuesView, pool, randStart, randEnd); -} - -template -void kk_sparseMatrix_generate_lower_upper_triangle( - char uplo, OrdinalType nrows, OrdinalType ncols, SizeType &nnz, - OrdinalType /*row_size_variance*/, OrdinalType /*bandwidth*/, - ScalarType *&values, SizeType *&rowPtr, OrdinalType *&colInd) { - rowPtr = new SizeType[nrows + 1]; - - // OrdinalType elements_per_row = nnz/nrows; - srand(13721); - rowPtr[0] = 0; - for (int row = 0; row < nrows; row++) { - if (uplo == 'L') - rowPtr[row + 1] = rowPtr[row] + row + 1; - else - rowPtr[row + 1] = rowPtr[row] + ncols - (row); - } - nnz = rowPtr[nrows]; - values = new ScalarType[nnz]; - colInd = new OrdinalType[nnz]; - for (OrdinalType row = 0; row < nrows; row++) { - for (SizeType k = rowPtr[row]; k < rowPtr[row + 1]; k++) { - if (uplo == 'L') - colInd[k] = k - rowPtr[row]; - else - colInd[k] = row + (k - rowPtr[row]); - values[k] = 1.0; - } - } -} - -template -void kk_diagonally_dominant_sparseMatrix_generate( - OrdinalType nrows, OrdinalType ncols, SizeType &nnz, - OrdinalType row_size_variance, OrdinalType bandwidth, ScalarType *&values, - SizeType *&rowPtr, OrdinalType *&colInd, - ScalarType diagDominance = 10 * Kokkos::ArithTraits::one()) { - rowPtr = new SizeType[nrows + 1]; - - OrdinalType elements_per_row = nnz / nrows; - srand(13721); - rowPtr[0] = 0; - for (int row = 0; row < nrows; row++) { - int varianz = (1.0 * rand() / RAND_MAX - 0.5) * row_size_variance; - if (varianz < 1) varianz = 1; - if (varianz > 0.75 * ncols) varianz = 0.75 * ncols; - rowPtr[row + 1] = rowPtr[row] + elements_per_row + varianz; - if (rowPtr[row + 1] <= rowPtr[row]) // This makes sure that there is - rowPtr[row + 1] = rowPtr[row] + 1; // at least one nonzero in the row - } - nnz = rowPtr[nrows]; - values = new ScalarType[nnz]; - colInd = new OrdinalType[nnz]; - for (OrdinalType row = 0; row < nrows; row++) { - ScalarType total_values = 0; - std::unordered_set entriesInRow; - // We always add the diagonal entry (after this loop) - entriesInRow.insert(row); - for (SizeType k = rowPtr[row]; k < rowPtr[row + 1] - 1; k++) { - while (true) { - OrdinalType pos = (1.0 * rand() / RAND_MAX - 0.5) * bandwidth + row; - while (pos < 0) pos += ncols; - while (pos >= ncols) pos -= ncols; - - if (entriesInRow.find(pos) == entriesInRow.end()) { - entriesInRow.insert(pos); - colInd[k] = pos; - values[k] = 100.0 * rand() / RAND_MAX - 50.0; - total_values += - Kokkos::Details::ArithTraits::abs(values[k]); - break; - } - } - } - - colInd[rowPtr[row + 1] - 1] = row; - values[rowPtr[row + 1] - 1] = total_values * diagDominance; - } -} - -// This function creates a diagonal sparse matrix for testing matrix operations. -// The elements on the diagonal are 1, 2, ..., n-1, n. -// If "invert" is true, it will return the inverse of the above diagonal matrix. -template -crsMat_t kk_generate_diag_matrix(typename crsMat_t::const_ordinal_type n, - const bool invert = false) { - typedef typename crsMat_t::ordinal_type ot; - typedef typename crsMat_t::StaticCrsGraphType graph_t; - typedef typename graph_t::row_map_type::non_const_type row_map_view_t; - typedef typename graph_t::entries_type::non_const_type cols_view_t; - typedef typename crsMat_t::values_type::non_const_type values_view_t; - - typedef typename row_map_view_t::non_const_value_type size_type; - typedef typename cols_view_t::non_const_value_type lno_t; - typedef typename values_view_t::non_const_value_type scalar_t; - - row_map_view_t rowmap_view("rowmap_view", n + 1); - cols_view_t columns_view("colsmap_view", n); - values_view_t values_view("values_view", n); - - { - typename row_map_view_t::HostMirror hr = - Kokkos::create_mirror_view(rowmap_view); - typename cols_view_t::HostMirror hc = - Kokkos::create_mirror_view(columns_view); - typename values_view_t::HostMirror hv = - Kokkos::create_mirror_view(values_view); - - for (lno_t i = 0; i <= n; ++i) { - hr(i) = size_type(i); - } - - for (ot i = 0; i < n; ++i) { - hc(i) = lno_t(i); - if (invert) { - hv(i) = scalar_t(1.0) / (scalar_t(i + 1)); - } else { - hv(i) = scalar_t(i + 1); - } - } - Kokkos::deep_copy(rowmap_view, hr); - Kokkos::deep_copy(columns_view, hc); - Kokkos::deep_copy(values_view, hv); - } - - graph_t static_graph(columns_view, rowmap_view); - crsMat_t crsmat("CrsMatrix", n, values_view, static_graph); - return crsmat; -} - -template -crsMat_t kk_generate_diagonally_dominant_sparse_matrix( - typename crsMat_t::const_ordinal_type nrows, - typename crsMat_t::const_ordinal_type ncols, - typename crsMat_t::non_const_size_type &nnz, - typename crsMat_t::const_ordinal_type row_size_variance, - typename crsMat_t::const_ordinal_type bandwidth, - typename crsMat_t::const_value_type diagDominance = - 10 * Kokkos::ArithTraits::one()) { - typedef typename crsMat_t::StaticCrsGraphType graph_t; - typedef typename graph_t::row_map_type::non_const_type row_map_view_t; - typedef typename graph_t::entries_type::non_const_type cols_view_t; - typedef typename crsMat_t::values_type::non_const_type values_view_t; - - typedef typename row_map_view_t::non_const_value_type size_type; - typedef typename cols_view_t::non_const_value_type lno_t; - typedef typename values_view_t::non_const_value_type scalar_t; - lno_t *adj; - size_type *xadj; //, nnzA; - scalar_t *values; - - kk_diagonally_dominant_sparseMatrix_generate( - nrows, ncols, nnz, row_size_variance, bandwidth, values, xadj, adj, - diagDominance); - - row_map_view_t rowmap_view("rowmap_view", nrows + 1); - cols_view_t columns_view("colsmap_view", nnz); - values_view_t values_view("values_view", nnz); - - { - typename row_map_view_t::HostMirror hr = - Kokkos::create_mirror_view(rowmap_view); - typename cols_view_t::HostMirror hc = - Kokkos::create_mirror_view(columns_view); - typename values_view_t::HostMirror hv = - Kokkos::create_mirror_view(values_view); - - for (lno_t i = 0; i <= nrows; ++i) { - hr(i) = xadj[i]; - } - - for (size_type i = 0; i < nnz; ++i) { - hc(i) = adj[i]; - hv(i) = values[i]; - } - Kokkos::deep_copy(rowmap_view, hr); - Kokkos::deep_copy(columns_view, hc); - Kokkos::deep_copy(values_view, hv); - } - - graph_t static_graph(columns_view, rowmap_view); - crsMat_t crsmat("CrsMatrix", ncols, values_view, static_graph); - delete[] xadj; - delete[] adj; - delete[] values; - return crsmat; -} - -template -crsMat_t kk_generate_triangular_sparse_matrix( - char uplo, typename crsMat_t::const_ordinal_type nrows, - typename crsMat_t::const_ordinal_type ncols, - typename crsMat_t::non_const_size_type &nnz, - typename crsMat_t::const_ordinal_type row_size_variance, - typename crsMat_t::const_ordinal_type bandwidth) { - typedef typename crsMat_t::StaticCrsGraphType graph_t; - typedef typename graph_t::row_map_type::non_const_type row_map_view_t; - typedef typename graph_t::entries_type::non_const_type cols_view_t; - typedef typename crsMat_t::values_type::non_const_type values_view_t; - - typedef typename row_map_view_t::non_const_value_type size_type; - typedef typename cols_view_t::non_const_value_type lno_t; - typedef typename values_view_t::non_const_value_type scalar_t; - lno_t *adj; - size_type *xadj; //, nnzA; - scalar_t *values; - - kk_sparseMatrix_generate_lower_upper_triangle( - uplo, nrows, ncols, nnz, row_size_variance, bandwidth, values, xadj, adj); - - row_map_view_t rowmap_view("rowmap_view", nrows + 1); - cols_view_t columns_view("colsmap_view", nnz); - values_view_t values_view("values_view", nnz); - - { - typename row_map_view_t::HostMirror hr = - Kokkos::create_mirror_view(rowmap_view); - typename cols_view_t::HostMirror hc = - Kokkos::create_mirror_view(columns_view); - typename values_view_t::HostMirror hv = - Kokkos::create_mirror_view(values_view); - - for (lno_t i = 0; i <= nrows; ++i) { - hr(i) = xadj[i]; - } - - for (size_type i = 0; i < nnz; ++i) { - hc(i) = adj[i]; - hv(i) = values[i]; - } - Kokkos::deep_copy(rowmap_view, hr); - Kokkos::deep_copy(columns_view, hc); - Kokkos::deep_copy(values_view, hv); - Kokkos::fence(); - } - - graph_t static_graph(columns_view, rowmap_view); - crsMat_t crsmat("CrsMatrix", ncols, values_view, static_graph); - delete[] xadj; - delete[] adj; - delete[] values; - return crsmat; -} - -template -crsMat_t kk_generate_sparse_matrix( - typename crsMat_t::const_ordinal_type nrows, - typename crsMat_t::const_ordinal_type ncols, - typename crsMat_t::non_const_size_type &nnz, - typename crsMat_t::const_ordinal_type row_size_variance, - typename crsMat_t::const_ordinal_type bandwidth) { - typedef typename crsMat_t::StaticCrsGraphType graph_t; - typedef typename graph_t::row_map_type::non_const_type row_map_view_t; - typedef typename graph_t::entries_type::non_const_type cols_view_t; - typedef typename crsMat_t::values_type::non_const_type values_view_t; - - typedef typename row_map_view_t::non_const_value_type size_type; - typedef typename cols_view_t::non_const_value_type lno_t; - typedef typename values_view_t::non_const_value_type scalar_t; - lno_t *adj; - size_type *xadj; //, nnzA; - scalar_t *values; - - kk_sparseMatrix_generate( - nrows, ncols, nnz, row_size_variance, bandwidth, values, xadj, adj); - - row_map_view_t rowmap_view("rowmap_view", nrows + 1); - cols_view_t columns_view("colsmap_view", nnz); - values_view_t values_view("values_view", nnz); - - { - typename row_map_view_t::HostMirror hr = - Kokkos::create_mirror_view(rowmap_view); - typename cols_view_t::HostMirror hc = - Kokkos::create_mirror_view(columns_view); - typename values_view_t::HostMirror hv = - Kokkos::create_mirror_view(values_view); - - for (lno_t i = 0; i <= nrows; ++i) { - hr(i) = xadj[i]; - } - - for (size_type i = 0; i < nnz; ++i) { - hc(i) = adj[i]; - hv(i) = values[i]; - } - Kokkos::deep_copy(rowmap_view, hr); - Kokkos::deep_copy(columns_view, hc); - Kokkos::deep_copy(values_view, hv); - } - - graph_t static_graph(columns_view, rowmap_view); - crsMat_t crsmat("CrsMatrix", ncols, values_view, static_graph); - delete[] xadj; - delete[] adj; - delete[] values; - return crsmat; -} - -template -bsrMat_t kk_generate_sparse_matrix( - typename bsrMat_t::const_ordinal_type block_dim, - typename bsrMat_t::const_ordinal_type nrows, - typename bsrMat_t::const_ordinal_type ncols, - typename bsrMat_t::non_const_size_type &nnz, - typename bsrMat_t::const_ordinal_type row_size_variance, - typename bsrMat_t::const_ordinal_type bandwidth) { - typedef KokkosSparse::CrsMatrix< - typename bsrMat_t::value_type, typename bsrMat_t::ordinal_type, - typename bsrMat_t::device_type, typename bsrMat_t::memory_traits, - typename bsrMat_t::size_type> - crsMat_t; - - const auto crs_mtx = kk_generate_sparse_matrix( - nrows * block_dim, ncols * block_dim, nnz, row_size_variance, bandwidth); - bsrMat_t bsrmat(crs_mtx, block_dim); - return bsrmat; -} -// TODO: need to fix the size_type. All over the reading inputs are lno_t. - template void md_malloc(stype **arr, size_t n, std::string /*alloc_str*/ = "") { *arr = new stype[n]; @@ -647,133 +268,10 @@ inline void kk_read_3Dview_from_file(idx_array_type &view, Kokkos::fence(); } -template -void convert_crs_to_lower_triangle_edge_list(idx nv, idx *xadj, idx *adj, - idx *lower_triangle_srcs, - idx *lower_triangle_dests) { - idx ind = 0; - for (idx i = 0; i < nv; ++i) { - idx xb = xadj[i]; - idx xe = xadj[i + 1]; - for (idx j = xb; j < xe; ++j) { - idx dst = adj[j]; - if (i < dst) { - lower_triangle_srcs[ind] = i; - lower_triangle_dests[ind++] = dst; - } - } - } -} - -template -void convert_crs_to_edge_list(idx nv, idx *xadj, idx *srcs) { - for (idx i = 0; i < nv; ++i) { - idx xb = xadj[i]; - idx xe = xadj[i + 1]; - for (idx j = xb; j < xe; ++j) { - srcs[j] = i; - } - } -} - -template -void convert_edge_list_to_csr(lno_t nv, size_type ne, lno_t *srcs, lno_t *dests, - wt *ew, size_type *xadj, lno_t *adj, wt *crs_ew) { - std::vector> edges(ne); - for (size_type i = 0; i < ne; ++i) { - edges[i].src = srcs[i]; - edges[i].dst = dests[i]; - edges[i].ew = ew[i]; - } - std::sort(edges.begin(), edges.begin() + ne); - - size_type eind = 0; - for (lno_t i = 0; i < nv; ++i) { - (xadj)[i] = eind; - while (edges[eind].src == i) { - (adj)[eind] = edges[eind].dst; - (*crs_ew)[eind] = edges[eind].ew; - ++eind; - } - } - xadj[nv] = eind; -} - -template -void convert_undirected_edge_list_to_csr(lno_t nv, size_type ne, in_lno_t *srcs, - in_lno_t *dests, size_type *xadj, - lno_t *adj) { - std::vector> edges(ne * 2); - for (size_type i = 0; i < ne; ++i) { - edges[i * 2].src = srcs[i]; - edges[i * 2].dst = dests[i]; - - edges[i * 2 + 1].src = dests[i]; - edges[i * 2 + 1].dst = srcs[i]; - } -#ifdef KOKKOSKERNELS_HAVE_OUTER -#include -#include -#include -#include - __gnu_parallel::parallel_sort_mwms *>( - &(edges[0]), &(edges[0]) + ne * 2, - std::less>(), 64); -#else - std::sort(edges.begin(), edges.begin() + ne * 2); -#endif - - size_type eind = 0; - for (lno_t i = 0; i < nv; ++i) { - (xadj)[i] = eind; - while (edges[eind].src == i) { - (adj)[eind] = edges[eind].dst; - //(*crs_ew)[eind] = edges[eind].ew; - ++eind; - } - } - xadj[nv] = eind; -} -/* - -template -void read_graph_src_dst_bin( - lno_t *nv, size_type *ne - ,size_type **xadj, lno_t **adj, scalar_t **ew, - const char *fnameSrc, const char *fnameTarg){ - - size_t numEdges = 0; - size_t *srcs, *dst; //this type is hard coded - buildEdgeListFromBinSrcTarg_undirected( - fnameSrc, fnameTarg, - &numEdges, - &srcs, &dst); - - lno_t num_vertex = 0; - for (size_t i = 0; i < numEdges; ++i){ - if (num_vertex < srcs[i]) num_vertex = srcs[i]; - if (num_vertex < dst[i]) num_vertex = dst[i]; - } - num_vertex += 1; - - *nv = num_vertex; - *ne = numEdges * 2; - - md_malloc(xadj, num_vertex + 1); - md_malloc(adj, numEdges * 2); - convert_undirected_edge_list_to_csr ( - num_vertex, numEdges, - srcs, dst, - *xadj, *adj); - - delete [] srcs; - delete [] dst; -} -*/ - template -void write_edgelist_bin(size_t ne, const idx *edge_begins, const idx *edge_ends, - const wt *ew, const char *filename) { +[[deprecated]] void write_edgelist_bin(size_t ne, const idx *edge_begins, + const idx *edge_ends, const wt *ew, + const char *filename) { std::ofstream myFile(filename, std::ios::out | std::ios::binary); myFile.write((char *)&ne, sizeof(idx)); myFile.write((char *)edge_begins, sizeof(idx) * (ne)); @@ -797,270 +295,6 @@ void read_edgelist_bin(idx *ne, idx **edge_begins, idx **edge_ends, wt **ew, myFile.close(); } -template -void write_graph_bin(lno_t nv, size_type ne, const size_type *xadj, - const lno_t *adj, const scalar_t *ew, - const char *filename) { - std::ofstream myFile(filename, std::ios::out | std::ios::binary); - myFile.write((char *)&nv, sizeof(lno_t)); - myFile.write((char *)&ne, sizeof(size_type)); - myFile.write((char *)xadj, sizeof(size_type) * (nv + 1)); - - myFile.write((char *)adj, sizeof(lno_t) * (ne)); - - myFile.write((char *)ew, sizeof(scalar_t) * (ne)); - - myFile.close(); -} - -template -void write_graph_crs(lno_t nv, size_type ne, const size_type *xadj, - const lno_t *adj, const scalar_t *ew, - const char *filename) { - std::ofstream myFile(filename, std::ios::out); - myFile << nv << " " << ne << std::endl; - - for (lno_t i = 0; i <= nv; ++i) { - myFile << xadj[i] << " "; - } - myFile << std::endl; - - for (lno_t i = 0; i < nv; ++i) { - size_type b = xadj[i]; - size_type e = xadj[i + 1]; - for (size_type j = b; j < e; ++j) { - myFile << adj[j] << " "; - } - myFile << std::endl; - } - for (size_type i = 0; i < ne; ++i) { - myFile << ew[i] << " "; - } - myFile << std::endl; - - myFile.close(); -} - -template -void write_graph_ligra(lno_t nv, size_type ne, const size_type *xadj, - const lno_t *adj, const scalar_t * /*ew*/, - const char *filename) { - std::ofstream ff(filename); - ff << "AdjacencyGraph" << std::endl; - ff << nv << std::endl << ne << std::endl; - for (lno_t i = 0; i < nv; ++i) { - ff << xadj[i] << std::endl; - } - for (size_type i = 0; i < ne; ++i) { - ff << adj[i] << std::endl; - } - ff.close(); -} - -// MM: types and utility functions for parsing the MatrixMarket format -namespace MM { -enum MtxObject { UNDEFINED_OBJECT, MATRIX, VECTOR }; -enum MtxFormat { UNDEFINED_FORMAT, COORDINATE, ARRAY }; -enum MtxField { - UNDEFINED_FIELD, - REAL, // includes both float and double - COMPLEX, // includes complex and complex - INTEGER, // includes all integer types - PATTERN // not a type, but means the value for every entry is 1 -}; -enum MtxSym { - UNDEFINED_SYMMETRY, - GENERAL, - SYMMETRIC, // A(i, j) = A(j, i) - SKEW_SYMMETRIC, // A(i, j) = -A(j, i) - HERMITIAN // A(i, j) = a + bi; A(j, i) = a - bi -}; - -// readScalar/writeScalar: read and write a scalar in the form that it appears -// in an .mtx file. The >> and << operators won't work, because complex appears -// as "real imag", not "(real, imag)" -template -scalar_t readScalar(std::istream &is) { - scalar_t val; - is >> val; - return val; -} - -template <> -inline Kokkos::complex readScalar(std::istream &is) { - float r, i; - is >> r; - is >> i; - return Kokkos::complex(r, i); -} - -template <> -inline Kokkos::complex readScalar(std::istream &is) { - double r, i; - is >> r; - is >> i; - return Kokkos::complex(r, i); -} - -template -void writeScalar(std::ostream &os, scalar_t val) { - os << val; -} - -template <> -inline void writeScalar(std::ostream &os, Kokkos::complex val) { - os << val.real() << ' ' << val.imag(); -} - -template <> -inline void writeScalar(std::ostream &os, Kokkos::complex val) { - os << val.real() << ' ' << val.imag(); -} - -// symmetryFlip: given a value for A(i, j), return the value that -// should be inserted at A(j, i) (if any) -template -scalar_t symmetryFlip(scalar_t val, MtxSym symFlag) { - if (symFlag == SKEW_SYMMETRIC) return -val; - return val; -} - -template <> -inline Kokkos::complex symmetryFlip(Kokkos::complex val, - MtxSym symFlag) { - if (symFlag == HERMITIAN) - return Kokkos::conj(val); - else if (symFlag == SKEW_SYMMETRIC) - return -val; - return val; -} - -template <> -inline Kokkos::complex symmetryFlip(Kokkos::complex val, - MtxSym symFlag) { - if (symFlag == HERMITIAN) - return Kokkos::conj(val); - else if (symFlag == SKEW_SYMMETRIC) - return -val; - return val; -} -} // namespace MM - -template -void write_matrix_mtx(lno_t nrows, lno_t ncols, size_type nentries, - const size_type *xadj, const lno_t *adj, - const scalar_t *vals, const char *filename) { - std::ofstream myFile(filename); - myFile << "%%MatrixMarket matrix coordinate "; - if (std::is_same>::value || - std::is_same>::value) - myFile << "complex"; - else - myFile << "real"; - myFile << " general\n"; - myFile << nrows << " " << ncols << " " << nentries << '\n'; - myFile << std::setprecision(17) << std::scientific; - for (lno_t i = 0; i < nrows; ++i) { - size_type b = xadj[i]; - size_type e = xadj[i + 1]; - for (size_type j = b; j < e; ++j) { - myFile << i + 1 << " " << adj[j] + 1 << " "; - MM::writeScalar(myFile, vals[j]); - myFile << '\n'; - } - } - myFile.close(); -} - -template -void write_graph_mtx(lno_t nv, size_type ne, const size_type *xadj, - const lno_t *adj, const scalar_t *ew, - const char *filename) { - std::ofstream myFile(filename); - myFile << "%%MatrixMarket matrix coordinate "; - if (std::is_same>::value || - std::is_same>::value) - myFile << "complex"; - else - myFile << "real"; - myFile << " general\n"; - myFile << nv << " " << nv << " " << ne << '\n'; - myFile << std::setprecision(8) << std::scientific; - for (lno_t i = 0; i < nv; ++i) { - size_type b = xadj[i]; - size_type e = xadj[i + 1]; - for (size_type j = b; j < e; ++j) { - myFile << i + 1 << " " << (adj)[j] + 1 << " "; - MM::writeScalar(myFile, ew[j]); - myFile << '\n'; - } - } - - myFile.close(); -} - -template -void read_graph_bin(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, - scalar_t **ew, const char *filename) { - std::ifstream myFile(filename, std::ios::in | std::ios::binary); - - myFile.read((char *)nv, sizeof(lno_t)); - myFile.read((char *)ne, sizeof(size_type)); - md_malloc(xadj, *nv + 1); - md_malloc(adj, *ne); - md_malloc(ew, *ne); - myFile.read((char *)*xadj, sizeof(size_type) * (*nv + 1)); - myFile.read((char *)*adj, sizeof(lno_t) * (*ne)); - myFile.read((char *)*ew, sizeof(scalar_t) * (*ne)); - myFile.close(); -} - -// When Kokkos issue #2313 is resolved, can delete -// parseScalar and just use operator>> -template -scalar_t parseScalar(std::istream &is) { - scalar_t val; - is >> val; - return val; -} - -template <> -inline Kokkos::complex parseScalar(std::istream &is) { - std::complex val; - is >> val; - return Kokkos::complex(val); -} - -template <> -inline Kokkos::complex parseScalar(std::istream &is) { - std::complex val; - is >> val; - return Kokkos::complex(val); -} - -template -void read_graph_crs(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, - scalar_t **ew, const char *filename) { - std::ifstream myFile(filename, std::ios::in); - myFile >> *nv >> *ne; - - md_malloc(xadj, *nv + 1); - md_malloc(adj, *ne); - md_malloc(ew, *ne); - - for (lno_t i = 0; i <= *nv; ++i) { - myFile >> (*xadj)[i]; - } - - for (size_type i = 0; i < *ne; ++i) { - myFile >> (*adj)[i]; - } - for (size_type i = 0; i < *ne; ++i) { - (*ew)[i] = parseScalar(myFile); - } - myFile.close(); -} - inline bool endswith(std::string const &fullString, std::string const &ending) { if (fullString.length() >= ending.length()) { return (0 == fullString.compare(fullString.length() - ending.length(), @@ -1070,491 +304,6 @@ inline bool endswith(std::string const &fullString, std::string const &ending) { } } -template -void write_kokkos_crst_matrix(crs_matrix_t a_crsmat, const char *filename) { - typedef typename crs_matrix_t::StaticCrsGraphType graph_t; - typedef typename graph_t::row_map_type::non_const_type row_map_view_t; - typedef typename graph_t::entries_type::non_const_type cols_view_t; - typedef typename crs_matrix_t::values_type::non_const_type values_view_t; - - typedef typename row_map_view_t::value_type offset_t; - typedef typename cols_view_t::value_type lno_t; - typedef typename values_view_t::value_type scalar_t; - typedef typename values_view_t::size_type size_type; - - size_type nnz = a_crsmat.nnz(); - - auto a_rowmap_view = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), a_crsmat.graph.row_map); - auto a_entries_view = Kokkos::create_mirror_view_and_copy( - Kokkos::HostSpace(), a_crsmat.graph.entries); - auto a_values_view = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a_crsmat.values); - offset_t *a_rowmap = const_cast(a_rowmap_view.data()); - lno_t *a_entries = a_entries_view.data(); - scalar_t *a_values = a_values_view.data(); - - std::string strfilename(filename); - if (endswith(strfilename, ".mtx") || endswith(strfilename, ".mm")) { - write_matrix_mtx( - a_crsmat.numRows(), a_crsmat.numCols(), a_crsmat.nnz(), a_rowmap, - a_entries, a_values, filename); - return; - } else if (a_crsmat.numRows() != a_crsmat.numCols()) { - throw std::runtime_error( - "For formats other than MatrixMarket (suffix .mm or .mtx),\n" - "write_kokkos_crst_matrix only supports square matrices"); - } - if (endswith(strfilename, ".bin")) { - write_graph_bin( - a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); - } else if (endswith(strfilename, ".ligra")) { - write_graph_ligra( - a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); - } else if (endswith(strfilename, ".crs")) { - write_graph_crs( - a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); - } else { - std::string errMsg = - std::string("write_kokkos_crst_matrix: File extension on ") + filename + - " does not correspond to a known format"; - throw std::runtime_error(errMsg); - } -} - -template -int read_mtx(const char *fileName, lno_t *nrows, lno_t *ncols, size_type *ne, - size_type **xadj, lno_t **adj, scalar_t **ew, - bool symmetrize = false, bool remove_diagonal = true, - bool transpose = false) { - using namespace MM; - std::ifstream mmf(fileName, std::ifstream::in); - if (!mmf.is_open()) { - throw std::runtime_error("File cannot be opened\n"); - } - - std::string fline = ""; - getline(mmf, fline); - - if (fline.size() < 2 || fline[0] != '%' || fline[1] != '%') { - throw std::runtime_error("Invalid MM file. Line-1\n"); - } - - // make sure every required field is in the file, by initializing them to - // UNDEFINED_* - MtxObject mtx_object = UNDEFINED_OBJECT; - MtxFormat mtx_format = UNDEFINED_FORMAT; - MtxField mtx_field = UNDEFINED_FIELD; - MtxSym mtx_sym = UNDEFINED_SYMMETRY; - - if (fline.find("matrix") != std::string::npos) { - mtx_object = MATRIX; - } else if (fline.find("vector") != std::string::npos) { - mtx_object = VECTOR; - throw std::runtime_error( - "MatrixMarket \"vector\" is not supported by KokkosKernels read_mtx()"); - } - - if (fline.find("coordinate") != std::string::npos) { - // sparse - mtx_format = COORDINATE; - } else if (fline.find("array") != std::string::npos) { - // dense - mtx_format = ARRAY; - } - - if (fline.find("real") != std::string::npos || - fline.find("double") != std::string::npos) { - if (std::is_same::value || - std::is_same::value) - mtx_field = REAL; - else { - if (!std::is_floating_point::value) - throw std::runtime_error( - "scalar_t in read_mtx() incompatible with float or double typed " - "MatrixMarket file."); - else - mtx_field = REAL; - } - } else if (fline.find("complex") != std::string::npos) { - if (!(std::is_same>::value || - std::is_same>::value)) - throw std::runtime_error( - "scalar_t in read_mtx() incompatible with complex-typed MatrixMarket " - "file."); - else - mtx_field = COMPLEX; - } else if (fline.find("integer") != std::string::npos) { - if (std::is_integral::value || - std::is_floating_point::value || - std::is_same::value || - std::is_same::value) - mtx_field = INTEGER; - else - throw std::runtime_error( - "scalar_t in read_mtx() incompatible with integer-typed MatrixMarket " - "file."); - } else if (fline.find("pattern") != std::string::npos) { - mtx_field = PATTERN; - // any reasonable choice for scalar_t can represent "1" or "1.0 + 0i", so - // nothing to check here - } - - if (fline.find("general") != std::string::npos) { - mtx_sym = GENERAL; - } else if (fline.find("skew-symmetric") != std::string::npos) { - mtx_sym = SKEW_SYMMETRIC; - } else if (fline.find("symmetric") != std::string::npos) { - // checking for "symmetric" after "skew-symmetric" because it's a substring - mtx_sym = SYMMETRIC; - } else if (fline.find("hermitian") != std::string::npos || - fline.find("Hermitian") != std::string::npos) { - mtx_sym = HERMITIAN; - } - // Validate the matrix attributes - if (mtx_format == ARRAY) { - if (mtx_sym == UNDEFINED_SYMMETRY) mtx_sym = GENERAL; - if (mtx_sym != GENERAL) - throw std::runtime_error( - "array format MatrixMarket file must have general symmetry (optional " - "to include \"general\")"); - } - if (mtx_object == UNDEFINED_OBJECT) - throw std::runtime_error( - "MatrixMarket file header is missing the object type."); - if (mtx_format == UNDEFINED_FORMAT) - throw std::runtime_error("MatrixMarket file header is missing the format."); - if (mtx_field == UNDEFINED_FIELD) - throw std::runtime_error( - "MatrixMarket file header is missing the field type."); - if (mtx_sym == UNDEFINED_SYMMETRY) - throw std::runtime_error( - "MatrixMarket file header is missing the symmetry type."); - - while (1) { - getline(mmf, fline); - if (fline[0] != '%') break; - } - std::stringstream ss(fline); - lno_t nr = 0, nc = 0; - size_type nnz = 0; - ss >> nr >> nc; - if (mtx_format == COORDINATE) - ss >> nnz; - else - nnz = nr * nc; - size_type numEdges = nnz; - symmetrize = symmetrize || mtx_sym != GENERAL; - if (symmetrize && nr != nc) { - throw std::runtime_error("A non-square matrix cannot be symmetrized."); - } - if (mtx_format == ARRAY) { - // Array format only supports general symmetry and non-pattern - if (symmetrize) - throw std::runtime_error( - "array format MatrixMarket file cannot be symmetrized."); - if (mtx_field == PATTERN) - throw std::runtime_error( - "array format MatrixMarket file can't have \"pattern\" field type."); - } - if (symmetrize) { - numEdges = 2 * nnz; - } - // numEdges is only an upper bound (diagonal entries may be removed) - std::vector> edges(numEdges); - size_type nE = 0; - lno_t numDiagonal = 0; - for (size_type i = 0; i < nnz; ++i) { - getline(mmf, fline); - std::stringstream ss2(fline); - struct Edge tmp; - // read source, dest (edge) and weight (value) - lno_t s, d; - scalar_t w; - if (mtx_format == ARRAY) { - // In array format, entries are listed in column major order, - // so the row and column can be determined just from the index i - //(but make them 1-based indices, to match the way coordinate works) - s = i % nr + 1; // row - d = i / nr + 1; // col - } else { - // In coordinate format, row and col of each entry is read from file - ss2 >> s >> d; - } - if (mtx_field == PATTERN) - w = 1; - else - w = readScalar(ss2); - if (!transpose) { - tmp.src = s - 1; - tmp.dst = d - 1; - tmp.ew = w; - } else { - tmp.src = d - 1; - tmp.dst = s - 1; - tmp.ew = w; - } - if (tmp.src == tmp.dst) { - numDiagonal++; - if (!remove_diagonal) { - edges[nE++] = tmp; - } - continue; - } - edges[nE++] = tmp; - if (symmetrize) { - struct Edge tmp2; - tmp2.src = tmp.dst; - tmp2.dst = tmp.src; - // the symmetrized value is w, -w or conj(w) if mtx_sym is - // SYMMETRIC, SKEW_SYMMETRIC or HERMITIAN, respectively. - tmp2.ew = symmetryFlip(tmp.ew, mtx_sym); - edges[nE++] = tmp2; - } - } - mmf.close(); - std::sort(edges.begin(), edges.begin() + nE); - if (transpose) { - lno_t tmp = nr; - nr = nc; - nc = tmp; - } - // idx *nv, idx *ne, idx **xadj, idx **adj, wt **wt - *nrows = nr; - *ncols = nc; - *ne = nE; - //*xadj = new idx[nr + 1]; - md_malloc(xadj, nr + 1); - //*adj = new idx[nE]; - md_malloc(adj, nE); - //*ew = new wt[nE]; - md_malloc(ew, nE); - size_type eind = 0; - size_type actual = 0; - for (lno_t i = 0; i < nr; ++i) { - (*xadj)[i] = actual; - bool is_first = true; - while (eind < nE && edges[eind].src == i) { - if (is_first || !symmetrize || eind == 0 || - (eind > 0 && edges[eind - 1].dst != edges[eind].dst)) { - (*adj)[actual] = edges[eind].dst; - (*ew)[actual] = edges[eind].ew; - ++actual; - } - is_first = false; - ++eind; - } - } - (*xadj)[nr] = actual; - *ne = actual; - return 0; -} - -// Version of read_mtx which does not capture the number of columns. -// This is the old interface; it's kept for backwards compatibility. -template -int read_mtx(const char *fileName, lno_t *nv, size_type *ne, size_type **xadj, - lno_t **adj, scalar_t **ew, bool symmetrize = false, - bool remove_diagonal = true, bool transpose = false) { - lno_t ncol; // will discard - return read_mtx(fileName, nv, &ncol, ne, xadj, - adj, ew, symmetrize, - remove_diagonal, transpose); -} - -template -void read_matrix(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, - scalar_t **ew, const char *filename) { - std::string strfilename(filename); - if (endswith(strfilename, ".mtx") || endswith(strfilename, ".mm")) { - read_mtx(filename, nv, ne, xadj, adj, ew, false, false, false); - } - - else if (endswith(strfilename, ".bin")) { - read_graph_bin(nv, ne, xadj, adj, ew, filename); - } - - else if (endswith(strfilename, ".crs")) { - read_graph_crs(nv, ne, xadj, adj, ew, filename); - } - - else { - throw std::runtime_error("Reader is not available\n"); - } -} - -template -crsMat_t read_kokkos_crst_matrix(const char *filename_) { - std::string strfilename(filename_); - bool isMatrixMarket = - endswith(strfilename, ".mtx") || endswith(strfilename, ".mm"); - - typedef typename crsMat_t::StaticCrsGraphType graph_t; - typedef typename graph_t::row_map_type::non_const_type row_map_view_t; - typedef typename graph_t::entries_type::non_const_type cols_view_t; - typedef typename crsMat_t::values_type::non_const_type values_view_t; - - typedef typename row_map_view_t::value_type size_type; - typedef typename cols_view_t::value_type lno_t; - typedef typename values_view_t::value_type scalar_t; - - lno_t nr, nc, *adj; - size_type *xadj, nnzA; - scalar_t *values; - - if (isMatrixMarket) { - // MatrixMarket file contains the exact number of columns - read_mtx(filename_, &nr, &nc, &nnzA, &xadj, - &adj, &values, false, false, false); - } else { - //.crs and .bin files don't contain #cols, so will compute it later based on - // the entries - read_matrix(&nr, &nnzA, &xadj, &adj, &values, - filename_); - } - - row_map_view_t rowmap_view("rowmap_view", nr + 1); - cols_view_t columns_view("colsmap_view", nnzA); - values_view_t values_view("values_view", nnzA); - - { - Kokkos::View> - hr(xadj, nr + 1); - Kokkos::View> - hc(adj, nnzA); - Kokkos::View> - hv(values, nnzA); - Kokkos::deep_copy(rowmap_view, hr); - Kokkos::deep_copy(columns_view, hc); - Kokkos::deep_copy(values_view, hv); - } - - if (!isMatrixMarket) { - KokkosKernels::Impl::kk_view_reduce_max( - nnzA, columns_view, nc); - nc++; - } - - graph_t static_graph(columns_view, rowmap_view); - crsMat_t crsmat("CrsMatrix", nc, values_view, static_graph); - delete[] xadj; - delete[] adj; - delete[] values; - return crsmat; -} - -template -crsGraph_t read_kokkos_crst_graph(const char *filename_) { - typedef typename crsGraph_t::row_map_type::non_const_type row_map_view_t; - typedef typename crsGraph_t::entries_type::non_const_type cols_view_t; - - typedef typename row_map_view_t::value_type size_type; - typedef typename cols_view_t::value_type lno_t; - typedef double scalar_t; - - lno_t nv, *adj; - size_type *xadj, nnzA; - scalar_t *values; - read_matrix(&nv, &nnzA, &xadj, &adj, &values, - filename_); - - row_map_view_t rowmap_view("rowmap_view", nv + 1); - cols_view_t columns_view("colsmap_view", nnzA); - - { - typename row_map_view_t::HostMirror hr = - Kokkos::create_mirror_view(rowmap_view); - typename cols_view_t::HostMirror hc = - Kokkos::create_mirror_view(columns_view); - - for (lno_t i = 0; i <= nv; ++i) { - hr(i) = xadj[i]; - } - - for (size_type i = 0; i < nnzA; ++i) { - hc(i) = adj[i]; - } - Kokkos::deep_copy(rowmap_view, hr); - Kokkos::deep_copy(columns_view, hc); - } - - lno_t ncols = 0; - KokkosKernels::Impl::kk_view_reduce_max( - nnzA, columns_view, ncols); - ncols += 1; - - crsGraph_t static_graph(columns_view, rowmap_view, ncols); - delete[] xadj; - delete[] adj; - delete[] values; - return static_graph; -} - -template -inline void kk_sequential_create_incidence_matrix( - nnz_lno_t num_rows, const size_type *xadj, const nnz_lno_t *adj, - size_type *i_adj // output. preallocated -) { - std::vector c_xadj(num_rows); - for (nnz_lno_t i = 0; i < num_rows; i++) { - c_xadj[i] = xadj[i]; - } - int eCnt = 0; - for (nnz_lno_t i = 0; i < num_rows; i++) { - size_type begin = xadj[i]; - size_type end = xadj[i + 1]; - nnz_lno_t adjsize = end - begin; - - for (nnz_lno_t j = 0; j < adjsize; j++) { - size_type aind = j + begin; - nnz_lno_t col = adj[aind]; - if (i < col) { - i_adj[c_xadj[i]++] = eCnt; - i_adj[c_xadj[col]++] = eCnt++; - } - } - } - - for (nnz_lno_t i = 0; i < num_rows; i++) { - if (c_xadj[i] != xadj[i + 1]) { - std::cout << "i:" << i << " c_xadj[i]:" << c_xadj[i] - << " xadj[i+1]:" << xadj[i + 1] << std::endl; - } - } -} - -template -inline void kk_sequential_create_incidence_matrix_transpose( - const nnz_lno_t num_rows, const size_type num_edges, const size_type *xadj, - const nnz_lno_t *adj, - size_type *i_xadj, // output. preallocated - nnz_lno_t *i_adj // output. preallocated -) { - for (nnz_lno_t i = 0; i < num_edges / 2 + 1; i++) { - i_xadj[i] = i * 2; - } - int eCnt = 0; - for (nnz_lno_t i = 0; i < num_rows; i++) { - size_type begin = xadj[i]; - size_type end = xadj[i + 1]; - nnz_lno_t adjsize = end - begin; - - for (nnz_lno_t j = 0; j < adjsize; j++) { - size_type aind = j + begin; - nnz_lno_t col = adj[aind]; - if (i < col) { - i_adj[eCnt++] = i; - i_adj[eCnt++] = col; - } - } - } -} - } // namespace Impl } // namespace KokkosKernels diff --git a/src/common/KokkosKernels_Sorting.hpp b/src/common/KokkosKernels_Sorting.hpp index 208688ae5b..8b897047d9 100644 --- a/src/common/KokkosKernels_Sorting.hpp +++ b/src/common/KokkosKernels_Sorting.hpp @@ -61,48 +61,6 @@ struct DefaultComparator { }; } // namespace Impl -// ---------------------------------- -// BSR matrix/graph sorting utilities -// ---------------------------------- - -template -void sort_bsr_matrix(const bsrMat_t& A); - -// ---------------------------------- -// CRS matrix/graph sorting utilities -// ---------------------------------- - -// The sort_crs* functions sort the adjacent column list for each row into -// ascending order. - -template -void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, - const values_t& values); - -template -void sort_crs_matrix(const crsMat_t& A); - -template -void sort_crs_graph(const rowmap_t& rowmap, const entries_t& entries); - -template -void sort_crs_graph(const crsGraph_t& G); - -// sort_and_merge_matrix produces a new matrix which is equivalent to A but is -// sorted and has no duplicated entries: each (i, j) is unique. Values for -// duplicated entries are summed. -template -crsMat_t sort_and_merge_matrix(const crsMat_t& A); - -template -crsGraph_t sort_and_merge_graph(const crsGraph_t& G); - -template -void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, - const entries_t& entries_in, rowmap_t& rowmap_out, - entries_t& entries_out); - // ---------------------------- // General device-level sorting // ---------------------------- @@ -155,240 +113,6 @@ KOKKOS_INLINE_FUNCTION void TeamBitonicSort2( namespace Impl { -template -struct SortCrsMatrixFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using scalar_t = typename values_t::non_const_value_type; - using team_mem = typename Kokkos::TeamPolicy::member_type; - // The functor owns memory for entriesAux, so it can't have - // MemoryTraits - using entries_managed_t = Kokkos::View; - using values_managed_t = Kokkos::View; - - SortCrsMatrixFunctor(bool usingRangePol, const rowmap_t& rowmap_, - const entries_t& entries_, const values_t& values_) - : rowmap(rowmap_), entries(entries_), values(values_) { - if (usingRangePol) { - entriesAux = entries_managed_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), - entries.extent(0)); - valuesAux = values_managed_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values aux"), - values.extent(0)); - } - // otherwise, aux arrays won't be allocated (sorting in place) - } - - KOKKOS_INLINE_FUNCTION void operator()(const lno_t i) const { - size_type rowStart = rowmap(i); - size_type rowEnd = rowmap(i + 1); - lno_t rowNum = rowEnd - rowStart; - // Radix sort requires unsigned keys for comparison - using unsigned_lno_t = typename std::make_unsigned::type; - KokkosKernels::SerialRadixSort2( - (unsigned_lno_t*)entries.data() + rowStart, - (unsigned_lno_t*)entriesAux.data() + rowStart, values.data() + rowStart, - valuesAux.data() + rowStart, rowNum); - } - - KOKKOS_INLINE_FUNCTION void operator()(const team_mem t) const { - size_type i = t.league_rank(); - size_type rowStart = rowmap(i); - size_type rowEnd = rowmap(i + 1); - lno_t rowNum = rowEnd - rowStart; - KokkosKernels::TeamBitonicSort2( - entries.data() + rowStart, values.data() + rowStart, rowNum, t); - } - - rowmap_t rowmap; - entries_t entries; - entries_managed_t entriesAux; - values_t values; - values_managed_t valuesAux; -}; - -template -struct SortCrsGraphFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using team_mem = typename Kokkos::TeamPolicy::member_type; - // The functor owns memory for entriesAux, so it can't have - // MemoryTraits - using entries_managed_t = Kokkos::View; - - SortCrsGraphFunctor(bool usingRangePol, const rowmap_t& rowmap_, - const entries_t& entries_) - : rowmap(rowmap_), entries(entries_) { - if (usingRangePol) { - entriesAux = entries_managed_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), - entries.extent(0)); - } - // otherwise, aux arrays won't be allocated (sorting in place) - } - - KOKKOS_INLINE_FUNCTION void operator()(const lno_t i) const { - size_type rowStart = rowmap(i); - size_type rowEnd = rowmap(i + 1); - lno_t rowNum = rowEnd - rowStart; - // Radix sort requires unsigned keys for comparison - using unsigned_lno_t = typename std::make_unsigned::type; - KokkosKernels::SerialRadixSort( - (unsigned_lno_t*)entries.data() + rowStart, - (unsigned_lno_t*)entriesAux.data() + rowStart, rowNum); - } - - KOKKOS_INLINE_FUNCTION void operator()(const team_mem t) const { - size_type i = t.league_rank(); - size_type rowStart = rowmap(i); - size_type rowEnd = rowmap(i + 1); - lno_t rowNum = rowEnd - rowStart; - KokkosKernels::TeamBitonicSort( - entries.data() + rowStart, rowNum, t); - } - - rowmap_t rowmap; - entries_t entries; - entries_managed_t entriesAux; -}; - -template -struct MergedRowmapFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using c_rowmap_t = typename rowmap_t::const_type; - - // Precondition: entries are sorted within each row - MergedRowmapFunctor(const rowmap_t& mergedCounts_, const c_rowmap_t& rowmap_, - const entries_t& entries_) - : mergedCounts(mergedCounts_), rowmap(rowmap_), entries(entries_) {} - - KOKKOS_INLINE_FUNCTION void operator()(lno_t row, size_type& lnewNNZ) const { - size_type rowBegin = rowmap(row); - size_type rowEnd = rowmap(row + 1); - if (rowEnd == rowBegin) { - // Row was empty to begin with - mergedCounts(row) = 0; - return; - } - // Otherwise, the first entry in the row exists - lno_t uniqueEntries = 1; - for (size_type j = rowBegin + 1; j < rowEnd; j++) { - if (entries(j - 1) != entries(j)) uniqueEntries++; - } - mergedCounts(row) = uniqueEntries; - lnewNNZ += uniqueEntries; - if (row == lno_t((rowmap.extent(0) - 1) - 1)) mergedCounts(row + 1) = 0; - } - - rowmap_t mergedCounts; - c_rowmap_t rowmap; - entries_t entries; -}; - -template -struct MatrixMergedEntriesFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using scalar_t = typename values_t::non_const_value_type; - - // Precondition: entries are sorted within each row - MatrixMergedEntriesFunctor(const rowmap_t& rowmap_, const entries_t& entries_, - const values_t& values_, - const rowmap_t& mergedRowmap_, - const entries_t& mergedEntries_, - const values_t& mergedValues_) - : rowmap(rowmap_), - entries(entries_), - values(values_), - mergedRowmap(mergedRowmap_), - mergedEntries(mergedEntries_), - mergedValues(mergedValues_) {} - - KOKKOS_INLINE_FUNCTION void operator()(lno_t row) const { - size_type rowBegin = rowmap(row); - size_type rowEnd = rowmap(row + 1); - if (rowEnd == rowBegin) { - // Row was empty to begin with, nothing to do - return; - } - // Otherwise, accumulate the value for each column - scalar_t accumVal = values(rowBegin); - lno_t accumCol = entries(rowBegin); - size_type insertPos = mergedRowmap(row); - for (size_type j = rowBegin + 1; j < rowEnd; j++) { - if (accumCol == entries(j)) { - // accumulate - accumVal += values(j); - } else { - // write out and reset - mergedValues(insertPos) = accumVal; - mergedEntries(insertPos) = accumCol; - insertPos++; - accumVal = values(j); - accumCol = entries(j); - } - } - // always left with the last unique entry - mergedValues(insertPos) = accumVal; - mergedEntries(insertPos) = accumCol; - } - - rowmap_t rowmap; - entries_t entries; - values_t values; - rowmap_t mergedRowmap; - entries_t mergedEntries; - values_t mergedValues; -}; - -template -struct GraphMergedEntriesFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - - // Precondition: entries are sorted within each row - GraphMergedEntriesFunctor(const rowmap_t& rowmap_, const entries_t& entries_, - const rowmap_t& mergedRowmap_, - const entries_t& mergedEntries_) - : rowmap(rowmap_), - entries(entries_), - mergedRowmap(mergedRowmap_), - mergedEntries(mergedEntries_) {} - - KOKKOS_INLINE_FUNCTION void operator()(lno_t row) const { - size_type rowBegin = rowmap(row); - size_type rowEnd = rowmap(row + 1); - if (rowEnd == rowBegin) { - // Row was empty to begin with, nothing to do - return; - } - // Otherwise, accumulate the value for each column - lno_t accumCol = entries(rowBegin); - size_type insertPos = mergedRowmap(row); - for (size_type j = rowBegin + 1; j < rowEnd; j++) { - if (accumCol != entries(j)) { - // write out and reset - mergedEntries(insertPos) = accumCol; - insertPos++; - accumCol = entries(j); - } - } - // always left with the last unique entry - mergedEntries(insertPos) = accumCol; - } - - rowmap_t rowmap; - entries_t entries; - rowmap_t mergedRowmap; - entries_t mergedEntries; -}; - // Functor that sorts a view on one team template @@ -524,274 +248,6 @@ struct BitonicPhase2Functor { } // namespace Impl -// Sort a CRS matrix: within each row, sort entries ascending by column. -// At the same time, permute the values. -template -void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, - const values_t& values) { - using lno_t = typename entries_t::non_const_value_type; - using team_pol = Kokkos::TeamPolicy; - bool useRadix = !Impl::kk_is_gpu_exec_space(); - lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; - if (numRows == 0) return; - Impl::SortCrsMatrixFunctor - funct(useRadix, rowmap, entries, values); - if (useRadix) { - Kokkos::parallel_for("sort_crs_matrix", - Kokkos::RangePolicy(0, numRows), - funct); - } else { - // Try to get teamsize to be largest power of 2 not greater than avg entries - // per row - // TODO (probably important for performnce): add thread-level sort also, and - // use that for small avg degree. But this works for now. - lno_t idealTeamSize = 1; - lno_t avgDeg = (entries.extent(0) + numRows - 1) / numRows; - while (idealTeamSize < avgDeg / 2) { - idealTeamSize *= 2; - } - team_pol temp(numRows, 1); - lno_t maxTeamSize = temp.team_size_max(funct, Kokkos::ParallelForTag()); - lno_t teamSize = std::min(idealTeamSize, maxTeamSize); - Kokkos::parallel_for("sort_crs_matrix", team_pol(numRows, teamSize), funct); - } -} - -template -void sort_crs_matrix(const crsMat_t& A) { - // Note: rowmap_t has const values, but that's OK as sorting doesn't modify it - using rowmap_t = typename crsMat_t::row_map_type; - using entries_t = typename crsMat_t::index_type::non_const_type; - using values_t = typename crsMat_t::values_type::non_const_type; - using exec_space = typename crsMat_t::execution_space; - // NOTE: the rowmap of a StaticCrsGraph is const-valued, but the - // entries and CrsMatrix values are non-const (so sorting them directly - // is allowed) - sort_crs_matrix( - A.graph.row_map, A.graph.entries, A.values); -} - -namespace Impl { - -template -KOKKOS_INLINE_FUNCTION void kk_swap(T& a, T& b) { - T t = a; - a = b; - b = t; -} - -template -struct sort_bsr_functor { - using lno_t = typename entries_type::non_const_value_type; - - row_map_type rowmap; - entries_type entries; - values_type values; - const lno_t blocksize; - - sort_bsr_functor(row_map_type rowmap_, entries_type entries_, - values_type values_, const lno_t blocksize_) - : rowmap(rowmap_), - entries(entries_), - values(values_), - blocksize(blocksize_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const lno_t i) const { - const lno_t rowStart = rowmap(i); - const lno_t rowSize = rowmap(i + 1) - rowStart; - auto* e = entries.data() + rowStart; - auto* v = values.data() + rowStart * blocksize; - bool done = false; - while (!done) { - done = true; - for (lno_t j = 1; j < rowSize; ++j) { - const lno_t jp = j - 1; - if (e[jp] <= e[j]) continue; - Impl::kk_swap(e[jp], e[j]); - auto const vb = v + j * blocksize; - auto const vbp = v + jp * blocksize; - for (lno_t k = 0; k < blocksize; - ++k) // std::swap_ranges(vb, vb + blocksize, vbp); - Impl::kk_swap(vb[k], vbp[k]); - done = false; - } - } - } -}; - -} // namespace Impl - -// Sort a BRS matrix: within each row, sort entries ascending by column and -// permute the values accordingly. -template -void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, - const entries_t& entries, const values_t& values) { - // TODO: this is O(N^2) mock for debugging - do regular implementation based - // on Radix/Bitonic sort (like CSR) IDEA: maybe we need only one general - // Radix2/Bitonic2 and CSR sorting may call it with blockSize=1 ? - lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; - if (numRows == 0) return; - const lno_t blocksize = blockdim * blockdim; - - assert(values.extent(0) == entries.extent(0) * blocksize); - Impl::sort_bsr_functor bsr_sorter( - rowmap, entries, values, blocksize); - Kokkos::parallel_for("sort_bsr_matrix", - Kokkos::RangePolicy(0, numRows), - bsr_sorter); -} - -// Sort a BSR matrix (like CRS but single values are replaced with contignous -// blocks) -template -void sort_bsr_matrix(const bsrMat_t& A) { - // NOTE: unlike rowmap, entries and values are non-const, so we can sort them - // directly - sort_bsr_matrix( - A.blockDim(), A.graph.row_map, A.graph.entries, A.values); -} - -// Sort a CRS graph: within each row, sort entries ascending by column. -template -void sort_crs_graph(const rowmap_t& rowmap, const entries_t& entries) { - using lno_t = typename entries_t::non_const_value_type; - using team_pol = Kokkos::TeamPolicy; - bool useRadix = !Impl::kk_is_gpu_exec_space(); - lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; - if (numRows == 0) return; - Impl::SortCrsGraphFunctor funct( - useRadix, rowmap, entries); - if (useRadix) { - Kokkos::parallel_for("sort_crs_graph", - Kokkos::RangePolicy(0, numRows), - funct); - } else { - // Try to get teamsize to be largest power of 2 less than or equal to - // half the entries per row. 0.5 * #entries is bitonic's parallelism within - // a row. - // TODO (probably important for performnce): add thread-level sort also, and - // use that for small avg degree. But this works for now. - lno_t idealTeamSize = 1; - lno_t avgDeg = (entries.extent(0) + numRows - 1) / numRows; - while (idealTeamSize < avgDeg / 2) { - idealTeamSize *= 2; - } - team_pol temp(numRows, 1); - lno_t maxTeamSize = temp.team_size_max(funct, Kokkos::ParallelForTag()); - lno_t teamSize = std::min(idealTeamSize, maxTeamSize); - Kokkos::parallel_for("sort_crs_graph", team_pol(numRows, teamSize), funct); - } -} - -template -void sort_crs_graph(const crsGraph_t& G) { - static_assert( - !std::is_const::value, - "sort_crs_graph requires StaticCrsGraph entries to be non-const."); - sort_crs_graph(G.row_map, G.entries); -} - -// Sort the rows of matrix, and merge duplicate entries. -template -crsMat_t sort_and_merge_matrix(const crsMat_t& A) { - using c_rowmap_t = typename crsMat_t::row_map_type; - using rowmap_t = typename crsMat_t::row_map_type::non_const_type; - using entries_t = typename crsMat_t::index_type::non_const_type; - using values_t = typename crsMat_t::values_type::non_const_type; - using size_type = typename rowmap_t::non_const_value_type; - using exec_space = typename crsMat_t::execution_space; - using range_t = Kokkos::RangePolicy; - sort_crs_matrix(A); - // Count entries per row into a new rowmap, in terms of merges that can be - // done - rowmap_t mergedRowmap( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "SortedMerged rowmap"), - A.numRows() + 1); - size_type numCompressedEntries = 0; - Kokkos::parallel_reduce(range_t(0, A.numRows()), - Impl::MergedRowmapFunctor( - mergedRowmap, A.graph.row_map, A.graph.entries), - numCompressedEntries); - // Prefix sum to get rowmap - Impl::kk_exclusive_parallel_prefix_sum(A.numRows() + 1, - mergedRowmap); - entries_t mergedEntries("SortedMerged entries", numCompressedEntries); - values_t mergedValues("SortedMerged values", numCompressedEntries); - // Compute merged entries and values - Kokkos::parallel_for( - range_t(0, A.numRows()), - Impl::MatrixMergedEntriesFunctor( - A.graph.row_map, A.graph.entries, A.values, mergedRowmap, - mergedEntries, mergedValues)); - // Finally, construct the new compressed matrix - return crsMat_t("SortedMerged", A.numRows(), A.numCols(), - numCompressedEntries, mergedValues, mergedRowmap, - mergedEntries); -} - -template -void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, - const entries_t& entries_in, rowmap_t& rowmap_out, - entries_t& entries_out) { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using range_t = Kokkos::RangePolicy; - using const_rowmap_t = typename rowmap_t::const_type; - lno_t numRows = rowmap_in.extent(0); - if (numRows <= 1) { - // Matrix has zero rows - rowmap_out = rowmap_t(); - entries_out = entries_t(); - return; - } - numRows--; - // Sort in place - sort_crs_graph(rowmap_in, entries_in); - // Count entries per row into a new rowmap, in terms of merges that can be - // done - rowmap_out = rowmap_t( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "SortedMerged rowmap"), - numRows + 1); - size_type numCompressedEntries = 0; - Kokkos::parallel_reduce(range_t(0, numRows), - Impl::MergedRowmapFunctor( - rowmap_out, rowmap_in, entries_in), - numCompressedEntries); - // Prefix sum to get rowmap - Impl::kk_exclusive_parallel_prefix_sum(numRows + 1, - rowmap_out); - entries_out = entries_t("SortedMerged entries", numCompressedEntries); - // Compute merged entries and values - Kokkos::parallel_for( - range_t(0, numRows), - Impl::GraphMergedEntriesFunctor( - rowmap_in, entries_in, rowmap_out, entries_out)); -} - -template -crsGraph_t sort_and_merge_graph(const crsGraph_t& G) { - using rowmap_t = typename crsGraph_t::row_map_type::non_const_type; - using entries_t = typename crsGraph_t::entries_type; - static_assert( - !std::is_const::value, - "sort_and_merge_graph requires StaticCrsGraph entries to be non-const."); - rowmap_t mergedRowmap; - entries_t mergedEntries; - sort_and_merge_graph(G.row_map, G.entries, mergedRowmap, - mergedEntries); - return crsGraph_t(mergedEntries, mergedRowmap); -} - // Version to be called from host on a single array // Generally ~2x slower than Kokkos::sort() for large arrays (> 50 M elements), // but faster for smaller arrays. @@ -1125,39 +581,6 @@ KOKKOS_INLINE_FUNCTION void TeamBitonicSort2(ValueType* values, PermType* perm, // For backward compatibility: keep the public interface accessible in // KokkosKernels::Impl:: namespace Impl { -template -[[deprecated]] void sort_crs_graph(const rowmap_t& rowmap, - const entries_t& entries) { - KokkosKernels::sort_crs_graph(rowmap, - entries); -} - -template -[[deprecated]] void sort_crs_matrix(const rowmap_t& rowmap, - const entries_t& entries, - const values_t& values) { - KokkosKernels::sort_crs_matrix(rowmap, entries, values); -} - -template -[[deprecated]] void sort_crs_matrix(const crsMat_t& A) { - KokkosKernels::sort_crs_matrix(A); -} - -template -[[deprecated]] void sort_and_merge_graph( - const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, - rowmap_t& rowmap_out, entries_t& entries_out) { - KokkosKernels::sort_and_merge_graph( - rowmap_in, entries_in, rowmap_out, entries_out); -} - -template -[[deprecated]] crsMat_t sort_and_merge_matrix(const crsMat_t& A) { - return KokkosKernels::sort_and_merge_matrix(A); -} template < typename View, typename ExecSpace, typename Ordinal, diff --git a/src/common/KokkosKernels_Utils.hpp b/src/common/KokkosKernels_Utils.hpp index 655d89ba67..a6649f102b 100644 --- a/src/common/KokkosKernels_Utils.hpp +++ b/src/common/KokkosKernels_Utils.hpp @@ -49,7 +49,7 @@ #include "KokkosKernels_ExecSpaceUtils.hpp" #include "KokkosKernels_SimpleUtils.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosKernels_PrintUtils.hpp" #include "KokkosKernels_VectorUtils.hpp" diff --git a/src/graph/KokkosGraph_ExplicitCoarsening.hpp b/src/graph/KokkosGraph_ExplicitCoarsening.hpp index 8992aa4bb8..322004c0b6 100644 --- a/src/graph/KokkosGraph_ExplicitCoarsening.hpp +++ b/src/graph/KokkosGraph_ExplicitCoarsening.hpp @@ -46,7 +46,7 @@ #define KOKKOSGRAPH_EXPLICIT_COARSEN_HPP #include "KokkosGraph_ExplicitCoarsening_impl.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_SortCrs.hpp" namespace KokkosGraph { namespace Experimental { @@ -86,8 +86,8 @@ void graph_explicit_coarsen( if (compress) { coarse_rowmap_t mergedRowmap; coarse_entries_t mergedEntries; - KokkosKernels::sort_and_merge_graph( + KokkosSparse::sort_and_merge_graph( coarseRowmap, coarseEntries, mergedRowmap, mergedEntries); coarseRowmap = mergedRowmap; coarseEntries = mergedEntries; @@ -125,8 +125,8 @@ void graph_explicit_coarsen_with_inverse_map( if (compress) { coarse_rowmap_t mergedRowmap; coarse_entries_t mergedEntries; - KokkosKernels::sort_and_merge_graph( + KokkosSparse::sort_and_merge_graph( coarseRowmap, coarseEntries, mergedRowmap, mergedEntries); coarseRowmap = mergedRowmap; coarseEntries = mergedEntries; diff --git a/src/impl/tpls/KokkosKernels_tpl_handles_decl.hpp b/src/impl/tpls/KokkosKernels_tpl_handles_decl.hpp index 50b2d1c2ef..aef089fd06 100644 --- a/src/impl/tpls/KokkosKernels_tpl_handles_decl.hpp +++ b/src/impl/tpls/KokkosKernels_tpl_handles_decl.hpp @@ -48,7 +48,7 @@ #include "KokkosBlas_tpl_spec.hpp" #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE -#include "KokkosKernels_SparseUtils_cusparse.hpp" +#include "KokkosSparse_Utils_cusparse.hpp" namespace KokkosKernels { namespace Impl { diff --git a/src/impl/tpls/KokkosKernels_tpl_handles_def.hpp b/src/impl/tpls/KokkosKernels_tpl_handles_def.hpp index 84b5386a00..a5187986e5 100644 --- a/src/impl/tpls/KokkosKernels_tpl_handles_def.hpp +++ b/src/impl/tpls/KokkosKernels_tpl_handles_def.hpp @@ -69,7 +69,7 @@ CusparseSingleton& CusparseSingleton::singleton() { #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE -#include "KokkosKernels_SparseUtils_rocsparse.hpp" +#include "KokkosSparse_Utils_rocsparse.hpp" namespace KokkosKernels { namespace Impl { diff --git a/src/impl/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp b/src/impl/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp index 77b76868f3..d0ea5cdc26 100644 --- a/src/impl/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp @@ -46,7 +46,7 @@ #define KOKKOSSPARSE_SPMV_BSRMATRIX_TPL_SPEC_DECL_HPP #include "KokkosKernels_Controls.hpp" -#include "KokkosKernels_SparseUtils_mkl.hpp" +#include "KokkosSparse_Utils_mkl.hpp" #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL #include @@ -454,7 +454,7 @@ KOKKOSSPARSE_SPMV_MV_MKL(Kokkos::complex, Kokkos::OpenMP, // cuSPARSE #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE #include "cusparse.h" -#include "KokkosKernels_SparseUtils_cusparse.hpp" +#include "KokkosSparse_Utils_cusparse.hpp" // // From https://docs.nvidia.com/cuda/cusparse/index.html#bsrmv diff --git a/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp b/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp index d6f36c0a2b..0a92b91eb2 100644 --- a/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp @@ -50,7 +50,7 @@ // cuSPARSE #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE #include "cusparse.h" -#include "KokkosKernels_SparseUtils_cusparse.hpp" +#include "KokkosSparse_Utils_cusparse.hpp" namespace KokkosSparse { namespace Impl { @@ -385,7 +385,7 @@ KOKKOSSPARSE_SPMV_CUSPARSE(Kokkos::complex, int64_t, size_t, // rocSPARSE #if defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) #include -#include "KokkosKernels_SparseUtils_rocsparse.hpp" +#include "KokkosSparse_Utils_rocsparse.hpp" namespace KokkosSparse { namespace Impl { @@ -542,7 +542,7 @@ KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, Kokkos::LayoutRight, #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL #include -#include "KokkosKernels_SparseUtils_mkl.hpp" +#include "KokkosSparse_Utils_mkl.hpp" namespace KokkosSparse { namespace Impl { diff --git a/src/common/KokkosKernels_Controls.hpp b/src/sparse/KokkosKernels_Controls.hpp similarity index 100% rename from src/common/KokkosKernels_Controls.hpp rename to src/sparse/KokkosKernels_Controls.hpp diff --git a/src/common/KokkosKernels_Handle.hpp b/src/sparse/KokkosKernels_Handle.hpp similarity index 100% rename from src/common/KokkosKernels_Handle.hpp rename to src/sparse/KokkosKernels_Handle.hpp diff --git a/src/sparse/KokkosSparse_IOUtils.hpp b/src/sparse/KokkosSparse_IOUtils.hpp new file mode 100644 index 0000000000..fa6d08f960 --- /dev/null +++ b/src/sparse/KokkosSparse_IOUtils.hpp @@ -0,0 +1,1274 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef _KOKKOSSPARSE_IOUTILS_HPP +#define _KOKKOSSPARSE_IOUTILS_HPP + +#include "KokkosKernels_IOUtils.hpp" +#include "KokkosSparse_CrsMatrix.hpp" + +namespace KokkosSparse { +namespace Impl { + +// MD: Bases on Christian's sparseMatrix_generate function in test_crsmatrix.cpp +// file. +template +void kk_sparseMatrix_generate(OrdinalType nrows, OrdinalType ncols, + SizeType &nnz, OrdinalType row_size_variance, + OrdinalType bandwidth, ScalarType *&values, + SizeType *&rowPtr, OrdinalType *&colInd, + OrdinalType block_elem_count = 1) { + rowPtr = new SizeType[nrows + 1]; + + OrdinalType elements_per_row = nrows ? nnz / nrows : 0; + srand(13721); + rowPtr[0] = 0; + for (int row = 0; row < nrows; row++) { + int varianz = (1.0 * rand() / RAND_MAX - 0.5) * row_size_variance; + int numRowEntries = elements_per_row + varianz; + if (numRowEntries < 0) numRowEntries = 0; + // Clamping numRowEntries above accomplishes 2 things: + // - If ncols is 0, numRowEntries will also be 0 + // - With numRowEntries at most 2/3 the number of columns, in the worst + // case + // 90% of insertions will succeed after 6 tries + if (numRowEntries > 0.66 * ncols) numRowEntries = 0.66 * ncols; + rowPtr[row + 1] = rowPtr[row] + numRowEntries; + } + nnz = rowPtr[nrows]; + values = new ScalarType[nnz]; + colInd = new OrdinalType[nnz]; + for (OrdinalType row = 0; row < nrows; row++) { + for (SizeType k = rowPtr[row]; k < rowPtr[row + 1]; ++k) { + while (true) { + OrdinalType pos = (1.0 * rand() / RAND_MAX - 0.5) * bandwidth + row; + while (pos < 0) pos += ncols; + while (pos >= ncols) pos -= ncols; + + bool is_already_in_the_row = false; + for (SizeType j = rowPtr[row]; j < k; j++) { + if (colInd[j] == pos) { + is_already_in_the_row = true; + break; + } + } + if (!is_already_in_the_row) { + colInd[k] = pos; + break; + } + } + } + } + // Sample each value from uniform (-50, 50) for real types, or (-50 - 50i, 50 + // + 50i) for complex types. + Kokkos::View valuesView( + values, nnz * block_elem_count); + ScalarType randStart, randEnd; + KokkosKernels::Impl::getRandomBounds(50.0, randStart, randEnd); + Kokkos::Random_XorShift64_Pool pool(13718); + Kokkos::fill_random(valuesView, pool, randStart, randEnd); +} + +template +void kk_sparseMatrix_generate_lower_upper_triangle( + char uplo, OrdinalType nrows, OrdinalType ncols, SizeType &nnz, + OrdinalType /*row_size_variance*/, OrdinalType /*bandwidth*/, + ScalarType *&values, SizeType *&rowPtr, OrdinalType *&colInd) { + rowPtr = new SizeType[nrows + 1]; + + // OrdinalType elements_per_row = nnz/nrows; + srand(13721); + rowPtr[0] = 0; + for (int row = 0; row < nrows; row++) { + if (uplo == 'L') + rowPtr[row + 1] = rowPtr[row] + row + 1; + else + rowPtr[row + 1] = rowPtr[row] + ncols - (row); + } + nnz = rowPtr[nrows]; + values = new ScalarType[nnz]; + colInd = new OrdinalType[nnz]; + for (OrdinalType row = 0; row < nrows; row++) { + for (SizeType k = rowPtr[row]; k < rowPtr[row + 1]; k++) { + if (uplo == 'L') + colInd[k] = k - rowPtr[row]; + else + colInd[k] = row + (k - rowPtr[row]); + values[k] = 1.0; + } + } +} + +template +void kk_diagonally_dominant_sparseMatrix_generate( + OrdinalType nrows, OrdinalType ncols, SizeType &nnz, + OrdinalType row_size_variance, OrdinalType bandwidth, ScalarType *&values, + SizeType *&rowPtr, OrdinalType *&colInd, + ScalarType diagDominance = 10 * Kokkos::ArithTraits::one()) { + rowPtr = new SizeType[nrows + 1]; + + OrdinalType elements_per_row = nnz / nrows; + srand(13721); + rowPtr[0] = 0; + for (int row = 0; row < nrows; row++) { + int varianz = (1.0 * rand() / RAND_MAX - 0.5) * row_size_variance; + if (varianz < 1) varianz = 1; + if (varianz > 0.75 * ncols) varianz = 0.75 * ncols; + rowPtr[row + 1] = rowPtr[row] + elements_per_row + varianz; + if (rowPtr[row + 1] <= rowPtr[row]) // This makes sure that there is + rowPtr[row + 1] = rowPtr[row] + 1; // at least one nonzero in the row + } + nnz = rowPtr[nrows]; + values = new ScalarType[nnz]; + colInd = new OrdinalType[nnz]; + for (OrdinalType row = 0; row < nrows; row++) { + ScalarType total_values = 0; + std::unordered_set entriesInRow; + // We always add the diagonal entry (after this loop) + entriesInRow.insert(row); + for (SizeType k = rowPtr[row]; k < rowPtr[row + 1] - 1; k++) { + while (true) { + OrdinalType pos = (1.0 * rand() / RAND_MAX - 0.5) * bandwidth + row; + while (pos < 0) pos += ncols; + while (pos >= ncols) pos -= ncols; + + if (entriesInRow.find(pos) == entriesInRow.end()) { + entriesInRow.insert(pos); + colInd[k] = pos; + values[k] = 100.0 * rand() / RAND_MAX - 50.0; + total_values += + Kokkos::Details::ArithTraits::abs(values[k]); + break; + } + } + } + + colInd[rowPtr[row + 1] - 1] = row; + values[rowPtr[row + 1] - 1] = total_values * diagDominance; + } +} + +// This function creates a diagonal sparse matrix for testing matrix operations. +// The elements on the diagonal are 1, 2, ..., n-1, n. +// If "invert" is true, it will return the inverse of the above diagonal matrix. +template +crsMat_t kk_generate_diag_matrix(typename crsMat_t::const_ordinal_type n, + const bool invert = false) { + typedef typename crsMat_t::ordinal_type ot; + typedef typename crsMat_t::StaticCrsGraphType graph_t; + typedef typename graph_t::row_map_type::non_const_type row_map_view_t; + typedef typename graph_t::entries_type::non_const_type cols_view_t; + typedef typename crsMat_t::values_type::non_const_type values_view_t; + + typedef typename row_map_view_t::non_const_value_type size_type; + typedef typename cols_view_t::non_const_value_type lno_t; + typedef typename values_view_t::non_const_value_type scalar_t; + + row_map_view_t rowmap_view("rowmap_view", n + 1); + cols_view_t columns_view("colsmap_view", n); + values_view_t values_view("values_view", n); + + { + typename row_map_view_t::HostMirror hr = + Kokkos::create_mirror_view(rowmap_view); + typename cols_view_t::HostMirror hc = + Kokkos::create_mirror_view(columns_view); + typename values_view_t::HostMirror hv = + Kokkos::create_mirror_view(values_view); + + for (lno_t i = 0; i <= n; ++i) { + hr(i) = size_type(i); + } + + for (ot i = 0; i < n; ++i) { + hc(i) = lno_t(i); + if (invert) { + hv(i) = scalar_t(1.0) / (scalar_t(i + 1)); + } else { + hv(i) = scalar_t(i + 1); + } + } + Kokkos::deep_copy(rowmap_view, hr); + Kokkos::deep_copy(columns_view, hc); + Kokkos::deep_copy(values_view, hv); + } + + graph_t static_graph(columns_view, rowmap_view); + crsMat_t crsmat("CrsMatrix", n, values_view, static_graph); + return crsmat; +} + +template +crsMat_t kk_generate_diagonally_dominant_sparse_matrix( + typename crsMat_t::const_ordinal_type nrows, + typename crsMat_t::const_ordinal_type ncols, + typename crsMat_t::non_const_size_type &nnz, + typename crsMat_t::const_ordinal_type row_size_variance, + typename crsMat_t::const_ordinal_type bandwidth, + typename crsMat_t::const_value_type diagDominance = + 10 * Kokkos::ArithTraits::one()) { + typedef typename crsMat_t::StaticCrsGraphType graph_t; + typedef typename graph_t::row_map_type::non_const_type row_map_view_t; + typedef typename graph_t::entries_type::non_const_type cols_view_t; + typedef typename crsMat_t::values_type::non_const_type values_view_t; + + typedef typename row_map_view_t::non_const_value_type size_type; + typedef typename cols_view_t::non_const_value_type lno_t; + typedef typename values_view_t::non_const_value_type scalar_t; + lno_t *adj; + size_type *xadj; //, nnzA; + scalar_t *values; + + kk_diagonally_dominant_sparseMatrix_generate( + nrows, ncols, nnz, row_size_variance, bandwidth, values, xadj, adj, + diagDominance); + + row_map_view_t rowmap_view("rowmap_view", nrows + 1); + cols_view_t columns_view("colsmap_view", nnz); + values_view_t values_view("values_view", nnz); + + { + typename row_map_view_t::HostMirror hr = + Kokkos::create_mirror_view(rowmap_view); + typename cols_view_t::HostMirror hc = + Kokkos::create_mirror_view(columns_view); + typename values_view_t::HostMirror hv = + Kokkos::create_mirror_view(values_view); + + for (lno_t i = 0; i <= nrows; ++i) { + hr(i) = xadj[i]; + } + + for (size_type i = 0; i < nnz; ++i) { + hc(i) = adj[i]; + hv(i) = values[i]; + } + Kokkos::deep_copy(rowmap_view, hr); + Kokkos::deep_copy(columns_view, hc); + Kokkos::deep_copy(values_view, hv); + } + + graph_t static_graph(columns_view, rowmap_view); + crsMat_t crsmat("CrsMatrix", ncols, values_view, static_graph); + delete[] xadj; + delete[] adj; + delete[] values; + return crsmat; +} + +template +crsMat_t kk_generate_triangular_sparse_matrix( + char uplo, typename crsMat_t::const_ordinal_type nrows, + typename crsMat_t::const_ordinal_type ncols, + typename crsMat_t::non_const_size_type &nnz, + typename crsMat_t::const_ordinal_type row_size_variance, + typename crsMat_t::const_ordinal_type bandwidth) { + typedef typename crsMat_t::StaticCrsGraphType graph_t; + typedef typename graph_t::row_map_type::non_const_type row_map_view_t; + typedef typename graph_t::entries_type::non_const_type cols_view_t; + typedef typename crsMat_t::values_type::non_const_type values_view_t; + + typedef typename row_map_view_t::non_const_value_type size_type; + typedef typename cols_view_t::non_const_value_type lno_t; + typedef typename values_view_t::non_const_value_type scalar_t; + lno_t *adj; + size_type *xadj; //, nnzA; + scalar_t *values; + + kk_sparseMatrix_generate_lower_upper_triangle( + uplo, nrows, ncols, nnz, row_size_variance, bandwidth, values, xadj, adj); + + row_map_view_t rowmap_view("rowmap_view", nrows + 1); + cols_view_t columns_view("colsmap_view", nnz); + values_view_t values_view("values_view", nnz); + + { + typename row_map_view_t::HostMirror hr = + Kokkos::create_mirror_view(rowmap_view); + typename cols_view_t::HostMirror hc = + Kokkos::create_mirror_view(columns_view); + typename values_view_t::HostMirror hv = + Kokkos::create_mirror_view(values_view); + + for (lno_t i = 0; i <= nrows; ++i) { + hr(i) = xadj[i]; + } + + for (size_type i = 0; i < nnz; ++i) { + hc(i) = adj[i]; + hv(i) = values[i]; + } + Kokkos::deep_copy(rowmap_view, hr); + Kokkos::deep_copy(columns_view, hc); + Kokkos::deep_copy(values_view, hv); + Kokkos::fence(); + } + + graph_t static_graph(columns_view, rowmap_view); + crsMat_t crsmat("CrsMatrix", ncols, values_view, static_graph); + delete[] xadj; + delete[] adj; + delete[] values; + return crsmat; +} + +template +crsMat_t kk_generate_sparse_matrix( + typename crsMat_t::const_ordinal_type nrows, + typename crsMat_t::const_ordinal_type ncols, + typename crsMat_t::non_const_size_type &nnz, + typename crsMat_t::const_ordinal_type row_size_variance, + typename crsMat_t::const_ordinal_type bandwidth) { + typedef typename crsMat_t::StaticCrsGraphType graph_t; + typedef typename graph_t::row_map_type::non_const_type row_map_view_t; + typedef typename graph_t::entries_type::non_const_type cols_view_t; + typedef typename crsMat_t::values_type::non_const_type values_view_t; + + typedef typename row_map_view_t::non_const_value_type size_type; + typedef typename cols_view_t::non_const_value_type lno_t; + typedef typename values_view_t::non_const_value_type scalar_t; + lno_t *adj; + size_type *xadj; //, nnzA; + scalar_t *values; + + kk_sparseMatrix_generate( + nrows, ncols, nnz, row_size_variance, bandwidth, values, xadj, adj); + + row_map_view_t rowmap_view("rowmap_view", nrows + 1); + cols_view_t columns_view("colsmap_view", nnz); + values_view_t values_view("values_view", nnz); + + { + typename row_map_view_t::HostMirror hr = + Kokkos::create_mirror_view(rowmap_view); + typename cols_view_t::HostMirror hc = + Kokkos::create_mirror_view(columns_view); + typename values_view_t::HostMirror hv = + Kokkos::create_mirror_view(values_view); + + for (lno_t i = 0; i <= nrows; ++i) { + hr(i) = xadj[i]; + } + + for (size_type i = 0; i < nnz; ++i) { + hc(i) = adj[i]; + hv(i) = values[i]; + } + Kokkos::deep_copy(rowmap_view, hr); + Kokkos::deep_copy(columns_view, hc); + Kokkos::deep_copy(values_view, hv); + } + + graph_t static_graph(columns_view, rowmap_view); + crsMat_t crsmat("CrsMatrix", ncols, values_view, static_graph); + delete[] xadj; + delete[] adj; + delete[] values; + return crsmat; +} + +template +bsrMat_t kk_generate_sparse_matrix( + typename bsrMat_t::const_ordinal_type block_dim, + typename bsrMat_t::const_ordinal_type nrows, + typename bsrMat_t::const_ordinal_type ncols, + typename bsrMat_t::non_const_size_type &nnz, + typename bsrMat_t::const_ordinal_type row_size_variance, + typename bsrMat_t::const_ordinal_type bandwidth) { + typedef KokkosSparse::CrsMatrix< + typename bsrMat_t::value_type, typename bsrMat_t::ordinal_type, + typename bsrMat_t::device_type, typename bsrMat_t::memory_traits, + typename bsrMat_t::size_type> + crsMat_t; + + const auto crs_mtx = kk_generate_sparse_matrix( + nrows * block_dim, ncols * block_dim, nnz, row_size_variance, bandwidth); + bsrMat_t bsrmat(crs_mtx, block_dim); + return bsrmat; +} +// TODO: need to fix the size_type. All over the reading inputs are lno_t. + +template +void convert_crs_to_lower_triangle_edge_list(idx nv, idx *xadj, idx *adj, + idx *lower_triangle_srcs, + idx *lower_triangle_dests) { + idx ind = 0; + for (idx i = 0; i < nv; ++i) { + idx xb = xadj[i]; + idx xe = xadj[i + 1]; + for (idx j = xb; j < xe; ++j) { + idx dst = adj[j]; + if (i < dst) { + lower_triangle_srcs[ind] = i; + lower_triangle_dests[ind++] = dst; + } + } + } +} + +template +void convert_crs_to_edge_list(idx nv, idx *xadj, idx *srcs) { + for (idx i = 0; i < nv; ++i) { + idx xb = xadj[i]; + idx xe = xadj[i + 1]; + for (idx j = xb; j < xe; ++j) { + srcs[j] = i; + } + } +} + +template +void convert_edge_list_to_csr(lno_t nv, size_type ne, lno_t *srcs, lno_t *dests, + wt *ew, size_type *xadj, lno_t *adj, wt *crs_ew) { + std::vector> edges(ne); + for (size_type i = 0; i < ne; ++i) { + edges[i].src = srcs[i]; + edges[i].dst = dests[i]; + edges[i].ew = ew[i]; + } + std::sort(edges.begin(), edges.begin() + ne); + + size_type eind = 0; + for (lno_t i = 0; i < nv; ++i) { + (xadj)[i] = eind; + while (edges[eind].src == i) { + (adj)[eind] = edges[eind].dst; + (*crs_ew)[eind] = edges[eind].ew; + ++eind; + } + } + xadj[nv] = eind; +} + +template +void convert_undirected_edge_list_to_csr(lno_t nv, size_type ne, in_lno_t *srcs, + in_lno_t *dests, size_type *xadj, + lno_t *adj) { + std::vector> edges(ne * 2); + for (size_type i = 0; i < ne; ++i) { + edges[i * 2].src = srcs[i]; + edges[i * 2].dst = dests[i]; + + edges[i * 2 + 1].src = dests[i]; + edges[i * 2 + 1].dst = srcs[i]; + } +#ifdef KOKKOSKERNELS_HAVE_OUTER +#include +#include +#include +#include + __gnu_parallel::parallel_sort_mwms< + false, true, struct KokkosKernels::Impl::Edge *>( + &(edges[0]), &(edges[0]) + ne * 2, + std::less>(), 64); +#else + std::sort(edges.begin(), edges.begin() + ne * 2); +#endif + + size_type eind = 0; + for (lno_t i = 0; i < nv; ++i) { + (xadj)[i] = eind; + while (edges[eind].src == i) { + (adj)[eind] = edges[eind].dst; + //(*crs_ew)[eind] = edges[eind].ew; + ++eind; + } + } + xadj[nv] = eind; +} + +template +void write_graph_bin(lno_t nv, size_type ne, const size_type *xadj, + const lno_t *adj, const scalar_t *ew, + const char *filename) { + std::ofstream myFile(filename, std::ios::out | std::ios::binary); + myFile.write((char *)&nv, sizeof(lno_t)); + myFile.write((char *)&ne, sizeof(size_type)); + myFile.write((char *)xadj, sizeof(size_type) * (nv + 1)); + + myFile.write((char *)adj, sizeof(lno_t) * (ne)); + + myFile.write((char *)ew, sizeof(scalar_t) * (ne)); + + myFile.close(); +} + +template +void write_graph_crs(lno_t nv, size_type ne, const size_type *xadj, + const lno_t *adj, const scalar_t *ew, + const char *filename) { + std::ofstream myFile(filename, std::ios::out); + myFile << nv << " " << ne << std::endl; + + for (lno_t i = 0; i <= nv; ++i) { + myFile << xadj[i] << " "; + } + myFile << std::endl; + + for (lno_t i = 0; i < nv; ++i) { + size_type b = xadj[i]; + size_type e = xadj[i + 1]; + for (size_type j = b; j < e; ++j) { + myFile << adj[j] << " "; + } + myFile << std::endl; + } + for (size_type i = 0; i < ne; ++i) { + myFile << ew[i] << " "; + } + myFile << std::endl; + + myFile.close(); +} + +template +void write_graph_ligra(lno_t nv, size_type ne, const size_type *xadj, + const lno_t *adj, const scalar_t * /*ew*/, + const char *filename) { + std::ofstream ff(filename); + ff << "AdjacencyGraph" << std::endl; + ff << nv << std::endl << ne << std::endl; + for (lno_t i = 0; i < nv; ++i) { + ff << xadj[i] << std::endl; + } + for (size_type i = 0; i < ne; ++i) { + ff << adj[i] << std::endl; + } + ff.close(); +} + +// MM: types and utility functions for parsing the MatrixMarket format +namespace MM { +enum MtxObject { UNDEFINED_OBJECT, MATRIX, VECTOR }; +enum MtxFormat { UNDEFINED_FORMAT, COORDINATE, ARRAY }; +enum MtxField { + UNDEFINED_FIELD, + REAL, // includes both float and double + COMPLEX, // includes complex and complex + INTEGER, // includes all integer types + PATTERN // not a type, but means the value for every entry is 1 +}; +enum MtxSym { + UNDEFINED_SYMMETRY, + GENERAL, + SYMMETRIC, // A(i, j) = A(j, i) + SKEW_SYMMETRIC, // A(i, j) = -A(j, i) + HERMITIAN // A(i, j) = a + bi; A(j, i) = a - bi +}; + +// readScalar/writeScalar: read and write a scalar in the form that it appears +// in an .mtx file. The >> and << operators won't work, because complex appears +// as "real imag", not "(real, imag)" +template +scalar_t readScalar(std::istream &is) { + scalar_t val; + is >> val; + return val; +} + +template <> +inline Kokkos::complex readScalar(std::istream &is) { + float r, i; + is >> r; + is >> i; + return Kokkos::complex(r, i); +} + +template <> +inline Kokkos::complex readScalar(std::istream &is) { + double r, i; + is >> r; + is >> i; + return Kokkos::complex(r, i); +} + +template +void writeScalar(std::ostream &os, scalar_t val) { + os << val; +} + +template <> +inline void writeScalar(std::ostream &os, Kokkos::complex val) { + os << val.real() << ' ' << val.imag(); +} + +template <> +inline void writeScalar(std::ostream &os, Kokkos::complex val) { + os << val.real() << ' ' << val.imag(); +} + +// symmetryFlip: given a value for A(i, j), return the value that +// should be inserted at A(j, i) (if any) +template +scalar_t symmetryFlip(scalar_t val, MtxSym symFlag) { + if (symFlag == SKEW_SYMMETRIC) return -val; + return val; +} + +template <> +inline Kokkos::complex symmetryFlip(Kokkos::complex val, + MtxSym symFlag) { + if (symFlag == HERMITIAN) + return Kokkos::conj(val); + else if (symFlag == SKEW_SYMMETRIC) + return -val; + return val; +} + +template <> +inline Kokkos::complex symmetryFlip(Kokkos::complex val, + MtxSym symFlag) { + if (symFlag == HERMITIAN) + return Kokkos::conj(val); + else if (symFlag == SKEW_SYMMETRIC) + return -val; + return val; +} +} // namespace MM + +template +void write_matrix_mtx(lno_t nrows, lno_t ncols, size_type nentries, + const size_type *xadj, const lno_t *adj, + const scalar_t *vals, const char *filename) { + std::ofstream myFile(filename); + myFile << "%%MatrixMarket matrix coordinate "; + if (std::is_same>::value || + std::is_same>::value) + myFile << "complex"; + else + myFile << "real"; + myFile << " general\n"; + myFile << nrows << " " << ncols << " " << nentries << '\n'; + myFile << std::setprecision(17) << std::scientific; + for (lno_t i = 0; i < nrows; ++i) { + size_type b = xadj[i]; + size_type e = xadj[i + 1]; + for (size_type j = b; j < e; ++j) { + myFile << i + 1 << " " << adj[j] + 1 << " "; + MM::writeScalar(myFile, vals[j]); + myFile << '\n'; + } + } + myFile.close(); +} + +template +void write_graph_mtx(lno_t nv, size_type ne, const size_type *xadj, + const lno_t *adj, const scalar_t *ew, + const char *filename) { + std::ofstream myFile(filename); + myFile << "%%MatrixMarket matrix coordinate "; + if (std::is_same>::value || + std::is_same>::value) + myFile << "complex"; + else + myFile << "real"; + myFile << " general\n"; + myFile << nv << " " << nv << " " << ne << '\n'; + myFile << std::setprecision(8) << std::scientific; + for (lno_t i = 0; i < nv; ++i) { + size_type b = xadj[i]; + size_type e = xadj[i + 1]; + for (size_type j = b; j < e; ++j) { + myFile << i + 1 << " " << (adj)[j] + 1 << " "; + MM::writeScalar(myFile, ew[j]); + myFile << '\n'; + } + } + + myFile.close(); +} + +template +void read_graph_bin(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, + scalar_t **ew, const char *filename) { + std::ifstream myFile(filename, std::ios::in | std::ios::binary); + + myFile.read((char *)nv, sizeof(lno_t)); + myFile.read((char *)ne, sizeof(size_type)); + KokkosKernels::Impl::md_malloc(xadj, *nv + 1); + KokkosKernels::Impl::md_malloc(adj, *ne); + KokkosKernels::Impl::md_malloc(ew, *ne); + myFile.read((char *)*xadj, sizeof(size_type) * (*nv + 1)); + myFile.read((char *)*adj, sizeof(lno_t) * (*ne)); + myFile.read((char *)*ew, sizeof(scalar_t) * (*ne)); + myFile.close(); +} + +// When Kokkos issue #2313 is resolved, can delete +// parseScalar and just use operator>> +template +scalar_t parseScalar(std::istream &is) { + scalar_t val; + is >> val; + return val; +} + +template <> +inline Kokkos::complex parseScalar(std::istream &is) { + std::complex val; + is >> val; + return Kokkos::complex(val); +} + +template <> +inline Kokkos::complex parseScalar(std::istream &is) { + std::complex val; + is >> val; + return Kokkos::complex(val); +} + +template +void read_graph_crs(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, + scalar_t **ew, const char *filename) { + std::ifstream myFile(filename, std::ios::in); + myFile >> *nv >> *ne; + + KokkosKernels::Impl::md_malloc(xadj, *nv + 1); + KokkosKernels::Impl::md_malloc(adj, *ne); + KokkosKernels::Impl::md_malloc(ew, *ne); + + for (lno_t i = 0; i <= *nv; ++i) { + myFile >> (*xadj)[i]; + } + + for (size_type i = 0; i < *ne; ++i) { + myFile >> (*adj)[i]; + } + for (size_type i = 0; i < *ne; ++i) { + (*ew)[i] = parseScalar(myFile); + } + myFile.close(); +} + +template +void write_kokkos_crst_matrix(crs_matrix_t a_crsmat, const char *filename) { + typedef typename crs_matrix_t::StaticCrsGraphType graph_t; + typedef typename graph_t::row_map_type::non_const_type row_map_view_t; + typedef typename graph_t::entries_type::non_const_type cols_view_t; + typedef typename crs_matrix_t::values_type::non_const_type values_view_t; + + typedef typename row_map_view_t::value_type offset_t; + typedef typename cols_view_t::value_type lno_t; + typedef typename values_view_t::value_type scalar_t; + typedef typename values_view_t::size_type size_type; + + size_type nnz = a_crsmat.nnz(); + + auto a_rowmap_view = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), a_crsmat.graph.row_map); + auto a_entries_view = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), a_crsmat.graph.entries); + auto a_values_view = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a_crsmat.values); + offset_t *a_rowmap = const_cast(a_rowmap_view.data()); + lno_t *a_entries = a_entries_view.data(); + scalar_t *a_values = a_values_view.data(); + + std::string strfilename(filename); + if (KokkosKernels::Impl::endswith(strfilename, ".mtx") || + KokkosKernels::Impl::endswith(strfilename, ".mm")) { + write_matrix_mtx( + a_crsmat.numRows(), a_crsmat.numCols(), a_crsmat.nnz(), a_rowmap, + a_entries, a_values, filename); + return; + } else if (a_crsmat.numRows() != a_crsmat.numCols()) { + throw std::runtime_error( + "For formats other than MatrixMarket (suffix .mm or .mtx),\n" + "write_kokkos_crst_matrix only supports square matrices"); + } + if (KokkosKernels::Impl::endswith(strfilename, ".bin")) { + write_graph_bin( + a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); + } else if (KokkosKernels::Impl::endswith(strfilename, ".ligra")) { + write_graph_ligra( + a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); + } else if (KokkosKernels::Impl::endswith(strfilename, ".crs")) { + write_graph_crs( + a_crsmat.numRows(), nnz, a_rowmap, a_entries, a_values, filename); + } else { + std::string errMsg = + std::string("write_kokkos_crst_matrix: File extension on ") + filename + + " does not correspond to a known format"; + throw std::runtime_error(errMsg); + } +} + +template +int read_mtx(const char *fileName, lno_t *nrows, lno_t *ncols, size_type *ne, + size_type **xadj, lno_t **adj, scalar_t **ew, + bool symmetrize = false, bool remove_diagonal = true, + bool transpose = false) { + using namespace MM; + std::ifstream mmf(fileName, std::ifstream::in); + if (!mmf.is_open()) { + throw std::runtime_error("File cannot be opened\n"); + } + + std::string fline = ""; + getline(mmf, fline); + + if (fline.size() < 2 || fline[0] != '%' || fline[1] != '%') { + throw std::runtime_error("Invalid MM file. Line-1\n"); + } + + // make sure every required field is in the file, by initializing them to + // UNDEFINED_* + MtxObject mtx_object = UNDEFINED_OBJECT; + MtxFormat mtx_format = UNDEFINED_FORMAT; + MtxField mtx_field = UNDEFINED_FIELD; + MtxSym mtx_sym = UNDEFINED_SYMMETRY; + + if (fline.find("matrix") != std::string::npos) { + mtx_object = MATRIX; + } else if (fline.find("vector") != std::string::npos) { + mtx_object = VECTOR; + throw std::runtime_error( + "MatrixMarket \"vector\" is not supported by KokkosKernels read_mtx()"); + } + + if (fline.find("coordinate") != std::string::npos) { + // sparse + mtx_format = COORDINATE; + } else if (fline.find("array") != std::string::npos) { + // dense + mtx_format = ARRAY; + } + + if (fline.find("real") != std::string::npos || + fline.find("double") != std::string::npos) { + if (std::is_same::value || + std::is_same::value) + mtx_field = REAL; + else { + if (!std::is_floating_point::value) + throw std::runtime_error( + "scalar_t in read_mtx() incompatible with float or double typed " + "MatrixMarket file."); + else + mtx_field = REAL; + } + } else if (fline.find("complex") != std::string::npos) { + if (!(std::is_same>::value || + std::is_same>::value)) + throw std::runtime_error( + "scalar_t in read_mtx() incompatible with complex-typed MatrixMarket " + "file."); + else + mtx_field = COMPLEX; + } else if (fline.find("integer") != std::string::npos) { + if (std::is_integral::value || + std::is_floating_point::value || + std::is_same::value || + std::is_same::value) + mtx_field = INTEGER; + else + throw std::runtime_error( + "scalar_t in read_mtx() incompatible with integer-typed MatrixMarket " + "file."); + } else if (fline.find("pattern") != std::string::npos) { + mtx_field = PATTERN; + // any reasonable choice for scalar_t can represent "1" or "1.0 + 0i", so + // nothing to check here + } + + if (fline.find("general") != std::string::npos) { + mtx_sym = GENERAL; + } else if (fline.find("skew-symmetric") != std::string::npos) { + mtx_sym = SKEW_SYMMETRIC; + } else if (fline.find("symmetric") != std::string::npos) { + // checking for "symmetric" after "skew-symmetric" because it's a substring + mtx_sym = SYMMETRIC; + } else if (fline.find("hermitian") != std::string::npos || + fline.find("Hermitian") != std::string::npos) { + mtx_sym = HERMITIAN; + } + // Validate the matrix attributes + if (mtx_format == ARRAY) { + if (mtx_sym == UNDEFINED_SYMMETRY) mtx_sym = GENERAL; + if (mtx_sym != GENERAL) + throw std::runtime_error( + "array format MatrixMarket file must have general symmetry (optional " + "to include \"general\")"); + } + if (mtx_object == UNDEFINED_OBJECT) + throw std::runtime_error( + "MatrixMarket file header is missing the object type."); + if (mtx_format == UNDEFINED_FORMAT) + throw std::runtime_error("MatrixMarket file header is missing the format."); + if (mtx_field == UNDEFINED_FIELD) + throw std::runtime_error( + "MatrixMarket file header is missing the field type."); + if (mtx_sym == UNDEFINED_SYMMETRY) + throw std::runtime_error( + "MatrixMarket file header is missing the symmetry type."); + + while (1) { + getline(mmf, fline); + if (fline[0] != '%') break; + } + std::stringstream ss(fline); + lno_t nr = 0, nc = 0; + size_type nnz = 0; + ss >> nr >> nc; + if (mtx_format == COORDINATE) + ss >> nnz; + else + nnz = nr * nc; + size_type numEdges = nnz; + symmetrize = symmetrize || mtx_sym != GENERAL; + if (symmetrize && nr != nc) { + throw std::runtime_error("A non-square matrix cannot be symmetrized."); + } + if (mtx_format == ARRAY) { + // Array format only supports general symmetry and non-pattern + if (symmetrize) + throw std::runtime_error( + "array format MatrixMarket file cannot be symmetrized."); + if (mtx_field == PATTERN) + throw std::runtime_error( + "array format MatrixMarket file can't have \"pattern\" field type."); + } + if (symmetrize) { + numEdges = 2 * nnz; + } + // numEdges is only an upper bound (diagonal entries may be removed) + std::vector> edges( + numEdges); + size_type nE = 0; + lno_t numDiagonal = 0; + for (size_type i = 0; i < nnz; ++i) { + getline(mmf, fline); + std::stringstream ss2(fline); + struct KokkosKernels::Impl::Edge tmp; + // read source, dest (edge) and weight (value) + lno_t s, d; + scalar_t w; + if (mtx_format == ARRAY) { + // In array format, entries are listed in column major order, + // so the row and column can be determined just from the index i + //(but make them 1-based indices, to match the way coordinate works) + s = i % nr + 1; // row + d = i / nr + 1; // col + } else { + // In coordinate format, row and col of each entry is read from file + ss2 >> s >> d; + } + if (mtx_field == PATTERN) + w = 1; + else + w = readScalar(ss2); + if (!transpose) { + tmp.src = s - 1; + tmp.dst = d - 1; + tmp.ew = w; + } else { + tmp.src = d - 1; + tmp.dst = s - 1; + tmp.ew = w; + } + if (tmp.src == tmp.dst) { + numDiagonal++; + if (!remove_diagonal) { + edges[nE++] = tmp; + } + continue; + } + edges[nE++] = tmp; + if (symmetrize) { + struct KokkosKernels::Impl::Edge tmp2; + tmp2.src = tmp.dst; + tmp2.dst = tmp.src; + // the symmetrized value is w, -w or conj(w) if mtx_sym is + // SYMMETRIC, SKEW_SYMMETRIC or HERMITIAN, respectively. + tmp2.ew = symmetryFlip(tmp.ew, mtx_sym); + edges[nE++] = tmp2; + } + } + mmf.close(); + std::sort(edges.begin(), edges.begin() + nE); + if (transpose) { + lno_t tmp = nr; + nr = nc; + nc = tmp; + } + // idx *nv, idx *ne, idx **xadj, idx **adj, wt **wt + *nrows = nr; + *ncols = nc; + *ne = nE; + //*xadj = new idx[nr + 1]; + KokkosKernels::Impl::md_malloc(xadj, nr + 1); + //*adj = new idx[nE]; + KokkosKernels::Impl::md_malloc(adj, nE); + //*ew = new wt[nE]; + KokkosKernels::Impl::md_malloc(ew, nE); + size_type eind = 0; + size_type actual = 0; + for (lno_t i = 0; i < nr; ++i) { + (*xadj)[i] = actual; + bool is_first = true; + while (eind < nE && edges[eind].src == i) { + if (is_first || !symmetrize || eind == 0 || + (eind > 0 && edges[eind - 1].dst != edges[eind].dst)) { + (*adj)[actual] = edges[eind].dst; + (*ew)[actual] = edges[eind].ew; + ++actual; + } + is_first = false; + ++eind; + } + } + (*xadj)[nr] = actual; + *ne = actual; + return 0; +} + +// Version of read_mtx which does not capture the number of columns. +// This is the old interface; it's kept for backwards compatibility. +template +int read_mtx(const char *fileName, lno_t *nv, size_type *ne, size_type **xadj, + lno_t **adj, scalar_t **ew, bool symmetrize = false, + bool remove_diagonal = true, bool transpose = false) { + lno_t ncol; // will discard + return read_mtx(fileName, nv, &ncol, ne, xadj, + adj, ew, symmetrize, + remove_diagonal, transpose); +} + +template +void read_matrix(lno_t *nv, size_type *ne, size_type **xadj, lno_t **adj, + scalar_t **ew, const char *filename) { + std::string strfilename(filename); + if (KokkosKernels::Impl::endswith(strfilename, ".mtx") || + KokkosKernels::Impl::endswith(strfilename, ".mm")) { + read_mtx(filename, nv, ne, xadj, adj, ew, false, false, false); + } + + else if (KokkosKernels::Impl::endswith(strfilename, ".bin")) { + read_graph_bin(nv, ne, xadj, adj, ew, filename); + } + + else if (KokkosKernels::Impl::endswith(strfilename, ".crs")) { + read_graph_crs(nv, ne, xadj, adj, ew, filename); + } + + else { + throw std::runtime_error("Reader is not available\n"); + } +} + +template +crsMat_t read_kokkos_crst_matrix(const char *filename_) { + std::string strfilename(filename_); + bool isMatrixMarket = KokkosKernels::Impl::endswith(strfilename, ".mtx") || + KokkosKernels::Impl::endswith(strfilename, ".mm"); + + typedef typename crsMat_t::StaticCrsGraphType graph_t; + typedef typename graph_t::row_map_type::non_const_type row_map_view_t; + typedef typename graph_t::entries_type::non_const_type cols_view_t; + typedef typename crsMat_t::values_type::non_const_type values_view_t; + + typedef typename row_map_view_t::value_type size_type; + typedef typename cols_view_t::value_type lno_t; + typedef typename values_view_t::value_type scalar_t; + + lno_t nr, nc, *adj; + size_type *xadj, nnzA; + scalar_t *values; + + if (isMatrixMarket) { + // MatrixMarket file contains the exact number of columns + read_mtx(filename_, &nr, &nc, &nnzA, &xadj, + &adj, &values, false, false, false); + } else { + //.crs and .bin files don't contain #cols, so will compute it later based on + // the entries + read_matrix(&nr, &nnzA, &xadj, &adj, &values, + filename_); + } + + row_map_view_t rowmap_view("rowmap_view", nr + 1); + cols_view_t columns_view("colsmap_view", nnzA); + values_view_t values_view("values_view", nnzA); + + { + Kokkos::View> + hr(xadj, nr + 1); + Kokkos::View> + hc(adj, nnzA); + Kokkos::View> + hv(values, nnzA); + Kokkos::deep_copy(rowmap_view, hr); + Kokkos::deep_copy(columns_view, hc); + Kokkos::deep_copy(values_view, hv); + } + + if (!isMatrixMarket) { + KokkosKernels::Impl::kk_view_reduce_max( + nnzA, columns_view, nc); + nc++; + } + + graph_t static_graph(columns_view, rowmap_view); + crsMat_t crsmat("CrsMatrix", nc, values_view, static_graph); + delete[] xadj; + delete[] adj; + delete[] values; + return crsmat; +} + +template +crsGraph_t read_kokkos_crst_graph(const char *filename_) { + typedef typename crsGraph_t::row_map_type::non_const_type row_map_view_t; + typedef typename crsGraph_t::entries_type::non_const_type cols_view_t; + + typedef typename row_map_view_t::value_type size_type; + typedef typename cols_view_t::value_type lno_t; + typedef double scalar_t; + + lno_t nv, *adj; + size_type *xadj, nnzA; + scalar_t *values; + read_matrix(&nv, &nnzA, &xadj, &adj, &values, + filename_); + + row_map_view_t rowmap_view("rowmap_view", nv + 1); + cols_view_t columns_view("colsmap_view", nnzA); + + { + typename row_map_view_t::HostMirror hr = + Kokkos::create_mirror_view(rowmap_view); + typename cols_view_t::HostMirror hc = + Kokkos::create_mirror_view(columns_view); + + for (lno_t i = 0; i <= nv; ++i) { + hr(i) = xadj[i]; + } + + for (size_type i = 0; i < nnzA; ++i) { + hc(i) = adj[i]; + } + Kokkos::deep_copy(rowmap_view, hr); + Kokkos::deep_copy(columns_view, hc); + } + + lno_t ncols = 0; + KokkosKernels::Impl::kk_view_reduce_max( + nnzA, columns_view, ncols); + ncols += 1; + + crsGraph_t static_graph(columns_view, rowmap_view, ncols); + delete[] xadj; + delete[] adj; + delete[] values; + return static_graph; +} + +template +inline void kk_sequential_create_incidence_matrix( + nnz_lno_t num_rows, const size_type *xadj, const nnz_lno_t *adj, + size_type *i_adj // output. preallocated +) { + std::vector c_xadj(num_rows); + for (nnz_lno_t i = 0; i < num_rows; i++) { + c_xadj[i] = xadj[i]; + } + int eCnt = 0; + for (nnz_lno_t i = 0; i < num_rows; i++) { + size_type begin = xadj[i]; + size_type end = xadj[i + 1]; + nnz_lno_t adjsize = end - begin; + + for (nnz_lno_t j = 0; j < adjsize; j++) { + size_type aind = j + begin; + nnz_lno_t col = adj[aind]; + if (i < col) { + i_adj[c_xadj[i]++] = eCnt; + i_adj[c_xadj[col]++] = eCnt++; + } + } + } + + for (nnz_lno_t i = 0; i < num_rows; i++) { + if (c_xadj[i] != xadj[i + 1]) { + std::cout << "i:" << i << " c_xadj[i]:" << c_xadj[i] + << " xadj[i+1]:" << xadj[i + 1] << std::endl; + } + } +} + +template +inline void kk_sequential_create_incidence_matrix_transpose( + const nnz_lno_t num_rows, const size_type num_edges, const size_type *xadj, + const nnz_lno_t *adj, + size_type *i_xadj, // output. preallocated + nnz_lno_t *i_adj // output. preallocated +) { + for (nnz_lno_t i = 0; i < num_edges / 2 + 1; i++) { + i_xadj[i] = i * 2; + } + int eCnt = 0; + for (nnz_lno_t i = 0; i < num_rows; i++) { + size_type begin = xadj[i]; + size_type end = xadj[i + 1]; + nnz_lno_t adjsize = end - begin; + + for (nnz_lno_t j = 0; j < adjsize; j++) { + size_type aind = j + begin; + nnz_lno_t col = adj[aind]; + if (i < col) { + i_adj[eCnt++] = i; + i_adj[eCnt++] = col; + } + } + } +} + +} // namespace Impl +} // namespace KokkosSparse +#endif // _KOKKOSSPARSE_IOUTILS_HPP diff --git a/src/sparse/KokkosSparse_SortCrs.hpp b/src/sparse/KokkosSparse_SortCrs.hpp new file mode 100644 index 0000000000..97bad80f39 --- /dev/null +++ b/src/sparse/KokkosSparse_SortCrs.hpp @@ -0,0 +1,720 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef _KOKKOSSPARSE_SORTCRS_HPP +#define _KOKKOSSPARSE_SORTCRS_HPP + +#include "Kokkos_Core.hpp" +#include "KokkosKernels_Sorting.hpp" + +namespace KokkosSparse { + +// ---------------------------------- +// BSR matrix/graph sorting utilities +// ---------------------------------- + +// Sort a BRS matrix: within each row, sort entries ascending by column and +// permute the values accordingly. +template +void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, + const entries_t& entries, const values_t& values); + +template +void sort_bsr_matrix(const bsrMat_t& A); + +// ---------------------------------- +// CRS matrix/graph sorting utilities +// ---------------------------------- + +// The sort_crs* functions sort the adjacent column list for each row into +// ascending order. + +template +void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, + const values_t& values); + +template +void sort_crs_matrix(const crsMat_t& A); + +template +void sort_crs_graph(const rowmap_t& rowmap, const entries_t& entries); + +template +void sort_crs_graph(const crsGraph_t& G); + +// sort_and_merge_matrix produces a new matrix which is equivalent to A but is +// sorted and has no duplicated entries: each (i, j) is unique. Values for +// duplicated entries are summed. +template +crsMat_t sort_and_merge_matrix(const crsMat_t& A); + +template +crsGraph_t sort_and_merge_graph(const crsGraph_t& G); + +template +void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, + const entries_t& entries_in, rowmap_t& rowmap_out, + entries_t& entries_out); + +namespace Impl { + +template +struct SortCrsMatrixFunctor { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + using scalar_t = typename values_t::non_const_value_type; + using team_mem = typename Kokkos::TeamPolicy::member_type; + // The functor owns memory for entriesAux, so it can't have + // MemoryTraits + using entries_managed_t = Kokkos::View; + using values_managed_t = Kokkos::View; + + SortCrsMatrixFunctor(bool usingRangePol, const rowmap_t& rowmap_, + const entries_t& entries_, const values_t& values_) + : rowmap(rowmap_), entries(entries_), values(values_) { + if (usingRangePol) { + entriesAux = entries_managed_t( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), + entries.extent(0)); + valuesAux = values_managed_t( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values aux"), + values.extent(0)); + } + // otherwise, aux arrays won't be allocated (sorting in place) + } + + KOKKOS_INLINE_FUNCTION void operator()(const lno_t i) const { + size_type rowStart = rowmap(i); + size_type rowEnd = rowmap(i + 1); + lno_t rowNum = rowEnd - rowStart; + // Radix sort requires unsigned keys for comparison + using unsigned_lno_t = typename std::make_unsigned::type; + KokkosKernels::SerialRadixSort2( + (unsigned_lno_t*)entries.data() + rowStart, + (unsigned_lno_t*)entriesAux.data() + rowStart, values.data() + rowStart, + valuesAux.data() + rowStart, rowNum); + } + + KOKKOS_INLINE_FUNCTION void operator()(const team_mem t) const { + size_type i = t.league_rank(); + size_type rowStart = rowmap(i); + size_type rowEnd = rowmap(i + 1); + lno_t rowNum = rowEnd - rowStart; + KokkosKernels::TeamBitonicSort2( + entries.data() + rowStart, values.data() + rowStart, rowNum, t); + } + + rowmap_t rowmap; + entries_t entries; + entries_managed_t entriesAux; + values_t values; + values_managed_t valuesAux; +}; + +template +struct SortCrsGraphFunctor { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + using team_mem = typename Kokkos::TeamPolicy::member_type; + // The functor owns memory for entriesAux, so it can't have + // MemoryTraits + using entries_managed_t = Kokkos::View; + + SortCrsGraphFunctor(bool usingRangePol, const rowmap_t& rowmap_, + const entries_t& entries_) + : rowmap(rowmap_), entries(entries_) { + if (usingRangePol) { + entriesAux = entries_managed_t( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), + entries.extent(0)); + } + // otherwise, aux arrays won't be allocated (sorting in place) + } + + KOKKOS_INLINE_FUNCTION void operator()(const lno_t i) const { + size_type rowStart = rowmap(i); + size_type rowEnd = rowmap(i + 1); + lno_t rowNum = rowEnd - rowStart; + // Radix sort requires unsigned keys for comparison + using unsigned_lno_t = typename std::make_unsigned::type; + KokkosKernels::SerialRadixSort( + (unsigned_lno_t*)entries.data() + rowStart, + (unsigned_lno_t*)entriesAux.data() + rowStart, rowNum); + } + + KOKKOS_INLINE_FUNCTION void operator()(const team_mem t) const { + size_type i = t.league_rank(); + size_type rowStart = rowmap(i); + size_type rowEnd = rowmap(i + 1); + lno_t rowNum = rowEnd - rowStart; + KokkosKernels::TeamBitonicSort( + entries.data() + rowStart, rowNum, t); + } + + rowmap_t rowmap; + entries_t entries; + entries_managed_t entriesAux; +}; + +template +struct MergedRowmapFunctor { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + using c_rowmap_t = typename rowmap_t::const_type; + + // Precondition: entries are sorted within each row + MergedRowmapFunctor(const rowmap_t& mergedCounts_, const c_rowmap_t& rowmap_, + const entries_t& entries_) + : mergedCounts(mergedCounts_), rowmap(rowmap_), entries(entries_) {} + + KOKKOS_INLINE_FUNCTION void operator()(lno_t row, size_type& lnewNNZ) const { + size_type rowBegin = rowmap(row); + size_type rowEnd = rowmap(row + 1); + if (rowEnd == rowBegin) { + // Row was empty to begin with + mergedCounts(row) = 0; + return; + } + // Otherwise, the first entry in the row exists + lno_t uniqueEntries = 1; + for (size_type j = rowBegin + 1; j < rowEnd; j++) { + if (entries(j - 1) != entries(j)) uniqueEntries++; + } + mergedCounts(row) = uniqueEntries; + lnewNNZ += uniqueEntries; + if (row == lno_t((rowmap.extent(0) - 1) - 1)) mergedCounts(row + 1) = 0; + } + + rowmap_t mergedCounts; + c_rowmap_t rowmap; + entries_t entries; +}; + +template +struct MatrixMergedEntriesFunctor { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + using scalar_t = typename values_t::non_const_value_type; + + // Precondition: entries are sorted within each row + MatrixMergedEntriesFunctor(const rowmap_t& rowmap_, const entries_t& entries_, + const values_t& values_, + const rowmap_t& mergedRowmap_, + const entries_t& mergedEntries_, + const values_t& mergedValues_) + : rowmap(rowmap_), + entries(entries_), + values(values_), + mergedRowmap(mergedRowmap_), + mergedEntries(mergedEntries_), + mergedValues(mergedValues_) {} + + KOKKOS_INLINE_FUNCTION void operator()(lno_t row) const { + size_type rowBegin = rowmap(row); + size_type rowEnd = rowmap(row + 1); + if (rowEnd == rowBegin) { + // Row was empty to begin with, nothing to do + return; + } + // Otherwise, accumulate the value for each column + scalar_t accumVal = values(rowBegin); + lno_t accumCol = entries(rowBegin); + size_type insertPos = mergedRowmap(row); + for (size_type j = rowBegin + 1; j < rowEnd; j++) { + if (accumCol == entries(j)) { + // accumulate + accumVal += values(j); + } else { + // write out and reset + mergedValues(insertPos) = accumVal; + mergedEntries(insertPos) = accumCol; + insertPos++; + accumVal = values(j); + accumCol = entries(j); + } + } + // always left with the last unique entry + mergedValues(insertPos) = accumVal; + mergedEntries(insertPos) = accumCol; + } + + rowmap_t rowmap; + entries_t entries; + values_t values; + rowmap_t mergedRowmap; + entries_t mergedEntries; + values_t mergedValues; +}; + +template +struct GraphMergedEntriesFunctor { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + + // Precondition: entries are sorted within each row + GraphMergedEntriesFunctor(const rowmap_t& rowmap_, const entries_t& entries_, + const rowmap_t& mergedRowmap_, + const entries_t& mergedEntries_) + : rowmap(rowmap_), + entries(entries_), + mergedRowmap(mergedRowmap_), + mergedEntries(mergedEntries_) {} + + KOKKOS_INLINE_FUNCTION void operator()(lno_t row) const { + size_type rowBegin = rowmap(row); + size_type rowEnd = rowmap(row + 1); + if (rowEnd == rowBegin) { + // Row was empty to begin with, nothing to do + return; + } + // Otherwise, accumulate the value for each column + lno_t accumCol = entries(rowBegin); + size_type insertPos = mergedRowmap(row); + for (size_type j = rowBegin + 1; j < rowEnd; j++) { + if (accumCol != entries(j)) { + // write out and reset + mergedEntries(insertPos) = accumCol; + insertPos++; + accumCol = entries(j); + } + } + // always left with the last unique entry + mergedEntries(insertPos) = accumCol; + } + + rowmap_t rowmap; + entries_t entries; + rowmap_t mergedRowmap; + entries_t mergedEntries; +}; + +template +KOKKOS_INLINE_FUNCTION void kk_swap(T& a, T& b) { + T t = a; + a = b; + b = t; +} + +template +struct sort_bsr_functor { + using lno_t = typename entries_type::non_const_value_type; + + row_map_type rowmap; + entries_type entries; + values_type values; + const lno_t blocksize; + + sort_bsr_functor(row_map_type rowmap_, entries_type entries_, + values_type values_, const lno_t blocksize_) + : rowmap(rowmap_), + entries(entries_), + values(values_), + blocksize(blocksize_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const lno_t i) const { + const lno_t rowStart = rowmap(i); + const lno_t rowSize = rowmap(i + 1) - rowStart; + auto* e = entries.data() + rowStart; + auto* v = values.data() + rowStart * blocksize; + bool done = false; + while (!done) { + done = true; + for (lno_t j = 1; j < rowSize; ++j) { + const lno_t jp = j - 1; + if (e[jp] <= e[j]) continue; + Impl::kk_swap(e[jp], e[j]); + auto const vb = v + j * blocksize; + auto const vbp = v + jp * blocksize; + for (lno_t k = 0; k < blocksize; + ++k) // std::swap_ranges(vb, vb + blocksize, vbp); + Impl::kk_swap(vb[k], vbp[k]); + done = false; + } + } + } +}; + +} // namespace Impl + +// Sort a CRS matrix: within each row, sort entries ascending by column. +// At the same time, permute the values. +template +void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, + const values_t& values) { + using lno_t = typename entries_t::non_const_value_type; + using team_pol = Kokkos::TeamPolicy; + bool useRadix = !KokkosKernels::Impl::kk_is_gpu_exec_space(); + lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; + if (numRows == 0) return; + Impl::SortCrsMatrixFunctor + funct(useRadix, rowmap, entries, values); + if (useRadix) { + Kokkos::parallel_for("sort_crs_matrix", + Kokkos::RangePolicy(0, numRows), + funct); + } else { + // Try to get teamsize to be largest power of 2 not greater than avg entries + // per row + // TODO (probably important for performnce): add thread-level sort also, and + // use that for small avg degree. But this works for now. + lno_t idealTeamSize = 1; + lno_t avgDeg = (entries.extent(0) + numRows - 1) / numRows; + while (idealTeamSize < avgDeg / 2) { + idealTeamSize *= 2; + } + team_pol temp(numRows, 1); + lno_t maxTeamSize = temp.team_size_max(funct, Kokkos::ParallelForTag()); + lno_t teamSize = std::min(idealTeamSize, maxTeamSize); + Kokkos::parallel_for("sort_crs_matrix", team_pol(numRows, teamSize), funct); + } +} + +template +void sort_crs_matrix(const crsMat_t& A) { + // Note: rowmap_t has const values, but that's OK as sorting doesn't modify it + using rowmap_t = typename crsMat_t::row_map_type; + using entries_t = typename crsMat_t::index_type::non_const_type; + using values_t = typename crsMat_t::values_type::non_const_type; + using exec_space = typename crsMat_t::execution_space; + // NOTE: the rowmap of a StaticCrsGraph is const-valued, but the + // entries and CrsMatrix values are non-const (so sorting them directly + // is allowed) + sort_crs_matrix( + A.graph.row_map, A.graph.entries, A.values); +} + +// Sort a BRS matrix: within each row, sort entries ascending by column and +// permute the values accordingly. +template +void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, + const entries_t& entries, const values_t& values) { + // TODO: this is O(N^2) mock for debugging - do regular implementation based + // on Radix/Bitonic sort (like CSR) IDEA: maybe we need only one general + // Radix2/Bitonic2 and CSR sorting may call it with blockSize=1 ? + lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; + if (numRows == 0) return; + const lno_t blocksize = blockdim * blockdim; + + assert(values.extent(0) == entries.extent(0) * blocksize); + Impl::sort_bsr_functor bsr_sorter( + rowmap, entries, values, blocksize); + Kokkos::parallel_for("sort_bsr_matrix", + Kokkos::RangePolicy(0, numRows), + bsr_sorter); +} + +// Sort a BSR matrix (like CRS but single values are replaced with contignous +// blocks) +template +void sort_bsr_matrix(const bsrMat_t& A) { + // NOTE: unlike rowmap, entries and values are non-const, so we can sort them + // directly + sort_bsr_matrix( + A.blockDim(), A.graph.row_map, A.graph.entries, A.values); +} + +// Sort a CRS graph: within each row, sort entries ascending by column. +template +void sort_crs_graph(const rowmap_t& rowmap, const entries_t& entries) { + using lno_t = typename entries_t::non_const_value_type; + using team_pol = Kokkos::TeamPolicy; + bool useRadix = !KokkosKernels::Impl::kk_is_gpu_exec_space(); + lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; + if (numRows == 0) return; + Impl::SortCrsGraphFunctor funct( + useRadix, rowmap, entries); + if (useRadix) { + Kokkos::parallel_for("sort_crs_graph", + Kokkos::RangePolicy(0, numRows), + funct); + } else { + // Try to get teamsize to be largest power of 2 less than or equal to + // half the entries per row. 0.5 * #entries is bitonic's parallelism within + // a row. + // TODO (probably important for performnce): add thread-level sort also, and + // use that for small avg degree. But this works for now. + lno_t idealTeamSize = 1; + lno_t avgDeg = (entries.extent(0) + numRows - 1) / numRows; + while (idealTeamSize < avgDeg / 2) { + idealTeamSize *= 2; + } + team_pol temp(numRows, 1); + lno_t maxTeamSize = temp.team_size_max(funct, Kokkos::ParallelForTag()); + lno_t teamSize = std::min(idealTeamSize, maxTeamSize); + Kokkos::parallel_for("sort_crs_graph", team_pol(numRows, teamSize), funct); + } +} + +template +void sort_crs_graph(const crsGraph_t& G) { + static_assert( + !std::is_const::value, + "sort_crs_graph requires StaticCrsGraph entries to be non-const."); + sort_crs_graph(G.row_map, G.entries); +} + +// Sort the rows of matrix, and merge duplicate entries. +template +crsMat_t sort_and_merge_matrix(const crsMat_t& A) { + using c_rowmap_t = typename crsMat_t::row_map_type; + using rowmap_t = typename crsMat_t::row_map_type::non_const_type; + using entries_t = typename crsMat_t::index_type::non_const_type; + using values_t = typename crsMat_t::values_type::non_const_type; + using size_type = typename rowmap_t::non_const_value_type; + using exec_space = typename crsMat_t::execution_space; + using range_t = Kokkos::RangePolicy; + sort_crs_matrix(A); + // Count entries per row into a new rowmap, in terms of merges that can be + // done + rowmap_t mergedRowmap( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "SortedMerged rowmap"), + A.numRows() + 1); + size_type numCompressedEntries = 0; + Kokkos::parallel_reduce(range_t(0, A.numRows()), + Impl::MergedRowmapFunctor( + mergedRowmap, A.graph.row_map, A.graph.entries), + numCompressedEntries); + // Prefix sum to get rowmap + KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum( + A.numRows() + 1, mergedRowmap); + entries_t mergedEntries("SortedMerged entries", numCompressedEntries); + values_t mergedValues("SortedMerged values", numCompressedEntries); + // Compute merged entries and values + Kokkos::parallel_for( + range_t(0, A.numRows()), + Impl::MatrixMergedEntriesFunctor( + A.graph.row_map, A.graph.entries, A.values, mergedRowmap, + mergedEntries, mergedValues)); + // Finally, construct the new compressed matrix + return crsMat_t("SortedMerged", A.numRows(), A.numCols(), + numCompressedEntries, mergedValues, mergedRowmap, + mergedEntries); +} + +template +void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, + const entries_t& entries_in, rowmap_t& rowmap_out, + entries_t& entries_out) { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + using range_t = Kokkos::RangePolicy; + using const_rowmap_t = typename rowmap_t::const_type; + lno_t numRows = rowmap_in.extent(0); + if (numRows <= 1) { + // Matrix has zero rows + rowmap_out = rowmap_t(); + entries_out = entries_t(); + return; + } + numRows--; + // Sort in place + sort_crs_graph(rowmap_in, entries_in); + // Count entries per row into a new rowmap, in terms of merges that can be + // done + rowmap_out = rowmap_t( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "SortedMerged rowmap"), + numRows + 1); + size_type numCompressedEntries = 0; + Kokkos::parallel_reduce(range_t(0, numRows), + Impl::MergedRowmapFunctor( + rowmap_out, rowmap_in, entries_in), + numCompressedEntries); + // Prefix sum to get rowmap + KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum( + numRows + 1, rowmap_out); + entries_out = entries_t("SortedMerged entries", numCompressedEntries); + // Compute merged entries and values + Kokkos::parallel_for( + range_t(0, numRows), + Impl::GraphMergedEntriesFunctor( + rowmap_in, entries_in, rowmap_out, entries_out)); +} + +template +crsGraph_t sort_and_merge_graph(const crsGraph_t& G) { + using rowmap_t = typename crsGraph_t::row_map_type::non_const_type; + using entries_t = typename crsGraph_t::entries_type; + static_assert( + !std::is_const::value, + "sort_and_merge_graph requires StaticCrsGraph entries to be non-const."); + rowmap_t mergedRowmap; + entries_t mergedEntries; + sort_and_merge_graph(G.row_map, G.entries, mergedRowmap, + mergedEntries); + return crsGraph_t(mergedEntries, mergedRowmap); +} + +} // namespace KokkosSparse + +namespace KokkosKernels { + +// ---------------------------------- +// BSR matrix/graph sorting utilities +// ---------------------------------- + +// Sort a BRS matrix: within each row, sort entries ascending by column and +// permute the values accordingly. +template +[[deprecated]] void sort_bsr_matrix(const lno_t blockdim, + const rowmap_t& rowmap, + const entries_t& entries, + const values_t& values) { + KokkosSparse::sort_bsr_matrix(blockdim, rowmap, entries, values); +} + +template +[[deprecated]] void sort_bsr_matrix(const bsrMat_t& A) { + KokkosSparse::sort_bsr_matrix(A); +} + +// ---------------------------------- +// CRS matrix/graph sorting utilities +// ---------------------------------- + +// The sort_crs* functions sort the adjacent column list for each row into +// ascending order. + +template +[[deprecated]] void sort_crs_matrix(const rowmap_t& rowmap, + const entries_t& entries, + const values_t& values) { + KokkosSparse::sort_crs_matrix(rowmap, entries, values); +} + +template +[[deprecated]] void sort_crs_matrix(const crsMat_t& A) { + KokkosSparse::sort_crs_matrix(A); +} + +template +[[deprecated]] void sort_crs_graph(const rowmap_t& rowmap, + const entries_t& entries) { + KokkosSparse::sort_crs_graph(rowmap, entries); +} + +template +[[deprecated]] void sort_crs_graph(const crsGraph_t& G) { + KokkosSparse::sort_crs_graph(G); +} + +// sort_and_merge_matrix produces a new matrix which is equivalent to A but is +// sorted and has no duplicated entries: each (i, j) is unique. Values for +// duplicated entries are summed. +template +[[deprecated]] crsMat_t sort_and_merge_matrix(const crsMat_t& A) { + KokkosSparse::sort_and_merge_matrix(A); +} + +template +[[deprecated]] crsGraph_t sort_and_merge_graph(const crsGraph_t& G) { + KokkosSparse::sort_and_merge_graph(G); +} + +template +[[deprecated]] void sort_and_merge_graph( + const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, + rowmap_t& rowmap_out, entries_t& entries_out) { + KokkosSparse::sort_and_merge_graph(rowmap_in, entries_in, rowmap_out, + entries_out); +} + +// For backward compatibility: keep the public interface accessible in +// KokkosKernels::Impl:: +namespace Impl { +template +[[deprecated]] void sort_crs_graph(const rowmap_t& rowmap, + const entries_t& entries) { + KokkosKernels::sort_crs_graph(rowmap, + entries); +} + +template +[[deprecated]] void sort_crs_matrix(const rowmap_t& rowmap, + const entries_t& entries, + const values_t& values) { + KokkosKernels::sort_crs_matrix(rowmap, entries, values); +} + +template +[[deprecated]] void sort_crs_matrix(const crsMat_t& A) { + KokkosKernels::sort_crs_matrix(A); +} + +template +[[deprecated]] void sort_and_merge_graph( + const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, + rowmap_t& rowmap_out, entries_t& entries_out) { + KokkosKernels::sort_and_merge_graph( + rowmap_in, entries_in, rowmap_out, entries_out); +} + +template +[[deprecated]] crsMat_t sort_and_merge_matrix(const crsMat_t& A) { + return KokkosKernels::sort_and_merge_matrix(A); +} + +} // namespace Impl +} // namespace KokkosKernels + +#endif // _KOKKOSSPARSE_SORTCRS_HPP diff --git a/src/common/KokkosKernels_SparseUtils.hpp b/src/sparse/KokkosSparse_Utils.hpp similarity index 100% rename from src/common/KokkosKernels_SparseUtils.hpp rename to src/sparse/KokkosSparse_Utils.hpp diff --git a/src/common/KokkosKernels_SparseUtils_cusparse.hpp b/src/sparse/KokkosSparse_Utils_cusparse.hpp similarity index 100% rename from src/common/KokkosKernels_SparseUtils_cusparse.hpp rename to src/sparse/KokkosSparse_Utils_cusparse.hpp diff --git a/src/common/KokkosKernels_SparseUtils_mkl.hpp b/src/sparse/KokkosSparse_Utils_mkl.hpp similarity index 100% rename from src/common/KokkosKernels_SparseUtils_mkl.hpp rename to src/sparse/KokkosSparse_Utils_mkl.hpp diff --git a/src/common/KokkosKernels_SparseUtils_rocsparse.hpp b/src/sparse/KokkosSparse_Utils_rocsparse.hpp similarity index 100% rename from src/common/KokkosKernels_SparseUtils_rocsparse.hpp rename to src/sparse/KokkosSparse_Utils_rocsparse.hpp diff --git a/src/sparse/KokkosSparse_sptrsv_cholmod.hpp b/src/sparse/KokkosSparse_sptrsv_cholmod.hpp index 796ee579bd..6d354047cf 100644 --- a/src/sparse/KokkosSparse_sptrsv_cholmod.hpp +++ b/src/sparse/KokkosSparse_sptrsv_cholmod.hpp @@ -56,7 +56,7 @@ defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) #include "cholmod.h" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosSparse_sptrsv_supernode.hpp" namespace KokkosSparse { diff --git a/src/sparse/KokkosSparse_sptrsv_supernode.hpp b/src/sparse/KokkosSparse_sptrsv_supernode.hpp index fa9a607be7..481bd2cc0a 100644 --- a/src/sparse/KokkosSparse_sptrsv_supernode.hpp +++ b/src/sparse/KokkosSparse_sptrsv_supernode.hpp @@ -63,7 +63,7 @@ #include "KokkosBatched_Trmm_Decl.hpp" #include "KokkosBatched_Trmm_Serial_Impl.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_SortCrs.hpp" #include "KokkosSparse_sptrsv.hpp" namespace KokkosSparse { @@ -597,8 +597,8 @@ host_graph_t generate_supernodal_graph(bool col_major, graph_t &graph, #endif // sort column ids per row - KokkosKernels::sort_crs_graph(hr, hc); + KokkosSparse::sort_crs_graph(hr, hc); #ifdef KOKKOS_SPTRSV_SUPERNODE_PROFILE time_seconds = timer.seconds(); std::cout << " > Generate Supernodal Graph: sort graph : " diff --git a/src/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp b/src/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp index 0f265dfbc4..62b86ca72e 100644 --- a/src/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp +++ b/src/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp @@ -52,7 +52,7 @@ #include "KokkosKernels_Uniform_Initialized_MemoryPool.hpp" #include "KokkosKernels_BitUtils.hpp" #include "KokkosKernels_SimpleUtils.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_SortCrs.hpp" // FOR DEBUGGING #include "KokkosBlas1_nrm2.hpp" @@ -979,8 +979,8 @@ class PointGaussSeidel { gsHandle->set_long_row_x(long_row_x); } else { // Just sort rows by ID. - KokkosKernels::sort_crs_graph(color_xadj, color_adj); + KokkosSparse::sort_crs_graph(color_xadj, color_adj); } #ifdef KOKKOSSPARSE_IMPL_TIME_REVERSE MyExecSpace().fence(); diff --git a/src/sparse/impl/KokkosSparse_spadd_symbolic_impl.hpp b/src/sparse/impl/KokkosSparse_spadd_symbolic_impl.hpp index 2131cec751..c4ae435f55 100644 --- a/src/sparse/impl/KokkosSparse_spadd_symbolic_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spadd_symbolic_impl.hpp @@ -46,7 +46,7 @@ #define _KOKKOS_SPADD_SYMBOLIC_IMPL_HPP #include "KokkosKernels_Handle.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_SortCrs.hpp" #include "Kokkos_ArithTraits.hpp" namespace KokkosSparse { @@ -593,8 +593,8 @@ void spadd_symbolic_impl( "KokkosSparse::SpAdd:Symbolic::InputNotSorted::UnmergedSum", range_type(0, nrows), unmergedSum); // sort the unmerged sum - KokkosKernels::sort_crs_matrix( + KokkosSparse::sort_crs_matrix( c_rowmap_upperbound, c_entries_uncompressed, ab_perm); ordinal_view_t a_pos( Kokkos::view_alloc(Kokkos::WithoutInitializing, "A entry positions"), diff --git a/src/sparse/impl/KokkosSparse_spgemm_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_impl.hpp index 9b4c28c877..dadc944b09 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_impl.hpp @@ -47,7 +47,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp index d1bfb3db5c..9a6ab70f9e 100644 --- a/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spgemm_mkl_impl.hpp @@ -46,7 +46,7 @@ #define _KOKKOSSPGEMMMKL_HPP #include "KokkosKernels_config.h" -#include "KokkosKernels_SparseUtils_mkl.hpp" +#include "KokkosSparse_Utils_mkl.hpp" #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL #include "mkl_spblas.h" diff --git a/src/sparse/impl/KokkosSparse_twostage_gauss_seidel_impl.hpp b/src/sparse/impl/KokkosSparse_twostage_gauss_seidel_impl.hpp index 19bc5ec163..d779ff3e96 100644 --- a/src/sparse/impl/KokkosSparse_twostage_gauss_seidel_impl.hpp +++ b/src/sparse/impl/KokkosSparse_twostage_gauss_seidel_impl.hpp @@ -57,7 +57,7 @@ // needed for classical GS #include "KokkosSparse_sptrsv.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosSparse_gauss_seidel_handle.hpp" @@ -854,11 +854,11 @@ class TwostageGaussSeidel { // values // CuSparse needs matrix sorted by column indexes for each row // TODO: may need to move this to symbolic/numeric of sptrsv - KokkosKernels::sort_crs_matrix( + KokkosSparse::sort_crs_matrix( rowmap_viewL, column_viewL, values_viewL); - KokkosKernels::sort_crs_matrix( + KokkosSparse::sort_crs_matrix( rowmap_viewU, column_viewU, values_viewU); // now do symbolic diff --git a/unit_test/common/Test_Common.hpp b/unit_test/common/Test_Common.hpp index 9d6958e816..cc4204d076 100644 --- a/unit_test/common/Test_Common.hpp +++ b/unit_test/common/Test_Common.hpp @@ -8,7 +8,6 @@ // #include #include #include -#include #include #include #include diff --git a/unit_test/common/Test_Common_Sorting.hpp b/unit_test/common/Test_Common_Sorting.hpp index 1580a0c98b..f0320cb637 100644 --- a/unit_test/common/Test_Common_Sorting.hpp +++ b/unit_test/common/Test_Common_Sorting.hpp @@ -525,226 +525,6 @@ void testBitonicSortLexicographic() { ASSERT_TRUE(ordered); } -template -void testSortCRS(default_lno_t numRows, default_lno_t numCols, - default_size_type nnz, bool doValues, bool doStructInterface) { - using scalar_t = default_scalar; - using lno_t = default_lno_t; - using size_type = default_size_type; - using mem_space = typename exec_space::memory_space; - using device_t = Kokkos::Device; - using crsMat_t = - KokkosSparse::CrsMatrix; - using rowmap_t = typename crsMat_t::row_map_type; - using entries_t = typename crsMat_t::index_type; - using values_t = typename crsMat_t::values_type; - // Create a random matrix on device - // IMPORTANT: kk_generate_sparse_matrix does not sort the rows, if it did this - // wouldn't test anything - crsMat_t A = KokkosKernels::Impl::kk_generate_sparse_matrix( - numRows, numCols, nnz, 2, numCols / 2); - auto rowmap = A.graph.row_map; - auto entries = A.graph.entries; - auto values = A.values; - Kokkos::View rowmapHost("rowmap host", - numRows + 1); - Kokkos::View entriesHost("sorted entries host", - nnz); - Kokkos::View valuesHost("sorted values host", - nnz); - Kokkos::deep_copy(rowmapHost, rowmap); - Kokkos::deep_copy(entriesHost, entries); - Kokkos::deep_copy(valuesHost, values); - struct ColValue { - ColValue() {} - ColValue(lno_t c, scalar_t v) : col(c), val(v) {} - bool operator<(const ColValue& rhs) const { return col < rhs.col; } - bool operator==(const ColValue& rhs) const { - return col == rhs.col && val == rhs.val; - } - lno_t col; - scalar_t val; - }; - // sort one row at a time on host using STL. - { - for (lno_t i = 0; i < numRows; i++) { - std::vector rowCopy; - for (size_type j = rowmapHost(i); j < rowmapHost(i + 1); j++) - rowCopy.emplace_back(entriesHost(j), valuesHost(j)); - std::sort(rowCopy.begin(), rowCopy.end()); - // write sorted row back - for (size_t j = 0; j < rowCopy.size(); j++) { - entriesHost(rowmapHost(i) + j) = rowCopy[j].col; - valuesHost(rowmapHost(i) + j) = rowCopy[j].val; - } - } - } - // call the actual sort routine being tested - if (doValues) { - if (doStructInterface) { - KokkosKernels::sort_crs_matrix(A); - } else { - KokkosKernels::sort_crs_matrix( - A.graph.row_map, A.graph.entries, A.values); - } - } else { - if (doStructInterface) { - KokkosKernels::sort_crs_graph(A.graph); - } else { - KokkosKernels::sort_crs_graph( - A.graph.row_map, A.graph.entries); - } - } - // Copy to host and compare - Kokkos::View entriesOut("sorted entries host", - nnz); - Kokkos::View valuesOut("sorted values host", - nnz); - Kokkos::deep_copy(entriesOut, entries); - Kokkos::deep_copy(valuesOut, values); - for (size_type i = 0; i < nnz; i++) { - EXPECT_EQ(entriesHost(i), entriesOut(i)) - << "Sorted column indices are wrong!"; - if (doValues) { - EXPECT_EQ(valuesHost(i), valuesOut(i)) << "Sorted values are wrong!"; - } - } -} - -template -void testSortCRSUnmanaged(bool doValues, bool doStructInterface) { - // This test is about bug #960. - using scalar_t = default_scalar; - using lno_t = default_lno_t; - using size_type = default_size_type; - using mem_space = typename exec_space::memory_space; - using device_t = Kokkos::Device; - using crsMat_t = - KokkosSparse::CrsMatrix, - size_type>; - using crsMat_Managed_t = - KokkosSparse::CrsMatrix; - using rowmap_t = typename crsMat_t::row_map_type; - using entries_t = typename crsMat_t::index_type; - using values_t = typename crsMat_t::values_type; - const lno_t numRows = 50; - const lno_t numCols = numRows; - size_type nnz = numRows * 5; - // Create a random matrix on device - // IMPORTANT: kk_generate_sparse_matrix does not sort the rows, if it did this - // wouldn't test anything - crsMat_Managed_t A_managed = - KokkosKernels::Impl::kk_generate_sparse_matrix( - numRows, numCols, nnz, 2, numCols / 2); - crsMat_t A(A_managed); - auto rowmap = A.graph.row_map; - auto entries = A.graph.entries; - auto values = A.values; - if (doValues) { - if (doStructInterface) { - KokkosKernels::sort_crs_matrix(A); - } else { - KokkosKernels::sort_crs_matrix( - A.graph.row_map, A.graph.entries, A.values); - } - } else { - if (doStructInterface) { - KokkosKernels::sort_crs_graph(A.graph); - } else { - KokkosKernels::sort_crs_graph( - A.graph.row_map, A.graph.entries); - } - } -} - -template -void testSortAndMerge() { - using size_type = default_size_type; - using lno_t = default_lno_t; - using scalar_t = default_scalar; - using mem_space = typename exec_space::memory_space; - using device_t = Kokkos::Device; - using crsMat_t = - KokkosSparse::CrsMatrix; - using rowmap_t = typename crsMat_t::row_map_type::non_const_type; - using entries_t = typename crsMat_t::index_type; - using values_t = typename crsMat_t::values_type; - using Kokkos::HostSpace; - using Kokkos::MemoryTraits; - using Kokkos::Unmanaged; - // Create a small CRS matrix on host - std::vector inRowmap = {0, 4, 4, 5, 7, 10}; - std::vector inEntries = { - 4, 3, 5, 3, // row 0 - // row 1 has no entries - 6, // row 2 - 2, 2, // row 3 - 0, 1, 2 // row 4 - }; - // note: choosing values that can be represented exactly by float - std::vector inValues = { - 1.5, 4, 1, -3, // row 0 - // row 1 - 2, // row 2 - -1, -2, // row 3 - 0, 3.5, -2.25 // row 4 - }; - lno_t nrows = 5; - lno_t ncols = 7; - size_type nnz = inEntries.size(); - Kokkos::View> hostInRowmap( - inRowmap.data(), nrows + 1); - Kokkos::View> hostInEntries( - inEntries.data(), nnz); - Kokkos::View> hostInValues( - inValues.data(), nnz); - rowmap_t devInRowmap("", nrows + 1); - entries_t devInEntries("", nnz); - values_t devInValues("", nnz); - Kokkos::deep_copy(devInRowmap, hostInRowmap); - Kokkos::deep_copy(devInEntries, hostInEntries); - Kokkos::deep_copy(devInValues, hostInValues); - crsMat_t input("Input", nrows, ncols, nnz, devInValues, devInRowmap, - devInEntries); - crsMat_t output = KokkosKernels::sort_and_merge_matrix(input); - exec_space().fence(); - EXPECT_EQ(output.numRows(), nrows); - EXPECT_EQ(output.numCols(), ncols); - auto outRowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), - output.graph.row_map); - auto outEntries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), - output.graph.entries); - auto outValues = - Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output.values); - // Expect 2 merges to have taken place - std::vector goldRowmap = {0, 3, 3, 4, 5, 8}; - std::vector goldEntries = { - 3, 4, 5, // row 0 - // row 1 has no entries - 6, // row 2 - 2, // row 3 - 0, 1, 2 // row 4 - }; - // note: choosing values that can be represented exactly by float - std::vector goldValues = { - 1, 1.5, 1, // row 0 - // row 1 - 2, // row 2 - -3, // row 3 - 0, 3.5, -2.25 // row 4 - }; - EXPECT_EQ(goldRowmap.size(), outRowmap.extent(0)); - EXPECT_EQ(goldEntries.size(), outEntries.extent(0)); - EXPECT_EQ(goldValues.size(), outValues.extent(0)); - EXPECT_EQ(goldValues.size(), output.nnz()); - for (lno_t i = 0; i < nrows + 1; i++) EXPECT_EQ(goldRowmap[i], outRowmap(i)); - for (size_type i = 0; i < output.nnz(); i++) { - EXPECT_EQ(goldEntries[i], outEntries(i)); - EXPECT_EQ(goldValues[i], outValues(i)); - } -} - TEST_F(TestCategory, common_serial_radix) { // Test serial radix over some contiguous small arrays // 1st arg is #arrays, 2nd arg is max subarray size @@ -805,31 +585,4 @@ TEST_F(TestCategory, common_device_bitonic) { testBitonicSortLexicographic(); } -TEST_F(TestCategory, common_sort_crsgraph) { - for (int doStructInterface = 0; doStructInterface < 2; doStructInterface++) { - testSortCRS(10, 10, 20, false, doStructInterface); - testSortCRS(100, 100, 2000, false, doStructInterface); - testSortCRS(1000, 1000, 30000, false, doStructInterface); - testSortCRSUnmanaged(false, doStructInterface); - } -} - -TEST_F(TestCategory, common_sort_crsmatrix) { - for (int doStructInterface = 0; doStructInterface < 2; doStructInterface++) { - testSortCRS(10, 10, 20, true, doStructInterface); - testSortCRS(100, 100, 2000, true, doStructInterface); - testSortCRS(1000, 1000, 30000, true, doStructInterface); - testSortCRSUnmanaged(true, doStructInterface); - } -} - -TEST_F(TestCategory, common_sort_crs_longrows) { - testSortCRS(1, 50000, 10000, false, false); - testSortCRS(1, 50000, 10000, true, false); -} - -TEST_F(TestCategory, common_sort_merge_crsmatrix) { - testSortAndMerge(); -} - #endif diff --git a/unit_test/graph/Test_Graph_graph_color.hpp b/unit_test/graph/Test_Graph_graph_color.hpp index ef7c14a931..da86546862 100644 --- a/unit_test/graph/Test_Graph_graph_color.hpp +++ b/unit_test/graph/Test_Graph_graph_color.hpp @@ -47,8 +47,8 @@ #include "KokkosGraph_Distance1Color.hpp" #include "KokkosSparse_CrsMatrix.hpp" -#include "KokkosKernels_IOUtils.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosKernels_Handle.hpp" using namespace KokkosKernels; @@ -115,7 +115,7 @@ void test_coloring(lno_t numRows, size_type nnz, lno_t bandwidth, // typedef typename lno_view_t::non_const_value_type size_type; lno_t numCols = numRows; - crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numCols, nnz, row_size_variance, bandwidth); typename lno_view_t::non_const_type sym_xadj; diff --git a/unit_test/graph/Test_Graph_graph_color_deterministic.hpp b/unit_test/graph/Test_Graph_graph_color_deterministic.hpp index ec718e9aa4..2fd64675ec 100644 --- a/unit_test/graph/Test_Graph_graph_color_deterministic.hpp +++ b/unit_test/graph/Test_Graph_graph_color_deterministic.hpp @@ -48,7 +48,7 @@ #include "KokkosGraph_Distance1Color.hpp" #include "KokkosSparse_CrsMatrix.hpp" #include "KokkosKernels_IOUtils.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosKernels_Handle.hpp" using namespace KokkosKernels; diff --git a/unit_test/graph/Test_Graph_graph_color_distance2.hpp b/unit_test/graph/Test_Graph_graph_color_distance2.hpp index 70158941a8..45444cd136 100644 --- a/unit_test/graph/Test_Graph_graph_color_distance2.hpp +++ b/unit_test/graph/Test_Graph_graph_color_distance2.hpp @@ -49,8 +49,8 @@ #include "KokkosGraph_Distance2Color.hpp" #include "KokkosGraph_MIS2.hpp" #include "KokkosSparse_CrsMatrix.hpp" -#include "KokkosKernels_IOUtils.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosKernels_Handle.hpp" #include "KokkosKernels_ExecSpaceUtils.hpp" @@ -159,7 +159,7 @@ void test_dist2_coloring(lno_t numVerts, size_type nnz, lno_t bandwidth, KokkosKernelsHandle; // Generate graph, and add some out-of-bounds columns - crsMat A = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat A = KokkosSparse::Impl::kk_generate_sparse_matrix( numVerts, numVerts, nnz, row_size_variance, bandwidth); auto G = A.graph; // Symmetrize the graph @@ -216,7 +216,7 @@ void test_bipartite_symmetric(lno_t numVerts, size_type nnz, lno_t bandwidth, KokkosKernelsHandle; // Generate graph, and add some out-of-bounds columns - crsMat A = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat A = KokkosSparse::Impl::kk_generate_sparse_matrix( numVerts, numVerts, nnz, row_size_variance, bandwidth); auto G = A.graph; // Symmetrize the graph @@ -273,7 +273,7 @@ void test_bipartite(lno_t numRows, lno_t numCols, size_type nnz, KokkosKernelsHandle; // Generate graph - crsMat A = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat A = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numCols, nnz, row_size_variance, bandwidth); auto G = A.graph; rowmap_t t_rowmap("rowmap^T", numCols + 1); diff --git a/unit_test/graph/Test_Graph_mis2.hpp b/unit_test/graph/Test_Graph_mis2.hpp index ed3acc3b85..c1b5e179fe 100644 --- a/unit_test/graph/Test_Graph_mis2.hpp +++ b/unit_test/graph/Test_Graph_mis2.hpp @@ -50,7 +50,8 @@ #include "KokkosGraph_ExplicitCoarsening.hpp" #include "KokkosSparse_CrsMatrix.hpp" #include "KokkosKernels_IOUtils.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosKernels_Handle.hpp" #include "KokkosKernels_ExecSpaceUtils.hpp" @@ -122,7 +123,7 @@ void test_mis2(lno_t numVerts, size_type nnz, lno_t bandwidth, using rowmap_t = typename c_rowmap_t::non_const_type; using entries_t = typename c_entries_t::non_const_type; // Generate graph, and add some out-of-bounds columns - crsMat A = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat A = KokkosSparse::Impl::kk_generate_sparse_matrix( numVerts, numVerts, nnz, row_size_variance, bandwidth); auto G = A.graph; // Symmetrize the graph @@ -164,7 +165,7 @@ void test_mis2_coarsening(lno_t numVerts, size_type nnz, lno_t bandwidth, using entries_t = typename c_entries_t::non_const_type; using labels_t = entries_t; // Generate graph, and add some out-of-bounds columns - crsMat A = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat A = KokkosSparse::Impl::kk_generate_sparse_matrix( numVerts, numVerts, nnz, row_size_variance, bandwidth); auto G = A.graph; // Symmetrize the graph diff --git a/unit_test/sparse/Test_Sparse.hpp b/unit_test/sparse/Test_Sparse.hpp index 65cbb40ca5..e75eb1ce6a 100644 --- a/unit_test/sparse/Test_Sparse.hpp +++ b/unit_test/sparse/Test_Sparse.hpp @@ -13,12 +13,14 @@ #include "Test_Sparse_spgemm_jacobi.hpp" #include "Test_Sparse_spgemm.hpp" #include "Test_Sparse_bspgemm.hpp" +#include "Test_Sparse_SortCrs.hpp" #include "Test_Sparse_spiluk.hpp" #include "Test_Sparse_spmv.hpp" #include "Test_Sparse_spmv_blockcrs.hpp" #include "Test_Sparse_spmv_bsr.hpp" #include "Test_Sparse_sptrsv.hpp" #include "Test_Sparse_trsv.hpp" +#include "Test_Sparse_Transpose.hpp" #include "Test_Sparse_TestUtils_RandCscMat.hpp" #include "Test_Sparse_csc2csr.hpp" diff --git a/unit_test/sparse/Test_Sparse_SortCrs.hpp b/unit_test/sparse/Test_Sparse_SortCrs.hpp new file mode 100644 index 0000000000..a4d30b40a1 --- /dev/null +++ b/unit_test/sparse/Test_Sparse_SortCrs.hpp @@ -0,0 +1,310 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// \file Test_Sparse_SortCrs.hpp +/// \brief Tests for sort_crs_matrix and sort_crs_graph in +/// KokkosSparse_SortCrs.hpp + +#ifndef KOKKOSSPARSE_SORTCRSTEST_HPP +#define KOKKOSSPARSE_SORTCRSTEST_HPP + +#include +#include +#include +#include "KokkosSparse_IOUtils.hpp" +#include +#include +#include +#include +#include +#include + +template +void testSortCRS(default_lno_t numRows, default_lno_t numCols, + default_size_type nnz, bool doValues, bool doStructInterface) { + using scalar_t = default_scalar; + using lno_t = default_lno_t; + using size_type = default_size_type; + using mem_space = typename exec_space::memory_space; + using device_t = Kokkos::Device; + using crsMat_t = + KokkosSparse::CrsMatrix; + using rowmap_t = typename crsMat_t::row_map_type; + using entries_t = typename crsMat_t::index_type; + using values_t = typename crsMat_t::values_type; + // Create a random matrix on device + // IMPORTANT: kk_generate_sparse_matrix does not sort the rows, if it did this + // wouldn't test anything + crsMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix( + numRows, numCols, nnz, 2, numCols / 2); + auto rowmap = A.graph.row_map; + auto entries = A.graph.entries; + auto values = A.values; + Kokkos::View rowmapHost("rowmap host", + numRows + 1); + Kokkos::View entriesHost("sorted entries host", + nnz); + Kokkos::View valuesHost("sorted values host", + nnz); + Kokkos::deep_copy(rowmapHost, rowmap); + Kokkos::deep_copy(entriesHost, entries); + Kokkos::deep_copy(valuesHost, values); + struct ColValue { + ColValue() {} + ColValue(lno_t c, scalar_t v) : col(c), val(v) {} + bool operator<(const ColValue& rhs) const { return col < rhs.col; } + bool operator==(const ColValue& rhs) const { + return col == rhs.col && val == rhs.val; + } + lno_t col; + scalar_t val; + }; + // sort one row at a time on host using STL. + { + for (lno_t i = 0; i < numRows; i++) { + std::vector rowCopy; + for (size_type j = rowmapHost(i); j < rowmapHost(i + 1); j++) + rowCopy.emplace_back(entriesHost(j), valuesHost(j)); + std::sort(rowCopy.begin(), rowCopy.end()); + // write sorted row back + for (size_t j = 0; j < rowCopy.size(); j++) { + entriesHost(rowmapHost(i) + j) = rowCopy[j].col; + valuesHost(rowmapHost(i) + j) = rowCopy[j].val; + } + } + } + // call the actual sort routine being tested + if (doValues) { + if (doStructInterface) { + KokkosSparse::sort_crs_matrix(A); + } else { + KokkosSparse::sort_crs_matrix( + A.graph.row_map, A.graph.entries, A.values); + } + } else { + if (doStructInterface) { + KokkosSparse::sort_crs_graph(A.graph); + } else { + KokkosSparse::sort_crs_graph( + A.graph.row_map, A.graph.entries); + } + } + // Copy to host and compare + Kokkos::View entriesOut("sorted entries host", + nnz); + Kokkos::View valuesOut("sorted values host", + nnz); + Kokkos::deep_copy(entriesOut, entries); + Kokkos::deep_copy(valuesOut, values); + for (size_type i = 0; i < nnz; i++) { + EXPECT_EQ(entriesHost(i), entriesOut(i)) + << "Sorted column indices are wrong!"; + if (doValues) { + EXPECT_EQ(valuesHost(i), valuesOut(i)) << "Sorted values are wrong!"; + } + } +} + +template +void testSortCRSUnmanaged(bool doValues, bool doStructInterface) { + // This test is about bug #960. + using scalar_t = default_scalar; + using lno_t = default_lno_t; + using size_type = default_size_type; + using mem_space = typename exec_space::memory_space; + using device_t = Kokkos::Device; + using crsMat_t = + KokkosSparse::CrsMatrix, + size_type>; + using crsMat_Managed_t = + KokkosSparse::CrsMatrix; + using rowmap_t = typename crsMat_t::row_map_type; + using entries_t = typename crsMat_t::index_type; + using values_t = typename crsMat_t::values_type; + const lno_t numRows = 50; + const lno_t numCols = numRows; + size_type nnz = numRows * 5; + // Create a random matrix on device + // IMPORTANT: kk_generate_sparse_matrix does not sort the rows, if it did this + // wouldn't test anything + crsMat_Managed_t A_managed = + KokkosSparse::Impl::kk_generate_sparse_matrix( + numRows, numCols, nnz, 2, numCols / 2); + crsMat_t A(A_managed); + auto rowmap = A.graph.row_map; + auto entries = A.graph.entries; + auto values = A.values; + if (doValues) { + if (doStructInterface) { + KokkosSparse::sort_crs_matrix(A); + } else { + KokkosSparse::sort_crs_matrix( + A.graph.row_map, A.graph.entries, A.values); + } + } else { + if (doStructInterface) { + KokkosSparse::sort_crs_graph(A.graph); + } else { + KokkosSparse::sort_crs_graph( + A.graph.row_map, A.graph.entries); + } + } +} + +template +void testSortAndMerge() { + using size_type = default_size_type; + using lno_t = default_lno_t; + using scalar_t = default_scalar; + using mem_space = typename exec_space::memory_space; + using device_t = Kokkos::Device; + using crsMat_t = + KokkosSparse::CrsMatrix; + using rowmap_t = typename crsMat_t::row_map_type::non_const_type; + using entries_t = typename crsMat_t::index_type; + using values_t = typename crsMat_t::values_type; + using Kokkos::HostSpace; + using Kokkos::MemoryTraits; + using Kokkos::Unmanaged; + // Create a small CRS matrix on host + std::vector inRowmap = {0, 4, 4, 5, 7, 10}; + std::vector inEntries = { + 4, 3, 5, 3, // row 0 + // row 1 has no entries + 6, // row 2 + 2, 2, // row 3 + 0, 1, 2 // row 4 + }; + // note: choosing values that can be represented exactly by float + std::vector inValues = { + 1.5, 4, 1, -3, // row 0 + // row 1 + 2, // row 2 + -1, -2, // row 3 + 0, 3.5, -2.25 // row 4 + }; + lno_t nrows = 5; + lno_t ncols = 7; + size_type nnz = inEntries.size(); + Kokkos::View> hostInRowmap( + inRowmap.data(), nrows + 1); + Kokkos::View> hostInEntries( + inEntries.data(), nnz); + Kokkos::View> hostInValues( + inValues.data(), nnz); + rowmap_t devInRowmap("", nrows + 1); + entries_t devInEntries("", nnz); + values_t devInValues("", nnz); + Kokkos::deep_copy(devInRowmap, hostInRowmap); + Kokkos::deep_copy(devInEntries, hostInEntries); + Kokkos::deep_copy(devInValues, hostInValues); + crsMat_t input("Input", nrows, ncols, nnz, devInValues, devInRowmap, + devInEntries); + crsMat_t output = KokkosSparse::sort_and_merge_matrix(input); + exec_space().fence(); + EXPECT_EQ(output.numRows(), nrows); + EXPECT_EQ(output.numCols(), ncols); + auto outRowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), + output.graph.row_map); + auto outEntries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), + output.graph.entries); + auto outValues = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), output.values); + // Expect 2 merges to have taken place + std::vector goldRowmap = {0, 3, 3, 4, 5, 8}; + std::vector goldEntries = { + 3, 4, 5, // row 0 + // row 1 has no entries + 6, // row 2 + 2, // row 3 + 0, 1, 2 // row 4 + }; + // note: choosing values that can be represented exactly by float + std::vector goldValues = { + 1, 1.5, 1, // row 0 + // row 1 + 2, // row 2 + -3, // row 3 + 0, 3.5, -2.25 // row 4 + }; + EXPECT_EQ(goldRowmap.size(), outRowmap.extent(0)); + EXPECT_EQ(goldEntries.size(), outEntries.extent(0)); + EXPECT_EQ(goldValues.size(), outValues.extent(0)); + EXPECT_EQ(goldValues.size(), output.nnz()); + for (lno_t i = 0; i < nrows + 1; i++) EXPECT_EQ(goldRowmap[i], outRowmap(i)); + for (size_type i = 0; i < output.nnz(); i++) { + EXPECT_EQ(goldEntries[i], outEntries(i)); + EXPECT_EQ(goldValues[i], outValues(i)); + } +} + +TEST_F(TestCategory, common_sort_crsgraph) { + for (int doStructInterface = 0; doStructInterface < 2; doStructInterface++) { + testSortCRS(10, 10, 20, false, doStructInterface); + testSortCRS(100, 100, 2000, false, doStructInterface); + testSortCRS(1000, 1000, 30000, false, doStructInterface); + testSortCRSUnmanaged(false, doStructInterface); + } +} + +TEST_F(TestCategory, common_sort_crsmatrix) { + for (int doStructInterface = 0; doStructInterface < 2; doStructInterface++) { + testSortCRS(10, 10, 20, true, doStructInterface); + testSortCRS(100, 100, 2000, true, doStructInterface); + testSortCRS(1000, 1000, 30000, true, doStructInterface); + testSortCRSUnmanaged(true, doStructInterface); + } +} + +TEST_F(TestCategory, common_sort_crs_longrows) { + testSortCRS(1, 50000, 10000, false, false); + testSortCRS(1, 50000, 10000, true, false); +} + +TEST_F(TestCategory, common_sort_merge_crsmatrix) { + testSortAndMerge(); +} + +#endif // KOKKOSSPARSE_SORTCRSTEST_HPP diff --git a/unit_test/common/Test_Common_Transpose.hpp b/unit_test/sparse/Test_Sparse_Transpose.hpp similarity index 95% rename from unit_test/common/Test_Common_Transpose.hpp rename to unit_test/sparse/Test_Sparse_Transpose.hpp index fba29da81d..7431d0c485 100644 --- a/unit_test/common/Test_Common_Transpose.hpp +++ b/unit_test/sparse/Test_Sparse_Transpose.hpp @@ -49,11 +49,12 @@ #include #include -#include -#include +#include #include +#include #include #include +#include template struct ExactCompare { @@ -85,7 +86,7 @@ void testTranspose(int numRows, int numCols, bool doValues) { using values_t = typename crsMat_t::values_type::non_const_type; size_type nnz = 10 * numRows; // Generate a matrix that has 0 entries in some rows - crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numCols, nnz, 3 * 10, numRows / 2); // compute the transpose while unsorted, then transpose again rowmap_t t_rowmap("Rowmap^T", numCols + 1); // this view is initialized to 0 @@ -124,8 +125,8 @@ void testTranspose(int numRows, int numCols, bool doValues) { } // Sort both the transpose-transpose, and the original matrix (to compare // directly) - KokkosKernels::sort_crs_matrix(input_mat); - KokkosKernels::sort_crs_matrix( + KokkosSparse::sort_crs_matrix(input_mat); + KokkosSparse::sort_crs_matrix( tt_rowmap, tt_entries, tt_values); // The views should now be exactly identical, since they represent the same // matrix and are sorted diff --git a/unit_test/sparse/Test_Sparse_Utils_cusparse.hpp b/unit_test/sparse/Test_Sparse_Utils_cusparse.hpp index 3d85ec394a..0ad16c54d0 100644 --- a/unit_test/sparse/Test_Sparse_Utils_cusparse.hpp +++ b/unit_test/sparse/Test_Sparse_Utils_cusparse.hpp @@ -7,7 +7,7 @@ #include #include -#include "KokkosKernels_SparseUtils_cusparse.hpp" +#include "KokkosSparse_Utils_cusparse.hpp" void test_cusparse_safe_call() { bool caught_exception = false; diff --git a/unit_test/sparse/Test_Sparse_block_gauss_seidel.hpp b/unit_test/sparse/Test_Sparse_block_gauss_seidel.hpp index cd90ec39ea..0f4c9b0d67 100644 --- a/unit_test/sparse/Test_Sparse_block_gauss_seidel.hpp +++ b/unit_test/sparse/Test_Sparse_block_gauss_seidel.hpp @@ -48,7 +48,8 @@ #include "KokkosKernels_TestUtils.hpp" #include "KokkosKernels_Handle.hpp" #include "KokkosKernels_IOUtils.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include #include #include @@ -200,7 +201,7 @@ void test_block_gauss_seidel_rank1(lno_t numRows, size_type nnz, lno_t block_size = params.block_size; crsMat_t crsmat = - KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); lno_view_t pf_rm; @@ -288,7 +289,7 @@ void test_block_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t block_size = params.block_size; crsMat_t crsmat = - KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); lno_view_t pf_rm; diff --git a/unit_test/sparse/Test_Sparse_bspgemm.hpp b/unit_test/sparse/Test_Sparse_bspgemm.hpp index a3ec84fedf..7374ac6a78 100644 --- a/unit_test/sparse/Test_Sparse_bspgemm.hpp +++ b/unit_test/sparse/Test_Sparse_bspgemm.hpp @@ -45,10 +45,11 @@ #include #include -#include "KokkosKernels_SparseUtils.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_Utils.hpp" +#include "KokkosSparse_SortCrs.hpp" #include "KokkosSparse_spgemm.hpp" #include "KokkosSparse_BsrMatrix.hpp" +#include "KokkosSparse_IOUtils.hpp" using namespace KokkosSparse; @@ -120,8 +121,8 @@ bool is_same_block_matrix(bsrMat_t output_mat_actual, return false; } - KokkosKernels::sort_bsr_matrix(output_mat_actual); - KokkosKernels::sort_bsr_matrix(output_mat_reference); + KokkosSparse::sort_bsr_matrix(output_mat_actual); + KokkosSparse::sort_bsr_matrix(output_mat_reference); bool is_identical = true; is_identical = KokkosKernels::Impl::kk_is_identical_view< @@ -187,9 +188,9 @@ void test_bspgemm(lno_t blkDim, lno_t m, lno_t k, lno_t n, size_type nnz, // Generate random compressed sparse row matrix. Randomly generated (non-zero) // values are stored in a 1-D (1 rank) array. - bsrMat_t A = KokkosKernels::Impl::kk_generate_sparse_matrix( + bsrMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix( blkDim, m, k, nnz, row_size_variance, bandwidth); - bsrMat_t B = KokkosKernels::Impl::kk_generate_sparse_matrix( + bsrMat_t B = KokkosSparse::Impl::kk_generate_sparse_matrix( blkDim, k, n, nnz, row_size_variance, bandwidth); const bool is_empy_case = m < 1 || n < 1 || k < 1 || nnz < 1; diff --git a/unit_test/sparse/Test_Sparse_gauss_seidel.hpp b/unit_test/sparse/Test_Sparse_gauss_seidel.hpp index 6e9661ea62..627a9fc99e 100644 --- a/unit_test/sparse/Test_Sparse_gauss_seidel.hpp +++ b/unit_test/sparse/Test_Sparse_gauss_seidel.hpp @@ -47,6 +47,7 @@ #include #include "KokkosKernels_Handle.hpp" #include "KokkosKernels_IOUtils.hpp" +#include "KokkosSparse_IOUtils.hpp" //#include #include #include @@ -61,7 +62,7 @@ #include "KokkosSparse_gauss_seidel.hpp" #include "KokkosSparse_partitioning_impl.hpp" #include "KokkosSparse_sor_sequential_impl.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_SortCrs.hpp" #include "KokkosKernels_TestUtils.hpp" // #ifndef kokkos_complex_double @@ -183,7 +184,7 @@ void test_gauss_seidel_rank1(lno_t numRows, size_type nnz, lno_t bandwidth, srand(245); lno_t numCols = numRows; crsMat_t input_mat = - KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); if (symmetric) { // Symmetrize on host, rather than relying on the parallel versions (those @@ -272,7 +273,7 @@ void test_gauss_seidel_rank2(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t numCols = numRows; crsMat_t input_mat = - KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); if (symmetric) { // Symmetrize on host, rather than relying on the parallel versions (those @@ -396,7 +397,7 @@ void test_sequential_sor(lno_t numRows, size_type nnz, lno_t bandwidth, crsMat_t; lno_t numCols = numRows; crsMat_t input_mat = - KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); auto rowmap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), input_mat.graph.row_map); @@ -472,7 +473,7 @@ void test_balloon_clustering(lno_t numRows, size_type nnzPerRow, srand(245); size_type nnzTotal = nnzPerRow * numRows; lno_t nnzVariance = nnzPerRow / 4; - crsMat_t A = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numRows, nnzTotal, nnzVariance, bandwidth); lno_row_view_t symRowmap; lno_nnz_view_t symEntries; @@ -609,7 +610,7 @@ void test_gauss_seidel_long_rows(lno_t numRows, lno_t numLongRows, rowmap.data(), numRows + 1)); crsMat_t input_mat("A", numRows, numRows, totalEntries, valuesView, rowmapView, entriesView); - input_mat = KokkosKernels::sort_and_merge_matrix(input_mat); + input_mat = KokkosSparse::sort_and_merge_matrix(input_mat); if (symmetric) { // Symmetrize on host, rather than relying on the parallel versions (those // can be tested for symmetric=false) @@ -660,11 +661,11 @@ void test_gauss_seidel_custom_coloring(lno_t numRows, lno_t nnzPerRow) { const scalar_t one = Kokkos::ArithTraits::one(); size_type nnz = nnzPerRow * numRows; crsMat_t input_mat = - KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< crsMat_t>(numRows, numRows, nnz, 0, numRows / 10, 2.0 * one); input_mat = Test::symmetrize(input_mat); - input_mat = KokkosKernels::sort_and_merge_matrix(input_mat); + input_mat = KokkosSparse::sort_and_merge_matrix(input_mat); scalar_view_t solution_x( Kokkos::view_alloc(Kokkos::WithoutInitializing, "X (correct)"), numRows); create_random_x_vector(solution_x); diff --git a/unit_test/sparse/Test_Sparse_rocsparse.hpp b/unit_test/sparse/Test_Sparse_rocsparse.hpp index 27e0b1f9fd..fe1bf8e9b2 100644 --- a/unit_test/sparse/Test_Sparse_rocsparse.hpp +++ b/unit_test/sparse/Test_Sparse_rocsparse.hpp @@ -7,7 +7,7 @@ #include #include #include -#include "KokkosKernels_SparseUtils_rocsparse.hpp" +#include "KokkosSparse_Utils_rocsparse.hpp" void test_rocsparse_version() { // Print version diff --git a/unit_test/sparse/Test_Sparse_spgemm.hpp b/unit_test/sparse/Test_Sparse_spgemm.hpp index a7b9432857..a1e33c0ca6 100644 --- a/unit_test/sparse/Test_Sparse_spgemm.hpp +++ b/unit_test/sparse/Test_Sparse_spgemm.hpp @@ -45,8 +45,8 @@ #include #include -#include "KokkosKernels_SparseUtils.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_Utils.hpp" +#include "KokkosSparse_SortCrs.hpp" #include #include #include @@ -58,6 +58,7 @@ #include #include +#include // This file contains the matrix for test_issue402 #include "matrixIssue402.hpp" @@ -197,8 +198,8 @@ bool is_same_matrix(crsMat_t output_mat_actual, crsMat_t output_mat_reference) { return false; } - KokkosKernels::sort_crs_matrix(output_mat_actual); - KokkosKernels::sort_crs_matrix(output_mat_reference); + KokkosSparse::sort_crs_matrix(output_mat_actual); + KokkosSparse::sort_crs_matrix(output_mat_reference); bool is_identical = true; is_identical = KokkosKernels::Impl::kk_is_identical_view< @@ -264,9 +265,9 @@ void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth, // Generate random compressed sparse row matrix. Randomly generated (non-zero) // values are stored in a 1-D (1 rank) array. - crsMat_t A = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t A = KokkosSparse::Impl::kk_generate_sparse_matrix( m, k, nnz, row_size_variance, bandwidth); - crsMat_t B = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t B = KokkosSparse::Impl::kk_generate_sparse_matrix( k, n, nnz, row_size_variance, bandwidth); const bool is_empy_case = m < 1 || n < 1 || k < 1 || nnz < 1; diff --git a/unit_test/sparse/Test_Sparse_spgemm_jacobi.hpp b/unit_test/sparse/Test_Sparse_spgemm_jacobi.hpp index 885b1a07fe..f9db6f4d8d 100644 --- a/unit_test/sparse/Test_Sparse_spgemm_jacobi.hpp +++ b/unit_test/sparse/Test_Sparse_spgemm_jacobi.hpp @@ -45,8 +45,8 @@ #include #include -#include "KokkosKernels_SparseUtils.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_Utils.hpp" +#include "KokkosSparse_SortCrs.hpp" #include #include #include @@ -58,6 +58,7 @@ #include #include +#include using namespace KokkosSparse; using namespace KokkosSparse::Experimental; @@ -154,7 +155,7 @@ bool is_same_mat(crsMat_t output_mat1, crsMat_t output_mat2) { size_t nentries2 = output_mat2.graph.entries.extent(0); size_t nvals2 = output_mat2.values.extent(0); - KokkosKernels::sort_crs_matrix(output_mat1); + KokkosSparse::sort_crs_matrix(output_mat1); if (nrows1 != nrows2) { std::cout << "nrows1:" << nrows1 << " nrows2:" << nrows2 << std::endl; @@ -170,7 +171,7 @@ bool is_same_mat(crsMat_t output_mat1, crsMat_t output_mat2) { return false; } - KokkosKernels::sort_crs_matrix(output_mat2); + KokkosSparse::sort_crs_matrix(output_mat2); bool is_identical = true; is_identical = KokkosKernels::Impl::kk_is_identical_view< @@ -225,7 +226,7 @@ void test_spgemm_jacobi(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t numCols = numRows; crsMat_t input_mat = - KokkosKernels::Impl::kk_generate_diagonally_dominant_sparse_matrix< + KokkosSparse::Impl::kk_generate_diagonally_dominant_sparse_matrix< crsMat_t>(numRows, numCols, nnz, row_size_variance, bandwidth); crsMat_t output_mat2; diff --git a/unit_test/sparse/Test_Sparse_spiluk.hpp b/unit_test/sparse/Test_Sparse_spiluk.hpp index 353543b751..8f9ef99063 100644 --- a/unit_test/sparse/Test_Sparse_spiluk.hpp +++ b/unit_test/sparse/Test_Sparse_spiluk.hpp @@ -49,7 +49,7 @@ #include #include -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosSparse_CrsMatrix.hpp" #include #include "KokkosBlas1_nrm2.hpp" diff --git a/unit_test/sparse/Test_Sparse_spmv.hpp b/unit_test/sparse/Test_Sparse_spmv.hpp index 5cb729f311..8a15153dce 100644 --- a/unit_test/sparse/Test_Sparse_spmv.hpp +++ b/unit_test/sparse/Test_Sparse_spmv.hpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "KokkosKernels_Controls.hpp" @@ -422,7 +423,7 @@ void test_spmv(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t numCols = numRows; - crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numCols, nnz, row_size_variance, bandwidth); lno_t nr = input_mat.numRows(); lno_t nc = input_mat.numCols(); @@ -513,7 +514,7 @@ void test_spmv_mv(lno_t numRows, size_type nnz, lno_t bandwidth, Kokkos::fill_random(b_xt, rand_pool, randomUpperBound(max_x)); Kokkos::fill_random(b_yt, rand_pool, randomUpperBound(max_y)); - crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numCols, nnz, row_size_variance, bandwidth); const lno_t max_nnz_per_row = @@ -574,7 +575,7 @@ void test_spmv_mv_heavy(lno_t numRows, size_type nnz, lno_t bandwidth, constexpr mag_t max_y = static_cast(10); constexpr mag_t max_val = static_cast(10); - crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numRows, nnz, row_size_variance, bandwidth); Kokkos::Random_XorShift64_Pool rand_pool( 13718); @@ -889,7 +890,7 @@ void test_spmv_controls(lno_t numRows, size_type nnz, lno_t bandwidth, lno_t numCols = numRows; - crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix( + crsMat_t input_mat = KokkosSparse::Impl::kk_generate_sparse_matrix( numRows, numCols, nnz, row_size_variance, bandwidth); lno_t nr = input_mat.numRows(); lno_t nc = input_mat.numCols(); diff --git a/unit_test/sparse/Test_Sparse_sptrsv.hpp b/unit_test/sparse/Test_Sparse_sptrsv.hpp index 0b175da13d..08c5494c88 100644 --- a/unit_test/sparse/Test_Sparse_sptrsv.hpp +++ b/unit_test/sparse/Test_Sparse_sptrsv.hpp @@ -50,7 +50,7 @@ #include #include "KokkosKernels_IOUtils.hpp" -#include "KokkosKernels_SparseUtils.hpp" +#include "KokkosSparse_Utils.hpp" #include "KokkosSparse_spmv.hpp" #include "KokkosSparse_CrsMatrix.hpp" diff --git a/unit_test/sparse/Test_Sparse_trsv.hpp b/unit_test/sparse/Test_Sparse_trsv.hpp index 4b1f00c98a..776674344a 100644 --- a/unit_test/sparse/Test_Sparse_trsv.hpp +++ b/unit_test/sparse/Test_Sparse_trsv.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -76,7 +77,7 @@ void test_trsv_mv(lno_t numRows, size_type nnz, lno_t bandwidth, // this function creates a dense lower and upper triangular matrix. // TODO: SHOULD CHANGE IT TO SPARSE crsMat_t lower_part = - KokkosKernels::Impl::kk_generate_triangular_sparse_matrix( + KokkosSparse::Impl::kk_generate_triangular_sparse_matrix( 'L', numRows, numCols, nnz, row_size_variance, bandwidth); KokkosSparse::spmv("N", alpha, lower_part, b_x_copy, beta, b_y); Test::check_trsv_mv(lower_part, b_x, b_y, b_x_copy, numMV, "L", "N"); @@ -86,7 +87,7 @@ void test_trsv_mv(lno_t numRows, size_type nnz, lno_t bandwidth, // typedef typename Kokkos::View indexview; crsMat_t upper_part = - KokkosKernels::Impl::kk_generate_triangular_sparse_matrix( + KokkosSparse::Impl::kk_generate_triangular_sparse_matrix( 'U', numRows, numCols, nnz, row_size_variance, bandwidth); KokkosSparse::spmv("N", alpha, upper_part, b_x_copy, beta, b_y); Test::check_trsv_mv(upper_part, b_x, b_y, b_x_copy, numMV, "U", "N");