From a6407290f4b393f5b5f20068b6de921c490b544a Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Mon, 21 Aug 2023 00:54:13 -0700 Subject: [PATCH 01/12] Add an utility function to extract diagonal blocks from a crsmatrix --- sparse/src/KokkosSparse_Utils.hpp | 114 ++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/sparse/src/KokkosSparse_Utils.hpp b/sparse/src/KokkosSparse_Utils.hpp index 4039b6f5a7..a33d3e9d8e 100644 --- a/sparse/src/KokkosSparse_Utils.hpp +++ b/sparse/src/KokkosSparse_Utils.hpp @@ -2330,6 +2330,120 @@ void validateCrsMatrix(int m, int n, const Rowmap &rowmapIn, } } +template +void kk_extract_diagonal_blocks_crsmatrix_sequential(const crsMat_t &A, + std::vector& DiagBlk_v) { + using row_map_type = typename crsMat_t::row_map_type; + using entries_type = typename crsMat_t::index_type; + using values_type = typename crsMat_t::values_type; + using row_map_hostmirror_type = typename row_map_type::HostMirror; + using entries_hostmirror_type = typename entries_type::HostMirror; + using values_hostmirror_type = typename values_type::HostMirror; + using int_view1d_type = Kokkos::View; + + using graph_t = typename crsMat_t::StaticCrsGraphType; + using out_row_map_type = typename graph_t::row_map_type::non_const_type; + using out_entries_type = typename graph_t::entries_type::non_const_type; + using out_values_type = typename crsMat_t::values_type::non_const_type; + using out_row_map_hostmirror_type = typename out_row_map_type::HostMirror; + using out_entries_hostmirror_type = typename out_entries_type::HostMirror; + using out_values_hostmirror_type = typename out_values_type::HostMirror; + + row_map_type A_row_map = A.graph.row_map; + entries_type A_entries = A.graph.entries; + values_type A_values = A.values; + + row_map_hostmirror_type A_row_map_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_row_map); + entries_hostmirror_type A_entries_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_entries); + values_hostmirror_type A_values_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_values); + + int A_nrows = static_cast(A_row_map.extent(0))-1; + int n_blocks = static_cast(DiagBlk_v.size()); + + int rows_per_block = ((A_nrows%n_blocks)==0) ? (A_nrows/n_blocks) : (A_nrows/n_blocks+1); + + std::vector row_map_v(n_blocks); + std::vector entries_v(n_blocks); + std::vector values_v (n_blocks); + std::vector row_map_h_v(n_blocks); + std::vector entries_h_v(n_blocks); + std::vector values_h_v (n_blocks); + + int row_start = 0; // first row index of i-th diagonal block + int col_start = 0; // first col index of i-th diagonal block + int nrows, ncols; // Nrows, Ncols of i-th diagonal block + for (int i = 0; i < n_blocks; i++) { + nrows = rows_per_block; + if ((row_start + rows_per_block) > A_nrows) { + nrows = A_nrows - row_start; + } + col_start = row_start; + ncols = nrows; + + // Rowmap of i-th row-oriented sub-matrix + auto A_row_map_sub = Kokkos::subview(A_row_map_h, Kokkos::make_pair(row_start, row_start + nrows + 1)); + + // First round: count i-th non-zeros or size of entries_v[i] + int n_entries = 0; + int_view1d_type first("first", nrows); // first position per row + int_view1d_type last ("last", nrows); // last position per row + + for (int j = 0; j < nrows; j++) { // loop through each row + int k1 = static_cast(A_row_map_sub(j)); + int k2 = static_cast(A_row_map_sub(j + 1)); + int k; + // Assume column indices are sorted in ascending order + // Find the position of the start column in the row + for (k = k1; k < k2; k++) { + int col = static_cast(A_entries_h(k)); + if (col >= col_start) { + break; + } + } + first(j) = k; + // Find the position of the last column in the row + for (k = k2-1; k >= k1; k--) { + int col = static_cast(A_entries_h(k)); + if (col < col_start + ncols) { + break; + } + } + last(j) = k; + n_entries += (last(j) - first(j) + 1); + } + + // Second round: + // - create row_map_v[i] + // - copy A_entries to entries_v[i] and update entries_v[i] with local column indices + // - copy A_values to values_v[i] + row_map_v[i] = out_row_map_type("row_map_v", nrows + 1); + entries_v[i] = out_entries_type("entries_v", n_entries); + values_v[i] = out_values_type ("values_v", n_entries); + row_map_h_v[i] = out_row_map_hostmirror_type("row_map_h_v", nrows + 1); + entries_h_v[i] = out_entries_hostmirror_type("entries_h_v", n_entries); + values_h_v[i] = out_values_hostmirror_type ("values_h_v", n_entries); + int first_ = 0; + for (int j = 0; j < nrows; j++) { // loop through each row + int nnz = last(j) - first(j) + 1; + row_map_h_v[i](j) = first_; + for (int k = 0; k < nnz; k++) { + entries_h_v[i](first_ + k) = A_entries_h(first(j) + k) - col_start; + values_h_v[i] (first_ + k) = A_values_h (first(j) + k); + } + first_ += nnz; + } + row_map_h_v[i](nrows) = n_entries; // last element + + Kokkos::deep_copy(row_map_v[i], row_map_h_v[i]); + Kokkos::deep_copy(entries_v[i], entries_h_v[i]); + Kokkos::deep_copy(values_v[i], values_h_v[i]); + + DiagBlk_v[i] = crsMat_t("CrsMatrix", nrows, ncols, n_entries, values_v[i], row_map_v[i], entries_v[i]); + + row_start += nrows; + } +} + } // namespace Impl using Impl::isCrsGraphSorted; From 90bef9f9efdb00a132b1a7710fa0e8f1d87d04b6 Mon Sep 17 00:00:00 2001 From: "Vinh Quang Dang (-EXP)" Date: Mon, 21 Aug 2023 02:08:32 -0600 Subject: [PATCH 02/12] Apply clang format --- sparse/src/KokkosSparse_Utils.hpp | 80 +++++++++++++++++-------------- 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/sparse/src/KokkosSparse_Utils.hpp b/sparse/src/KokkosSparse_Utils.hpp index a33d3e9d8e..0e5dc6d9b0 100644 --- a/sparse/src/KokkosSparse_Utils.hpp +++ b/sparse/src/KokkosSparse_Utils.hpp @@ -2331,17 +2331,18 @@ void validateCrsMatrix(int m, int n, const Rowmap &rowmapIn, } template -void kk_extract_diagonal_blocks_crsmatrix_sequential(const crsMat_t &A, - std::vector& DiagBlk_v) { - using row_map_type = typename crsMat_t::row_map_type; - using entries_type = typename crsMat_t::index_type; - using values_type = typename crsMat_t::values_type; +void kk_extract_diagonal_blocks_crsmatrix_sequential( + const crsMat_t &A, std::vector &DiagBlk_v) { + using row_map_type = typename crsMat_t::row_map_type; + using entries_type = typename crsMat_t::index_type; + using values_type = typename crsMat_t::values_type; using row_map_hostmirror_type = typename row_map_type::HostMirror; using entries_hostmirror_type = typename entries_type::HostMirror; using values_hostmirror_type = typename values_type::HostMirror; - using int_view1d_type = Kokkos::View; + using int_view1d_type = + Kokkos::View; - using graph_t = typename crsMat_t::StaticCrsGraphType; + using graph_t = typename crsMat_t::StaticCrsGraphType; using out_row_map_type = typename graph_t::row_map_type::non_const_type; using out_entries_type = typename graph_t::entries_type::non_const_type; using out_values_type = typename crsMat_t::values_type::non_const_type; @@ -2351,44 +2352,49 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential(const crsMat_t &A, row_map_type A_row_map = A.graph.row_map; entries_type A_entries = A.graph.entries; - values_type A_values = A.values; + values_type A_values = A.values; - row_map_hostmirror_type A_row_map_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_row_map); - entries_hostmirror_type A_entries_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_entries); - values_hostmirror_type A_values_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_values); + row_map_hostmirror_type A_row_map_h = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_row_map); + entries_hostmirror_type A_entries_h = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_entries); + values_hostmirror_type A_values_h = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_values); - int A_nrows = static_cast(A_row_map.extent(0))-1; + int A_nrows = static_cast(A_row_map.extent(0)) - 1; int n_blocks = static_cast(DiagBlk_v.size()); - - int rows_per_block = ((A_nrows%n_blocks)==0) ? (A_nrows/n_blocks) : (A_nrows/n_blocks+1); + + int rows_per_block = ((A_nrows % n_blocks) == 0) ? (A_nrows / n_blocks) + : (A_nrows / n_blocks + 1); std::vector row_map_v(n_blocks); std::vector entries_v(n_blocks); - std::vector values_v (n_blocks); + std::vector values_v(n_blocks); std::vector row_map_h_v(n_blocks); std::vector entries_h_v(n_blocks); - std::vector values_h_v (n_blocks); + std::vector values_h_v(n_blocks); - int row_start = 0; // first row index of i-th diagonal block - int col_start = 0; // first col index of i-th diagonal block - int nrows, ncols; // Nrows, Ncols of i-th diagonal block + int row_start = 0; // first row index of i-th diagonal block + int col_start = 0; // first col index of i-th diagonal block + int nrows, ncols; // Nrows, Ncols of i-th diagonal block for (int i = 0; i < n_blocks; i++) { nrows = rows_per_block; if ((row_start + rows_per_block) > A_nrows) { nrows = A_nrows - row_start; } col_start = row_start; - ncols = nrows; + ncols = nrows; // Rowmap of i-th row-oriented sub-matrix - auto A_row_map_sub = Kokkos::subview(A_row_map_h, Kokkos::make_pair(row_start, row_start + nrows + 1)); + auto A_row_map_sub = Kokkos::subview( + A_row_map_h, Kokkos::make_pair(row_start, row_start + nrows + 1)); // First round: count i-th non-zeros or size of entries_v[i] int n_entries = 0; - int_view1d_type first("first", nrows); // first position per row - int_view1d_type last ("last", nrows); // last position per row + int_view1d_type first("first", nrows); // first position per row + int_view1d_type last("last", nrows); // last position per row - for (int j = 0; j < nrows; j++) { // loop through each row + for (int j = 0; j < nrows; j++) { // loop through each row int k1 = static_cast(A_row_map_sub(j)); int k2 = static_cast(A_row_map_sub(j + 1)); int k; @@ -2402,7 +2408,7 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential(const crsMat_t &A, } first(j) = k; // Find the position of the last column in the row - for (k = k2-1; k >= k1; k--) { + for (k = k2 - 1; k >= k1; k--) { int col = static_cast(A_entries_h(k)); if (col < col_start + ncols) { break; @@ -2414,31 +2420,33 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential(const crsMat_t &A, // Second round: // - create row_map_v[i] - // - copy A_entries to entries_v[i] and update entries_v[i] with local column indices + // - copy A_entries to entries_v[i] and update entries_v[i] with local + // column indices // - copy A_values to values_v[i] - row_map_v[i] = out_row_map_type("row_map_v", nrows + 1); - entries_v[i] = out_entries_type("entries_v", n_entries); - values_v[i] = out_values_type ("values_v", n_entries); + row_map_v[i] = out_row_map_type("row_map_v", nrows + 1); + entries_v[i] = out_entries_type("entries_v", n_entries); + values_v[i] = out_values_type("values_v", n_entries); row_map_h_v[i] = out_row_map_hostmirror_type("row_map_h_v", nrows + 1); entries_h_v[i] = out_entries_hostmirror_type("entries_h_v", n_entries); - values_h_v[i] = out_values_hostmirror_type ("values_h_v", n_entries); - int first_ = 0; - for (int j = 0; j < nrows; j++) { // loop through each row - int nnz = last(j) - first(j) + 1; + values_h_v[i] = out_values_hostmirror_type("values_h_v", n_entries); + int first_ = 0; + for (int j = 0; j < nrows; j++) { // loop through each row + int nnz = last(j) - first(j) + 1; row_map_h_v[i](j) = first_; for (int k = 0; k < nnz; k++) { entries_h_v[i](first_ + k) = A_entries_h(first(j) + k) - col_start; - values_h_v[i] (first_ + k) = A_values_h (first(j) + k); + values_h_v[i](first_ + k) = A_values_h(first(j) + k); } first_ += nnz; } - row_map_h_v[i](nrows) = n_entries; // last element + row_map_h_v[i](nrows) = n_entries; // last element Kokkos::deep_copy(row_map_v[i], row_map_h_v[i]); Kokkos::deep_copy(entries_v[i], entries_h_v[i]); Kokkos::deep_copy(values_v[i], values_h_v[i]); - DiagBlk_v[i] = crsMat_t("CrsMatrix", nrows, ncols, n_entries, values_v[i], row_map_v[i], entries_v[i]); + DiagBlk_v[i] = crsMat_t("CrsMatrix", nrows, ncols, n_entries, values_v[i], + row_map_v[i], entries_v[i]); row_start += nrows; } From 25a55f47e384e371c2bec9e4e84def273f1f48f6 Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Tue, 29 Aug 2023 10:35:07 -0700 Subject: [PATCH 03/12] Address PR review comments --- sparse/src/KokkosSparse_Utils.hpp | 193 +++++++++++++++++------------- 1 file changed, 109 insertions(+), 84 deletions(-) diff --git a/sparse/src/KokkosSparse_Utils.hpp b/sparse/src/KokkosSparse_Utils.hpp index 0e5dc6d9b0..45b547a5da 100644 --- a/sparse/src/KokkosSparse_Utils.hpp +++ b/sparse/src/KokkosSparse_Utils.hpp @@ -2339,8 +2339,6 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( using row_map_hostmirror_type = typename row_map_type::HostMirror; using entries_hostmirror_type = typename entries_type::HostMirror; using values_hostmirror_type = typename values_type::HostMirror; - using int_view1d_type = - Kokkos::View; using graph_t = typename crsMat_t::StaticCrsGraphType; using out_row_map_type = typename graph_t::row_map_type::non_const_type; @@ -2350,6 +2348,10 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( using out_entries_hostmirror_type = typename out_entries_type::HostMirror; using out_values_hostmirror_type = typename out_values_type::HostMirror; + using ordinal_type = typename crsMat_t::non_const_ordinal_type; + using size_type = typename crsMat_t::non_const_size_type; + using offset_view1d_type = Kokkos::View; + row_map_type A_row_map = A.graph.row_map; entries_type A_entries = A.graph.entries; values_type A_values = A.values; @@ -2361,95 +2363,118 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( values_hostmirror_type A_values_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_values); - int A_nrows = static_cast(A_row_map.extent(0)) - 1; - int n_blocks = static_cast(DiagBlk_v.size()); - - int rows_per_block = ((A_nrows % n_blocks) == 0) ? (A_nrows / n_blocks) - : (A_nrows / n_blocks + 1); - - std::vector row_map_v(n_blocks); - std::vector entries_v(n_blocks); - std::vector values_v(n_blocks); - std::vector row_map_h_v(n_blocks); - std::vector entries_h_v(n_blocks); - std::vector values_h_v(n_blocks); - - int row_start = 0; // first row index of i-th diagonal block - int col_start = 0; // first col index of i-th diagonal block - int nrows, ncols; // Nrows, Ncols of i-th diagonal block - for (int i = 0; i < n_blocks; i++) { - nrows = rows_per_block; - if ((row_start + rows_per_block) > A_nrows) { - nrows = A_nrows - row_start; + ordinal_type A_nrows = static_cast(A_row_map.extent(0)) - 1; + ordinal_type A_ncols = static_cast(A.numCols()); + ordinal_type n_blocks = static_cast(DiagBlk_v.size()); + + if (A_nrows != A_ncols) { + std::ostringstream os; + os << "The diagonal block extraction only works with square matrices -- matrix A: " << A_nrows << " x " << A_ncols; + throw std::runtime_error(os.str()); + } + + if (n_blocks == 1) { + // One block case: simply shallow copy A to DiagBlk_v[0] + DiagBlk_v[0] = crsMat_t(A); + } + else { + // n_blocks > 1 + if (A_nrows == 0) { + // Degenerate case: A is an empty matrix + for (ordinal_type i = 0; i < n_blocks; i++) { + DiagBlk_v[i] = crsMat_t(); + } } - col_start = row_start; - ncols = nrows; - - // Rowmap of i-th row-oriented sub-matrix - auto A_row_map_sub = Kokkos::subview( - A_row_map_h, Kokkos::make_pair(row_start, row_start + nrows + 1)); - - // First round: count i-th non-zeros or size of entries_v[i] - int n_entries = 0; - int_view1d_type first("first", nrows); // first position per row - int_view1d_type last("last", nrows); // last position per row - - for (int j = 0; j < nrows; j++) { // loop through each row - int k1 = static_cast(A_row_map_sub(j)); - int k2 = static_cast(A_row_map_sub(j + 1)); - int k; - // Assume column indices are sorted in ascending order - // Find the position of the start column in the row - for (k = k1; k < k2; k++) { - int col = static_cast(A_entries_h(k)); - if (col >= col_start) { - break; + else { + // A_nrows >= 1 + ordinal_type rows_per_block = ((A_nrows % n_blocks) == 0) ? (A_nrows / n_blocks) : (A_nrows / n_blocks + 1); + + std::vector row_map_v(n_blocks); + std::vector entries_v(n_blocks); + std::vector values_v(n_blocks); + std::vector row_map_h_v(n_blocks); + std::vector entries_h_v(n_blocks); + std::vector values_h_v(n_blocks); + + ordinal_type row_start = 0; // first row index of i-th diagonal block + ordinal_type col_start = 0; // first col index of i-th diagonal block + ordinal_type nrows, ncols; // Nrows, Ncols of i-th diagonal block + + for (ordinal_type i = 0; i < n_blocks; i++) { + nrows = rows_per_block; + if ((row_start + rows_per_block) > A_nrows) { + nrows = A_nrows - row_start; } - } - first(j) = k; - // Find the position of the last column in the row - for (k = k2 - 1; k >= k1; k--) { - int col = static_cast(A_entries_h(k)); - if (col < col_start + ncols) { - break; + col_start = row_start; + ncols = nrows; + + // Rowmap of i-th row-oriented sub-matrix + auto A_row_map_sub = Kokkos::subview( + A_row_map_h, Kokkos::make_pair(row_start, row_start + nrows + 1)); + + // First round: count i-th non-zeros or size of entries_v[i] + size_type n_entries = 0; + offset_view1d_type first("first", nrows); // first position per row + offset_view1d_type last("last", nrows); // last position per row + + for (ordinal_type j = 0; j < nrows; j++) { // loop through each row + size_type k1 = A_row_map_sub(j); + size_type k2 = A_row_map_sub(j + 1); + size_type k; + // Assume column indices are sorted in ascending order + // Find the position of the start column in the row + for (k = k1; k < k2; k++) { + ordinal_type col = A_entries_h(k); + if (col >= col_start) { + break; + } + } + first(j) = k; + // Find the position of the last column in the row + for (k = k2 - 1; k >= k1; k--) { + ordinal_type col = A_entries_h(k); + if (col < col_start + ncols) { + break; + } + } + last(j) = k; + n_entries += (last(j) - first(j) + 1); } - } - last(j) = k; - n_entries += (last(j) - first(j) + 1); - } - // Second round: - // - create row_map_v[i] - // - copy A_entries to entries_v[i] and update entries_v[i] with local - // column indices - // - copy A_values to values_v[i] - row_map_v[i] = out_row_map_type("row_map_v", nrows + 1); - entries_v[i] = out_entries_type("entries_v", n_entries); - values_v[i] = out_values_type("values_v", n_entries); - row_map_h_v[i] = out_row_map_hostmirror_type("row_map_h_v", nrows + 1); - entries_h_v[i] = out_entries_hostmirror_type("entries_h_v", n_entries); - values_h_v[i] = out_values_hostmirror_type("values_h_v", n_entries); - int first_ = 0; - for (int j = 0; j < nrows; j++) { // loop through each row - int nnz = last(j) - first(j) + 1; - row_map_h_v[i](j) = first_; - for (int k = 0; k < nnz; k++) { - entries_h_v[i](first_ + k) = A_entries_h(first(j) + k) - col_start; - values_h_v[i](first_ + k) = A_values_h(first(j) + k); - } - first_ += nnz; - } - row_map_h_v[i](nrows) = n_entries; // last element + // Second round: + // - create row_map_v[i] + // - copy A_entries to entries_v[i] and update entries_v[i] with local + // column indices + // - copy A_values to values_v[i] + row_map_v[i] = out_row_map_type("row_map_v", nrows + 1); + entries_v[i] = out_entries_type("entries_v", n_entries); + values_v[i] = out_values_type("values_v", n_entries); + row_map_h_v[i] = out_row_map_hostmirror_type("row_map_h_v", nrows + 1); + entries_h_v[i] = out_entries_hostmirror_type("entries_h_v", n_entries); + values_h_v[i] = out_values_hostmirror_type("values_h_v", n_entries); + size_type first_ = 0; + for (ordinal_type j = 0; j < nrows; j++) { // loop through each row + size_type nnz = last(j) - first(j) + 1; + row_map_h_v[i](j) = first_; + for (size_type k = 0; k < nnz; k++) { + entries_h_v[i](first_ + k) = A_entries_h(first(j) + k) - col_start; + values_h_v[i](first_ + k) = A_values_h(first(j) + k); + } + first_ += nnz; + } + row_map_h_v[i](nrows) = n_entries; // last element - Kokkos::deep_copy(row_map_v[i], row_map_h_v[i]); - Kokkos::deep_copy(entries_v[i], entries_h_v[i]); - Kokkos::deep_copy(values_v[i], values_h_v[i]); + Kokkos::deep_copy(row_map_v[i], row_map_h_v[i]); + Kokkos::deep_copy(entries_v[i], entries_h_v[i]); + Kokkos::deep_copy(values_v[i], values_h_v[i]); - DiagBlk_v[i] = crsMat_t("CrsMatrix", nrows, ncols, n_entries, values_v[i], - row_map_v[i], entries_v[i]); + DiagBlk_v[i] = crsMat_t("CrsMatrix", nrows, ncols, n_entries, values_v[i], + row_map_v[i], entries_v[i]); - row_start += nrows; - } + row_start += nrows; + } // for (ordinal_type i = 0; i < n_blocks; i++) + } // A_nrows >= 1 + } // n_blocks > 1 } } // namespace Impl From 58f27b0e9c8677e459eb35a6adedb6303287f51d Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Tue, 29 Aug 2023 11:51:23 -0600 Subject: [PATCH 04/12] sparse/src: Add doxygen-style comment --- sparse/src/KokkosSparse_Utils.hpp | 46 ++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/sparse/src/KokkosSparse_Utils.hpp b/sparse/src/KokkosSparse_Utils.hpp index 45b547a5da..2c20dc71f4 100644 --- a/sparse/src/KokkosSparse_Utils.hpp +++ b/sparse/src/KokkosSparse_Utils.hpp @@ -2330,6 +2330,17 @@ void validateCrsMatrix(int m, int n, const Rowmap &rowmapIn, } } +/** + * @brief Extract the diagonal blocks out of a crs matrix. + * This is a blocking function that runs on the host. + * + * @tparam crsMat_t The type of the CRS matrix + * @param A The CrsMatrix. + * @param DiagBlk_v [in/out] The location for extracting the diagonal blocks. + * + * Usage Example: + * kk_extract_diagonal_blocks_crsmatrix_sequential(A_in, diagBlk_in_b); + */ template void kk_extract_diagonal_blocks_crsmatrix_sequential( const crsMat_t &A, std::vector &DiagBlk_v) { @@ -2350,7 +2361,8 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( using ordinal_type = typename crsMat_t::non_const_ordinal_type; using size_type = typename crsMat_t::non_const_size_type; - using offset_view1d_type = Kokkos::View; + using offset_view1d_type = + Kokkos::View; row_map_type A_row_map = A.graph.row_map; entries_type A_entries = A.graph.entries; @@ -2369,33 +2381,35 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( if (A_nrows != A_ncols) { std::ostringstream os; - os << "The diagonal block extraction only works with square matrices -- matrix A: " << A_nrows << " x " << A_ncols; + os << "The diagonal block extraction only works with square matrices -- " + "matrix A: " + << A_nrows << " x " << A_ncols; throw std::runtime_error(os.str()); } if (n_blocks == 1) { // One block case: simply shallow copy A to DiagBlk_v[0] DiagBlk_v[0] = crsMat_t(A); - } - else { + } else { // n_blocks > 1 if (A_nrows == 0) { // Degenerate case: A is an empty matrix for (ordinal_type i = 0; i < n_blocks; i++) { DiagBlk_v[i] = crsMat_t(); } - } - else { + } else { // A_nrows >= 1 - ordinal_type rows_per_block = ((A_nrows % n_blocks) == 0) ? (A_nrows / n_blocks) : (A_nrows / n_blocks + 1); - + ordinal_type rows_per_block = ((A_nrows % n_blocks) == 0) + ? (A_nrows / n_blocks) + : (A_nrows / n_blocks + 1); + std::vector row_map_v(n_blocks); std::vector entries_v(n_blocks); std::vector values_v(n_blocks); std::vector row_map_h_v(n_blocks); std::vector entries_h_v(n_blocks); std::vector values_h_v(n_blocks); - + ordinal_type row_start = 0; // first row index of i-th diagonal block ordinal_type col_start = 0; // first col index of i-th diagonal block ordinal_type nrows, ncols; // Nrows, Ncols of i-th diagonal block @@ -2452,9 +2466,9 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( row_map_h_v[i] = out_row_map_hostmirror_type("row_map_h_v", nrows + 1); entries_h_v[i] = out_entries_hostmirror_type("entries_h_v", n_entries); values_h_v[i] = out_values_hostmirror_type("values_h_v", n_entries); - size_type first_ = 0; + size_type first_ = 0; for (ordinal_type j = 0; j < nrows; j++) { // loop through each row - size_type nnz = last(j) - first(j) + 1; + size_type nnz = last(j) - first(j) + 1; row_map_h_v[i](j) = first_; for (size_type k = 0; k < nnz; k++) { entries_h_v[i](first_ + k) = A_entries_h(first(j) + k) - col_start; @@ -2468,13 +2482,13 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( Kokkos::deep_copy(entries_v[i], entries_h_v[i]); Kokkos::deep_copy(values_v[i], values_h_v[i]); - DiagBlk_v[i] = crsMat_t("CrsMatrix", nrows, ncols, n_entries, values_v[i], - row_map_v[i], entries_v[i]); + DiagBlk_v[i] = crsMat_t("CrsMatrix", nrows, ncols, n_entries, + values_v[i], row_map_v[i], entries_v[i]); row_start += nrows; - } // for (ordinal_type i = 0; i < n_blocks; i++) - } // A_nrows >= 1 - } // n_blocks > 1 + } // for (ordinal_type i = 0; i < n_blocks; i++) + } // A_nrows >= 1 + } // n_blocks > 1 } } // namespace Impl From 2d963e9f6657057bdb2cb8e6ef3fb9dc9980654e Mon Sep 17 00:00:00 2001 From: "Vinh Quang Dang (-EXP)" Date: Tue, 29 Aug 2023 11:55:34 -0600 Subject: [PATCH 05/12] Apply clang format --- sparse/src/KokkosSparse_Utils.hpp | 35 +++++++++++++++++-------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/sparse/src/KokkosSparse_Utils.hpp b/sparse/src/KokkosSparse_Utils.hpp index 45b547a5da..c2125c5c96 100644 --- a/sparse/src/KokkosSparse_Utils.hpp +++ b/sparse/src/KokkosSparse_Utils.hpp @@ -2350,7 +2350,8 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( using ordinal_type = typename crsMat_t::non_const_ordinal_type; using size_type = typename crsMat_t::non_const_size_type; - using offset_view1d_type = Kokkos::View; + using offset_view1d_type = + Kokkos::View; row_map_type A_row_map = A.graph.row_map; entries_type A_entries = A.graph.entries; @@ -2369,33 +2370,35 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( if (A_nrows != A_ncols) { std::ostringstream os; - os << "The diagonal block extraction only works with square matrices -- matrix A: " << A_nrows << " x " << A_ncols; + os << "The diagonal block extraction only works with square matrices -- " + "matrix A: " + << A_nrows << " x " << A_ncols; throw std::runtime_error(os.str()); } if (n_blocks == 1) { // One block case: simply shallow copy A to DiagBlk_v[0] DiagBlk_v[0] = crsMat_t(A); - } - else { + } else { // n_blocks > 1 if (A_nrows == 0) { // Degenerate case: A is an empty matrix for (ordinal_type i = 0; i < n_blocks; i++) { DiagBlk_v[i] = crsMat_t(); } - } - else { + } else { // A_nrows >= 1 - ordinal_type rows_per_block = ((A_nrows % n_blocks) == 0) ? (A_nrows / n_blocks) : (A_nrows / n_blocks + 1); - + ordinal_type rows_per_block = ((A_nrows % n_blocks) == 0) + ? (A_nrows / n_blocks) + : (A_nrows / n_blocks + 1); + std::vector row_map_v(n_blocks); std::vector entries_v(n_blocks); std::vector values_v(n_blocks); std::vector row_map_h_v(n_blocks); std::vector entries_h_v(n_blocks); std::vector values_h_v(n_blocks); - + ordinal_type row_start = 0; // first row index of i-th diagonal block ordinal_type col_start = 0; // first col index of i-th diagonal block ordinal_type nrows, ncols; // Nrows, Ncols of i-th diagonal block @@ -2452,9 +2455,9 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( row_map_h_v[i] = out_row_map_hostmirror_type("row_map_h_v", nrows + 1); entries_h_v[i] = out_entries_hostmirror_type("entries_h_v", n_entries); values_h_v[i] = out_values_hostmirror_type("values_h_v", n_entries); - size_type first_ = 0; + size_type first_ = 0; for (ordinal_type j = 0; j < nrows; j++) { // loop through each row - size_type nnz = last(j) - first(j) + 1; + size_type nnz = last(j) - first(j) + 1; row_map_h_v[i](j) = first_; for (size_type k = 0; k < nnz; k++) { entries_h_v[i](first_ + k) = A_entries_h(first(j) + k) - col_start; @@ -2468,13 +2471,13 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( Kokkos::deep_copy(entries_v[i], entries_h_v[i]); Kokkos::deep_copy(values_v[i], values_h_v[i]); - DiagBlk_v[i] = crsMat_t("CrsMatrix", nrows, ncols, n_entries, values_v[i], - row_map_v[i], entries_v[i]); + DiagBlk_v[i] = crsMat_t("CrsMatrix", nrows, ncols, n_entries, + values_v[i], row_map_v[i], entries_v[i]); row_start += nrows; - } // for (ordinal_type i = 0; i < n_blocks; i++) - } // A_nrows >= 1 - } // n_blocks > 1 + } // for (ordinal_type i = 0; i < n_blocks; i++) + } // A_nrows >= 1 + } // n_blocks > 1 } } // namespace Impl From 1b9ff060f08da51f7fcda236d3324e349585363a Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Tue, 29 Aug 2023 11:06:07 -0700 Subject: [PATCH 06/12] Update document --- sparse/src/KokkosSparse_Utils.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sparse/src/KokkosSparse_Utils.hpp b/sparse/src/KokkosSparse_Utils.hpp index 2c20dc71f4..c5ef1ad039 100644 --- a/sparse/src/KokkosSparse_Utils.hpp +++ b/sparse/src/KokkosSparse_Utils.hpp @@ -2335,8 +2335,8 @@ void validateCrsMatrix(int m, int n, const Rowmap &rowmapIn, * This is a blocking function that runs on the host. * * @tparam crsMat_t The type of the CRS matrix - * @param A The CrsMatrix. - * @param DiagBlk_v [in/out] The location for extracting the diagonal blocks. + * @param A [in] The CrsMatrix. + * @param DiagBlk_v [out] The vector of extracted the CRS diagonal blocks. * * Usage Example: * kk_extract_diagonal_blocks_crsmatrix_sequential(A_in, diagBlk_in_b); From d00b62570dd47bca79c4ea06e30ca9402e85964b Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Wed, 30 Aug 2023 02:29:08 -0700 Subject: [PATCH 07/12] Add unit test for kk_extract_diagonal_blocks_crsmatrix_sequential --- sparse/src/KokkosSparse_Utils.hpp | 18 +-- sparse/unit_test/Test_Sparse.hpp | 1 + .../Test_Sparse_extractCrsDiagonalBlocks.hpp | 150 ++++++++++++++++++ 3 files changed, 158 insertions(+), 11 deletions(-) create mode 100644 sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp diff --git a/sparse/src/KokkosSparse_Utils.hpp b/sparse/src/KokkosSparse_Utils.hpp index c5ef1ad039..65e7e4243d 100644 --- a/sparse/src/KokkosSparse_Utils.hpp +++ b/sparse/src/KokkosSparse_Utils.hpp @@ -2344,13 +2344,9 @@ void validateCrsMatrix(int m, int n, const Rowmap &rowmapIn, template void kk_extract_diagonal_blocks_crsmatrix_sequential( const crsMat_t &A, std::vector &DiagBlk_v) { - using row_map_type = typename crsMat_t::row_map_type; - using entries_type = typename crsMat_t::index_type; - using values_type = typename crsMat_t::values_type; - using row_map_hostmirror_type = typename row_map_type::HostMirror; - using entries_hostmirror_type = typename entries_type::HostMirror; - using values_hostmirror_type = typename values_type::HostMirror; - + using row_map_type = typename crsMat_t::row_map_type; + using entries_type = typename crsMat_t::index_type; + using values_type = typename crsMat_t::values_type; using graph_t = typename crsMat_t::StaticCrsGraphType; using out_row_map_type = typename graph_t::row_map_type::non_const_type; using out_entries_type = typename graph_t::entries_type::non_const_type; @@ -2368,14 +2364,14 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( entries_type A_entries = A.graph.entries; values_type A_values = A.values; - row_map_hostmirror_type A_row_map_h = + auto A_row_map_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_row_map); - entries_hostmirror_type A_entries_h = + auto A_entries_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_entries); - values_hostmirror_type A_values_h = + auto A_values_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_values); - ordinal_type A_nrows = static_cast(A_row_map.extent(0)) - 1; + ordinal_type A_nrows = static_cast(A.numRows()); ordinal_type A_ncols = static_cast(A.numCols()); ordinal_type n_blocks = static_cast(DiagBlk_v.size()); diff --git a/sparse/unit_test/Test_Sparse.hpp b/sparse/unit_test/Test_Sparse.hpp index 2eb9f6f122..8ae06b598a 100644 --- a/sparse/unit_test/Test_Sparse.hpp +++ b/sparse/unit_test/Test_Sparse.hpp @@ -43,6 +43,7 @@ #include "Test_Sparse_ccs2crs.hpp" #include "Test_Sparse_crs2ccs.hpp" #include "Test_Sparse_removeCrsMatrixZeros.hpp" +#include "Test_Sparse_extractCrsDiagonalBlocks.hpp" // TPL specific tests, these require // particular pairs of backend and TPL diff --git a/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp b/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp new file mode 100644 index 0000000000..f74c095ccb --- /dev/null +++ b/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp @@ -0,0 +1,150 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include "KokkosSparse_Utils.hpp" +#include "KokkosKernels_TestUtils.hpp" + +namespace Test { +template +void run_test_extract_diagonal_blocks(int nrows, int nblocks) { + using RowMapType = Kokkos::View; + using EntriesType = Kokkos::View; + using ValuesType = Kokkos::View; + using RowMapType_hm = typename RowMapType::HostMirror; + using EntriesType_hm = typename EntriesType::HostMirror; + using ValuesType_hm = typename ValuesType::HostMirror; + using crsMat_t = CrsMatrix; + using AT = Kokkos::ArithTraits; + + crsMat_t A; + std::vector DiagBlks(nblocks); + + if (nrows != 0) { + // Generate test matrix + const size_type nnz = 2 + (nrows - 2) * 3 + 2; + RowMapType_hm hrow_map("hrow_map", nrows + 1); + EntriesType_hm hentries("hentries", nnz); + ValuesType_hm hvalues ("hvalues", nnz); + + // first row + hrow_map(0) = 0; + hentries(0) = 0; + hentries(1) = 1; + hvalues(0) = 0; + hvalues(1) = 1; + // rows in between + int cnt = 2; + for(int i = 1; i <= (nrows-2); i++) { + hrow_map(i) = cnt; + hentries(cnt) = -1 + i; + hentries(cnt+1) = 0 + i; + hentries(cnt+2) = 1 + i; + hvalues(cnt) = -1 + i; + hvalues(cnt+1) = 0 + i; + hvalues(cnt+2) = 1 + i; + cnt += 3; + } + // last row + hrow_map(nrows-1) = cnt; + hentries(nnz-2) = nrows-2; + hentries(nnz-1) = nrows-1; + hvalues(nnz-2) = nrows-2; + hvalues(nnz-1) = nrows-1; + // last element of row_map + hrow_map(nrows) = nnz; + + // Allocate A on device memory + RowMapType row_map("row_map", nrows + 1); + EntriesType entries("entries", nnz); + ValuesType values ("values", nnz); + + // Copy from host to device + Kokkos::deep_copy(row_map, hrow_map); + Kokkos::deep_copy(entries, hentries); + Kokkos::deep_copy(values, hvalues); + + // Construct a CRS matrix + A = crsMat_t("CrsMatrix", nrows, nrows, nnz, values, row_map, entries); + } + + // Extract + KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(A, DiagBlks); + + // Checking + lno_t numRows = 0; + lno_t numCols = 0; + for(int i = 0; i < nblocks; i++) { + numRows += DiagBlks[i].numRows(); + numCols += DiagBlks[i].numCols(); + } + + EXPECT_TRUE(numRows == static_cast(nrows)); + EXPECT_TRUE(numCols == static_cast(nrows)); + + if (nrows > 0) { + bool flag = true; + lno_t col_start = 0; + for(int i = 0; i < nblocks; i++) { + RowMapType_hm hrow_map_diagblk("hrow_map_diagblk", DiagBlks[i].numRows() + 1); + EntriesType_hm hentries_diagblk("hentries_diagblk", DiagBlks[i].nnz()); + ValuesType_hm hvalues_diagblk ("hvalues_diagblk", DiagBlks[i].nnz()); + + Kokkos::deep_copy(hrow_map_diagblk, DiagBlks[i].graph.row_map); + Kokkos::deep_copy(hentries_diagblk, DiagBlks[i].graph.entries); + Kokkos::deep_copy(hvalues_diagblk, DiagBlks[i].values); + + for(int j = 0; j < static_cast(DiagBlks[i].numRows()); j++) { + size_type k1 = hrow_map_diagblk(j); + size_type k2 = hrow_map_diagblk(j + 1); + for(size_type k = k1; k < k2; k++) { + scalar_t col = static_cast(hentries_diagblk(k) + col_start); + scalar_t val = hvalues_diagblk(k); + if (Kokkos::abs(col- val) != 0) { + flag = false; + break; + } + } + if (flag == false) break; + } + if (flag == false) break; + col_start += DiagBlks[i].numCols(); + } + EXPECT_TRUE(flag); + } +} +} // namespace Test + +template +void test_extract_diagonal_blocks() { + for (int s = 1; s <= 8; s++) { + Test::run_test_extract_diagonal_blocks(0, s); + Test::run_test_extract_diagonal_blocks(3, s); + Test::run_test_extract_diagonal_blocks(12, s); + Test::run_test_extract_diagonal_blocks(123, s); + } +} + +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, \ + sparse##_##extract_diagonal_blocks##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_extract_diagonal_blocks(); \ + } + +#include + +#undef KOKKOSKERNELS_EXECUTE_TEST From 8176fe8106454f7c6707708c43fc6e4f91944024 Mon Sep 17 00:00:00 2001 From: "Vinh Quang Dang (-EXP)" Date: Wed, 30 Aug 2023 03:37:28 -0600 Subject: [PATCH 08/12] Apply clang format --- .../Test_Sparse_extractCrsDiagonalBlocks.hpp | 113 +- sparse/unit_test/Test_Sparse_spiluk.hpp_ | 517 +++++++ sparse/unit_test/Test_Sparse_sptrsv.hpp_ | 1359 +++++++++++++++++ 3 files changed, 1936 insertions(+), 53 deletions(-) create mode 100644 sparse/unit_test/Test_Sparse_spiluk.hpp_ create mode 100644 sparse/unit_test/Test_Sparse_sptrsv.hpp_ diff --git a/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp b/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp index f74c095ccb..f7b48c6945 100644 --- a/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp +++ b/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp @@ -27,8 +27,8 @@ void run_test_extract_diagonal_blocks(int nrows, int nblocks) { using RowMapType_hm = typename RowMapType::HostMirror; using EntriesType_hm = typename EntriesType::HostMirror; using ValuesType_hm = typename ValuesType::HostMirror; - using crsMat_t = CrsMatrix; - using AT = Kokkos::ArithTraits; + using crsMat_t = CrsMatrix; + using AT = Kokkos::ArithTraits; crsMat_t A; std::vector DiagBlks(nblocks); @@ -36,58 +36,59 @@ void run_test_extract_diagonal_blocks(int nrows, int nblocks) { if (nrows != 0) { // Generate test matrix const size_type nnz = 2 + (nrows - 2) * 3 + 2; - RowMapType_hm hrow_map("hrow_map", nrows + 1); + RowMapType_hm hrow_map("hrow_map", nrows + 1); EntriesType_hm hentries("hentries", nnz); - ValuesType_hm hvalues ("hvalues", nnz); - + ValuesType_hm hvalues("hvalues", nnz); + // first row hrow_map(0) = 0; hentries(0) = 0; hentries(1) = 1; hvalues(0) = 0; hvalues(1) = 1; - // rows in between + // rows in between int cnt = 2; - for(int i = 1; i <= (nrows-2); i++) { - hrow_map(i) = cnt; - hentries(cnt) = -1 + i; - hentries(cnt+1) = 0 + i; - hentries(cnt+2) = 1 + i; - hvalues(cnt) = -1 + i; - hvalues(cnt+1) = 0 + i; - hvalues(cnt+2) = 1 + i; + for (int i = 1; i <= (nrows - 2); i++) { + hrow_map(i) = cnt; + hentries(cnt) = -1 + i; + hentries(cnt + 1) = 0 + i; + hentries(cnt + 2) = 1 + i; + hvalues(cnt) = -1 + i; + hvalues(cnt + 1) = 0 + i; + hvalues(cnt + 2) = 1 + i; cnt += 3; } // last row - hrow_map(nrows-1) = cnt; - hentries(nnz-2) = nrows-2; - hentries(nnz-1) = nrows-1; - hvalues(nnz-2) = nrows-2; - hvalues(nnz-1) = nrows-1; + hrow_map(nrows - 1) = cnt; + hentries(nnz - 2) = nrows - 2; + hentries(nnz - 1) = nrows - 1; + hvalues(nnz - 2) = nrows - 2; + hvalues(nnz - 1) = nrows - 1; // last element of row_map - hrow_map(nrows) = nnz; - + hrow_map(nrows) = nnz; + // Allocate A on device memory - RowMapType row_map("row_map", nrows + 1); + RowMapType row_map("row_map", nrows + 1); EntriesType entries("entries", nnz); - ValuesType values ("values", nnz); - + ValuesType values("values", nnz); + // Copy from host to device Kokkos::deep_copy(row_map, hrow_map); Kokkos::deep_copy(entries, hentries); - Kokkos::deep_copy(values, hvalues); - + Kokkos::deep_copy(values, hvalues); + // Construct a CRS matrix A = crsMat_t("CrsMatrix", nrows, nrows, nnz, values, row_map, entries); } // Extract - KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(A, DiagBlks); - + KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(A, + DiagBlks); + // Checking lno_t numRows = 0; lno_t numCols = 0; - for(int i = 0; i < nblocks; i++) { + for (int i = 0; i < nblocks; i++) { numRows += DiagBlks[i].numRows(); numCols += DiagBlks[i].numCols(); } @@ -96,29 +97,30 @@ void run_test_extract_diagonal_blocks(int nrows, int nblocks) { EXPECT_TRUE(numCols == static_cast(nrows)); if (nrows > 0) { - bool flag = true; + bool flag = true; lno_t col_start = 0; - for(int i = 0; i < nblocks; i++) { - RowMapType_hm hrow_map_diagblk("hrow_map_diagblk", DiagBlks[i].numRows() + 1); + for (int i = 0; i < nblocks; i++) { + RowMapType_hm hrow_map_diagblk("hrow_map_diagblk", + DiagBlks[i].numRows() + 1); EntriesType_hm hentries_diagblk("hentries_diagblk", DiagBlks[i].nnz()); - ValuesType_hm hvalues_diagblk ("hvalues_diagblk", DiagBlks[i].nnz()); - + ValuesType_hm hvalues_diagblk("hvalues_diagblk", DiagBlks[i].nnz()); + Kokkos::deep_copy(hrow_map_diagblk, DiagBlks[i].graph.row_map); Kokkos::deep_copy(hentries_diagblk, DiagBlks[i].graph.entries); - Kokkos::deep_copy(hvalues_diagblk, DiagBlks[i].values); - - for(int j = 0; j < static_cast(DiagBlks[i].numRows()); j++) { + Kokkos::deep_copy(hvalues_diagblk, DiagBlks[i].values); + + for (int j = 0; j < static_cast(DiagBlks[i].numRows()); j++) { size_type k1 = hrow_map_diagblk(j); size_type k2 = hrow_map_diagblk(j + 1); - for(size_type k = k1; k < k2; k++) { - scalar_t col = static_cast(hentries_diagblk(k) + col_start); - scalar_t val = hvalues_diagblk(k); - if (Kokkos::abs(col- val) != 0) { - flag = false; - break; - } + for (size_type k = k1; k < k2; k++) { + scalar_t col = static_cast(hentries_diagblk(k) + col_start); + scalar_t val = hvalues_diagblk(k); + if (Kokkos::abs(col - val) != 0) { + flag = false; + break; } - if (flag == false) break; + } + if (flag == false) break; } if (flag == false) break; col_start += DiagBlks[i].numCols(); @@ -132,17 +134,22 @@ template void test_extract_diagonal_blocks() { for (int s = 1; s <= 8; s++) { - Test::run_test_extract_diagonal_blocks(0, s); - Test::run_test_extract_diagonal_blocks(3, s); - Test::run_test_extract_diagonal_blocks(12, s); - Test::run_test_extract_diagonal_blocks(123, s); + Test::run_test_extract_diagonal_blocks( + 0, s); + Test::run_test_extract_diagonal_blocks( + 3, s); + Test::run_test_extract_diagonal_blocks( + 12, s); + Test::run_test_extract_diagonal_blocks( + 123, s); } } -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - sparse##_##extract_diagonal_blocks##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_extract_diagonal_blocks(); \ +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F( \ + TestCategory, \ + sparse##_##extract_diagonal_blocks##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_extract_diagonal_blocks(); \ } #include diff --git a/sparse/unit_test/Test_Sparse_spiluk.hpp_ b/sparse/unit_test/Test_Sparse_spiluk.hpp_ new file mode 100644 index 0000000000..3115bc9649 --- /dev/null +++ b/sparse/unit_test/Test_Sparse_spiluk.hpp_ @@ -0,0 +1,517 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include + +#include +#include + +#include "KokkosSparse_Utils.hpp" +#include "KokkosSparse_CrsMatrix.hpp" +#include +#include "KokkosBlas1_nrm2.hpp" +#include "KokkosSparse_spmv.hpp" +#include "KokkosSparse_spiluk.hpp" + +#include + +using namespace KokkosSparse; +using namespace KokkosSparse::Experimental; +using namespace KokkosKernels; +using namespace KokkosKernels::Experimental; + +// #ifndef kokkos_complex_double +// #define kokkos_complex_double Kokkos::complex +// #define kokkos_complex_float Kokkos::complex +// #endif + +typedef Kokkos::complex kokkos_complex_double; +typedef Kokkos::complex kokkos_complex_float; + +namespace Test { + +template +void run_test_spiluk() { + typedef Kokkos::View RowMapType; + typedef Kokkos::View EntriesType; + typedef Kokkos::View ValuesType; + typedef Kokkos::ArithTraits AT; + + const size_type nrows = 9; + const size_type nnz = 21; + + RowMapType row_map("row_map", nrows + 1); + EntriesType entries("entries", nnz); + ValuesType values("values", nnz); + + auto hrow_map = Kokkos::create_mirror_view(row_map); + auto hentries = Kokkos::create_mirror_view(entries); + auto hvalues = Kokkos::create_mirror_view(values); + + scalar_t ZERO = scalar_t(0); + scalar_t ONE = scalar_t(1); + scalar_t MONE = scalar_t(-1); + + hrow_map(0) = 0; + hrow_map(1) = 3; + hrow_map(2) = 5; + hrow_map(3) = 6; + hrow_map(4) = 9; + hrow_map(5) = 11; + hrow_map(6) = 13; + hrow_map(7) = 15; + hrow_map(8) = 18; + hrow_map(9) = nnz; + + hentries(0) = 0; + hentries(1) = 2; + hentries(2) = 5; + hentries(3) = 1; + hentries(4) = 6; + hentries(5) = 2; + hentries(6) = 0; + hentries(7) = 3; + hentries(8) = 4; + hentries(9) = 0; + hentries(10) = 4; + hentries(11) = 1; + hentries(12) = 5; + hentries(13) = 2; + hentries(14) = 6; + hentries(15) = 3; + hentries(16) = 4; + hentries(17) = 7; + hentries(18) = 3; + hentries(19) = 4; + hentries(20) = 8; + + hvalues(0) = 10; + hvalues(1) = 0.3; + hvalues(2) = 0.6; + hvalues(3) = 11; + hvalues(4) = 0.7; + hvalues(5) = 12; + hvalues(6) = 5; + hvalues(7) = 13; + hvalues(8) = 1; + hvalues(9) = 4; + hvalues(10) = 14; + hvalues(11) = 3; + hvalues(12) = 15; + hvalues(13) = 7; + hvalues(14) = 16; + hvalues(15) = 6; + hvalues(16) = 5; + hvalues(17) = 17; + hvalues(18) = 2; + hvalues(19) = 2.5; + hvalues(20) = 18; + + Kokkos::deep_copy(row_map, hrow_map); + Kokkos::deep_copy(entries, hentries); + Kokkos::deep_copy(values, hvalues); + + typedef KokkosKernels::Experimental::KokkosKernelsHandle< + size_type, lno_t, scalar_t, typename device::execution_space, + typename device::memory_space, typename device::memory_space> + KernelHandle; + + KernelHandle kh; + + // SPILUKAlgorithm::SEQLVLSCHD_RP + { + kh.create_spiluk_handle(SPILUKAlgorithm::SEQLVLSCHD_RP, nrows, 4 * nrows, + 4 * nrows); + + auto spiluk_handle = kh.get_spiluk_handle(); + + // Allocate L and U as outputs + RowMapType L_row_map("L_row_map", nrows + 1); + EntriesType L_entries("L_entries", spiluk_handle->get_nnzL()); + ValuesType L_values("L_values", spiluk_handle->get_nnzL()); + RowMapType U_row_map("U_row_map", nrows + 1); + EntriesType U_entries("U_entries", spiluk_handle->get_nnzU()); + ValuesType U_values("U_values", spiluk_handle->get_nnzU()); + + typename KernelHandle::const_nnz_lno_t fill_lev = 2; + + spiluk_symbolic(&kh, fill_lev, row_map, entries, L_row_map, L_entries, + U_row_map, U_entries); + + Kokkos::fence(); + + Kokkos::resize(L_entries, spiluk_handle->get_nnzL()); + Kokkos::resize(L_values, spiluk_handle->get_nnzL()); + Kokkos::resize(U_entries, spiluk_handle->get_nnzU()); + Kokkos::resize(U_values, spiluk_handle->get_nnzU()); + + spiluk_handle->print_algorithm(); + spiluk_numeric(&kh, fill_lev, row_map, entries, values, L_row_map, + L_entries, L_values, U_row_map, U_entries, U_values); + + Kokkos::fence(); + + // Checking + typedef CrsMatrix crsMat_t; + crsMat_t A("A_Mtx", nrows, nrows, nnz, values, row_map, entries); + crsMat_t L("L_Mtx", nrows, nrows, spiluk_handle->get_nnzL(), L_values, + L_row_map, L_entries); + crsMat_t U("U_Mtx", nrows, nrows, spiluk_handle->get_nnzU(), U_values, + U_row_map, U_entries); + + // Create a reference view e set to all 1's + ValuesType e_one("e_one", nrows); + Kokkos::deep_copy(e_one, 1.0); + + // Create two views for spmv results + ValuesType bb("bb", nrows); + ValuesType bb_tmp("bb_tmp", nrows); + + // Compute norm2(L*U*e_one - A*e_one)/norm2(A*e_one) + KokkosSparse::spmv("N", ONE, A, e_one, ZERO, bb); + + typename AT::mag_type bb_nrm = KokkosBlas::nrm2(bb); + + KokkosSparse::spmv("N", ONE, U, e_one, ZERO, bb_tmp); + KokkosSparse::spmv("N", ONE, L, bb_tmp, MONE, bb); + + typename AT::mag_type diff_nrm = KokkosBlas::nrm2(bb); + + EXPECT_TRUE((diff_nrm / bb_nrm) < 1e-4); + + kh.destroy_spiluk_handle(); + } + + // SPILUKAlgorithm::SEQLVLSCHD_TP1 + { + kh.create_spiluk_handle(SPILUKAlgorithm::SEQLVLSCHD_TP1, nrows, 4 * nrows, + 4 * nrows); + + auto spiluk_handle = kh.get_spiluk_handle(); + + // Allocate L and U as outputs + RowMapType L_row_map("L_row_map", nrows + 1); + EntriesType L_entries("L_entries", spiluk_handle->get_nnzL()); + ValuesType L_values("L_values", spiluk_handle->get_nnzL()); + RowMapType U_row_map("U_row_map", nrows + 1); + EntriesType U_entries("U_entries", spiluk_handle->get_nnzU()); + ValuesType U_values("U_values", spiluk_handle->get_nnzU()); + + typename KernelHandle::const_nnz_lno_t fill_lev = 2; + + spiluk_symbolic(&kh, fill_lev, row_map, entries, L_row_map, L_entries, + U_row_map, U_entries); + + Kokkos::fence(); + + Kokkos::resize(L_entries, spiluk_handle->get_nnzL()); + Kokkos::resize(L_values, spiluk_handle->get_nnzL()); + Kokkos::resize(U_entries, spiluk_handle->get_nnzU()); + Kokkos::resize(U_values, spiluk_handle->get_nnzU()); + + spiluk_handle->print_algorithm(); + spiluk_numeric(&kh, fill_lev, row_map, entries, values, L_row_map, + L_entries, L_values, U_row_map, U_entries, U_values); + + Kokkos::fence(); + + // Checking + typedef CrsMatrix crsMat_t; + crsMat_t A("A_Mtx", nrows, nrows, nnz, values, row_map, entries); + crsMat_t L("L_Mtx", nrows, nrows, spiluk_handle->get_nnzL(), L_values, + L_row_map, L_entries); + crsMat_t U("U_Mtx", nrows, nrows, spiluk_handle->get_nnzU(), U_values, + U_row_map, U_entries); + + // Create a reference view e set to all 1's + ValuesType e_one("e_one", nrows); + Kokkos::deep_copy(e_one, 1.0); + + // Create two views for spmv results + ValuesType bb("bb", nrows); + ValuesType bb_tmp("bb_tmp", nrows); + + // Compute norm2(L*U*e_one - A*e_one)/norm2(A*e_one) + KokkosSparse::spmv("N", ONE, A, e_one, ZERO, bb); + + typename AT::mag_type bb_nrm = KokkosBlas::nrm2(bb); + + KokkosSparse::spmv("N", ONE, U, e_one, ZERO, bb_tmp); + KokkosSparse::spmv("N", ONE, L, bb_tmp, MONE, bb); + + typename AT::mag_type diff_nrm = KokkosBlas::nrm2(bb); + + EXPECT_TRUE((diff_nrm / bb_nrm) < 1e-4); + + kh.destroy_spiluk_handle(); + } +} + +template +void run_test_spiluk_streams(int test_algo, int nstreams) { + using RowMapType = Kokkos::View; + using EntriesType = Kokkos::View; + using ValuesType = Kokkos::View; + using RowMapType_hostmirror = typename RowMapType::HostMirror; + using EntriesType_hostmirror = typename EntriesType::HostMirror; + using ValuesType_hostmirror = typename ValuesType::HostMirror; + using execution_space = typename device::execution_space; + using memory_space = typename device::memory_space; + using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< + size_type, lno_t, scalar_t, execution_space, memory_space, memory_space>; + using crsMat_t = CrsMatrix; + using AT = Kokkos::ArithTraits; + + // Workaround for OpenMP: skip tests if OMP_NUM_THREADS < nstreams because of not enough resource to partition + bool run_streams_test = true; +#ifdef KOKKOS_ENABLE_OPENMP + if (std::is_same::value) { + const char *env_omp_num_threads = std::getenv("OMP_NUM_THREADS"); + if (env_omp_num_threads != nullptr) { + int num_threads = std::atoi(env_omp_num_threads); + if (num_threads < nstreams) { + run_streams_test = false; + std::cout << " Skip stream test: omp_num_threads = " << num_threads << std::endl; + } + } + } +#endif + if (!run_streams_test) + return; + + const size_type nrows = 9; + const size_type nnz = 21; + + std::vector instances; + if (nstreams == 2) + instances = Kokkos::Experimental::partition_space(execution_space(), 1, 1); + else if (nstreams == 3) + instances = + Kokkos::Experimental::partition_space(execution_space(), 1, 1, 1); + else + instances = + Kokkos::Experimental::partition_space(execution_space(), 1, 1, 1, 1); + + std::vector kh_v(nstreams); + std::vector kh_ptr_v(nstreams); + std::vector A_row_map_v(nstreams); + std::vector A_entries_v(nstreams); + std::vector A_values_v(nstreams); + std::vector L_row_map_v(nstreams); + std::vector L_entries_v(nstreams); + std::vector L_values_v(nstreams); + std::vector U_row_map_v(nstreams); + std::vector U_entries_v(nstreams); + std::vector U_values_v(nstreams); + + RowMapType_hostmirror hrow_map("hrow_map", nrows + 1); + EntriesType_hostmirror hentries("hentries", nnz); + ValuesType_hostmirror hvalues("hvalues", nnz); + + scalar_t ZERO = scalar_t(0); + scalar_t ONE = scalar_t(1); + scalar_t MONE = scalar_t(-1); + + hrow_map(0) = 0; + hrow_map(1) = 3; + hrow_map(2) = 5; + hrow_map(3) = 6; + hrow_map(4) = 9; + hrow_map(5) = 11; + hrow_map(6) = 13; + hrow_map(7) = 15; + hrow_map(8) = 18; + hrow_map(9) = nnz; + + hentries(0) = 0; + hentries(1) = 2; + hentries(2) = 5; + hentries(3) = 1; + hentries(4) = 6; + hentries(5) = 2; + hentries(6) = 0; + hentries(7) = 3; + hentries(8) = 4; + hentries(9) = 0; + hentries(10) = 4; + hentries(11) = 1; + hentries(12) = 5; + hentries(13) = 2; + hentries(14) = 6; + hentries(15) = 3; + hentries(16) = 4; + hentries(17) = 7; + hentries(18) = 3; + hentries(19) = 4; + hentries(20) = 8; + + hvalues(0) = 10; + hvalues(1) = 0.3; + hvalues(2) = 0.6; + hvalues(3) = 11; + hvalues(4) = 0.7; + hvalues(5) = 12; + hvalues(6) = 5; + hvalues(7) = 13; + hvalues(8) = 1; + hvalues(9) = 4; + hvalues(10) = 14; + hvalues(11) = 3; + hvalues(12) = 15; + hvalues(13) = 7; + hvalues(14) = 16; + hvalues(15) = 6; + hvalues(16) = 5; + hvalues(17) = 17; + hvalues(18) = 2; + hvalues(19) = 2.5; + hvalues(20) = 18; + + typename KernelHandle::const_nnz_lno_t fill_lev = 2; + + for (int i = 0; i < nstreams; i++) { + // Allocate A as input + A_row_map_v[i] = RowMapType("A_row_map", nrows + 1); + A_entries_v[i] = EntriesType("A_entries", nnz); + A_values_v[i] = ValuesType("A_values", nnz); + + // Copy from host to device + Kokkos::deep_copy(A_row_map_v[i], hrow_map); + Kokkos::deep_copy(A_entries_v[i], hentries); + Kokkos::deep_copy(A_values_v[i], hvalues); + + // Create handle + kh_v[i] = KernelHandle(); + if (test_algo == 0) + kh_v[i].create_spiluk_handle(SPILUKAlgorithm::SEQLVLSCHD_RP, nrows, + 4 * nrows, 4 * nrows); + else if (test_algo == 1) + kh_v[i].create_spiluk_handle(SPILUKAlgorithm::SEQLVLSCHD_TP1, nrows, + 4 * nrows, 4 * nrows); + kh_ptr_v[i] = &kh_v[i]; + + auto spiluk_handle = kh_v[i].get_spiluk_handle(); + std::cout << " Stream " << i << ": "; + spiluk_handle->print_algorithm(); + + // Allocate L and U as outputs + L_row_map_v[i] = RowMapType("L_row_map", nrows + 1); + L_entries_v[i] = EntriesType("L_entries", spiluk_handle->get_nnzL()); + L_values_v[i] = ValuesType("L_values", spiluk_handle->get_nnzL()); + U_row_map_v[i] = RowMapType("U_row_map", nrows + 1); + U_entries_v[i] = EntriesType("U_entries", spiluk_handle->get_nnzU()); + U_values_v[i] = ValuesType("U_values", spiluk_handle->get_nnzU()); + + // Symbolic phase + spiluk_symbolic(kh_ptr_v[i], fill_lev, A_row_map_v[i], A_entries_v[i], + L_row_map_v[i], L_entries_v[i], U_row_map_v[i], + U_entries_v[i], nstreams); + + Kokkos::fence(); + + Kokkos::resize(L_entries_v[i], spiluk_handle->get_nnzL()); + Kokkos::resize(L_values_v[i], spiluk_handle->get_nnzL()); + Kokkos::resize(U_entries_v[i], spiluk_handle->get_nnzU()); + Kokkos::resize(U_values_v[i], spiluk_handle->get_nnzU()); + } // Done handle creation and spiluk_symbolic on all streams + + // Numeric phase + spiluk_numeric_streams(instances, kh_ptr_v, fill_lev, A_row_map_v, + A_entries_v, A_values_v, L_row_map_v, L_entries_v, + L_values_v, U_row_map_v, U_entries_v, U_values_v); + + for (int i = 0; i < nstreams; i++) instances[i].fence(); + + // Checking + for (int i = 0; i < nstreams; i++) { + auto spiluk_handle = kh_v[i].get_spiluk_handle(); + crsMat_t A("A_Mtx", nrows, nrows, nnz, A_values_v[i], A_row_map_v[i], + A_entries_v[i]); + crsMat_t L("L_Mtx", nrows, nrows, spiluk_handle->get_nnzL(), L_values_v[i], + L_row_map_v[i], L_entries_v[i]); + crsMat_t U("U_Mtx", nrows, nrows, spiluk_handle->get_nnzU(), U_values_v[i], + U_row_map_v[i], U_entries_v[i]); + + // Create a reference view e set to all 1's + ValuesType e_one("e_one", nrows); + Kokkos::deep_copy(e_one, 1.0); + + // Create two views for spmv results + ValuesType bb("bb", nrows); + ValuesType bb_tmp("bb_tmp", nrows); + + // Compute norm2(L*U*e_one - A*e_one)/norm2(A*e_one) + KokkosSparse::spmv("N", ONE, A, e_one, ZERO, bb); + + typename AT::mag_type bb_nrm = KokkosBlas::nrm2(bb); + + KokkosSparse::spmv("N", ONE, U, e_one, ZERO, bb_tmp); + KokkosSparse::spmv("N", ONE, L, bb_tmp, MONE, bb); + + typename AT::mag_type diff_nrm = KokkosBlas::nrm2(bb); + + EXPECT_TRUE((diff_nrm / bb_nrm) < 1e-4); + + kh_v[i].destroy_spiluk_handle(); + } +} + +} // namespace Test + +template +void test_spiluk() { + Test::run_test_spiluk(); +} + +template +void test_spiluk_streams() { + std::cout << "SPILUKAlgorithm::SEQLVLSCHD_RP: 2 streams" << std::endl; + Test::run_test_spiluk_streams(0, 2); + + std::cout << "SPILUKAlgorithm::SEQLVLSCHD_RP: 3 streams" << std::endl; + Test::run_test_spiluk_streams(0, 3); + + std::cout << "SPILUKAlgorithm::SEQLVLSCHD_RP: 4 streams" << std::endl; + Test::run_test_spiluk_streams(0, 4); + + std::cout << "SPILUKAlgorithm::SEQLVLSCHD_TP1: 2 streams" << std::endl; + Test::run_test_spiluk_streams(1, 2); + + std::cout << "SPILUKAlgorithm::SEQLVLSCHD_TP1: 3 streams" << std::endl; + Test::run_test_spiluk_streams(1, 3); + + std::cout << "SPILUKAlgorithm::SEQLVLSCHD_TP1: 4 streams" << std::endl; + Test::run_test_spiluk_streams(1, 4); +} + +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, \ + sparse##_##spiluk##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_spiluk(); \ + test_spiluk_streams(); \ + } + +#define NO_TEST_COMPLEX + +#include + +#undef KOKKOSKERNELS_EXECUTE_TEST +#undef NO_TEST_COMPLEX diff --git a/sparse/unit_test/Test_Sparse_sptrsv.hpp_ b/sparse/unit_test/Test_Sparse_sptrsv.hpp_ new file mode 100644 index 0000000000..2425fb4c27 --- /dev/null +++ b/sparse/unit_test/Test_Sparse_sptrsv.hpp_ @@ -0,0 +1,1359 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include + +#include +#include + +#include "KokkosKernels_IOUtils.hpp" +#include "KokkosSparse_Utils.hpp" +#include "KokkosSparse_spmv.hpp" +#include "KokkosSparse_CrsMatrix.hpp" + +#include "KokkosSparse_sptrsv.hpp" +#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) +#include "KokkosSparse_sptrsv_supernode.hpp" +#endif + +#include + +using namespace KokkosSparse; +using namespace KokkosSparse::Experimental; +using namespace KokkosKernels; +using namespace KokkosKernels::Impl; +using namespace KokkosKernels::Experimental; + +// #ifndef kokkos_complex_double +// #define kokkos_complex_double Kokkos::complex +// #endif +// #ifndef kokkos_complex_float +// #define kokkos_complex_float Kokkos::complex +// #endif + +typedef Kokkos::complex kokkos_complex_double; +typedef Kokkos::complex kokkos_complex_float; + +namespace Test { + +#if 0 +template +void run_test_sptrsv_mtx() { + + typedef typename KokkosSparse::CrsMatrix crsmat_t; + typedef typename crsmat_t::StaticCrsGraphType graph_t; + + //typedef Kokkos::View< size_type*, device > RowMapType; + //typedef Kokkos::View< lno_t*, device > EntriesType; + typedef Kokkos::View< scalar_t*, device > ValuesType; + + // Lower tri + std::cout << "LowerTriTest Begin" << std::endl; + { + +// std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/L-offshore-amd.mtx"; +// std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/L-Transport-amd.mtx"; +// std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/L-Fault_639amd.mtx"; +// std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/L-thermal2-amd.mtx"; + std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/L-dielFilterV2real-amd.mtx"; + std::cout << "Matrix file: " << mtx_filename << std::endl; + crsmat_t triMtx = KokkosKernels::Impl::read_kokkos_crst_matrix(mtx_filename.c_str()); //in_matrix + graph_t lgraph = triMtx.graph; // in_graph + + auto row_map = lgraph.row_map; + auto entries = lgraph.entries; + auto values = triMtx.values; + + const size_type nrows = lgraph.numRows(); +// const size_type nnz = triMtx.nnz(); + + scalar_t ZERO = scalar_t(0); + scalar_t ONE = scalar_t(1); + + typedef KokkosKernels::Experimental::KokkosKernelsHandle KernelHandle; + + std::cout << "UnitTest nrows = " << nrows << std::endl; + + KernelHandle kh; + bool is_lower_tri = true; + std::cout << "Create handle" << std::endl; + kh.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, nrows, is_lower_tri); + + std::cout << "Prepare linear system" << std::endl; + // Create known_lhs, generate rhs, then solve for lhs to compare to known_lhs + ValuesType known_lhs("known_lhs", nrows); + // Create known solution lhs set to all 1's + Kokkos::deep_copy(known_lhs, ONE); + + // Solution to find + ValuesType lhs("lhs", nrows); + + // A*known_lhs generates rhs: rhs is dense, use spmv + ValuesType rhs("rhs", nrows); + +// typedef CrsMatrix crsMat_t; +// crsMat_t triMtx("triMtx", nrows, nrows, nnz, values, row_map, entries); + + std::cout << "SPMV" << std::endl; + KokkosSparse::spmv( "N", ONE, triMtx, known_lhs, ZERO, rhs); + + std::cout << "TriSolve Symbolic" << std::endl; + Kokkos::Timer timer; + sptrsv_symbolic( &kh, row_map, entries ); + std::cout << "LTRI Symbolic Time: " << timer.seconds() << std::endl; + + std::cout << "TriSolve Solve" << std::endl; + kh.get_sptrsv_handle()->print_algorithm(); + timer.reset(); + sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); + std::cout << "LTRI Solve TEAMPOLICY! Time: " << timer.seconds() << std::endl; + + scalar_t sum = 0.0; + Kokkos::parallel_reduce( Kokkos::RangePolicy(0, lhs.extent(0)), KOKKOS_LAMBDA ( const lno_t i, scalar_t &tsum ) { + tsum += lhs(i); + }, sum); + if ( sum != lhs.extent(0) ) { + std::cout << "Lower Tri Solve FAILURE" << std::endl; + } + else { + std::cout << "Lower Tri Solve SUCCESS!" << std::endl; + //std::cout << "Num-levels = " << kh->get_sptrsv_handle()->get_num_levels() << std::endl; + } + EXPECT_TRUE( sum == scalar_t(lhs.extent(0)) ); + + Kokkos::deep_copy(lhs, 0); + kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHD_RP); + kh.get_sptrsv_handle()->print_algorithm(); + timer.reset(); + sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); + std::cout << "LTRI Solve SEQLVLSCHD_RP Time: " << timer.seconds() << std::endl; + + sum = 0.0; + Kokkos::parallel_reduce( Kokkos::RangePolicy(0, lhs.extent(0)), KOKKOS_LAMBDA ( const lno_t i, scalar_t &tsum ) { + tsum += lhs(i); + }, sum); + if ( sum != lhs.extent(0) ) { + std::cout << "Lower Tri Solve FAILURE" << std::endl; + } + else { + std::cout << "Lower Tri Solve SUCCESS!" << std::endl; + //std::cout << "Num-levels = " << kh->get_sptrsv_handle()->get_num_levels() << std::endl; + } + EXPECT_TRUE( sum == scalar_t(lhs.extent(0)) ); + + Kokkos::deep_copy(lhs, 0); + kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHED_TP2); + kh.get_sptrsv_handle()->print_algorithm(); + timer.reset(); + sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); + std::cout << "LTRI Solve SEQLVLSCHED_TP2 Time: " << timer.seconds() << std::endl; + + sum = 0.0; + Kokkos::parallel_reduce( Kokkos::RangePolicy(0, lhs.extent(0)), KOKKOS_LAMBDA ( const lno_t i, scalar_t &tsum ) { + tsum += lhs(i); + }, sum); + if ( sum != lhs.extent(0) ) { + std::cout << "Lower Tri Solve FAILURE" << std::endl; + } + else { + std::cout << "Lower Tri Solve SUCCESS!" << std::endl; + //std::cout << "Num-levels = " << kh->get_sptrsv_handle()->get_num_levels() << std::endl; + } + EXPECT_TRUE( sum == scalar_t(lhs.extent(0)) ); + + + kh.destroy_sptrsv_handle(); + } + // Upper tri + std::cout << "UpperTriTest Begin" << std::endl; + { +// std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/U-offshore-amd.mtx"; +// std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/U-Transport-amd.mtx"; +// std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/U-Fault_639amd.mtx"; +// std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/U-thermal2-amd.mtx"; + std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/U-dielFilterV2real-amd.mtx"; + std::cout << "Matrix file: " << mtx_filename << std::endl; + crsmat_t triMtx = KokkosKernels::Impl::read_kokkos_crst_matrix(mtx_filename.c_str()); //in_matrix + graph_t lgraph = triMtx.graph; // in_graph + + auto row_map = lgraph.row_map; + auto entries = lgraph.entries; + auto values = triMtx.values; + + const size_type nrows = lgraph.numRows(); +// const size_type nnz = triMtx.nnz(); + + scalar_t ZERO = scalar_t(0); + scalar_t ONE = scalar_t(1); + + typedef KokkosKernels::Experimental::KokkosKernelsHandle KernelHandle; + + std::cout << "UnitTest nrows = " << nrows << std::endl; + + KernelHandle kh; + bool is_lower_tri = false; + std::cout << "Create handle" << std::endl; + kh.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, nrows, is_lower_tri); + + std::cout << "Prepare linear system" << std::endl; + // Create known_lhs, generate rhs, then solve for lhs to compare to known_lhs + ValuesType known_lhs("known_lhs", nrows); + // Create known solution lhs set to all 1's + Kokkos::deep_copy(known_lhs, ONE); + + // Solution to find + ValuesType lhs("lhs", nrows); + + // A*known_lhs generates rhs: rhs is dense, use spmv + ValuesType rhs("rhs", nrows); + +// typedef CrsMatrix crsMat_t; +// crsMat_t triMtx("triMtx", nrows, nrows, nnz, values, row_map, entries); + std::cout << "SPMV" << std::endl; + KokkosSparse::spmv( "N", ONE, triMtx, known_lhs, ZERO, rhs); + + std::cout << "TriSolve Symbolic" << std::endl; + Kokkos::Timer timer; + sptrsv_symbolic( &kh, row_map, entries ); + std::cout << "UTRI Symbolic Time: " << timer.seconds() << std::endl; + + std::cout << "TriSolve Solve" << std::endl; + kh.get_sptrsv_handle()->print_algorithm(); + timer.reset(); + sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); + std::cout << "UTRI Solve SEQLVLSCHD_TP1 Time: " << timer.seconds() << std::endl; + + scalar_t sum = 0.0; + Kokkos::parallel_reduce( Kokkos::RangePolicy(0, lhs.extent(0)), KOKKOS_LAMBDA ( const lno_t i, scalar_t &tsum ) { + tsum += lhs(i); + }, sum); + if ( sum != lhs.extent(0) ) { + std::cout << "Upper Tri Solve FAILURE" << std::endl; + } + else { + std::cout << "Upper Tri Solve SUCCESS!" << std::endl; + //std::cout << "Num-levels = " << kh->get_sptrsv_handle()->get_num_levels() << std::endl; + } + EXPECT_TRUE( sum == scalar_t(lhs.extent(0)) ); + + Kokkos::deep_copy(lhs, 0); + kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHD_RP); + kh.get_sptrsv_handle()->print_algorithm(); + timer.reset(); + sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); + std::cout << "UTRI Solve SEQLVLSCHD_RP Time: " << timer.seconds() << std::endl; + + sum = 0.0; + Kokkos::parallel_reduce( Kokkos::RangePolicy(0, lhs.extent(0)), KOKKOS_LAMBDA ( const lno_t i, scalar_t &tsum ) { + tsum += lhs(i); + }, sum); + if ( sum != lhs.extent(0) ) { + std::cout << "Upper Tri Solve FAILURE" << std::endl; + } + else { + std::cout << "Upper Tri Solve SUCCESS!" << std::endl; + //std::cout << "Num-levels = " << kh->get_sptrsv_handle()->get_num_levels() << std::endl; + } + EXPECT_TRUE( sum == scalar_t(lhs.extent(0)) ); + + Kokkos::deep_copy(lhs, 0); + kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHED_TP2); + kh.get_sptrsv_handle()->print_algorithm(); + timer.reset(); + sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); + std::cout << "UTRI Solve SEQLVLSCHED_TP2 Time: " << timer.seconds() << std::endl; + + sum = 0.0; + Kokkos::parallel_reduce( Kokkos::RangePolicy(0, lhs.extent(0)), KOKKOS_LAMBDA ( const lno_t i, scalar_t &tsum ) { + tsum += lhs(i); + }, sum); + if ( sum != lhs.extent(0) ) { + std::cout << "Upper Tri Solve FAILURE" << std::endl; + } + else { + std::cout << "Upper Tri Solve SUCCESS!" << std::endl; + //std::cout << "Num-levels = " << kh->get_sptrsv_handle()->get_num_levels() << std::endl; + } + EXPECT_TRUE( sum == scalar_t(lhs.extent(0)) ); + + + kh.destroy_sptrsv_handle(); + } + +} +#endif + +namespace { +template +struct ReductionCheck { + using lno_t = OrdinalType; + using value_type = ValueType; + + ViewType lhs; + + ReductionCheck(const ViewType &lhs_) : lhs(lhs_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(lno_t i, value_type &tsum) const { tsum += lhs(i); } +}; +} // namespace + +template +void run_test_sptrsv() { + typedef Kokkos::View RowMapType; + typedef Kokkos::View EntriesType; + typedef Kokkos::View ValuesType; + + scalar_t ZERO = scalar_t(0); + scalar_t ONE = scalar_t(1); + + const size_type nrows = 5; + const size_type nnz = 10; + + using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< + size_type, lno_t, scalar_t, typename device::execution_space, + typename device::memory_space, typename device::memory_space>; + +#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) + using host_crsmat_t = typename KernelHandle::SPTRSVHandleType::host_crsmat_t; + using host_graph_t = typename host_crsmat_t::StaticCrsGraphType; + + using row_map_view_t = typename host_graph_t::row_map_type::non_const_type; + using cols_view_t = typename host_graph_t::entries_type::non_const_type; + using values_view_t = typename host_crsmat_t::values_type::non_const_type; + + // L & U handle for supernodal SpTrsv + KernelHandle khL; + KernelHandle khU; + + // right-hand-side and solution + ValuesType B("rhs", nrows); + ValuesType X("sol", nrows); + + // host CRS for L & U + host_crsmat_t L, U, Ut; +#endif + + // Upper tri + { + RowMapType row_map("row_map", nrows + 1); + EntriesType entries("entries", nnz); + ValuesType values("values", nnz); + + auto hrow_map = Kokkos::create_mirror_view(row_map); + auto hentries = Kokkos::create_mirror_view(entries); + auto hvalues = Kokkos::create_mirror_view(values); + + hrow_map(0) = 0; + hrow_map(1) = 2; + hrow_map(2) = 4; + hrow_map(3) = 7; + hrow_map(4) = 9; + hrow_map(5) = 10; + + hentries(0) = 0; + hentries(1) = 2; + hentries(2) = 1; + hentries(3) = 4; + hentries(4) = 2; + hentries(5) = 3; + hentries(6) = 4; + hentries(7) = 3; + hentries(8) = 4; + hentries(9) = 4; + + for (size_type i = 0; i < nnz; ++i) { + hvalues(i) = ONE; + } + + Kokkos::deep_copy(row_map, hrow_map); + Kokkos::deep_copy(entries, hentries); + Kokkos::deep_copy(values, hvalues); + + // Create known_lhs, generate rhs, then solve for lhs to compare to + // known_lhs + ValuesType known_lhs("known_lhs", nrows); + // Create known solution lhs set to all 1's + Kokkos::deep_copy(known_lhs, ONE); + + // Solution to find + ValuesType lhs("lhs", nrows); + + // A*known_lhs generates rhs: rhs is dense, use spmv + ValuesType rhs("rhs", nrows); + + typedef CrsMatrix crsMat_t; + crsMat_t triMtx("triMtx", nrows, nrows, nnz, values, row_map, entries); + KokkosSparse::spmv("N", ONE, triMtx, known_lhs, ZERO, rhs); + + { + KernelHandle kh; + bool is_lower_tri = false; + kh.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, nrows, + is_lower_tri); + + sptrsv_symbolic(&kh, row_map, entries); + Kokkos::fence(); + + sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); + Kokkos::fence(); + + scalar_t sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, + lhs.extent(0)), + ReductionCheck(lhs), sum); + if (sum != lhs.extent(0)) { + std::cout << "Upper Tri Solve FAILURE" << std::endl; + kh.get_sptrsv_handle()->print_algorithm(); + } + EXPECT_TRUE(sum == scalar_t(lhs.extent(0))); + + Kokkos::deep_copy(lhs, ZERO); + kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHD_RP); + sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); + Kokkos::fence(); + + sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, + lhs.extent(0)), + ReductionCheck(lhs), sum); + if (sum != lhs.extent(0)) { + std::cout << "Upper Tri Solve FAILURE" << std::endl; + kh.get_sptrsv_handle()->print_algorithm(); + } + EXPECT_TRUE(sum == scalar_t(lhs.extent(0))); + + // FIXME Issues with various integral type combos - algorithm currently + // unavailable and commented out until fixed + /* + Kokkos::deep_copy(lhs, ZERO); + kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHED_TP2); + sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); + Kokkos::fence(); + + sum = 0.0; + Kokkos::parallel_reduce( Kokkos::RangePolicy(0, lhs.extent(0)), ReductionCheck(lhs), sum); if ( sum != lhs.extent(0) ) { std::cout << + "Upper Tri Solve FAILURE" << std::endl; + kh.get_sptrsv_handle()->print_algorithm(); + } + EXPECT_TRUE( sum == scalar_t(lhs.extent(0)) ); + */ + + kh.destroy_sptrsv_handle(); + } + + { + Kokkos::deep_copy(lhs, ZERO); + KernelHandle kh; + bool is_lower_tri = false; + kh.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1CHAIN, nrows, + is_lower_tri); + auto chain_threshold = 1; + kh.get_sptrsv_handle()->reset_chain_threshold(chain_threshold); + + sptrsv_symbolic(&kh, row_map, entries); + Kokkos::fence(); + + sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); + Kokkos::fence(); + + scalar_t sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, + lhs.extent(0)), + ReductionCheck(lhs), sum); + if (sum != lhs.extent(0)) { + std::cout << "Upper Tri Solve FAILURE" << std::endl; + kh.get_sptrsv_handle()->print_algorithm(); + } + EXPECT_TRUE(sum == scalar_t(lhs.extent(0))); + + kh.destroy_sptrsv_handle(); + } + +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE + if (std::is_same::value && + std::is_same::value && + std::is_same::value) { + Kokkos::deep_copy(lhs, ZERO); + KernelHandle kh; + bool is_lower_tri = false; + kh.create_sptrsv_handle(SPTRSVAlgorithm::SPTRSV_CUSPARSE, nrows, + is_lower_tri); + + sptrsv_symbolic(&kh, row_map, entries, values); + Kokkos::fence(); + + sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); + Kokkos::fence(); + + scalar_t sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, + lhs.extent(0)), + ReductionCheck(lhs), sum); + if (sum != lhs.extent(0)) { + std::cout << "Upper Tri Solve FAILURE" << std::endl; + kh.get_sptrsv_handle()->print_algorithm(); + } + EXPECT_TRUE(sum == scalar_t(lhs.extent(0))); + + kh.destroy_sptrsv_handle(); + } +#endif + +#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) + const scalar_t FIVE = scalar_t(5); + const size_type nnz_sp = 14; + { + // U in csr + row_map_view_t hUrowptr("hUrowptr", nrows + 1); + cols_view_t hUcolind("hUcolind", nnz_sp); + values_view_t hUvalues("hUvalues", nnz_sp); + + // rowptr + hUrowptr(0) = 0; + hUrowptr(1) = 4; + hUrowptr(2) = 8; + hUrowptr(3) = 11; + hUrowptr(4) = 13; + hUrowptr(5) = 14; + + // colind + // first row (first supernode) + hUcolind(0) = 0; + hUcolind(1) = 1; + hUcolind(2) = 2; + hUcolind(3) = 4; + // second row (first supernode) + hUcolind(4) = 0; + hUcolind(5) = 1; + hUcolind(6) = 2; + hUcolind(7) = 4; + // third row (second supernode) + hUcolind(8) = 2; + hUcolind(9) = 3; + hUcolind(10) = 4; + // fourth row (third supernode) + hUcolind(11) = 3; + hUcolind(12) = 4; + // fifth row (fourth supernode) + hUcolind(13) = 4; + + // values + // first row (first supernode) + hUvalues(0) = FIVE; + hUvalues(1) = ONE; + hUvalues(2) = ONE; + hUvalues(3) = ZERO; + // second row (first supernode) + hUvalues(4) = ZERO; + hUvalues(5) = FIVE; + hUvalues(6) = ZERO; + hUvalues(7) = ONE; + // third row (second supernode) + hUvalues(8) = FIVE; + hUvalues(9) = ONE; + hUvalues(10) = ONE; + // fourth row (third supernode) + hUvalues(11) = FIVE; + hUvalues(12) = ONE; + // fifth row (fourth supernode) + hUvalues(13) = FIVE; + + // save U for Supernodal Sptrsv + host_graph_t static_graph(hUcolind, hUrowptr); + U = host_crsmat_t("CrsMatrixU", nrows, hUvalues, static_graph); + + // create handle for Supernodal Sptrsv + bool is_lower_tri = false; + khU.create_sptrsv_handle(SPTRSVAlgorithm::SUPERNODAL_DAG, nrows, + is_lower_tri); + + // X = U*ONES to generate B = A*ONES (on device) + { + RowMapType Urowptr("Urowptr", nrows + 1); + EntriesType Ucolind("Ucolind", nnz_sp); + ValuesType Uvalues("Uvalues", nnz_sp); + + Kokkos::deep_copy(Urowptr, hUrowptr); + Kokkos::deep_copy(Ucolind, hUcolind); + Kokkos::deep_copy(Uvalues, hUvalues); + + crsMat_t mtxU("mtxU", nrows, nrows, nnz_sp, Uvalues, Urowptr, Ucolind); + Kokkos::deep_copy(B, ONE); + KokkosSparse::spmv("N", ONE, mtxU, B, ZERO, X); + } + } + + { + // U in csc (for inverting off-diag) + row_map_view_t hUcolptr("hUcolptr", nrows + 1); + cols_view_t hUrowind("hUrowind", nnz_sp); + values_view_t hUvalues("hUvalues", nnz_sp); + + // colptr + hUcolptr(0) = 0; + hUcolptr(1) = 2; + hUcolptr(2) = 4; + hUcolptr(3) = 7; + hUcolptr(4) = 9; + hUcolptr(5) = 14; + + // colind + // first column (first supernode) + hUrowind(0) = 0; + hUrowind(1) = 1; + // second column (first supernode) + hUrowind(2) = 0; + hUrowind(3) = 1; + // third column (second supernode) + hUrowind(4) = 2; + hUrowind(5) = 0; + hUrowind(6) = 1; + // fourth column (third supernode) + hUrowind(7) = 3; + hUrowind(8) = 2; + // fifth column (fourth supernode) + hUrowind(9) = 4; + hUrowind(10) = 0; + hUrowind(11) = 1; + hUrowind(12) = 2; + hUrowind(13) = 3; + + // values + // first column (first supernode) + hUvalues(0) = FIVE; + hUvalues(1) = ZERO; + // second column (first supernode) + hUvalues(2) = ONE; + hUvalues(3) = FIVE; + // third column (second supernode) + hUvalues(4) = FIVE; + hUvalues(5) = ONE; + hUvalues(6) = ZERO; + // fourth column (third supernode) + hUvalues(7) = FIVE; + hUvalues(8) = ONE; + // fifth column (fourth supernode) + hUvalues(9) = FIVE; + hUvalues(10) = ZERO; + hUvalues(11) = ONE; + hUvalues(12) = ONE; + hUvalues(13) = ONE; + + // store Ut in crsmat + host_graph_t static_graph(hUrowind, hUcolptr); + Ut = host_crsmat_t("CrsMatrixUt", nrows, hUvalues, static_graph); + } +#endif + } + + // Lower tri + { + RowMapType row_map("row_map", nrows + 1); + EntriesType entries("entries", nnz); + ValuesType values("values", nnz); + + auto hrow_map = Kokkos::create_mirror_view(row_map); + auto hentries = Kokkos::create_mirror_view(entries); + auto hvalues = Kokkos::create_mirror_view(values); + + hrow_map(0) = 0; + hrow_map(1) = 1; + hrow_map(2) = 2; + hrow_map(3) = 4; + hrow_map(4) = 6; + hrow_map(5) = 10; + + hentries(0) = 0; + hentries(1) = 1; + hentries(2) = 0; + hentries(3) = 2; + hentries(4) = 2; + hentries(5) = 3; + hentries(6) = 1; + hentries(7) = 2; + hentries(8) = 3; + hentries(9) = 4; + + for (size_type i = 0; i < nnz; ++i) { + hvalues(i) = ONE; + } + + Kokkos::deep_copy(row_map, hrow_map); + Kokkos::deep_copy(entries, hentries); + Kokkos::deep_copy(values, hvalues); + + // Create known_lhs, generate rhs, then solve for lhs to compare to + // known_lhs + ValuesType known_lhs("known_lhs", nrows); + // Create known solution lhs set to all 1's + Kokkos::deep_copy(known_lhs, ONE); + + // Solution to find + ValuesType lhs("lhs", nrows); + + // A*known_lhs generates rhs: rhs is dense, use spmv + ValuesType rhs("rhs", nrows); + + typedef CrsMatrix crsMat_t; + crsMat_t triMtx("triMtx", nrows, nrows, nnz, values, row_map, entries); + KokkosSparse::spmv("N", ONE, triMtx, known_lhs, ZERO, rhs); + + { + KernelHandle kh; + bool is_lower_tri = true; + kh.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, nrows, + is_lower_tri); + + sptrsv_symbolic(&kh, row_map, entries); + Kokkos::fence(); + + sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); + Kokkos::fence(); + + scalar_t sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, + lhs.extent(0)), + ReductionCheck(lhs), sum); + if (sum != lhs.extent(0)) { + std::cout << "Lower Tri Solve FAILURE" << std::endl; + kh.get_sptrsv_handle()->print_algorithm(); + } + EXPECT_TRUE(sum == scalar_t(lhs.extent(0))); + + Kokkos::deep_copy(lhs, ZERO); + kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHD_RP); + sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); + Kokkos::fence(); + + sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, + lhs.extent(0)), + ReductionCheck(lhs), sum); + if (sum != lhs.extent(0)) { + std::cout << "Lower Tri Solve FAILURE" << std::endl; + kh.get_sptrsv_handle()->print_algorithm(); + } + EXPECT_TRUE(sum == scalar_t(lhs.extent(0))); + + // FIXME Issues with various integral type combos - algorithm currently + // unavailable and commented out until fixed + /* + Kokkos::deep_copy(lhs, ZERO); + kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHED_TP2); + sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); + Kokkos::fence(); + + sum = 0.0; + Kokkos::parallel_reduce( Kokkos::RangePolicy(0, lhs.extent(0)), ReductionCheck(lhs), sum); if ( sum != lhs.extent(0) ) { std::cout << + "Lower Tri Solve FAILURE" << std::endl; + kh.get_sptrsv_handle()->print_algorithm(); + } + EXPECT_TRUE( sum == scalar_t(lhs.extent(0)) ); + */ + + kh.destroy_sptrsv_handle(); + } + + { + Kokkos::deep_copy(lhs, ZERO); + KernelHandle kh; + bool is_lower_tri = true; + kh.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1CHAIN, nrows, + is_lower_tri); + auto chain_threshold = 1; + kh.get_sptrsv_handle()->reset_chain_threshold(chain_threshold); + + sptrsv_symbolic(&kh, row_map, entries); + Kokkos::fence(); + + sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); + Kokkos::fence(); + + scalar_t sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, + lhs.extent(0)), + ReductionCheck(lhs), sum); + if (sum != lhs.extent(0)) { + std::cout << "Lower Tri Solve FAILURE" << std::endl; + kh.get_sptrsv_handle()->print_algorithm(); + } + EXPECT_TRUE(sum == scalar_t(lhs.extent(0))); + + kh.destroy_sptrsv_handle(); + } + +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE + if (std::is_same::value && + std::is_same::value && + std::is_same::value) { + Kokkos::deep_copy(lhs, ZERO); + KernelHandle kh; + bool is_lower_tri = true; + kh.create_sptrsv_handle(SPTRSVAlgorithm::SPTRSV_CUSPARSE, nrows, + is_lower_tri); + + sptrsv_symbolic(&kh, row_map, entries, values); + Kokkos::fence(); + + sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); + Kokkos::fence(); + + scalar_t sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, + lhs.extent(0)), + ReductionCheck(lhs), sum); + if (sum != lhs.extent(0)) { + std::cout << "Lower Tri Solve FAILURE" << std::endl; + kh.get_sptrsv_handle()->print_algorithm(); + } + EXPECT_TRUE(sum == scalar_t(lhs.extent(0))); + + kh.destroy_sptrsv_handle(); + } +#endif + +#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) + { + // L in csc + const scalar_t TWO = scalar_t(2); + const scalar_t FIVE = scalar_t(5); + const size_type nnz_sp = 14; + + row_map_view_t hLcolptr("hUcolptr", nrows + 1); + cols_view_t hLrowind("hUrowind", nnz_sp); + values_view_t hLvalues("hUvalues", nnz_sp); + + // colptr + hLcolptr(0) = 0; + hLcolptr(1) = 4; + hLcolptr(2) = 8; + hLcolptr(3) = 11; + hLcolptr(4) = 13; + hLcolptr(5) = 14; + + // rowind + // first column (first supernode) + hLrowind(0) = 0; + hLrowind(1) = 1; + hLrowind(2) = 2; + hLrowind(3) = 4; + // second column (first supernode) + hLrowind(4) = 0; + hLrowind(5) = 1; + hLrowind(6) = 2; + hLrowind(7) = 4; + // third column (second supernode) + hLrowind(8) = 2; + hLrowind(9) = 3; + hLrowind(10) = 4; + // fourth column (third supernode) + hLrowind(11) = 3; + hLrowind(12) = 4; + // fifth column (fourth supernode) + hLrowind(13) = 4; + + // values + // first column (first supernode) + hLvalues(0) = FIVE; + hLvalues(1) = TWO; + hLvalues(2) = ONE; + hLvalues(3) = ZERO; + // second column (first supernode) + hLvalues(4) = ZERO; + hLvalues(5) = FIVE; + hLvalues(6) = ZERO; + hLvalues(7) = ONE; + // third column (second supernode) + hLvalues(8) = FIVE; + hLvalues(9) = ONE; + hLvalues(10) = ONE; + // fourth column (third supernode) + hLvalues(11) = FIVE; + hLvalues(12) = ONE; + // fifth column (fourth supernode) + hLvalues(13) = FIVE; + + // store Lt in crsmat + host_graph_t static_graph(hLrowind, hLcolptr); + L = host_crsmat_t("CrsMatrixL", nrows, hLvalues, static_graph); + + bool is_lower_tri = true; + khL.create_sptrsv_handle(SPTRSVAlgorithm::SUPERNODAL_DAG, nrows, + is_lower_tri); + + // generate B = A*ONES = L*(U*ONES), where X = U*ONES (on device) + { + RowMapType Lcolptr("Lcolptr", nrows + 1); + EntriesType Lrowind("Lrowind", nnz_sp); + ValuesType Lvalues("Lvalues", nnz_sp); + + Kokkos::deep_copy(Lcolptr, hLcolptr); + Kokkos::deep_copy(Lrowind, hLrowind); + Kokkos::deep_copy(Lvalues, hLvalues); + + crsMat_t mtxL("mtxL", nrows, nrows, nnz_sp, Lvalues, Lcolptr, Lrowind); + KokkosSparse::spmv("T", ONE, mtxL, X, ZERO, B); + } + } + + { + // unit-test for supernode SpTrsv (default) + // > set up supernodes (block size = one) + size_type nsupers = 4; + Kokkos::View supercols("supercols", + 1 + nsupers); + supercols(0) = 0; + supercols(1) = 2; // two columns + supercols(2) = 3; // one column + supercols(3) = 4; // one column + supercols(4) = 5; // one column + int *etree = NULL; // we generate graph internally + + // invert diagonal blocks + bool invert_diag = true; + khL.set_sptrsv_invert_diagonal(invert_diag); + khU.set_sptrsv_invert_diagonal(invert_diag); + + // > symbolic (on host) + sptrsv_supernodal_symbolic(nsupers, supercols.data(), etree, L.graph, + &khL, U.graph, &khU); + // > numeric (on host) + sptrsv_compute(&khL, L); + sptrsv_compute(&khU, U); + Kokkos::fence(); + + // > solve + ValuesType b("b", nrows); + Kokkos::deep_copy(b, B); + Kokkos::deep_copy(X, ZERO); + sptrsv_solve(&khL, &khU, X, b); + Kokkos::fence(); + + // > check + scalar_t sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, X.extent(0)), + ReductionCheck(X), sum); + if (sum != lhs.extent(0)) { + std::cout << "Supernode Tri Solve FAILURE : " << sum << " vs." + << lhs.extent(0) << std::endl; + khL.get_sptrsv_handle()->print_algorithm(); + } else { + std::cout << "Supernode Tri Solve SUCCESS" << std::endl; + khL.get_sptrsv_handle()->print_algorithm(); + } + EXPECT_TRUE(sum == scalar_t(X.extent(0))); + + khL.destroy_sptrsv_handle(); + khU.destroy_sptrsv_handle(); + } + + { + // unit-test for supernode SpTrsv (running TRMM on device for compute) + // > set up supernodes + size_type nsupers = 4; + Kokkos::View supercols("supercols", + 1 + nsupers); + supercols(0) = 0; + supercols(1) = 2; // two columns + supercols(2) = 3; // one column + supercols(3) = 4; // one column + supercols(4) = 5; // one column + int *etree = NULL; // we generate tree internally + + // > create handles + KernelHandle khLd; + KernelHandle khUd; + khLd.create_sptrsv_handle(SPTRSVAlgorithm::SUPERNODAL_DAG, nrows, true); + khUd.create_sptrsv_handle(SPTRSVAlgorithm::SUPERNODAL_DAG, nrows, false); + + // > invert diagonal blocks + bool invert_diag = true; + khLd.set_sptrsv_invert_diagonal(invert_diag); + khUd.set_sptrsv_invert_diagonal(invert_diag); + + // > invert off-diagonal blocks + bool invert_offdiag = true; + khUd.set_sptrsv_column_major(true); + khLd.set_sptrsv_invert_offdiagonal(invert_offdiag); + khUd.set_sptrsv_invert_offdiagonal(invert_offdiag); + + // > forcing sptrsv compute to perform TRMM on device + khLd.set_sptrsv_diag_supernode_sizes(1, 1); + khUd.set_sptrsv_diag_supernode_sizes(1, 1); + + // > symbolic (on host) + sptrsv_supernodal_symbolic(nsupers, supercols.data(), etree, L.graph, + &khLd, Ut.graph, &khUd); + // > numeric (on host) + sptrsv_compute(&khLd, L); + sptrsv_compute(&khUd, Ut); + Kokkos::fence(); + + // > solve + ValuesType b("b", nrows); + Kokkos::deep_copy(b, B); + Kokkos::deep_copy(X, ZERO); + sptrsv_solve(&khLd, &khUd, X, b); + Kokkos::fence(); + + // > check + scalar_t sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, X.extent(0)), + ReductionCheck(X), sum); + if (sum != lhs.extent(0)) { + std::cout << "Supernode Tri Solve FAILURE : " << sum << " vs." + << lhs.extent(0) << std::endl; + khLd.get_sptrsv_handle()->print_algorithm(); + } else { + std::cout << "Supernode Tri Solve SUCCESS" << std::endl; + khLd.get_sptrsv_handle()->print_algorithm(); + } + EXPECT_TRUE(sum == scalar_t(X.extent(0))); + + khLd.destroy_sptrsv_handle(); + khUd.destroy_sptrsv_handle(); + } +#endif + } +} + +template +void run_test_sptrsv_streams(int test_algo, int nstreams) { + using RowMapType = Kokkos::View; + using EntriesType = Kokkos::View; + using ValuesType = Kokkos::View; + using RowMapType_hostmirror = typename RowMapType::HostMirror; + using EntriesType_hostmirror = typename EntriesType::HostMirror; + using ValuesType_hostmirror = typename ValuesType::HostMirror; + using execution_space = typename device::execution_space; + using memory_space = typename device::memory_space; + using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< + size_type, lno_t, scalar_t, execution_space, memory_space, memory_space>; + using crsMat_t = CrsMatrix; + + // Workaround for OpenMP: skip tests if OMP_NUM_THREADS < nstreams because of not enough resource to partition + bool run_streams_test = true; +#ifdef KOKKOS_ENABLE_OPENMP + if (std::is_same::value) { + const char *env_omp_num_threads = std::getenv("OMP_NUM_THREADS"); + if (env_omp_num_threads != nullptr) { + int num_threads = std::atoi(env_omp_num_threads); + if (num_threads < nstreams) { + run_streams_test = false; + std::cout << " Skip stream test: omp_num_threads = " << num_threads << std::endl; + } + } + } +#endif + if (!run_streams_test) + return; + + scalar_t ZERO = scalar_t(0); + scalar_t ONE = scalar_t(1); + + const size_type nrows = 5; + const size_type nnz = 10; + + std::vector instances; + if (nstreams == 2) + instances = Kokkos::Experimental::partition_space(execution_space(), 1, 1); + else if (nstreams == 3) + instances = + Kokkos::Experimental::partition_space(execution_space(), 1, 1, 1); + else // (nstreams == 4) + instances = + Kokkos::Experimental::partition_space(execution_space(), 1, 1, 1, 1); + + std::vector kh_v(nstreams); + std::vector kh_ptr_v(nstreams); + std::vector row_map_v(nstreams); + std::vector entries_v(nstreams); + std::vector values_v(nstreams); + std::vector rhs_v(nstreams); + std::vector lhs_v(nstreams); + + RowMapType_hostmirror hrow_map("hrow_map", nrows + 1); + EntriesType_hostmirror hentries("hentries", nnz); + ValuesType_hostmirror hvalues("hvalues", nnz); + + // Upper tri + { + hrow_map(0) = 0; + hrow_map(1) = 2; + hrow_map(2) = 4; + hrow_map(3) = 7; + hrow_map(4) = 9; + hrow_map(5) = 10; + + hentries(0) = 0; + hentries(1) = 2; + hentries(2) = 1; + hentries(3) = 4; + hentries(4) = 2; + hentries(5) = 3; + hentries(6) = 4; + hentries(7) = 3; + hentries(8) = 4; + hentries(9) = 4; + + for (size_type i = 0; i < nnz; ++i) { + hvalues(i) = ONE; + } + + for (int i = 0; i < nstreams; i++) { + // Allocate U + row_map_v[i] = RowMapType("row_map", nrows + 1); + entries_v[i] = EntriesType("entries", nnz); + values_v[i] = ValuesType("values", nnz); + + // Copy from host to device + Kokkos::deep_copy(row_map_v[i], hrow_map); + Kokkos::deep_copy(entries_v[i], hentries); + Kokkos::deep_copy(values_v[i], hvalues); + + // Create known_lhs, generate rhs, then solve for lhs to compare to + // known_lhs + ValuesType known_lhs("known_lhs", nrows); + // Create known solution lhs set to all 1's + Kokkos::deep_copy(known_lhs, ONE); + + // Solution to find + lhs_v[i] = ValuesType("lhs", nrows); + + // A*known_lhs generates rhs: rhs is dense, use spmv + rhs_v[i] = ValuesType("rhs", nrows); + + crsMat_t triMtx("triMtx", nrows, nrows, nnz, values_v[i], row_map_v[i], + entries_v[i]); + + KokkosSparse::spmv("N", ONE, triMtx, known_lhs, ZERO, rhs_v[i]); + Kokkos::fence(); + + // Create handle + kh_v[i] = KernelHandle(); + bool is_lower_tri = false; + if (test_algo == 0) + kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_RP, nrows, + is_lower_tri); + else if (test_algo == 1) + kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, nrows, + is_lower_tri); + else + kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SPTRSV_CUSPARSE, nrows, + is_lower_tri); + + kh_ptr_v[i] = &kh_v[i]; + + // Symbolic phase + sptrsv_symbolic(kh_ptr_v[i], row_map_v[i], entries_v[i], values_v[i]); + Kokkos::fence(); + } // Done handle creation and sptrsv_symbolic on all streams + + // Solve phase + sptrsv_solve_streams(instances, kh_ptr_v, row_map_v, entries_v, values_v, + rhs_v, lhs_v); + + for (int i = 0; i < nstreams; i++) instances[i].fence(); + + // Checking + for (int i = 0; i < nstreams; i++) { + scalar_t sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::RangePolicy( + 0, lhs_v[i].extent(0)), + ReductionCheck(lhs_v[i]), sum); + if (sum != lhs_v[i].extent(0)) { + std::cout << "Upper Tri Solve FAILURE on stream " << i << std::endl; + kh_v[i].get_sptrsv_handle()->print_algorithm(); + } + EXPECT_TRUE(sum == scalar_t(lhs_v[i].extent(0))); + + kh_v[i].destroy_sptrsv_handle(); + } + } + + // Lower tri + { + hrow_map(0) = 0; + hrow_map(1) = 1; + hrow_map(2) = 2; + hrow_map(3) = 4; + hrow_map(4) = 6; + hrow_map(5) = 10; + + hentries(0) = 0; + hentries(1) = 1; + hentries(2) = 0; + hentries(3) = 2; + hentries(4) = 2; + hentries(5) = 3; + hentries(6) = 1; + hentries(7) = 2; + hentries(8) = 3; + hentries(9) = 4; + + for (size_type i = 0; i < nnz; ++i) { + hvalues(i) = ONE; + } + + for (int i = 0; i < nstreams; i++) { + // Allocate L + row_map_v[i] = RowMapType("row_map", nrows + 1); + entries_v[i] = EntriesType("entries", nnz); + values_v[i] = ValuesType("values", nnz); + + // Copy from host to device + Kokkos::deep_copy(row_map_v[i], hrow_map); + Kokkos::deep_copy(entries_v[i], hentries); + Kokkos::deep_copy(values_v[i], hvalues); + + // Create known_lhs, generate rhs, then solve for lhs to compare to + // known_lhs + ValuesType known_lhs("known_lhs", nrows); + // Create known solution lhs set to all 1's + Kokkos::deep_copy(known_lhs, ONE); + + // Solution to find + lhs_v[i] = ValuesType("lhs", nrows); + + // A*known_lhs generates rhs: rhs is dense, use spmv + rhs_v[i] = ValuesType("rhs", nrows); + + crsMat_t triMtx("triMtx", nrows, nrows, nnz, values_v[i], row_map_v[i], + entries_v[i]); + + KokkosSparse::spmv("N", ONE, triMtx, known_lhs, ZERO, rhs_v[i]); + Kokkos::fence(); + + // Create handle + kh_v[i] = KernelHandle(); + bool is_lower_tri = true; + if (test_algo == 0) + kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_RP, nrows, + is_lower_tri); + else if (test_algo == 1) + kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, nrows, + is_lower_tri); + else + kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SPTRSV_CUSPARSE, nrows, + is_lower_tri); + + kh_ptr_v[i] = &kh_v[i]; + + // Symbolic phase + sptrsv_symbolic(kh_ptr_v[i], row_map_v[i], entries_v[i], values_v[i]); + Kokkos::fence(); + } // Done handle creation and sptrsv_symbolic on all streams + + // Solve phase + sptrsv_solve_streams(instances, kh_ptr_v, row_map_v, entries_v, values_v, + rhs_v, lhs_v); + + for (int i = 0; i < nstreams; i++) instances[i].fence(); + + // Checking + for (int i = 0; i < nstreams; i++) { + scalar_t sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::RangePolicy( + 0, lhs_v[i].extent(0)), + ReductionCheck(lhs_v[i]), sum); + if (sum != lhs_v[i].extent(0)) { + std::cout << "Lower Tri Solve FAILURE on stream " << i << std::endl; + kh_v[i].get_sptrsv_handle()->print_algorithm(); + } + EXPECT_TRUE(sum == scalar_t(lhs_v[i].extent(0))); + + kh_v[i].destroy_sptrsv_handle(); + } + } +} + +} // namespace Test + +template +void test_sptrsv() { + Test::run_test_sptrsv(); + // Test::run_test_sptrsv_mtx(); +} + +template +void test_sptrsv_streams() { + std::cout << "SPTRSVAlgorithm::SEQLVLSCHD_RP: 2 streams" << std::endl; + Test::run_test_sptrsv_streams(0, 2); + + std::cout << "SPTRSVAlgorithm::SEQLVLSCHD_RP: 3 streams" << std::endl; + Test::run_test_sptrsv_streams(0, 3); + + std::cout << "SPTRSVAlgorithm::SEQLVLSCHD_RP: 4 streams" << std::endl; + Test::run_test_sptrsv_streams(0, 4); + + std::cout << "SPTRSVAlgorithm::SEQLVLSCHD_TP1: 2 streams" << std::endl; + Test::run_test_sptrsv_streams(1, 2); + + std::cout << "SPTRSVAlgorithm::SEQLVLSCHD_TP1: 3 streams" << std::endl; + Test::run_test_sptrsv_streams(1, 3); + + std::cout << "SPTRSVAlgorithm::SEQLVLSCHD_TP1: 4 streams" << std::endl; + Test::run_test_sptrsv_streams(1, 4); + +#if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) + if (std::is_same::value && + std::is_same::value) { + std::cout << "SPTRSVAlgorithm::SPTRSV_CUSPARSE: 2 streams" << std::endl; + Test::run_test_sptrsv_streams(2, 2); + + std::cout << "SPTRSVAlgorithm::SPTRSV_CUSPARSE: 3 streams" << std::endl; + Test::run_test_sptrsv_streams(2, 3); + + std::cout << "SPTRSVAlgorithm::SPTRSV_CUSPARSE: 4 streams" << std::endl; + Test::run_test_sptrsv_streams(2, 4); + } +#endif +} + +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, \ + sparse##_##sptrsv##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_sptrsv(); \ + test_sptrsv_streams(); \ + } + +#include + +#undef KOKKOSKERNELS_EXECUTE_TEST From e5e424a4dcb344f06a463670858f6a64f28fca38 Mon Sep 17 00:00:00 2001 From: "Vinh Quang Dang (-EXP)" Date: Wed, 30 Aug 2023 03:38:36 -0600 Subject: [PATCH 09/12] Delete redundant files --- sparse/unit_test/Test_Sparse_spiluk.hpp_ | 517 -------- sparse/unit_test/Test_Sparse_sptrsv.hpp_ | 1359 ---------------------- 2 files changed, 1876 deletions(-) delete mode 100644 sparse/unit_test/Test_Sparse_spiluk.hpp_ delete mode 100644 sparse/unit_test/Test_Sparse_sptrsv.hpp_ diff --git a/sparse/unit_test/Test_Sparse_spiluk.hpp_ b/sparse/unit_test/Test_Sparse_spiluk.hpp_ deleted file mode 100644 index 3115bc9649..0000000000 --- a/sparse/unit_test/Test_Sparse_spiluk.hpp_ +++ /dev/null @@ -1,517 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include -#include - -#include -#include - -#include "KokkosSparse_Utils.hpp" -#include "KokkosSparse_CrsMatrix.hpp" -#include -#include "KokkosBlas1_nrm2.hpp" -#include "KokkosSparse_spmv.hpp" -#include "KokkosSparse_spiluk.hpp" - -#include - -using namespace KokkosSparse; -using namespace KokkosSparse::Experimental; -using namespace KokkosKernels; -using namespace KokkosKernels::Experimental; - -// #ifndef kokkos_complex_double -// #define kokkos_complex_double Kokkos::complex -// #define kokkos_complex_float Kokkos::complex -// #endif - -typedef Kokkos::complex kokkos_complex_double; -typedef Kokkos::complex kokkos_complex_float; - -namespace Test { - -template -void run_test_spiluk() { - typedef Kokkos::View RowMapType; - typedef Kokkos::View EntriesType; - typedef Kokkos::View ValuesType; - typedef Kokkos::ArithTraits AT; - - const size_type nrows = 9; - const size_type nnz = 21; - - RowMapType row_map("row_map", nrows + 1); - EntriesType entries("entries", nnz); - ValuesType values("values", nnz); - - auto hrow_map = Kokkos::create_mirror_view(row_map); - auto hentries = Kokkos::create_mirror_view(entries); - auto hvalues = Kokkos::create_mirror_view(values); - - scalar_t ZERO = scalar_t(0); - scalar_t ONE = scalar_t(1); - scalar_t MONE = scalar_t(-1); - - hrow_map(0) = 0; - hrow_map(1) = 3; - hrow_map(2) = 5; - hrow_map(3) = 6; - hrow_map(4) = 9; - hrow_map(5) = 11; - hrow_map(6) = 13; - hrow_map(7) = 15; - hrow_map(8) = 18; - hrow_map(9) = nnz; - - hentries(0) = 0; - hentries(1) = 2; - hentries(2) = 5; - hentries(3) = 1; - hentries(4) = 6; - hentries(5) = 2; - hentries(6) = 0; - hentries(7) = 3; - hentries(8) = 4; - hentries(9) = 0; - hentries(10) = 4; - hentries(11) = 1; - hentries(12) = 5; - hentries(13) = 2; - hentries(14) = 6; - hentries(15) = 3; - hentries(16) = 4; - hentries(17) = 7; - hentries(18) = 3; - hentries(19) = 4; - hentries(20) = 8; - - hvalues(0) = 10; - hvalues(1) = 0.3; - hvalues(2) = 0.6; - hvalues(3) = 11; - hvalues(4) = 0.7; - hvalues(5) = 12; - hvalues(6) = 5; - hvalues(7) = 13; - hvalues(8) = 1; - hvalues(9) = 4; - hvalues(10) = 14; - hvalues(11) = 3; - hvalues(12) = 15; - hvalues(13) = 7; - hvalues(14) = 16; - hvalues(15) = 6; - hvalues(16) = 5; - hvalues(17) = 17; - hvalues(18) = 2; - hvalues(19) = 2.5; - hvalues(20) = 18; - - Kokkos::deep_copy(row_map, hrow_map); - Kokkos::deep_copy(entries, hentries); - Kokkos::deep_copy(values, hvalues); - - typedef KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - typename device::memory_space, typename device::memory_space> - KernelHandle; - - KernelHandle kh; - - // SPILUKAlgorithm::SEQLVLSCHD_RP - { - kh.create_spiluk_handle(SPILUKAlgorithm::SEQLVLSCHD_RP, nrows, 4 * nrows, - 4 * nrows); - - auto spiluk_handle = kh.get_spiluk_handle(); - - // Allocate L and U as outputs - RowMapType L_row_map("L_row_map", nrows + 1); - EntriesType L_entries("L_entries", spiluk_handle->get_nnzL()); - ValuesType L_values("L_values", spiluk_handle->get_nnzL()); - RowMapType U_row_map("U_row_map", nrows + 1); - EntriesType U_entries("U_entries", spiluk_handle->get_nnzU()); - ValuesType U_values("U_values", spiluk_handle->get_nnzU()); - - typename KernelHandle::const_nnz_lno_t fill_lev = 2; - - spiluk_symbolic(&kh, fill_lev, row_map, entries, L_row_map, L_entries, - U_row_map, U_entries); - - Kokkos::fence(); - - Kokkos::resize(L_entries, spiluk_handle->get_nnzL()); - Kokkos::resize(L_values, spiluk_handle->get_nnzL()); - Kokkos::resize(U_entries, spiluk_handle->get_nnzU()); - Kokkos::resize(U_values, spiluk_handle->get_nnzU()); - - spiluk_handle->print_algorithm(); - spiluk_numeric(&kh, fill_lev, row_map, entries, values, L_row_map, - L_entries, L_values, U_row_map, U_entries, U_values); - - Kokkos::fence(); - - // Checking - typedef CrsMatrix crsMat_t; - crsMat_t A("A_Mtx", nrows, nrows, nnz, values, row_map, entries); - crsMat_t L("L_Mtx", nrows, nrows, spiluk_handle->get_nnzL(), L_values, - L_row_map, L_entries); - crsMat_t U("U_Mtx", nrows, nrows, spiluk_handle->get_nnzU(), U_values, - U_row_map, U_entries); - - // Create a reference view e set to all 1's - ValuesType e_one("e_one", nrows); - Kokkos::deep_copy(e_one, 1.0); - - // Create two views for spmv results - ValuesType bb("bb", nrows); - ValuesType bb_tmp("bb_tmp", nrows); - - // Compute norm2(L*U*e_one - A*e_one)/norm2(A*e_one) - KokkosSparse::spmv("N", ONE, A, e_one, ZERO, bb); - - typename AT::mag_type bb_nrm = KokkosBlas::nrm2(bb); - - KokkosSparse::spmv("N", ONE, U, e_one, ZERO, bb_tmp); - KokkosSparse::spmv("N", ONE, L, bb_tmp, MONE, bb); - - typename AT::mag_type diff_nrm = KokkosBlas::nrm2(bb); - - EXPECT_TRUE((diff_nrm / bb_nrm) < 1e-4); - - kh.destroy_spiluk_handle(); - } - - // SPILUKAlgorithm::SEQLVLSCHD_TP1 - { - kh.create_spiluk_handle(SPILUKAlgorithm::SEQLVLSCHD_TP1, nrows, 4 * nrows, - 4 * nrows); - - auto spiluk_handle = kh.get_spiluk_handle(); - - // Allocate L and U as outputs - RowMapType L_row_map("L_row_map", nrows + 1); - EntriesType L_entries("L_entries", spiluk_handle->get_nnzL()); - ValuesType L_values("L_values", spiluk_handle->get_nnzL()); - RowMapType U_row_map("U_row_map", nrows + 1); - EntriesType U_entries("U_entries", spiluk_handle->get_nnzU()); - ValuesType U_values("U_values", spiluk_handle->get_nnzU()); - - typename KernelHandle::const_nnz_lno_t fill_lev = 2; - - spiluk_symbolic(&kh, fill_lev, row_map, entries, L_row_map, L_entries, - U_row_map, U_entries); - - Kokkos::fence(); - - Kokkos::resize(L_entries, spiluk_handle->get_nnzL()); - Kokkos::resize(L_values, spiluk_handle->get_nnzL()); - Kokkos::resize(U_entries, spiluk_handle->get_nnzU()); - Kokkos::resize(U_values, spiluk_handle->get_nnzU()); - - spiluk_handle->print_algorithm(); - spiluk_numeric(&kh, fill_lev, row_map, entries, values, L_row_map, - L_entries, L_values, U_row_map, U_entries, U_values); - - Kokkos::fence(); - - // Checking - typedef CrsMatrix crsMat_t; - crsMat_t A("A_Mtx", nrows, nrows, nnz, values, row_map, entries); - crsMat_t L("L_Mtx", nrows, nrows, spiluk_handle->get_nnzL(), L_values, - L_row_map, L_entries); - crsMat_t U("U_Mtx", nrows, nrows, spiluk_handle->get_nnzU(), U_values, - U_row_map, U_entries); - - // Create a reference view e set to all 1's - ValuesType e_one("e_one", nrows); - Kokkos::deep_copy(e_one, 1.0); - - // Create two views for spmv results - ValuesType bb("bb", nrows); - ValuesType bb_tmp("bb_tmp", nrows); - - // Compute norm2(L*U*e_one - A*e_one)/norm2(A*e_one) - KokkosSparse::spmv("N", ONE, A, e_one, ZERO, bb); - - typename AT::mag_type bb_nrm = KokkosBlas::nrm2(bb); - - KokkosSparse::spmv("N", ONE, U, e_one, ZERO, bb_tmp); - KokkosSparse::spmv("N", ONE, L, bb_tmp, MONE, bb); - - typename AT::mag_type diff_nrm = KokkosBlas::nrm2(bb); - - EXPECT_TRUE((diff_nrm / bb_nrm) < 1e-4); - - kh.destroy_spiluk_handle(); - } -} - -template -void run_test_spiluk_streams(int test_algo, int nstreams) { - using RowMapType = Kokkos::View; - using EntriesType = Kokkos::View; - using ValuesType = Kokkos::View; - using RowMapType_hostmirror = typename RowMapType::HostMirror; - using EntriesType_hostmirror = typename EntriesType::HostMirror; - using ValuesType_hostmirror = typename ValuesType::HostMirror; - using execution_space = typename device::execution_space; - using memory_space = typename device::memory_space; - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, execution_space, memory_space, memory_space>; - using crsMat_t = CrsMatrix; - using AT = Kokkos::ArithTraits; - - // Workaround for OpenMP: skip tests if OMP_NUM_THREADS < nstreams because of not enough resource to partition - bool run_streams_test = true; -#ifdef KOKKOS_ENABLE_OPENMP - if (std::is_same::value) { - const char *env_omp_num_threads = std::getenv("OMP_NUM_THREADS"); - if (env_omp_num_threads != nullptr) { - int num_threads = std::atoi(env_omp_num_threads); - if (num_threads < nstreams) { - run_streams_test = false; - std::cout << " Skip stream test: omp_num_threads = " << num_threads << std::endl; - } - } - } -#endif - if (!run_streams_test) - return; - - const size_type nrows = 9; - const size_type nnz = 21; - - std::vector instances; - if (nstreams == 2) - instances = Kokkos::Experimental::partition_space(execution_space(), 1, 1); - else if (nstreams == 3) - instances = - Kokkos::Experimental::partition_space(execution_space(), 1, 1, 1); - else - instances = - Kokkos::Experimental::partition_space(execution_space(), 1, 1, 1, 1); - - std::vector kh_v(nstreams); - std::vector kh_ptr_v(nstreams); - std::vector A_row_map_v(nstreams); - std::vector A_entries_v(nstreams); - std::vector A_values_v(nstreams); - std::vector L_row_map_v(nstreams); - std::vector L_entries_v(nstreams); - std::vector L_values_v(nstreams); - std::vector U_row_map_v(nstreams); - std::vector U_entries_v(nstreams); - std::vector U_values_v(nstreams); - - RowMapType_hostmirror hrow_map("hrow_map", nrows + 1); - EntriesType_hostmirror hentries("hentries", nnz); - ValuesType_hostmirror hvalues("hvalues", nnz); - - scalar_t ZERO = scalar_t(0); - scalar_t ONE = scalar_t(1); - scalar_t MONE = scalar_t(-1); - - hrow_map(0) = 0; - hrow_map(1) = 3; - hrow_map(2) = 5; - hrow_map(3) = 6; - hrow_map(4) = 9; - hrow_map(5) = 11; - hrow_map(6) = 13; - hrow_map(7) = 15; - hrow_map(8) = 18; - hrow_map(9) = nnz; - - hentries(0) = 0; - hentries(1) = 2; - hentries(2) = 5; - hentries(3) = 1; - hentries(4) = 6; - hentries(5) = 2; - hentries(6) = 0; - hentries(7) = 3; - hentries(8) = 4; - hentries(9) = 0; - hentries(10) = 4; - hentries(11) = 1; - hentries(12) = 5; - hentries(13) = 2; - hentries(14) = 6; - hentries(15) = 3; - hentries(16) = 4; - hentries(17) = 7; - hentries(18) = 3; - hentries(19) = 4; - hentries(20) = 8; - - hvalues(0) = 10; - hvalues(1) = 0.3; - hvalues(2) = 0.6; - hvalues(3) = 11; - hvalues(4) = 0.7; - hvalues(5) = 12; - hvalues(6) = 5; - hvalues(7) = 13; - hvalues(8) = 1; - hvalues(9) = 4; - hvalues(10) = 14; - hvalues(11) = 3; - hvalues(12) = 15; - hvalues(13) = 7; - hvalues(14) = 16; - hvalues(15) = 6; - hvalues(16) = 5; - hvalues(17) = 17; - hvalues(18) = 2; - hvalues(19) = 2.5; - hvalues(20) = 18; - - typename KernelHandle::const_nnz_lno_t fill_lev = 2; - - for (int i = 0; i < nstreams; i++) { - // Allocate A as input - A_row_map_v[i] = RowMapType("A_row_map", nrows + 1); - A_entries_v[i] = EntriesType("A_entries", nnz); - A_values_v[i] = ValuesType("A_values", nnz); - - // Copy from host to device - Kokkos::deep_copy(A_row_map_v[i], hrow_map); - Kokkos::deep_copy(A_entries_v[i], hentries); - Kokkos::deep_copy(A_values_v[i], hvalues); - - // Create handle - kh_v[i] = KernelHandle(); - if (test_algo == 0) - kh_v[i].create_spiluk_handle(SPILUKAlgorithm::SEQLVLSCHD_RP, nrows, - 4 * nrows, 4 * nrows); - else if (test_algo == 1) - kh_v[i].create_spiluk_handle(SPILUKAlgorithm::SEQLVLSCHD_TP1, nrows, - 4 * nrows, 4 * nrows); - kh_ptr_v[i] = &kh_v[i]; - - auto spiluk_handle = kh_v[i].get_spiluk_handle(); - std::cout << " Stream " << i << ": "; - spiluk_handle->print_algorithm(); - - // Allocate L and U as outputs - L_row_map_v[i] = RowMapType("L_row_map", nrows + 1); - L_entries_v[i] = EntriesType("L_entries", spiluk_handle->get_nnzL()); - L_values_v[i] = ValuesType("L_values", spiluk_handle->get_nnzL()); - U_row_map_v[i] = RowMapType("U_row_map", nrows + 1); - U_entries_v[i] = EntriesType("U_entries", spiluk_handle->get_nnzU()); - U_values_v[i] = ValuesType("U_values", spiluk_handle->get_nnzU()); - - // Symbolic phase - spiluk_symbolic(kh_ptr_v[i], fill_lev, A_row_map_v[i], A_entries_v[i], - L_row_map_v[i], L_entries_v[i], U_row_map_v[i], - U_entries_v[i], nstreams); - - Kokkos::fence(); - - Kokkos::resize(L_entries_v[i], spiluk_handle->get_nnzL()); - Kokkos::resize(L_values_v[i], spiluk_handle->get_nnzL()); - Kokkos::resize(U_entries_v[i], spiluk_handle->get_nnzU()); - Kokkos::resize(U_values_v[i], spiluk_handle->get_nnzU()); - } // Done handle creation and spiluk_symbolic on all streams - - // Numeric phase - spiluk_numeric_streams(instances, kh_ptr_v, fill_lev, A_row_map_v, - A_entries_v, A_values_v, L_row_map_v, L_entries_v, - L_values_v, U_row_map_v, U_entries_v, U_values_v); - - for (int i = 0; i < nstreams; i++) instances[i].fence(); - - // Checking - for (int i = 0; i < nstreams; i++) { - auto spiluk_handle = kh_v[i].get_spiluk_handle(); - crsMat_t A("A_Mtx", nrows, nrows, nnz, A_values_v[i], A_row_map_v[i], - A_entries_v[i]); - crsMat_t L("L_Mtx", nrows, nrows, spiluk_handle->get_nnzL(), L_values_v[i], - L_row_map_v[i], L_entries_v[i]); - crsMat_t U("U_Mtx", nrows, nrows, spiluk_handle->get_nnzU(), U_values_v[i], - U_row_map_v[i], U_entries_v[i]); - - // Create a reference view e set to all 1's - ValuesType e_one("e_one", nrows); - Kokkos::deep_copy(e_one, 1.0); - - // Create two views for spmv results - ValuesType bb("bb", nrows); - ValuesType bb_tmp("bb_tmp", nrows); - - // Compute norm2(L*U*e_one - A*e_one)/norm2(A*e_one) - KokkosSparse::spmv("N", ONE, A, e_one, ZERO, bb); - - typename AT::mag_type bb_nrm = KokkosBlas::nrm2(bb); - - KokkosSparse::spmv("N", ONE, U, e_one, ZERO, bb_tmp); - KokkosSparse::spmv("N", ONE, L, bb_tmp, MONE, bb); - - typename AT::mag_type diff_nrm = KokkosBlas::nrm2(bb); - - EXPECT_TRUE((diff_nrm / bb_nrm) < 1e-4); - - kh_v[i].destroy_spiluk_handle(); - } -} - -} // namespace Test - -template -void test_spiluk() { - Test::run_test_spiluk(); -} - -template -void test_spiluk_streams() { - std::cout << "SPILUKAlgorithm::SEQLVLSCHD_RP: 2 streams" << std::endl; - Test::run_test_spiluk_streams(0, 2); - - std::cout << "SPILUKAlgorithm::SEQLVLSCHD_RP: 3 streams" << std::endl; - Test::run_test_spiluk_streams(0, 3); - - std::cout << "SPILUKAlgorithm::SEQLVLSCHD_RP: 4 streams" << std::endl; - Test::run_test_spiluk_streams(0, 4); - - std::cout << "SPILUKAlgorithm::SEQLVLSCHD_TP1: 2 streams" << std::endl; - Test::run_test_spiluk_streams(1, 2); - - std::cout << "SPILUKAlgorithm::SEQLVLSCHD_TP1: 3 streams" << std::endl; - Test::run_test_spiluk_streams(1, 3); - - std::cout << "SPILUKAlgorithm::SEQLVLSCHD_TP1: 4 streams" << std::endl; - Test::run_test_spiluk_streams(1, 4); -} - -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - sparse##_##spiluk##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_spiluk(); \ - test_spiluk_streams(); \ - } - -#define NO_TEST_COMPLEX - -#include - -#undef KOKKOSKERNELS_EXECUTE_TEST -#undef NO_TEST_COMPLEX diff --git a/sparse/unit_test/Test_Sparse_sptrsv.hpp_ b/sparse/unit_test/Test_Sparse_sptrsv.hpp_ deleted file mode 100644 index 2425fb4c27..0000000000 --- a/sparse/unit_test/Test_Sparse_sptrsv.hpp_ +++ /dev/null @@ -1,1359 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include -#include - -#include -#include - -#include "KokkosKernels_IOUtils.hpp" -#include "KokkosSparse_Utils.hpp" -#include "KokkosSparse_spmv.hpp" -#include "KokkosSparse_CrsMatrix.hpp" - -#include "KokkosSparse_sptrsv.hpp" -#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) -#include "KokkosSparse_sptrsv_supernode.hpp" -#endif - -#include - -using namespace KokkosSparse; -using namespace KokkosSparse::Experimental; -using namespace KokkosKernels; -using namespace KokkosKernels::Impl; -using namespace KokkosKernels::Experimental; - -// #ifndef kokkos_complex_double -// #define kokkos_complex_double Kokkos::complex -// #endif -// #ifndef kokkos_complex_float -// #define kokkos_complex_float Kokkos::complex -// #endif - -typedef Kokkos::complex kokkos_complex_double; -typedef Kokkos::complex kokkos_complex_float; - -namespace Test { - -#if 0 -template -void run_test_sptrsv_mtx() { - - typedef typename KokkosSparse::CrsMatrix crsmat_t; - typedef typename crsmat_t::StaticCrsGraphType graph_t; - - //typedef Kokkos::View< size_type*, device > RowMapType; - //typedef Kokkos::View< lno_t*, device > EntriesType; - typedef Kokkos::View< scalar_t*, device > ValuesType; - - // Lower tri - std::cout << "LowerTriTest Begin" << std::endl; - { - -// std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/L-offshore-amd.mtx"; -// std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/L-Transport-amd.mtx"; -// std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/L-Fault_639amd.mtx"; -// std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/L-thermal2-amd.mtx"; - std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/L-dielFilterV2real-amd.mtx"; - std::cout << "Matrix file: " << mtx_filename << std::endl; - crsmat_t triMtx = KokkosKernels::Impl::read_kokkos_crst_matrix(mtx_filename.c_str()); //in_matrix - graph_t lgraph = triMtx.graph; // in_graph - - auto row_map = lgraph.row_map; - auto entries = lgraph.entries; - auto values = triMtx.values; - - const size_type nrows = lgraph.numRows(); -// const size_type nnz = triMtx.nnz(); - - scalar_t ZERO = scalar_t(0); - scalar_t ONE = scalar_t(1); - - typedef KokkosKernels::Experimental::KokkosKernelsHandle KernelHandle; - - std::cout << "UnitTest nrows = " << nrows << std::endl; - - KernelHandle kh; - bool is_lower_tri = true; - std::cout << "Create handle" << std::endl; - kh.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, nrows, is_lower_tri); - - std::cout << "Prepare linear system" << std::endl; - // Create known_lhs, generate rhs, then solve for lhs to compare to known_lhs - ValuesType known_lhs("known_lhs", nrows); - // Create known solution lhs set to all 1's - Kokkos::deep_copy(known_lhs, ONE); - - // Solution to find - ValuesType lhs("lhs", nrows); - - // A*known_lhs generates rhs: rhs is dense, use spmv - ValuesType rhs("rhs", nrows); - -// typedef CrsMatrix crsMat_t; -// crsMat_t triMtx("triMtx", nrows, nrows, nnz, values, row_map, entries); - - std::cout << "SPMV" << std::endl; - KokkosSparse::spmv( "N", ONE, triMtx, known_lhs, ZERO, rhs); - - std::cout << "TriSolve Symbolic" << std::endl; - Kokkos::Timer timer; - sptrsv_symbolic( &kh, row_map, entries ); - std::cout << "LTRI Symbolic Time: " << timer.seconds() << std::endl; - - std::cout << "TriSolve Solve" << std::endl; - kh.get_sptrsv_handle()->print_algorithm(); - timer.reset(); - sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); - std::cout << "LTRI Solve TEAMPOLICY! Time: " << timer.seconds() << std::endl; - - scalar_t sum = 0.0; - Kokkos::parallel_reduce( Kokkos::RangePolicy(0, lhs.extent(0)), KOKKOS_LAMBDA ( const lno_t i, scalar_t &tsum ) { - tsum += lhs(i); - }, sum); - if ( sum != lhs.extent(0) ) { - std::cout << "Lower Tri Solve FAILURE" << std::endl; - } - else { - std::cout << "Lower Tri Solve SUCCESS!" << std::endl; - //std::cout << "Num-levels = " << kh->get_sptrsv_handle()->get_num_levels() << std::endl; - } - EXPECT_TRUE( sum == scalar_t(lhs.extent(0)) ); - - Kokkos::deep_copy(lhs, 0); - kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHD_RP); - kh.get_sptrsv_handle()->print_algorithm(); - timer.reset(); - sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); - std::cout << "LTRI Solve SEQLVLSCHD_RP Time: " << timer.seconds() << std::endl; - - sum = 0.0; - Kokkos::parallel_reduce( Kokkos::RangePolicy(0, lhs.extent(0)), KOKKOS_LAMBDA ( const lno_t i, scalar_t &tsum ) { - tsum += lhs(i); - }, sum); - if ( sum != lhs.extent(0) ) { - std::cout << "Lower Tri Solve FAILURE" << std::endl; - } - else { - std::cout << "Lower Tri Solve SUCCESS!" << std::endl; - //std::cout << "Num-levels = " << kh->get_sptrsv_handle()->get_num_levels() << std::endl; - } - EXPECT_TRUE( sum == scalar_t(lhs.extent(0)) ); - - Kokkos::deep_copy(lhs, 0); - kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHED_TP2); - kh.get_sptrsv_handle()->print_algorithm(); - timer.reset(); - sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); - std::cout << "LTRI Solve SEQLVLSCHED_TP2 Time: " << timer.seconds() << std::endl; - - sum = 0.0; - Kokkos::parallel_reduce( Kokkos::RangePolicy(0, lhs.extent(0)), KOKKOS_LAMBDA ( const lno_t i, scalar_t &tsum ) { - tsum += lhs(i); - }, sum); - if ( sum != lhs.extent(0) ) { - std::cout << "Lower Tri Solve FAILURE" << std::endl; - } - else { - std::cout << "Lower Tri Solve SUCCESS!" << std::endl; - //std::cout << "Num-levels = " << kh->get_sptrsv_handle()->get_num_levels() << std::endl; - } - EXPECT_TRUE( sum == scalar_t(lhs.extent(0)) ); - - - kh.destroy_sptrsv_handle(); - } - // Upper tri - std::cout << "UpperTriTest Begin" << std::endl; - { -// std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/U-offshore-amd.mtx"; -// std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/U-Transport-amd.mtx"; -// std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/U-Fault_639amd.mtx"; -// std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/U-thermal2-amd.mtx"; - std::string mtx_filename = "/ascldap/users/ndellin/TestCodes-GitlabEx/KokkosEcoCodes/KokkosKernels-DevTests/Matrices/U-dielFilterV2real-amd.mtx"; - std::cout << "Matrix file: " << mtx_filename << std::endl; - crsmat_t triMtx = KokkosKernels::Impl::read_kokkos_crst_matrix(mtx_filename.c_str()); //in_matrix - graph_t lgraph = triMtx.graph; // in_graph - - auto row_map = lgraph.row_map; - auto entries = lgraph.entries; - auto values = triMtx.values; - - const size_type nrows = lgraph.numRows(); -// const size_type nnz = triMtx.nnz(); - - scalar_t ZERO = scalar_t(0); - scalar_t ONE = scalar_t(1); - - typedef KokkosKernels::Experimental::KokkosKernelsHandle KernelHandle; - - std::cout << "UnitTest nrows = " << nrows << std::endl; - - KernelHandle kh; - bool is_lower_tri = false; - std::cout << "Create handle" << std::endl; - kh.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, nrows, is_lower_tri); - - std::cout << "Prepare linear system" << std::endl; - // Create known_lhs, generate rhs, then solve for lhs to compare to known_lhs - ValuesType known_lhs("known_lhs", nrows); - // Create known solution lhs set to all 1's - Kokkos::deep_copy(known_lhs, ONE); - - // Solution to find - ValuesType lhs("lhs", nrows); - - // A*known_lhs generates rhs: rhs is dense, use spmv - ValuesType rhs("rhs", nrows); - -// typedef CrsMatrix crsMat_t; -// crsMat_t triMtx("triMtx", nrows, nrows, nnz, values, row_map, entries); - std::cout << "SPMV" << std::endl; - KokkosSparse::spmv( "N", ONE, triMtx, known_lhs, ZERO, rhs); - - std::cout << "TriSolve Symbolic" << std::endl; - Kokkos::Timer timer; - sptrsv_symbolic( &kh, row_map, entries ); - std::cout << "UTRI Symbolic Time: " << timer.seconds() << std::endl; - - std::cout << "TriSolve Solve" << std::endl; - kh.get_sptrsv_handle()->print_algorithm(); - timer.reset(); - sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); - std::cout << "UTRI Solve SEQLVLSCHD_TP1 Time: " << timer.seconds() << std::endl; - - scalar_t sum = 0.0; - Kokkos::parallel_reduce( Kokkos::RangePolicy(0, lhs.extent(0)), KOKKOS_LAMBDA ( const lno_t i, scalar_t &tsum ) { - tsum += lhs(i); - }, sum); - if ( sum != lhs.extent(0) ) { - std::cout << "Upper Tri Solve FAILURE" << std::endl; - } - else { - std::cout << "Upper Tri Solve SUCCESS!" << std::endl; - //std::cout << "Num-levels = " << kh->get_sptrsv_handle()->get_num_levels() << std::endl; - } - EXPECT_TRUE( sum == scalar_t(lhs.extent(0)) ); - - Kokkos::deep_copy(lhs, 0); - kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHD_RP); - kh.get_sptrsv_handle()->print_algorithm(); - timer.reset(); - sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); - std::cout << "UTRI Solve SEQLVLSCHD_RP Time: " << timer.seconds() << std::endl; - - sum = 0.0; - Kokkos::parallel_reduce( Kokkos::RangePolicy(0, lhs.extent(0)), KOKKOS_LAMBDA ( const lno_t i, scalar_t &tsum ) { - tsum += lhs(i); - }, sum); - if ( sum != lhs.extent(0) ) { - std::cout << "Upper Tri Solve FAILURE" << std::endl; - } - else { - std::cout << "Upper Tri Solve SUCCESS!" << std::endl; - //std::cout << "Num-levels = " << kh->get_sptrsv_handle()->get_num_levels() << std::endl; - } - EXPECT_TRUE( sum == scalar_t(lhs.extent(0)) ); - - Kokkos::deep_copy(lhs, 0); - kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHED_TP2); - kh.get_sptrsv_handle()->print_algorithm(); - timer.reset(); - sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); - std::cout << "UTRI Solve SEQLVLSCHED_TP2 Time: " << timer.seconds() << std::endl; - - sum = 0.0; - Kokkos::parallel_reduce( Kokkos::RangePolicy(0, lhs.extent(0)), KOKKOS_LAMBDA ( const lno_t i, scalar_t &tsum ) { - tsum += lhs(i); - }, sum); - if ( sum != lhs.extent(0) ) { - std::cout << "Upper Tri Solve FAILURE" << std::endl; - } - else { - std::cout << "Upper Tri Solve SUCCESS!" << std::endl; - //std::cout << "Num-levels = " << kh->get_sptrsv_handle()->get_num_levels() << std::endl; - } - EXPECT_TRUE( sum == scalar_t(lhs.extent(0)) ); - - - kh.destroy_sptrsv_handle(); - } - -} -#endif - -namespace { -template -struct ReductionCheck { - using lno_t = OrdinalType; - using value_type = ValueType; - - ViewType lhs; - - ReductionCheck(const ViewType &lhs_) : lhs(lhs_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(lno_t i, value_type &tsum) const { tsum += lhs(i); } -}; -} // namespace - -template -void run_test_sptrsv() { - typedef Kokkos::View RowMapType; - typedef Kokkos::View EntriesType; - typedef Kokkos::View ValuesType; - - scalar_t ZERO = scalar_t(0); - scalar_t ONE = scalar_t(1); - - const size_type nrows = 5; - const size_type nnz = 10; - - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, typename device::execution_space, - typename device::memory_space, typename device::memory_space>; - -#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) - using host_crsmat_t = typename KernelHandle::SPTRSVHandleType::host_crsmat_t; - using host_graph_t = typename host_crsmat_t::StaticCrsGraphType; - - using row_map_view_t = typename host_graph_t::row_map_type::non_const_type; - using cols_view_t = typename host_graph_t::entries_type::non_const_type; - using values_view_t = typename host_crsmat_t::values_type::non_const_type; - - // L & U handle for supernodal SpTrsv - KernelHandle khL; - KernelHandle khU; - - // right-hand-side and solution - ValuesType B("rhs", nrows); - ValuesType X("sol", nrows); - - // host CRS for L & U - host_crsmat_t L, U, Ut; -#endif - - // Upper tri - { - RowMapType row_map("row_map", nrows + 1); - EntriesType entries("entries", nnz); - ValuesType values("values", nnz); - - auto hrow_map = Kokkos::create_mirror_view(row_map); - auto hentries = Kokkos::create_mirror_view(entries); - auto hvalues = Kokkos::create_mirror_view(values); - - hrow_map(0) = 0; - hrow_map(1) = 2; - hrow_map(2) = 4; - hrow_map(3) = 7; - hrow_map(4) = 9; - hrow_map(5) = 10; - - hentries(0) = 0; - hentries(1) = 2; - hentries(2) = 1; - hentries(3) = 4; - hentries(4) = 2; - hentries(5) = 3; - hentries(6) = 4; - hentries(7) = 3; - hentries(8) = 4; - hentries(9) = 4; - - for (size_type i = 0; i < nnz; ++i) { - hvalues(i) = ONE; - } - - Kokkos::deep_copy(row_map, hrow_map); - Kokkos::deep_copy(entries, hentries); - Kokkos::deep_copy(values, hvalues); - - // Create known_lhs, generate rhs, then solve for lhs to compare to - // known_lhs - ValuesType known_lhs("known_lhs", nrows); - // Create known solution lhs set to all 1's - Kokkos::deep_copy(known_lhs, ONE); - - // Solution to find - ValuesType lhs("lhs", nrows); - - // A*known_lhs generates rhs: rhs is dense, use spmv - ValuesType rhs("rhs", nrows); - - typedef CrsMatrix crsMat_t; - crsMat_t triMtx("triMtx", nrows, nrows, nnz, values, row_map, entries); - KokkosSparse::spmv("N", ONE, triMtx, known_lhs, ZERO, rhs); - - { - KernelHandle kh; - bool is_lower_tri = false; - kh.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, nrows, - is_lower_tri); - - sptrsv_symbolic(&kh, row_map, entries); - Kokkos::fence(); - - sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); - Kokkos::fence(); - - scalar_t sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, - lhs.extent(0)), - ReductionCheck(lhs), sum); - if (sum != lhs.extent(0)) { - std::cout << "Upper Tri Solve FAILURE" << std::endl; - kh.get_sptrsv_handle()->print_algorithm(); - } - EXPECT_TRUE(sum == scalar_t(lhs.extent(0))); - - Kokkos::deep_copy(lhs, ZERO); - kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHD_RP); - sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); - Kokkos::fence(); - - sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, - lhs.extent(0)), - ReductionCheck(lhs), sum); - if (sum != lhs.extent(0)) { - std::cout << "Upper Tri Solve FAILURE" << std::endl; - kh.get_sptrsv_handle()->print_algorithm(); - } - EXPECT_TRUE(sum == scalar_t(lhs.extent(0))); - - // FIXME Issues with various integral type combos - algorithm currently - // unavailable and commented out until fixed - /* - Kokkos::deep_copy(lhs, ZERO); - kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHED_TP2); - sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); - Kokkos::fence(); - - sum = 0.0; - Kokkos::parallel_reduce( Kokkos::RangePolicy(0, lhs.extent(0)), ReductionCheck(lhs), sum); if ( sum != lhs.extent(0) ) { std::cout << - "Upper Tri Solve FAILURE" << std::endl; - kh.get_sptrsv_handle()->print_algorithm(); - } - EXPECT_TRUE( sum == scalar_t(lhs.extent(0)) ); - */ - - kh.destroy_sptrsv_handle(); - } - - { - Kokkos::deep_copy(lhs, ZERO); - KernelHandle kh; - bool is_lower_tri = false; - kh.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1CHAIN, nrows, - is_lower_tri); - auto chain_threshold = 1; - kh.get_sptrsv_handle()->reset_chain_threshold(chain_threshold); - - sptrsv_symbolic(&kh, row_map, entries); - Kokkos::fence(); - - sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); - Kokkos::fence(); - - scalar_t sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, - lhs.extent(0)), - ReductionCheck(lhs), sum); - if (sum != lhs.extent(0)) { - std::cout << "Upper Tri Solve FAILURE" << std::endl; - kh.get_sptrsv_handle()->print_algorithm(); - } - EXPECT_TRUE(sum == scalar_t(lhs.extent(0))); - - kh.destroy_sptrsv_handle(); - } - -#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE - if (std::is_same::value && - std::is_same::value && - std::is_same::value) { - Kokkos::deep_copy(lhs, ZERO); - KernelHandle kh; - bool is_lower_tri = false; - kh.create_sptrsv_handle(SPTRSVAlgorithm::SPTRSV_CUSPARSE, nrows, - is_lower_tri); - - sptrsv_symbolic(&kh, row_map, entries, values); - Kokkos::fence(); - - sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); - Kokkos::fence(); - - scalar_t sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, - lhs.extent(0)), - ReductionCheck(lhs), sum); - if (sum != lhs.extent(0)) { - std::cout << "Upper Tri Solve FAILURE" << std::endl; - kh.get_sptrsv_handle()->print_algorithm(); - } - EXPECT_TRUE(sum == scalar_t(lhs.extent(0))); - - kh.destroy_sptrsv_handle(); - } -#endif - -#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) - const scalar_t FIVE = scalar_t(5); - const size_type nnz_sp = 14; - { - // U in csr - row_map_view_t hUrowptr("hUrowptr", nrows + 1); - cols_view_t hUcolind("hUcolind", nnz_sp); - values_view_t hUvalues("hUvalues", nnz_sp); - - // rowptr - hUrowptr(0) = 0; - hUrowptr(1) = 4; - hUrowptr(2) = 8; - hUrowptr(3) = 11; - hUrowptr(4) = 13; - hUrowptr(5) = 14; - - // colind - // first row (first supernode) - hUcolind(0) = 0; - hUcolind(1) = 1; - hUcolind(2) = 2; - hUcolind(3) = 4; - // second row (first supernode) - hUcolind(4) = 0; - hUcolind(5) = 1; - hUcolind(6) = 2; - hUcolind(7) = 4; - // third row (second supernode) - hUcolind(8) = 2; - hUcolind(9) = 3; - hUcolind(10) = 4; - // fourth row (third supernode) - hUcolind(11) = 3; - hUcolind(12) = 4; - // fifth row (fourth supernode) - hUcolind(13) = 4; - - // values - // first row (first supernode) - hUvalues(0) = FIVE; - hUvalues(1) = ONE; - hUvalues(2) = ONE; - hUvalues(3) = ZERO; - // second row (first supernode) - hUvalues(4) = ZERO; - hUvalues(5) = FIVE; - hUvalues(6) = ZERO; - hUvalues(7) = ONE; - // third row (second supernode) - hUvalues(8) = FIVE; - hUvalues(9) = ONE; - hUvalues(10) = ONE; - // fourth row (third supernode) - hUvalues(11) = FIVE; - hUvalues(12) = ONE; - // fifth row (fourth supernode) - hUvalues(13) = FIVE; - - // save U for Supernodal Sptrsv - host_graph_t static_graph(hUcolind, hUrowptr); - U = host_crsmat_t("CrsMatrixU", nrows, hUvalues, static_graph); - - // create handle for Supernodal Sptrsv - bool is_lower_tri = false; - khU.create_sptrsv_handle(SPTRSVAlgorithm::SUPERNODAL_DAG, nrows, - is_lower_tri); - - // X = U*ONES to generate B = A*ONES (on device) - { - RowMapType Urowptr("Urowptr", nrows + 1); - EntriesType Ucolind("Ucolind", nnz_sp); - ValuesType Uvalues("Uvalues", nnz_sp); - - Kokkos::deep_copy(Urowptr, hUrowptr); - Kokkos::deep_copy(Ucolind, hUcolind); - Kokkos::deep_copy(Uvalues, hUvalues); - - crsMat_t mtxU("mtxU", nrows, nrows, nnz_sp, Uvalues, Urowptr, Ucolind); - Kokkos::deep_copy(B, ONE); - KokkosSparse::spmv("N", ONE, mtxU, B, ZERO, X); - } - } - - { - // U in csc (for inverting off-diag) - row_map_view_t hUcolptr("hUcolptr", nrows + 1); - cols_view_t hUrowind("hUrowind", nnz_sp); - values_view_t hUvalues("hUvalues", nnz_sp); - - // colptr - hUcolptr(0) = 0; - hUcolptr(1) = 2; - hUcolptr(2) = 4; - hUcolptr(3) = 7; - hUcolptr(4) = 9; - hUcolptr(5) = 14; - - // colind - // first column (first supernode) - hUrowind(0) = 0; - hUrowind(1) = 1; - // second column (first supernode) - hUrowind(2) = 0; - hUrowind(3) = 1; - // third column (second supernode) - hUrowind(4) = 2; - hUrowind(5) = 0; - hUrowind(6) = 1; - // fourth column (third supernode) - hUrowind(7) = 3; - hUrowind(8) = 2; - // fifth column (fourth supernode) - hUrowind(9) = 4; - hUrowind(10) = 0; - hUrowind(11) = 1; - hUrowind(12) = 2; - hUrowind(13) = 3; - - // values - // first column (first supernode) - hUvalues(0) = FIVE; - hUvalues(1) = ZERO; - // second column (first supernode) - hUvalues(2) = ONE; - hUvalues(3) = FIVE; - // third column (second supernode) - hUvalues(4) = FIVE; - hUvalues(5) = ONE; - hUvalues(6) = ZERO; - // fourth column (third supernode) - hUvalues(7) = FIVE; - hUvalues(8) = ONE; - // fifth column (fourth supernode) - hUvalues(9) = FIVE; - hUvalues(10) = ZERO; - hUvalues(11) = ONE; - hUvalues(12) = ONE; - hUvalues(13) = ONE; - - // store Ut in crsmat - host_graph_t static_graph(hUrowind, hUcolptr); - Ut = host_crsmat_t("CrsMatrixUt", nrows, hUvalues, static_graph); - } -#endif - } - - // Lower tri - { - RowMapType row_map("row_map", nrows + 1); - EntriesType entries("entries", nnz); - ValuesType values("values", nnz); - - auto hrow_map = Kokkos::create_mirror_view(row_map); - auto hentries = Kokkos::create_mirror_view(entries); - auto hvalues = Kokkos::create_mirror_view(values); - - hrow_map(0) = 0; - hrow_map(1) = 1; - hrow_map(2) = 2; - hrow_map(3) = 4; - hrow_map(4) = 6; - hrow_map(5) = 10; - - hentries(0) = 0; - hentries(1) = 1; - hentries(2) = 0; - hentries(3) = 2; - hentries(4) = 2; - hentries(5) = 3; - hentries(6) = 1; - hentries(7) = 2; - hentries(8) = 3; - hentries(9) = 4; - - for (size_type i = 0; i < nnz; ++i) { - hvalues(i) = ONE; - } - - Kokkos::deep_copy(row_map, hrow_map); - Kokkos::deep_copy(entries, hentries); - Kokkos::deep_copy(values, hvalues); - - // Create known_lhs, generate rhs, then solve for lhs to compare to - // known_lhs - ValuesType known_lhs("known_lhs", nrows); - // Create known solution lhs set to all 1's - Kokkos::deep_copy(known_lhs, ONE); - - // Solution to find - ValuesType lhs("lhs", nrows); - - // A*known_lhs generates rhs: rhs is dense, use spmv - ValuesType rhs("rhs", nrows); - - typedef CrsMatrix crsMat_t; - crsMat_t triMtx("triMtx", nrows, nrows, nnz, values, row_map, entries); - KokkosSparse::spmv("N", ONE, triMtx, known_lhs, ZERO, rhs); - - { - KernelHandle kh; - bool is_lower_tri = true; - kh.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, nrows, - is_lower_tri); - - sptrsv_symbolic(&kh, row_map, entries); - Kokkos::fence(); - - sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); - Kokkos::fence(); - - scalar_t sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, - lhs.extent(0)), - ReductionCheck(lhs), sum); - if (sum != lhs.extent(0)) { - std::cout << "Lower Tri Solve FAILURE" << std::endl; - kh.get_sptrsv_handle()->print_algorithm(); - } - EXPECT_TRUE(sum == scalar_t(lhs.extent(0))); - - Kokkos::deep_copy(lhs, ZERO); - kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHD_RP); - sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); - Kokkos::fence(); - - sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, - lhs.extent(0)), - ReductionCheck(lhs), sum); - if (sum != lhs.extent(0)) { - std::cout << "Lower Tri Solve FAILURE" << std::endl; - kh.get_sptrsv_handle()->print_algorithm(); - } - EXPECT_TRUE(sum == scalar_t(lhs.extent(0))); - - // FIXME Issues with various integral type combos - algorithm currently - // unavailable and commented out until fixed - /* - Kokkos::deep_copy(lhs, ZERO); - kh.get_sptrsv_handle()->set_algorithm(SPTRSVAlgorithm::SEQLVLSCHED_TP2); - sptrsv_solve( &kh, row_map, entries, values, rhs, lhs ); - Kokkos::fence(); - - sum = 0.0; - Kokkos::parallel_reduce( Kokkos::RangePolicy(0, lhs.extent(0)), ReductionCheck(lhs), sum); if ( sum != lhs.extent(0) ) { std::cout << - "Lower Tri Solve FAILURE" << std::endl; - kh.get_sptrsv_handle()->print_algorithm(); - } - EXPECT_TRUE( sum == scalar_t(lhs.extent(0)) ); - */ - - kh.destroy_sptrsv_handle(); - } - - { - Kokkos::deep_copy(lhs, ZERO); - KernelHandle kh; - bool is_lower_tri = true; - kh.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1CHAIN, nrows, - is_lower_tri); - auto chain_threshold = 1; - kh.get_sptrsv_handle()->reset_chain_threshold(chain_threshold); - - sptrsv_symbolic(&kh, row_map, entries); - Kokkos::fence(); - - sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); - Kokkos::fence(); - - scalar_t sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, - lhs.extent(0)), - ReductionCheck(lhs), sum); - if (sum != lhs.extent(0)) { - std::cout << "Lower Tri Solve FAILURE" << std::endl; - kh.get_sptrsv_handle()->print_algorithm(); - } - EXPECT_TRUE(sum == scalar_t(lhs.extent(0))); - - kh.destroy_sptrsv_handle(); - } - -#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE - if (std::is_same::value && - std::is_same::value && - std::is_same::value) { - Kokkos::deep_copy(lhs, ZERO); - KernelHandle kh; - bool is_lower_tri = true; - kh.create_sptrsv_handle(SPTRSVAlgorithm::SPTRSV_CUSPARSE, nrows, - is_lower_tri); - - sptrsv_symbolic(&kh, row_map, entries, values); - Kokkos::fence(); - - sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); - Kokkos::fence(); - - scalar_t sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, - lhs.extent(0)), - ReductionCheck(lhs), sum); - if (sum != lhs.extent(0)) { - std::cout << "Lower Tri Solve FAILURE" << std::endl; - kh.get_sptrsv_handle()->print_algorithm(); - } - EXPECT_TRUE(sum == scalar_t(lhs.extent(0))); - - kh.destroy_sptrsv_handle(); - } -#endif - -#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) - { - // L in csc - const scalar_t TWO = scalar_t(2); - const scalar_t FIVE = scalar_t(5); - const size_type nnz_sp = 14; - - row_map_view_t hLcolptr("hUcolptr", nrows + 1); - cols_view_t hLrowind("hUrowind", nnz_sp); - values_view_t hLvalues("hUvalues", nnz_sp); - - // colptr - hLcolptr(0) = 0; - hLcolptr(1) = 4; - hLcolptr(2) = 8; - hLcolptr(3) = 11; - hLcolptr(4) = 13; - hLcolptr(5) = 14; - - // rowind - // first column (first supernode) - hLrowind(0) = 0; - hLrowind(1) = 1; - hLrowind(2) = 2; - hLrowind(3) = 4; - // second column (first supernode) - hLrowind(4) = 0; - hLrowind(5) = 1; - hLrowind(6) = 2; - hLrowind(7) = 4; - // third column (second supernode) - hLrowind(8) = 2; - hLrowind(9) = 3; - hLrowind(10) = 4; - // fourth column (third supernode) - hLrowind(11) = 3; - hLrowind(12) = 4; - // fifth column (fourth supernode) - hLrowind(13) = 4; - - // values - // first column (first supernode) - hLvalues(0) = FIVE; - hLvalues(1) = TWO; - hLvalues(2) = ONE; - hLvalues(3) = ZERO; - // second column (first supernode) - hLvalues(4) = ZERO; - hLvalues(5) = FIVE; - hLvalues(6) = ZERO; - hLvalues(7) = ONE; - // third column (second supernode) - hLvalues(8) = FIVE; - hLvalues(9) = ONE; - hLvalues(10) = ONE; - // fourth column (third supernode) - hLvalues(11) = FIVE; - hLvalues(12) = ONE; - // fifth column (fourth supernode) - hLvalues(13) = FIVE; - - // store Lt in crsmat - host_graph_t static_graph(hLrowind, hLcolptr); - L = host_crsmat_t("CrsMatrixL", nrows, hLvalues, static_graph); - - bool is_lower_tri = true; - khL.create_sptrsv_handle(SPTRSVAlgorithm::SUPERNODAL_DAG, nrows, - is_lower_tri); - - // generate B = A*ONES = L*(U*ONES), where X = U*ONES (on device) - { - RowMapType Lcolptr("Lcolptr", nrows + 1); - EntriesType Lrowind("Lrowind", nnz_sp); - ValuesType Lvalues("Lvalues", nnz_sp); - - Kokkos::deep_copy(Lcolptr, hLcolptr); - Kokkos::deep_copy(Lrowind, hLrowind); - Kokkos::deep_copy(Lvalues, hLvalues); - - crsMat_t mtxL("mtxL", nrows, nrows, nnz_sp, Lvalues, Lcolptr, Lrowind); - KokkosSparse::spmv("T", ONE, mtxL, X, ZERO, B); - } - } - - { - // unit-test for supernode SpTrsv (default) - // > set up supernodes (block size = one) - size_type nsupers = 4; - Kokkos::View supercols("supercols", - 1 + nsupers); - supercols(0) = 0; - supercols(1) = 2; // two columns - supercols(2) = 3; // one column - supercols(3) = 4; // one column - supercols(4) = 5; // one column - int *etree = NULL; // we generate graph internally - - // invert diagonal blocks - bool invert_diag = true; - khL.set_sptrsv_invert_diagonal(invert_diag); - khU.set_sptrsv_invert_diagonal(invert_diag); - - // > symbolic (on host) - sptrsv_supernodal_symbolic(nsupers, supercols.data(), etree, L.graph, - &khL, U.graph, &khU); - // > numeric (on host) - sptrsv_compute(&khL, L); - sptrsv_compute(&khU, U); - Kokkos::fence(); - - // > solve - ValuesType b("b", nrows); - Kokkos::deep_copy(b, B); - Kokkos::deep_copy(X, ZERO); - sptrsv_solve(&khL, &khU, X, b); - Kokkos::fence(); - - // > check - scalar_t sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, X.extent(0)), - ReductionCheck(X), sum); - if (sum != lhs.extent(0)) { - std::cout << "Supernode Tri Solve FAILURE : " << sum << " vs." - << lhs.extent(0) << std::endl; - khL.get_sptrsv_handle()->print_algorithm(); - } else { - std::cout << "Supernode Tri Solve SUCCESS" << std::endl; - khL.get_sptrsv_handle()->print_algorithm(); - } - EXPECT_TRUE(sum == scalar_t(X.extent(0))); - - khL.destroy_sptrsv_handle(); - khU.destroy_sptrsv_handle(); - } - - { - // unit-test for supernode SpTrsv (running TRMM on device for compute) - // > set up supernodes - size_type nsupers = 4; - Kokkos::View supercols("supercols", - 1 + nsupers); - supercols(0) = 0; - supercols(1) = 2; // two columns - supercols(2) = 3; // one column - supercols(3) = 4; // one column - supercols(4) = 5; // one column - int *etree = NULL; // we generate tree internally - - // > create handles - KernelHandle khLd; - KernelHandle khUd; - khLd.create_sptrsv_handle(SPTRSVAlgorithm::SUPERNODAL_DAG, nrows, true); - khUd.create_sptrsv_handle(SPTRSVAlgorithm::SUPERNODAL_DAG, nrows, false); - - // > invert diagonal blocks - bool invert_diag = true; - khLd.set_sptrsv_invert_diagonal(invert_diag); - khUd.set_sptrsv_invert_diagonal(invert_diag); - - // > invert off-diagonal blocks - bool invert_offdiag = true; - khUd.set_sptrsv_column_major(true); - khLd.set_sptrsv_invert_offdiagonal(invert_offdiag); - khUd.set_sptrsv_invert_offdiagonal(invert_offdiag); - - // > forcing sptrsv compute to perform TRMM on device - khLd.set_sptrsv_diag_supernode_sizes(1, 1); - khUd.set_sptrsv_diag_supernode_sizes(1, 1); - - // > symbolic (on host) - sptrsv_supernodal_symbolic(nsupers, supercols.data(), etree, L.graph, - &khLd, Ut.graph, &khUd); - // > numeric (on host) - sptrsv_compute(&khLd, L); - sptrsv_compute(&khUd, Ut); - Kokkos::fence(); - - // > solve - ValuesType b("b", nrows); - Kokkos::deep_copy(b, B); - Kokkos::deep_copy(X, ZERO); - sptrsv_solve(&khLd, &khUd, X, b); - Kokkos::fence(); - - // > check - scalar_t sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, X.extent(0)), - ReductionCheck(X), sum); - if (sum != lhs.extent(0)) { - std::cout << "Supernode Tri Solve FAILURE : " << sum << " vs." - << lhs.extent(0) << std::endl; - khLd.get_sptrsv_handle()->print_algorithm(); - } else { - std::cout << "Supernode Tri Solve SUCCESS" << std::endl; - khLd.get_sptrsv_handle()->print_algorithm(); - } - EXPECT_TRUE(sum == scalar_t(X.extent(0))); - - khLd.destroy_sptrsv_handle(); - khUd.destroy_sptrsv_handle(); - } -#endif - } -} - -template -void run_test_sptrsv_streams(int test_algo, int nstreams) { - using RowMapType = Kokkos::View; - using EntriesType = Kokkos::View; - using ValuesType = Kokkos::View; - using RowMapType_hostmirror = typename RowMapType::HostMirror; - using EntriesType_hostmirror = typename EntriesType::HostMirror; - using ValuesType_hostmirror = typename ValuesType::HostMirror; - using execution_space = typename device::execution_space; - using memory_space = typename device::memory_space; - using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< - size_type, lno_t, scalar_t, execution_space, memory_space, memory_space>; - using crsMat_t = CrsMatrix; - - // Workaround for OpenMP: skip tests if OMP_NUM_THREADS < nstreams because of not enough resource to partition - bool run_streams_test = true; -#ifdef KOKKOS_ENABLE_OPENMP - if (std::is_same::value) { - const char *env_omp_num_threads = std::getenv("OMP_NUM_THREADS"); - if (env_omp_num_threads != nullptr) { - int num_threads = std::atoi(env_omp_num_threads); - if (num_threads < nstreams) { - run_streams_test = false; - std::cout << " Skip stream test: omp_num_threads = " << num_threads << std::endl; - } - } - } -#endif - if (!run_streams_test) - return; - - scalar_t ZERO = scalar_t(0); - scalar_t ONE = scalar_t(1); - - const size_type nrows = 5; - const size_type nnz = 10; - - std::vector instances; - if (nstreams == 2) - instances = Kokkos::Experimental::partition_space(execution_space(), 1, 1); - else if (nstreams == 3) - instances = - Kokkos::Experimental::partition_space(execution_space(), 1, 1, 1); - else // (nstreams == 4) - instances = - Kokkos::Experimental::partition_space(execution_space(), 1, 1, 1, 1); - - std::vector kh_v(nstreams); - std::vector kh_ptr_v(nstreams); - std::vector row_map_v(nstreams); - std::vector entries_v(nstreams); - std::vector values_v(nstreams); - std::vector rhs_v(nstreams); - std::vector lhs_v(nstreams); - - RowMapType_hostmirror hrow_map("hrow_map", nrows + 1); - EntriesType_hostmirror hentries("hentries", nnz); - ValuesType_hostmirror hvalues("hvalues", nnz); - - // Upper tri - { - hrow_map(0) = 0; - hrow_map(1) = 2; - hrow_map(2) = 4; - hrow_map(3) = 7; - hrow_map(4) = 9; - hrow_map(5) = 10; - - hentries(0) = 0; - hentries(1) = 2; - hentries(2) = 1; - hentries(3) = 4; - hentries(4) = 2; - hentries(5) = 3; - hentries(6) = 4; - hentries(7) = 3; - hentries(8) = 4; - hentries(9) = 4; - - for (size_type i = 0; i < nnz; ++i) { - hvalues(i) = ONE; - } - - for (int i = 0; i < nstreams; i++) { - // Allocate U - row_map_v[i] = RowMapType("row_map", nrows + 1); - entries_v[i] = EntriesType("entries", nnz); - values_v[i] = ValuesType("values", nnz); - - // Copy from host to device - Kokkos::deep_copy(row_map_v[i], hrow_map); - Kokkos::deep_copy(entries_v[i], hentries); - Kokkos::deep_copy(values_v[i], hvalues); - - // Create known_lhs, generate rhs, then solve for lhs to compare to - // known_lhs - ValuesType known_lhs("known_lhs", nrows); - // Create known solution lhs set to all 1's - Kokkos::deep_copy(known_lhs, ONE); - - // Solution to find - lhs_v[i] = ValuesType("lhs", nrows); - - // A*known_lhs generates rhs: rhs is dense, use spmv - rhs_v[i] = ValuesType("rhs", nrows); - - crsMat_t triMtx("triMtx", nrows, nrows, nnz, values_v[i], row_map_v[i], - entries_v[i]); - - KokkosSparse::spmv("N", ONE, triMtx, known_lhs, ZERO, rhs_v[i]); - Kokkos::fence(); - - // Create handle - kh_v[i] = KernelHandle(); - bool is_lower_tri = false; - if (test_algo == 0) - kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_RP, nrows, - is_lower_tri); - else if (test_algo == 1) - kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, nrows, - is_lower_tri); - else - kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SPTRSV_CUSPARSE, nrows, - is_lower_tri); - - kh_ptr_v[i] = &kh_v[i]; - - // Symbolic phase - sptrsv_symbolic(kh_ptr_v[i], row_map_v[i], entries_v[i], values_v[i]); - Kokkos::fence(); - } // Done handle creation and sptrsv_symbolic on all streams - - // Solve phase - sptrsv_solve_streams(instances, kh_ptr_v, row_map_v, entries_v, values_v, - rhs_v, lhs_v); - - for (int i = 0; i < nstreams; i++) instances[i].fence(); - - // Checking - for (int i = 0; i < nstreams; i++) { - scalar_t sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::RangePolicy( - 0, lhs_v[i].extent(0)), - ReductionCheck(lhs_v[i]), sum); - if (sum != lhs_v[i].extent(0)) { - std::cout << "Upper Tri Solve FAILURE on stream " << i << std::endl; - kh_v[i].get_sptrsv_handle()->print_algorithm(); - } - EXPECT_TRUE(sum == scalar_t(lhs_v[i].extent(0))); - - kh_v[i].destroy_sptrsv_handle(); - } - } - - // Lower tri - { - hrow_map(0) = 0; - hrow_map(1) = 1; - hrow_map(2) = 2; - hrow_map(3) = 4; - hrow_map(4) = 6; - hrow_map(5) = 10; - - hentries(0) = 0; - hentries(1) = 1; - hentries(2) = 0; - hentries(3) = 2; - hentries(4) = 2; - hentries(5) = 3; - hentries(6) = 1; - hentries(7) = 2; - hentries(8) = 3; - hentries(9) = 4; - - for (size_type i = 0; i < nnz; ++i) { - hvalues(i) = ONE; - } - - for (int i = 0; i < nstreams; i++) { - // Allocate L - row_map_v[i] = RowMapType("row_map", nrows + 1); - entries_v[i] = EntriesType("entries", nnz); - values_v[i] = ValuesType("values", nnz); - - // Copy from host to device - Kokkos::deep_copy(row_map_v[i], hrow_map); - Kokkos::deep_copy(entries_v[i], hentries); - Kokkos::deep_copy(values_v[i], hvalues); - - // Create known_lhs, generate rhs, then solve for lhs to compare to - // known_lhs - ValuesType known_lhs("known_lhs", nrows); - // Create known solution lhs set to all 1's - Kokkos::deep_copy(known_lhs, ONE); - - // Solution to find - lhs_v[i] = ValuesType("lhs", nrows); - - // A*known_lhs generates rhs: rhs is dense, use spmv - rhs_v[i] = ValuesType("rhs", nrows); - - crsMat_t triMtx("triMtx", nrows, nrows, nnz, values_v[i], row_map_v[i], - entries_v[i]); - - KokkosSparse::spmv("N", ONE, triMtx, known_lhs, ZERO, rhs_v[i]); - Kokkos::fence(); - - // Create handle - kh_v[i] = KernelHandle(); - bool is_lower_tri = true; - if (test_algo == 0) - kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_RP, nrows, - is_lower_tri); - else if (test_algo == 1) - kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_TP1, nrows, - is_lower_tri); - else - kh_v[i].create_sptrsv_handle(SPTRSVAlgorithm::SPTRSV_CUSPARSE, nrows, - is_lower_tri); - - kh_ptr_v[i] = &kh_v[i]; - - // Symbolic phase - sptrsv_symbolic(kh_ptr_v[i], row_map_v[i], entries_v[i], values_v[i]); - Kokkos::fence(); - } // Done handle creation and sptrsv_symbolic on all streams - - // Solve phase - sptrsv_solve_streams(instances, kh_ptr_v, row_map_v, entries_v, values_v, - rhs_v, lhs_v); - - for (int i = 0; i < nstreams; i++) instances[i].fence(); - - // Checking - for (int i = 0; i < nstreams; i++) { - scalar_t sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::RangePolicy( - 0, lhs_v[i].extent(0)), - ReductionCheck(lhs_v[i]), sum); - if (sum != lhs_v[i].extent(0)) { - std::cout << "Lower Tri Solve FAILURE on stream " << i << std::endl; - kh_v[i].get_sptrsv_handle()->print_algorithm(); - } - EXPECT_TRUE(sum == scalar_t(lhs_v[i].extent(0))); - - kh_v[i].destroy_sptrsv_handle(); - } - } -} - -} // namespace Test - -template -void test_sptrsv() { - Test::run_test_sptrsv(); - // Test::run_test_sptrsv_mtx(); -} - -template -void test_sptrsv_streams() { - std::cout << "SPTRSVAlgorithm::SEQLVLSCHD_RP: 2 streams" << std::endl; - Test::run_test_sptrsv_streams(0, 2); - - std::cout << "SPTRSVAlgorithm::SEQLVLSCHD_RP: 3 streams" << std::endl; - Test::run_test_sptrsv_streams(0, 3); - - std::cout << "SPTRSVAlgorithm::SEQLVLSCHD_RP: 4 streams" << std::endl; - Test::run_test_sptrsv_streams(0, 4); - - std::cout << "SPTRSVAlgorithm::SEQLVLSCHD_TP1: 2 streams" << std::endl; - Test::run_test_sptrsv_streams(1, 2); - - std::cout << "SPTRSVAlgorithm::SEQLVLSCHD_TP1: 3 streams" << std::endl; - Test::run_test_sptrsv_streams(1, 3); - - std::cout << "SPTRSVAlgorithm::SEQLVLSCHD_TP1: 4 streams" << std::endl; - Test::run_test_sptrsv_streams(1, 4); - -#if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) - if (std::is_same::value && - std::is_same::value) { - std::cout << "SPTRSVAlgorithm::SPTRSV_CUSPARSE: 2 streams" << std::endl; - Test::run_test_sptrsv_streams(2, 2); - - std::cout << "SPTRSVAlgorithm::SPTRSV_CUSPARSE: 3 streams" << std::endl; - Test::run_test_sptrsv_streams(2, 3); - - std::cout << "SPTRSVAlgorithm::SPTRSV_CUSPARSE: 4 streams" << std::endl; - Test::run_test_sptrsv_streams(2, 4); - } -#endif -} - -#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ - TEST_F(TestCategory, \ - sparse##_##sptrsv##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_sptrsv(); \ - test_sptrsv_streams(); \ - } - -#include - -#undef KOKKOSKERNELS_EXECUTE_TEST From 7dfeca33620fe6bdf3f0622d96b271b75421b915 Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Wed, 30 Aug 2023 08:16:17 -0700 Subject: [PATCH 10/12] Remove unused type alias AT --- sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp b/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp index f7b48c6945..69d8eabb0a 100644 --- a/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp +++ b/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp @@ -28,7 +28,6 @@ void run_test_extract_diagonal_blocks(int nrows, int nblocks) { using EntriesType_hm = typename EntriesType::HostMirror; using ValuesType_hm = typename ValuesType::HostMirror; using crsMat_t = CrsMatrix; - using AT = Kokkos::ArithTraits; crsMat_t A; std::vector DiagBlks(nblocks); From d632c0e5c967a3b7a4d5f517a0f9f97faca2c43b Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Tue, 5 Sep 2023 13:47:33 -0700 Subject: [PATCH 11/12] Address Evan's comments --- sparse/src/KokkosSparse_Utils.hpp | 174 +++++++++++------- .../Test_Sparse_extractCrsDiagonalBlocks.hpp | 2 - 2 files changed, 106 insertions(+), 70 deletions(-) diff --git a/sparse/src/KokkosSparse_Utils.hpp b/sparse/src/KokkosSparse_Utils.hpp index 65e7e4243d..7034b50ae2 100644 --- a/sparse/src/KokkosSparse_Utils.hpp +++ b/sparse/src/KokkosSparse_Utils.hpp @@ -2330,13 +2330,84 @@ void validateCrsMatrix(int m, int n, const Rowmap &rowmapIn, } } +/** + * @brief Count the non-zeros of a sub-block in a CRS matrix and find the first and last column indices at each row of the sub-block + * This is a host function used by the kk_extract_diagonal_blocks_crsmatrix_sequential() + */ +template +void kk_find_nnz_first_last_indices_subblock_crsmatrix_sequential(const row_map_type &A_row_map,const entries_type &A_entries, const ordinal_type &blk_row_start, const ordinal_type &blk_col_start, const ordinal_type &blk_nrows, const ordinal_type &blk_ncols, size_type &blk_nnz, offset_view1d_type &first_indices, offset_view1d_type &last_indices) { + // Rowmap of i-th row-oriented sub-matrix + auto A_row_map_sub = Kokkos::subview(A_row_map, Kokkos::make_pair(blk_row_start, blk_row_start + blk_nrows + 1)); + + blk_nnz = 0; + + for (ordinal_type j = 0; j < blk_nrows; j++) { // loop through each row + size_type k1 = A_row_map_sub(j); + size_type k2 = A_row_map_sub(j + 1); + size_type k; + // Assume column indices are sorted in ascending order + // Find the position of the start column in the row + for (k = k1; k < k2; k++) { + ordinal_type col = A_entries(k); + if (col >= blk_col_start) { + break; + } + } + first_indices(j) = k; + // Find the position of the last column in the row + for (k = k2 - 1; k >= k1; k--) { + ordinal_type col = A_entries(k); + if (col < blk_col_start + blk_ncols) { + break; + } + } + last_indices(j) = k; + blk_nnz += (last_indices(j) - first_indices(j) + 1); + } +} + +/** + * @brief Extract a CRS sub-block from a CRS matrix + * This is a host function used by the kk_extract_diagonal_blocks_crsmatrix_sequential() + */ +template +void kk_extract_subblock_crsmatrix_sequential(const entries_type &A_entries, const values_type &A_values, const ordinal_type &blk_col_start, const ordinal_type &blk_nrows, const size_type &blk_nnz, const offset_view1d_type &first_indices, const offset_view1d_type &last_indices, out_row_map_type &blk_row_map, out_entries_type &blk_entries, out_values_type &blk_values) { + // - create out_row_map + // - copy A_entries to out_entries and update out_entries with local column indices + // - copy A_values to out_values + size_type first_ = 0; + for (ordinal_type j = 0; j < blk_nrows; j++) { // loop through each row + size_type nnz = last_indices(j) - first_indices(j) + 1; + blk_row_map(j) = first_; + for (size_type k = 0; k < nnz; k++) { + blk_entries(first_ + k) = A_entries(first_indices(j) + k) - blk_col_start; + blk_values(first_ + k) = A_values(first_indices(j) + k); + } + first_ += nnz; + } + blk_row_map(blk_nrows) = blk_nnz; // last element +} + /** * @brief Extract the diagonal blocks out of a crs matrix. * This is a blocking function that runs on the host. * - * @tparam crsMat_t The type of the CRS matrix - * @param A [in] The CrsMatrix. - * @param DiagBlk_v [out] The vector of extracted the CRS diagonal blocks. + * @tparam crsMat_t The type of the CRS matrix. + * @param A [in] The square CrsMatrix. It is expected that column indices are + * in ascending order + * @param DiagBlk_v [out] The vector of the extracted the CRS diagonal blocks + * (1 <= the number of diagonal blocks <= A_nrows) * * Usage Example: * kk_extract_diagonal_blocks_crsmatrix_sequential(A_in, diagBlk_in_b); @@ -2395,6 +2466,12 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( } } else { // A_nrows >= 1 + if ((n_blocks < 1) || (A_nrows < n_blocks)) { + std::ostringstream os; + os << "The number of diagonal blocks (" << n_blocks << ") should be >=1 and <= the number of rows of the matrix A (" << A_nrows << ")"; + throw std::runtime_error(os.str()); + } + ordinal_type rows_per_block = ((A_nrows % n_blocks) == 0) ? (A_nrows / n_blocks) : (A_nrows / n_blocks + 1); @@ -2406,82 +2483,43 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( std::vector entries_h_v(n_blocks); std::vector values_h_v(n_blocks); - ordinal_type row_start = 0; // first row index of i-th diagonal block - ordinal_type col_start = 0; // first col index of i-th diagonal block - ordinal_type nrows, ncols; // Nrows, Ncols of i-th diagonal block + ordinal_type blk_row_start = 0; // first row index of i-th diagonal block + ordinal_type blk_col_start = 0; // first col index of i-th diagonal block + ordinal_type blk_nrows, blk_ncols; // Nrows, Ncols of i-th diagonal block for (ordinal_type i = 0; i < n_blocks; i++) { - nrows = rows_per_block; - if ((row_start + rows_per_block) > A_nrows) { - nrows = A_nrows - row_start; - } - col_start = row_start; - ncols = nrows; - - // Rowmap of i-th row-oriented sub-matrix - auto A_row_map_sub = Kokkos::subview( - A_row_map_h, Kokkos::make_pair(row_start, row_start + nrows + 1)); - - // First round: count i-th non-zeros or size of entries_v[i] - size_type n_entries = 0; - offset_view1d_type first("first", nrows); // first position per row - offset_view1d_type last("last", nrows); // last position per row - - for (ordinal_type j = 0; j < nrows; j++) { // loop through each row - size_type k1 = A_row_map_sub(j); - size_type k2 = A_row_map_sub(j + 1); - size_type k; - // Assume column indices are sorted in ascending order - // Find the position of the start column in the row - for (k = k1; k < k2; k++) { - ordinal_type col = A_entries_h(k); - if (col >= col_start) { - break; - } - } - first(j) = k; - // Find the position of the last column in the row - for (k = k2 - 1; k >= k1; k--) { - ordinal_type col = A_entries_h(k); - if (col < col_start + ncols) { - break; - } - } - last(j) = k; - n_entries += (last(j) - first(j) + 1); + blk_nrows = rows_per_block; + if ((blk_row_start + rows_per_block) > A_nrows) { + blk_nrows = A_nrows - blk_row_start; } + blk_col_start = blk_row_start; + blk_ncols = blk_nrows; - // Second round: - // - create row_map_v[i] - // - copy A_entries to entries_v[i] and update entries_v[i] with local - // column indices - // - copy A_values to values_v[i] - row_map_v[i] = out_row_map_type("row_map_v", nrows + 1); - entries_v[i] = out_entries_type("entries_v", n_entries); - values_v[i] = out_values_type("values_v", n_entries); - row_map_h_v[i] = out_row_map_hostmirror_type("row_map_h_v", nrows + 1); - entries_h_v[i] = out_entries_hostmirror_type("entries_h_v", n_entries); - values_h_v[i] = out_values_hostmirror_type("values_h_v", n_entries); - size_type first_ = 0; - for (ordinal_type j = 0; j < nrows; j++) { // loop through each row - size_type nnz = last(j) - first(j) + 1; - row_map_h_v[i](j) = first_; - for (size_type k = 0; k < nnz; k++) { - entries_h_v[i](first_ + k) = A_entries_h(first(j) + k) - col_start; - values_h_v[i](first_ + k) = A_values_h(first(j) + k); - } - first_ += nnz; - } - row_map_h_v[i](nrows) = n_entries; // last element + // First round: count i-th non-zeros or size of entries_v[i] and find the first and last column indices at each row + size_type blk_nnz = 0; + offset_view1d_type first("first", blk_nrows); // first position per row + offset_view1d_type last("last", blk_nrows); // last position per row + + kk_find_nnz_first_last_indices_subblock_crsmatrix_sequential(A_row_map_h, A_entries_h, blk_row_start, blk_col_start, blk_nrows, blk_ncols, blk_nnz, first, last); + + // Second round: extract + row_map_v[i] = out_row_map_type("row_map_v", blk_nrows + 1); + entries_v[i] = out_entries_type("entries_v", blk_nnz); + values_v[i] = out_values_type("values_v", blk_nnz); + row_map_h_v[i] = out_row_map_hostmirror_type("row_map_h_v", blk_nrows + 1); + entries_h_v[i] = out_entries_hostmirror_type("entries_h_v", blk_nnz); + values_h_v[i] = out_values_hostmirror_type("values_h_v", blk_nnz); + + kk_extract_subblock_crsmatrix_sequential(A_entries_h, A_values_h, blk_col_start, blk_nrows, blk_nnz, first, last, row_map_h_v[i], entries_h_v[i], values_h_v[i]); Kokkos::deep_copy(row_map_v[i], row_map_h_v[i]); Kokkos::deep_copy(entries_v[i], entries_h_v[i]); Kokkos::deep_copy(values_v[i], values_h_v[i]); - DiagBlk_v[i] = crsMat_t("CrsMatrix", nrows, ncols, n_entries, + DiagBlk_v[i] = crsMat_t("CrsMatrix", blk_nrows, blk_ncols, blk_nnz, values_v[i], row_map_v[i], entries_v[i]); - row_start += nrows; + blk_row_start += blk_nrows; } // for (ordinal_type i = 0; i < n_blocks; i++) } // A_nrows >= 1 } // n_blocks > 1 diff --git a/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp b/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp index 69d8eabb0a..327780dec3 100644 --- a/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp +++ b/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp @@ -135,8 +135,6 @@ void test_extract_diagonal_blocks() { for (int s = 1; s <= 8; s++) { Test::run_test_extract_diagonal_blocks( 0, s); - Test::run_test_extract_diagonal_blocks( - 3, s); Test::run_test_extract_diagonal_blocks( 12, s); Test::run_test_extract_diagonal_blocks( From ac523de1e9c90c2c20ef123c64a168a26e57c77b Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Tue, 5 Sep 2023 14:00:36 -0700 Subject: [PATCH 12/12] Clang format --- sparse/src/KokkosSparse_Utils.hpp | 82 ++++++++++++++++++------------- 1 file changed, 49 insertions(+), 33 deletions(-) diff --git a/sparse/src/KokkosSparse_Utils.hpp b/sparse/src/KokkosSparse_Utils.hpp index 7034b50ae2..33d9d6806a 100644 --- a/sparse/src/KokkosSparse_Utils.hpp +++ b/sparse/src/KokkosSparse_Utils.hpp @@ -2331,21 +2331,26 @@ void validateCrsMatrix(int m, int n, const Rowmap &rowmapIn, } /** - * @brief Count the non-zeros of a sub-block in a CRS matrix and find the first and last column indices at each row of the sub-block - * This is a host function used by the kk_extract_diagonal_blocks_crsmatrix_sequential() + * @brief Count the non-zeros of a sub-block in a CRS matrix and find the first + * and last column indices at each row of the sub-block. This is a host function + * used by the kk_extract_diagonal_blocks_crsmatrix_sequential() */ -template -void kk_find_nnz_first_last_indices_subblock_crsmatrix_sequential(const row_map_type &A_row_map,const entries_type &A_entries, const ordinal_type &blk_row_start, const ordinal_type &blk_col_start, const ordinal_type &blk_nrows, const ordinal_type &blk_ncols, size_type &blk_nnz, offset_view1d_type &first_indices, offset_view1d_type &last_indices) { +template +void kk_find_nnz_first_last_indices_subblock_crsmatrix_sequential( + const row_map_type &A_row_map, const entries_type &A_entries, + const ordinal_type &blk_row_start, const ordinal_type &blk_col_start, + const ordinal_type &blk_nrows, const ordinal_type &blk_ncols, + size_type &blk_nnz, offset_view1d_type &first_indices, + offset_view1d_type &last_indices) { // Rowmap of i-th row-oriented sub-matrix - auto A_row_map_sub = Kokkos::subview(A_row_map, Kokkos::make_pair(blk_row_start, blk_row_start + blk_nrows + 1)); + auto A_row_map_sub = Kokkos::subview( + A_row_map, + Kokkos::make_pair(blk_row_start, blk_row_start + blk_nrows + 1)); blk_nnz = 0; - for (ordinal_type j = 0; j < blk_nrows; j++) { // loop through each row + for (ordinal_type j = 0; j < blk_nrows; j++) { // loop through each row size_type k1 = A_row_map_sub(j); size_type k2 = A_row_map_sub(j + 1); size_type k; @@ -2372,19 +2377,22 @@ void kk_find_nnz_first_last_indices_subblock_crsmatrix_sequential(const row_map_ /** * @brief Extract a CRS sub-block from a CRS matrix - * This is a host function used by the kk_extract_diagonal_blocks_crsmatrix_sequential() + * This is a host function used by the + * kk_extract_diagonal_blocks_crsmatrix_sequential() */ -template -void kk_extract_subblock_crsmatrix_sequential(const entries_type &A_entries, const values_type &A_values, const ordinal_type &blk_col_start, const ordinal_type &blk_nrows, const size_type &blk_nnz, const offset_view1d_type &first_indices, const offset_view1d_type &last_indices, out_row_map_type &blk_row_map, out_entries_type &blk_entries, out_values_type &blk_values) { +void kk_extract_subblock_crsmatrix_sequential( + const entries_type &A_entries, const values_type &A_values, + const ordinal_type &blk_col_start, const ordinal_type &blk_nrows, + const size_type &blk_nnz, const offset_view1d_type &first_indices, + const offset_view1d_type &last_indices, out_row_map_type &blk_row_map, + out_entries_type &blk_entries, out_values_type &blk_values) { // - create out_row_map - // - copy A_entries to out_entries and update out_entries with local column indices + // - copy A_entries to out_entries and update out_entries with local column + // indices // - copy A_values to out_values size_type first_ = 0; for (ordinal_type j = 0; j < blk_nrows; j++) { // loop through each row @@ -2468,7 +2476,9 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( // A_nrows >= 1 if ((n_blocks < 1) || (A_nrows < n_blocks)) { std::ostringstream os; - os << "The number of diagonal blocks (" << n_blocks << ") should be >=1 and <= the number of rows of the matrix A (" << A_nrows << ")"; + os << "The number of diagonal blocks (" << n_blocks + << ") should be >=1 and <= the number of rows of the matrix A (" + << A_nrows << ")"; throw std::runtime_error(os.str()); } @@ -2483,9 +2493,9 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( std::vector entries_h_v(n_blocks); std::vector values_h_v(n_blocks); - ordinal_type blk_row_start = 0; // first row index of i-th diagonal block - ordinal_type blk_col_start = 0; // first col index of i-th diagonal block - ordinal_type blk_nrows, blk_ncols; // Nrows, Ncols of i-th diagonal block + ordinal_type blk_row_start = 0; // first row index of i-th diagonal block + ordinal_type blk_col_start = 0; // first col index of i-th diagonal block + ordinal_type blk_nrows, blk_ncols; // Nrows, Ncols of i-th diagonal block for (ordinal_type i = 0; i < n_blocks; i++) { blk_nrows = rows_per_block; @@ -2495,22 +2505,28 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( blk_col_start = blk_row_start; blk_ncols = blk_nrows; - // First round: count i-th non-zeros or size of entries_v[i] and find the first and last column indices at each row + // First round: count i-th non-zeros or size of entries_v[i] and find + // the first and last column indices at each row size_type blk_nnz = 0; - offset_view1d_type first("first", blk_nrows); // first position per row - offset_view1d_type last("last", blk_nrows); // last position per row + offset_view1d_type first("first", blk_nrows); // first position per row + offset_view1d_type last("last", blk_nrows); // last position per row - kk_find_nnz_first_last_indices_subblock_crsmatrix_sequential(A_row_map_h, A_entries_h, blk_row_start, blk_col_start, blk_nrows, blk_ncols, blk_nnz, first, last); + kk_find_nnz_first_last_indices_subblock_crsmatrix_sequential( + A_row_map_h, A_entries_h, blk_row_start, blk_col_start, blk_nrows, + blk_ncols, blk_nnz, first, last); // Second round: extract - row_map_v[i] = out_row_map_type("row_map_v", blk_nrows + 1); - entries_v[i] = out_entries_type("entries_v", blk_nnz); - values_v[i] = out_values_type("values_v", blk_nnz); - row_map_h_v[i] = out_row_map_hostmirror_type("row_map_h_v", blk_nrows + 1); + row_map_v[i] = out_row_map_type("row_map_v", blk_nrows + 1); + entries_v[i] = out_entries_type("entries_v", blk_nnz); + values_v[i] = out_values_type("values_v", blk_nnz); + row_map_h_v[i] = + out_row_map_hostmirror_type("row_map_h_v", blk_nrows + 1); entries_h_v[i] = out_entries_hostmirror_type("entries_h_v", blk_nnz); values_h_v[i] = out_values_hostmirror_type("values_h_v", blk_nnz); - kk_extract_subblock_crsmatrix_sequential(A_entries_h, A_values_h, blk_col_start, blk_nrows, blk_nnz, first, last, row_map_h_v[i], entries_h_v[i], values_h_v[i]); + kk_extract_subblock_crsmatrix_sequential( + A_entries_h, A_values_h, blk_col_start, blk_nrows, blk_nnz, first, + last, row_map_h_v[i], entries_h_v[i], values_h_v[i]); Kokkos::deep_copy(row_map_v[i], row_map_h_v[i]); Kokkos::deep_copy(entries_v[i], entries_h_v[i]);