From 99f91e48ea73e90e74cd50db10437ccca4bd4b61 Mon Sep 17 00:00:00 2001 From: Damien L-G Date: Wed, 13 Apr 2022 09:52:38 -0400 Subject: [PATCH 1/4] Value-initialize result of MaxLoc reduction to avoid maybe uninitialized warning --- src/batched/dense/impl/KokkosBatched_FindAmax_Internal.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/batched/dense/impl/KokkosBatched_FindAmax_Internal.hpp b/src/batched/dense/impl/KokkosBatched_FindAmax_Internal.hpp index f11210253e..32980219bf 100644 --- a/src/batched/dense/impl/KokkosBatched_FindAmax_Internal.hpp +++ b/src/batched/dense/impl/KokkosBatched_FindAmax_Internal.hpp @@ -43,7 +43,7 @@ struct TeamVectorFindAmaxInternal { if (m > 0) { using reducer_value_type = typename Kokkos::MaxLoc::value_type; - reducer_value_type value; + reducer_value_type value{}; Kokkos::MaxLoc reducer_value(value); Kokkos::parallel_reduce( Kokkos::TeamVectorRange(member, m), From 2221b2c184fd3d97c48dae23df9c86ff06593537 Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Fri, 1 Apr 2022 17:09:42 -0600 Subject: [PATCH 2/4] sptrsv: improve symbolic level scheduling time Use level scheduling implementation like that from spiluk Co-author: Vinh Dang @vqd8a --- .../KokkosSparse_sptrsv_symbolic_impl.hpp | 157 ++++++------------ 1 file changed, 47 insertions(+), 110 deletions(-) diff --git a/src/sparse/impl/KokkosSparse_sptrsv_symbolic_impl.hpp b/src/sparse/impl/KokkosSparse_sptrsv_symbolic_impl.hpp index 4d11112493..1d4be5be08 100644 --- a/src/sparse/impl/KokkosSparse_sptrsv_symbolic_impl.hpp +++ b/src/sparse/impl/KokkosSparse_sptrsv_symbolic_impl.hpp @@ -223,65 +223,32 @@ void lower_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map, HostSignedEntriesType level_list = Kokkos::create_mirror_view(dlevel_list); Kokkos::deep_copy(level_list, dlevel_list); - HostSignedEntriesType previous_level_list( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "previous_level_list"), - nrows); - Kokkos::deep_copy(previous_level_list, signed_integral_t(-1)); - - const bool stored_diagonal = thandle.is_stored_diagonal(); - // diagonal_offsets is uninitialized - deep_copy unnecessary at the - // beginning, only needed at the end - auto diagonal_offsets = thandle.get_diagonal_offsets(); - auto hdiagonal_offsets = thandle.get_host_diagonal_offsets(); - - size_type level = 0; - auto starting_node = 0; - auto ending_node = nrows; - + signed_integral_t level = 0; size_type node_count = 0; - while (node_count < nrows) { - for (size_type row = starting_node; row < ending_node; ++row) { - if (level_list(row) == -1) { // unmarked - bool is_root = true; - signed_integral_t ptrstart = row_map(row); - signed_integral_t ptrend = row_map(row + 1); - - for (signed_integral_t offset = ptrstart; offset < ptrend; ++offset) { - size_type col = entries(offset); - if (previous_level_list(col) == -1 && col != row) { // unmarked - if (col < row) { - is_root = false; - break; - } - } else if (col == row) { - if (stored_diagonal) hdiagonal_offsets(row) = offset; - } else if (col > row) { - std::cout << "\nrow = " << row << " col = " << col - << " offset = " << offset << std::endl; - throw( - std::runtime_error("SYMB ERROR: Lower tri with colid > rowid " - "- SHOULD NOT HAPPEN!!!")); - } - } // end for offset , i.e. cols of this row - - if (is_root == true) { - level_list(row) = level; - nodes_per_level(level) += 1; - nodes_grouped_by_level(node_count) = row; - node_count += 1; - } - - } // end if - } // end for row - - // Kokkos::deep_copy(previous_level_list, level_list); - for (size_type i = 0; i < nrows; ++i) { - previous_level_list(i) = level_list(i); + typename DeviceEntriesType::HostMirror level_ptr("lp", nrows+1); // temp View used for index bookkeeping + level_ptr(0) = 0; + for (size_type i = 0; i < nrows; ++i) { + signed_integral_t l = 0; + size_type rowstart = row_map(i); + size_type rowend = row_map(i + 1); + for (size_type j = rowstart; j < rowend; j++) { + size_type col = entries(j); + l = std::max(l, level_list(col)); } - - level += 1; - } // end while + level_list(i) = l + 1; + nodes_per_level(l) += 1; // 0-based indexing + level_ptr(l + 1) += 1; + level = std::max(level, l + 1); + node_count++; + } + for (size_type i = 1; i <= level; ++i) { + level_ptr(i) += level_ptr(i - 1); + } + for (size_type i = 0; i < nrows; i++) { + nodes_grouped_by_level(level_ptr(level_list(i) - 1)) = i; + level_ptr(level_list(i) - 1) += 1; + } thandle.set_num_levels(level); @@ -320,7 +287,6 @@ void lower_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map, Kokkos::deep_copy(dnodes_grouped_by_level, nodes_grouped_by_level); Kokkos::deep_copy(dnodes_per_level, nodes_per_level); Kokkos::deep_copy(dlevel_list, level_list); - if (stored_diagonal) Kokkos::deep_copy(diagonal_offsets, hdiagonal_offsets); // Extra check: #ifdef LVL_OUTPUT_INFO @@ -705,61 +671,33 @@ void upper_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map, HostSignedEntriesType level_list = Kokkos::create_mirror_view(dlevel_list); Kokkos::deep_copy(level_list, dlevel_list); - HostSignedEntriesType previous_level_list( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "previous_level_list"), - nrows); - Kokkos::deep_copy(previous_level_list, signed_integral_t(-1)); - - const bool stored_diagonal = thandle.is_stored_diagonal(); - // diagonal_offsets is uninitialized - deep_copy unnecessary at the - // beginning, only needed at the end - auto diagonal_offsets = thandle.get_diagonal_offsets(); - auto hdiagonal_offsets = thandle.get_host_diagonal_offsets(); - - size_type level = 0; - auto starting_node = nrows - 1; - auto ending_node = 0; - + signed_integral_t level = 0; size_type node_count = 0; - while (node_count < nrows) { - for (signed_integral_t row = starting_node; row >= ending_node; --row) { - if (level_list(row) == -1) { // unmarked - bool is_root = true; - signed_integral_t ptrstart = row_map(row); - signed_integral_t ptrend = row_map(row + 1); - - for (signed_integral_t offset = ptrend - 1; offset >= ptrstart; - --offset) { - signed_integral_t col = entries(offset); - - if (previous_level_list(col) == -1 && col != row) { // unmarked - if (col > row) { - is_root = false; - break; - } - } else if (col == row) { - if (stored_diagonal) hdiagonal_offsets(row) = offset; - } - } // end for offset , i.e. cols of this row - - if (is_root == true) { - level_list(row) = level; - nodes_per_level(level) += 1; - nodes_grouped_by_level(node_count) = row; - node_count += 1; - } - - } // end if - } // end for row - - // Kokkos::deep_copy(previous_level_list, level_list); - for (size_type i = 0; i < nrows; ++i) { - previous_level_list(i) = level_list(i); + typename DeviceEntriesType::HostMirror level_ptr("lp", nrows+1); // temp View used for index bookkeeping + level_ptr(0) = 0; + for (size_type ii = nrows; ii > 0 ; ii--) { + size_type i = ii-1; // Avoid >= 0 comparison in for-loop to prevent wraparound errors with unsigned types + signed_integral_t l = 0; + size_type rowstart = row_map(i)+1; // skip diag + size_type rowend = row_map(i + 1); + for (size_type j = rowstart; j < rowend; ++j) { + size_type col = entries(j); + l = std::max(l, level_list(col)); } - - level += 1; - } // end while + level_list(i) = l + 1; + nodes_per_level(l) += 1; // 0-based indexing + level_ptr(l + 1) += 1; + level = std::max(level, l + 1); + node_count++; + } + for (size_type i = 1; i <= level; ++i) { + level_ptr(i) += level_ptr(i - 1); + } + for (size_type i = 0; i < nrows; i++) { + nodes_grouped_by_level(level_ptr(level_list(i) - 1)) = i; + level_ptr(level_list(i) - 1) += 1; + } thandle.set_num_levels(level); @@ -798,7 +736,6 @@ void upper_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map, Kokkos::deep_copy(dnodes_grouped_by_level, nodes_grouped_by_level); Kokkos::deep_copy(dnodes_per_level, nodes_per_level); Kokkos::deep_copy(dlevel_list, level_list); - if (stored_diagonal) Kokkos::deep_copy(diagonal_offsets, hdiagonal_offsets); // Extra check: #ifdef LVL_OUTPUT_INFO From e0a391441a7a8c4e7a932892e222771fddd56f1f Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Fri, 1 Apr 2022 17:13:04 -0600 Subject: [PATCH 3/4] Apply clang-format-8 --- .../KokkosSparse_sptrsv_symbolic_impl.hpp | 39 ++++++++++--------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/src/sparse/impl/KokkosSparse_sptrsv_symbolic_impl.hpp b/src/sparse/impl/KokkosSparse_sptrsv_symbolic_impl.hpp index 1d4be5be08..ba339d26a8 100644 --- a/src/sparse/impl/KokkosSparse_sptrsv_symbolic_impl.hpp +++ b/src/sparse/impl/KokkosSparse_sptrsv_symbolic_impl.hpp @@ -223,21 +223,22 @@ void lower_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map, HostSignedEntriesType level_list = Kokkos::create_mirror_view(dlevel_list); Kokkos::deep_copy(level_list, dlevel_list); - signed_integral_t level = 0; - size_type node_count = 0; + signed_integral_t level = 0; + size_type node_count = 0; - typename DeviceEntriesType::HostMirror level_ptr("lp", nrows+1); // temp View used for index bookkeeping + typename DeviceEntriesType::HostMirror level_ptr( + "lp", nrows + 1); // temp View used for index bookkeeping level_ptr(0) = 0; for (size_type i = 0; i < nrows; ++i) { - signed_integral_t l = 0; - size_type rowstart = row_map(i); - size_type rowend = row_map(i + 1); + signed_integral_t l = 0; + size_type rowstart = row_map(i); + size_type rowend = row_map(i + 1); for (size_type j = rowstart; j < rowend; j++) { size_type col = entries(j); l = std::max(l, level_list(col)); } level_list(i) = l + 1; - nodes_per_level(l) += 1; // 0-based indexing + nodes_per_level(l) += 1; // 0-based indexing level_ptr(l + 1) += 1; level = std::max(level, l + 1); node_count++; @@ -288,7 +289,7 @@ void lower_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map, Kokkos::deep_copy(dnodes_per_level, nodes_per_level); Kokkos::deep_copy(dlevel_list, level_list); - // Extra check: + // Extra check: #ifdef LVL_OUTPUT_INFO { std::cout << " End symb - extra checks" << std::endl; @@ -671,22 +672,24 @@ void upper_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map, HostSignedEntriesType level_list = Kokkos::create_mirror_view(dlevel_list); Kokkos::deep_copy(level_list, dlevel_list); - signed_integral_t level = 0; - size_type node_count = 0; + signed_integral_t level = 0; + size_type node_count = 0; - typename DeviceEntriesType::HostMirror level_ptr("lp", nrows+1); // temp View used for index bookkeeping + typename DeviceEntriesType::HostMirror level_ptr( + "lp", nrows + 1); // temp View used for index bookkeeping level_ptr(0) = 0; - for (size_type ii = nrows; ii > 0 ; ii--) { - size_type i = ii-1; // Avoid >= 0 comparison in for-loop to prevent wraparound errors with unsigned types - signed_integral_t l = 0; - size_type rowstart = row_map(i)+1; // skip diag - size_type rowend = row_map(i + 1); + for (size_type ii = nrows; ii > 0; ii--) { + size_type i = ii - 1; // Avoid >= 0 comparison in for-loop to prevent + // wraparound errors with unsigned types + signed_integral_t l = 0; + size_type rowstart = row_map(i) + 1; // skip diag + size_type rowend = row_map(i + 1); for (size_type j = rowstart; j < rowend; ++j) { size_type col = entries(j); l = std::max(l, level_list(col)); } level_list(i) = l + 1; - nodes_per_level(l) += 1; // 0-based indexing + nodes_per_level(l) += 1; // 0-based indexing level_ptr(l + 1) += 1; level = std::max(level, l + 1); node_count++; @@ -737,7 +740,7 @@ void upper_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map, Kokkos::deep_copy(dnodes_per_level, nodes_per_level); Kokkos::deep_copy(dlevel_list, level_list); - // Extra check: + // Extra check: #ifdef LVL_OUTPUT_INFO { std::cout << " End symb - extra checks" << std::endl; From 5a791be6cb22f5f4f388788fb90a610368880ccc Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Fri, 1 Apr 2022 19:27:47 -0600 Subject: [PATCH 4/4] Fix unsigned - signed comparison -Werror --- src/sparse/impl/KokkosSparse_sptrsv_symbolic_impl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sparse/impl/KokkosSparse_sptrsv_symbolic_impl.hpp b/src/sparse/impl/KokkosSparse_sptrsv_symbolic_impl.hpp index ba339d26a8..3a6f988835 100644 --- a/src/sparse/impl/KokkosSparse_sptrsv_symbolic_impl.hpp +++ b/src/sparse/impl/KokkosSparse_sptrsv_symbolic_impl.hpp @@ -243,7 +243,7 @@ void lower_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map, level = std::max(level, l + 1); node_count++; } - for (size_type i = 1; i <= level; ++i) { + for (signed_integral_t i = 1; i <= level; ++i) { level_ptr(i) += level_ptr(i - 1); } for (size_type i = 0; i < nrows; i++) { @@ -694,7 +694,7 @@ void upper_tri_symbolic(TriSolveHandle& thandle, const RowMapType drow_map, level = std::max(level, l + 1); node_count++; } - for (size_type i = 1; i <= level; ++i) { + for (signed_integral_t i = 1; i <= level; ++i) { level_ptr(i) += level_ptr(i - 1); } for (size_type i = 0; i < nrows; i++) {