Skip to content

Commit

Permalink
Merge pull request #5 from e10harvey/sptrsv-tpl
Browse files Browse the repository at this point in the history
src/sparse: Fix supernodal sptrsv build with LayoutRight=ON
  • Loading branch information
iyamazaki authored Jun 7, 2021
2 parents fbd9ca0 + b3636c0 commit 6b4e189
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 38 deletions.
17 changes: 8 additions & 9 deletions src/sparse/KokkosSparse_sptrsv_supernode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -833,7 +833,7 @@ void merge_supernodal_graph(int *p_nsuper, input_size_type *nb,
/* ========================================================================================= */
template <typename output_graph_t, typename input_graph_t, typename input_size_type>
output_graph_t
generate_merged_supernodal_graph(bool lower,
generate_merged_supernodal_graph(bool lower,
int nsuper, const input_size_type *nb,
int nsuper2, input_size_type *nb2,
input_graph_t &graph, int *nnz) {
Expand Down Expand Up @@ -1146,7 +1146,7 @@ void sptrsv_supernodal_symbolic(
struct Tag_SupTrtriFunctor{};
struct Tag_SupTrtriTrmmFunctor{};

template <typename UploType, typename DiagType, typename integer_view_host_t,
template <typename UploType, typename DiagType, typename integer_view_host_t,
typename input_size_type, typename row_map_type, typename index_type, typename values_type>
struct TriSupernodalTrtriFunctor {

Expand All @@ -1157,7 +1157,7 @@ void sptrsv_supernodal_symbolic(
values_type hv;

KOKKOS_INLINE_FUNCTION
TriSupernodalTrtriFunctor(integer_view_host_t supernode_ids_, const input_size_type *nb_,
TriSupernodalTrtriFunctor(integer_view_host_t supernode_ids_, const input_size_type *nb_,
row_map_type& hr_, index_type& hc_, values_type& hv_) :
supernode_ids(supernode_ids_),
nb(nb_),
Expand Down Expand Up @@ -1232,7 +1232,7 @@ template <typename KernelHandle, typename input_size_type,
typename row_map_type, typename index_type, typename values_type,
typename integer_view_host_t>
void
invert_supernodal_columns_batched(KernelHandle *kernelHandle, bool unit_diag, const input_size_type *nb,
invert_supernodal_columns_batched(KernelHandle *kernelHandle, bool unit_diag, const input_size_type *nb,
row_map_type& hr, index_type& hc, values_type& hv, int num_batches, integer_view_host_t supernode_ids) {

using execution_space = typename values_type::execution_space;
Expand Down Expand Up @@ -1314,7 +1314,7 @@ invert_supernodal_columns_batched(KernelHandle *kernelHandle, bool unit_diag, co
template <typename KernelHandle, typename input_size_type,
typename row_map_type, typename index_type, typename values_type>
void
invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, int nsuper, const input_size_type *nb,
invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, int nsuper, const input_size_type *nb,
row_map_type& hr, index_type& hc, values_type& hv) {

using execution_space = typename values_type::execution_space;
Expand Down Expand Up @@ -1405,7 +1405,7 @@ invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, int nsuper
char uplo_char = (lower ? 'L' : 'U');
char diag_char = (unit_diag ? 'U' : 'N');

Kokkos::View<scalar_t**, Kokkos::LayoutLeft, memory_space, Kokkos::MemoryUnmanaged>
Kokkos::View<scalar_t**, default_layout, memory_space, Kokkos::MemoryUnmanaged>
viewL (&hv(nnzD), nsrow, nscol);
auto Ljj = Kokkos::subview (viewL, range_type (0, nscol), Kokkos::ALL ());

Expand All @@ -1426,7 +1426,7 @@ invert_supernodal_columns(KernelHandle *kernelHandle, bool unit_diag, int nsuper
timer.reset ();
#endif
if(run_trmm_on_device) {
Kokkos::View<scalar_t**, Kokkos::LayoutLeft, trmm_memory_space, Kokkos::MemoryUnmanaged>
Kokkos::View<scalar_t**, default_layout, trmm_memory_space, Kokkos::MemoryUnmanaged>
devL (trmm_dwork.data(), nsrow, nscol);
auto devLjj = Kokkos::subview (devL, range_type (0, nscol), Kokkos::ALL ());
auto devLij = Kokkos::subview (devL, range_type (nscol, nsrow), Kokkos::ALL ());
Expand Down Expand Up @@ -2127,7 +2127,7 @@ void split_crsmat(KernelHandle *kernelHandleL, host_crsmat_t superluL) {
graph_t diag_graph(columnD_view, rowmapD_view);
diag_blocks[lvl] = crsmat_t("DiagMatrix", nrows, valuesD_view, diag_graph);
}
//std::cout << " > split nnz(" << lvl << ") = " << nnzL+nnzD << std::endl;
//std::cout << " > split nnz(" << lvl << ") = " << nnzL+nnzD << std::endl;
time2 += timer.seconds ();

// update the number of supernodes processed
Expand Down Expand Up @@ -2218,4 +2218,3 @@ void split_crsmat(KernelHandle *kernelHandleL, host_crsmat_t superluL) {

#endif // KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV
#endif // KOKKOSSPARSE_SPTRSV_SUPERNODE_HPP_

58 changes: 29 additions & 29 deletions src/sparse/impl/KokkosSparse_sptrsv_solve_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ struct LowerTriLvlSchedTP2SolverFunctor

#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV)
// -----------------------------------------------------------
// Helper functors for Lower-triangular solve with SpMV
// Helper functors for Lower-triangular solve with SpMV
template <class TriSolveHandle, class LHSType, class NGBLType>
struct SparseTriSupernodalSpMVFunctor
{
Expand Down Expand Up @@ -662,7 +662,7 @@ struct SparseTriSupernodalSpMVFunctor
// copy X to work
for (int j = team_rank; j < nscol; j += team_size) {
work (w1 + j) = X (j1 + j);
}
}
} else if (flag == -1) {
// copy work to X
for (int j = team_rank; j < nscol; j += team_size) {
Expand Down Expand Up @@ -777,7 +777,7 @@ struct LowerTriSupernodalFunctor

// create a view for the s-th supernocal column
scalar_t *dataL = const_cast<scalar_t*> (values.data ());
Kokkos::View<scalar_t**, Kokkos::LayoutLeft, memory_space, Kokkos::MemoryUnmanaged> viewL (&dataL[i1], nsrow, nscol);
Kokkos::View<scalar_t**, default_layout, memory_space, Kokkos::MemoryUnmanaged> viewL (&dataL[i1], nsrow, nscol);

// extract part of the solution, corresponding to the diagonal block
auto Xj = Kokkos::subview (X, range_type(j1, j2));
Expand Down Expand Up @@ -817,7 +817,7 @@ struct LowerTriSupernodalFunctor
KokkosBatched::Algo::Gemv::Unblocked>
::invoke(team, one, Ljj, Y, zero, Xj);
} else {
Kokkos::View<scalar_t**, Kokkos::LayoutLeft, memory_space, Kokkos::MemoryUnmanaged> Xjj (Xj.data (), nscol, 1);
Kokkos::View<scalar_t**, default_layout, memory_space, Kokkos::MemoryUnmanaged> Xjj (Xj.data (), nscol, 1);
if (unit_diagonal) {
KokkosBatched::TeamTrsm<member_type,
KokkosBatched::Side::Left,
Expand Down Expand Up @@ -849,7 +849,7 @@ struct LowerTriSupernodalFunctor
}

/* scatter vectors back into X */
int i2 = i1 + nscol; // offset into rowind
int i2 = i1 + nscol; // offset into rowind
int nsrow2 = nsrow - nscol; // "total" number of rows in all the off-diagonal supernodes
Kokkos::View<scalar_t*, memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged | Kokkos::Atomic> > Xatomic(X.data(), X.extent(0));
for (int ii = team_rank; ii < nsrow2; ii += team_size) {
Expand Down Expand Up @@ -877,7 +877,7 @@ struct UpperTriSupernodalFunctor
using integer_view_t = Kokkos::View<int*, memory_space>;
using work_view_t = typename Kokkos::View<scalar_t*, Kokkos::Device<execution_space, memory_space>>;

using SupernodeView = typename Kokkos::View<scalar_t**, Kokkos::LayoutLeft,
using SupernodeView = typename Kokkos::View<scalar_t**, default_layout,
memory_space, Kokkos::MemoryUnmanaged>;

using range_type = Kokkos::pair<int, int>;
Expand Down Expand Up @@ -964,7 +964,7 @@ struct UpperTriSupernodalFunctor
// "total" number of rows in all the off-diagonal supernodes
int nsrow2 = nsrow - nscol;
/* gather vector into Z */
int i2 = i1 + nscol; // offset into rowind
int i2 = i1 + nscol; // offset into rowind
auto Z = Kokkos::subview(work, range_type(workoffset+nscol, workoffset+nsrow)); // needed with gemv for update&scatter
for (int ii = team_rank; ii < nsrow2 ; ii += team_size) {
int i = rowind (i2 + ii);
Expand Down Expand Up @@ -999,7 +999,7 @@ struct UpperTriSupernodalFunctor
KokkosBatched::Algo::Gemv::Unblocked>
::invoke(team, one, Ujj, Y, zero, Xj);
} else {
Kokkos::View<scalar_t**, Kokkos::LayoutLeft, memory_space, Kokkos::MemoryUnmanaged> Xjj (Xj.data (), nscol, 1);
Kokkos::View<scalar_t**, default_layout, memory_space, Kokkos::MemoryUnmanaged> Xjj (Xj.data (), nscol, 1);
KokkosBatched::TeamTrsm<member_type,
KokkosBatched::Side::Left,
KokkosBatched::Uplo::Lower,
Expand Down Expand Up @@ -1108,7 +1108,7 @@ struct UpperTriTranSupernodalFunctor

// create a view of the s-th supernocal column of U
scalar_t *dataU = const_cast<scalar_t*> (values.data ());
Kokkos::View<scalar_t**, Kokkos::LayoutLeft, memory_space, Kokkos::MemoryUnmanaged> viewU (&dataU[i1], nsrow, nscol);
Kokkos::View<scalar_t**, default_layout, memory_space, Kokkos::MemoryUnmanaged> viewU (&dataU[i1], nsrow, nscol);

// extract part of solution, corresponding to the diagonal block U(s, s)
auto Xj = Kokkos::subview (X, range_type(j1, j2));
Expand Down Expand Up @@ -1146,7 +1146,7 @@ struct UpperTriTranSupernodalFunctor
KokkosBatched::Algo::Gemv::Unblocked>
::invoke(team, one, Ujj, Y, zero, Xj);
} else {
Kokkos::View<scalar_t**, Kokkos::LayoutLeft, memory_space, Kokkos::MemoryUnmanaged> Xjj (Xj.data (), nscol, 1);
Kokkos::View<scalar_t**, default_layout, memory_space, Kokkos::MemoryUnmanaged> Xjj (Xj.data (), nscol, 1);
KokkosBatched::TeamTrsm<member_type,
KokkosBatched::Side::Left,
KokkosBatched::Uplo::Upper,
Expand All @@ -1172,7 +1172,7 @@ struct UpperTriTranSupernodalFunctor
}

/* scatter vector into Z */
int i2 = i1 + nscol; // offset into rowind
int i2 = i1 + nscol; // offset into rowind
Kokkos::View<scalar_t*, memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged | Kokkos::Atomic> > Xatomic(X.data(), X.extent(0));
for (int ii = team_rank; ii < nsrow2 ; ii += team_size) {
int i = rowind (i2 + ii);
Expand Down Expand Up @@ -2727,7 +2727,7 @@ cudaProfilerStart();
#endif

using team_policy_type = Kokkos::TeamPolicy<execution_space>;
using supernode_view_type = Kokkos::View<scalar_t**, Kokkos::LayoutLeft, memory_space, Kokkos::MemoryUnmanaged>;
using supernode_view_type = Kokkos::View<scalar_t**, default_layout, memory_space, Kokkos::MemoryUnmanaged>;
if (diag_kernel_type_host (lvl) == 3) {
// using device-level kernels (functor is called to scatter the results)
scalar_t *dataL = const_cast<scalar_t*> (values.data ());
Expand Down Expand Up @@ -2782,7 +2782,7 @@ cudaProfilerStart();
zero, Xj);
} else {
char unit_diag = (unit_diagonal ? 'U' : 'N');
Kokkos::View<scalar_t**, Kokkos::LayoutLeft, memory_space, Kokkos::MemoryUnmanaged> Xjj (Xj.data (), nscol, 1);
Kokkos::View<scalar_t**, default_layout, memory_space, Kokkos::MemoryUnmanaged> Xjj (Xj.data (), nscol, 1);
KokkosBlas::
trsm("L", "L", "N", &unit_diag,
one, Ljj, Xjj);
Expand Down Expand Up @@ -2810,7 +2810,7 @@ cudaProfilerStart();
}

// launching sparse-triangular solve functor
LowerTriSupernodalFunctor<TriSolveHandle, RowMapType, EntriesType, ValuesType, LHSType, NGBLType>
LowerTriSupernodalFunctor<TriSolveHandle, RowMapType, EntriesType, ValuesType, LHSType, NGBLType>
sptrsv_functor (unit_diagonal, invert_diagonal, invert_offdiagonal,
supercols, row_map, entries, values, lvl, kernel_type, diag_kernel_type, lhs,
work, work_offset, nodes_grouped_by_level, node_count);
Expand Down Expand Up @@ -2848,12 +2848,12 @@ cudaProfilerStart();
lhs,
one, work);
// copy from work to lhs corresponding to diagonal blocks
SparseTriSupernodalSpMVFunctor<TriSolveHandle, LHSType, NGBLType>
SparseTriSupernodalSpMVFunctor<TriSolveHandle, LHSType, NGBLType>
sptrsv_init_functor (-1, node_count, nodes_grouped_by_level, supercols, supercols, lhs, work);
Kokkos::parallel_for ("parfor_lsolve_supernode", team_policy_type(lvl_nodes, Kokkos::AUTO), sptrsv_init_functor);
} else {
// copy lhs corresponding to diagonal blocks to work and zero out in lhs
SparseTriSupernodalSpMVFunctor<TriSolveHandle, LHSType, NGBLType>
SparseTriSupernodalSpMVFunctor<TriSolveHandle, LHSType, NGBLType>
sptrsv_init_functor (1, node_count, nodes_grouped_by_level, supercols, supercols, lhs, work);
Kokkos::parallel_for ("parfor_lsolve_supernode", team_policy_type(lvl_nodes, Kokkos::AUTO), sptrsv_init_functor);
}
Expand All @@ -2865,7 +2865,7 @@ cudaProfilerStart();
one, lhs);

// reinitialize workspace
SparseTriSupernodalSpMVFunctor<TriSolveHandle, LHSType, NGBLType>
SparseTriSupernodalSpMVFunctor<TriSolveHandle, LHSType, NGBLType>
sptrsv_finalize_functor (0, node_count, nodes_grouped_by_level, supercols, supercols, lhs, work);
Kokkos::parallel_for ("parfor_lsolve_supernode", team_policy_type(lvl_nodes, Kokkos::AUTO), sptrsv_finalize_functor);

Expand Down Expand Up @@ -3067,9 +3067,9 @@ cudaProfilerStart();

// workspace
int workoffset = work_offset_host (s);

// create a view for the s-th supernocal block column
Kokkos::View<scalar_t**, Kokkos::LayoutLeft, memory_space, Kokkos::MemoryUnmanaged> viewU (&dataU[i1], nsrow, nscol);
Kokkos::View<scalar_t**, default_layout, memory_space, Kokkos::MemoryUnmanaged> viewU (&dataU[i1], nsrow, nscol);

if (invert_offdiagonal) {
auto Uij = Kokkos::subview (viewU, range_type (0, nsrow), Kokkos::ALL ());
Expand All @@ -3093,7 +3093,7 @@ cudaProfilerStart();
Y,
zero, Xj);
} else {
Kokkos::View<scalar_t**, Kokkos::LayoutLeft, memory_space, Kokkos::MemoryUnmanaged> Xjj (Xj.data (), nscol, 1);
Kokkos::View<scalar_t**, default_layout, memory_space, Kokkos::MemoryUnmanaged> Xjj (Xj.data (), nscol, 1);
KokkosBlas::
trsm("L", "U", "N", "N",
one, Ujj, Xjj);
Expand All @@ -3120,15 +3120,15 @@ cudaProfilerStart();
}

// launching sparse-triangular solve functor
UpperTriTranSupernodalFunctor<TriSolveHandle, RowMapType, EntriesType, ValuesType, LHSType, NGBLType>
UpperTriTranSupernodalFunctor<TriSolveHandle, RowMapType, EntriesType, ValuesType, LHSType, NGBLType>
sptrsv_functor (invert_diagonal, invert_offdiagonal, supercols, row_map, entries, values,lvl, kernel_type, diag_kernel_type, lhs,
work, work_offset, nodes_grouped_by_level, node_count);

using policy_type = Kokkos::TeamPolicy<execution_space>;
Kokkos::parallel_for ("parfor_usolve_tran_supernode", policy_type (lvl_nodes , Kokkos::AUTO), sptrsv_functor);
} else { // U stored in CSR
// launching sparse-triangular solve functor
UpperTriSupernodalFunctor<TriSolveHandle, RowMapType, EntriesType, ValuesType, LHSType, NGBLType>
UpperTriSupernodalFunctor<TriSolveHandle, RowMapType, EntriesType, ValuesType, LHSType, NGBLType>
sptrsv_functor (invert_diagonal, supercols, row_map, entries, values,lvl, kernel_type, diag_kernel_type, lhs,
work, work_offset, nodes_grouped_by_level, node_count);

Expand Down Expand Up @@ -3157,9 +3157,9 @@ cudaProfilerStart();

// workspace
int workoffset = work_offset_host (s);

// create a view for the s-th supernocal block column
Kokkos::View<scalar_t**, Kokkos::LayoutLeft, memory_space, Kokkos::MemoryUnmanaged> viewU (&dataU[i1], nsrow, nscol);
Kokkos::View<scalar_t**, default_layout, memory_space, Kokkos::MemoryUnmanaged> viewU (&dataU[i1], nsrow, nscol);

// extract part of the solution, corresponding to the diagonal block
auto Xj = Kokkos::subview (lhs, range_type(j1, j2));
Expand All @@ -3185,7 +3185,7 @@ cudaProfilerStart();
Xj,
zero, Y);
} else {
Kokkos::View<scalar_t**, Kokkos::LayoutLeft, memory_space, Kokkos::MemoryUnmanaged> Xjj (Xj.data (), nscol, 1);
Kokkos::View<scalar_t**, default_layout, memory_space, Kokkos::MemoryUnmanaged> Xjj (Xj.data (), nscol, 1);
KokkosBlas::
trsm("L", "L", "T", "N",
one, Ujj, Xjj);
Expand Down Expand Up @@ -3233,12 +3233,12 @@ cudaProfilerStart();
lhs,
one, work);
// copy from work to lhs corresponding to diagonal blocks
SparseTriSupernodalSpMVFunctor<TriSolveHandle, LHSType, NGBLType>
SparseTriSupernodalSpMVFunctor<TriSolveHandle, LHSType, NGBLType>
sptrsv_init_functor (-1, node_count, nodes_grouped_by_level, supercols, supercols, lhs, work);
Kokkos::parallel_for ("parfor_lsolve_supernode", team_policy_type(lvl_nodes, Kokkos::AUTO), sptrsv_init_functor);
} else {
// zero out lhs corresponding to diagonal blocks in lhs, and copy to work
SparseTriSupernodalSpMVFunctor<TriSolveHandle, LHSType, NGBLType>
SparseTriSupernodalSpMVFunctor<TriSolveHandle, LHSType, NGBLType>
sptrsv_init_functor (1, node_count, nodes_grouped_by_level, supercols, supercols, lhs, work);
Kokkos::parallel_for ("parfor_lsolve_supernode", team_policy_type(lvl_nodes, Kokkos::AUTO), sptrsv_init_functor);
}
Expand All @@ -3251,7 +3251,7 @@ cudaProfilerStart();
} else {
if (!invert_offdiagonal) {
// zero out lhs corresponding to diagonal blocks in lhs, and copy to work
SparseTriSupernodalSpMVFunctor<TriSolveHandle, LHSType, NGBLType>
SparseTriSupernodalSpMVFunctor<TriSolveHandle, LHSType, NGBLType>
sptrsv_init_functor (1, node_count, nodes_grouped_by_level, supercols, supercols, lhs, work);
Kokkos::parallel_for ("parfor_lsolve_supernode", team_policy_type(lvl_nodes, Kokkos::AUTO), sptrsv_init_functor);

Expand All @@ -3273,7 +3273,7 @@ cudaProfilerStart();
}
}
// reinitialize workspace
SparseTriSupernodalSpMVFunctor<TriSolveHandle, LHSType, NGBLType>
SparseTriSupernodalSpMVFunctor<TriSolveHandle, LHSType, NGBLType>
sptrsv_finalize_functor (0, node_count, nodes_grouped_by_level, supercols, supercols, lhs, work);
Kokkos::parallel_for ("parfor_lsolve_supernode", team_policy_type(lvl_nodes, Kokkos::AUTO), sptrsv_finalize_functor);

Expand Down

0 comments on commit 6b4e189

Please sign in to comment.