Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Supernodal SpTRSV: run TRMM on device for setup #983

Merged
merged 10 commits into from
May 27, 2021
31 changes: 21 additions & 10 deletions perf_test/sparse/KokkosSparse_sptrsv_superlu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,8 +280,8 @@ void free_superlu (SuperMatrix &L, SuperMatrix &U,
/* ========================================================================================= */
template<typename scalar_type>
int test_sptrsv_perf (std::vector<int> tests, bool verbose, std::string &filename, bool symm_mode, bool metis, bool merge,
bool invert_diag, bool invert_offdiag, bool u_in_csr, int panel_size, int relax_size, int block_size,
int loop) {
bool invert_diag, bool invert_offdiag, bool u_in_csr, bool trmm_on_device,
int panel_size, int relax_size, int block_size, int loop) {

using ordinal_type = int;
using size_type = int;
Expand Down Expand Up @@ -423,6 +423,11 @@ int test_sptrsv_perf (std::vector<int> tests, bool verbose, std::string &filenam
khL.set_sptrsv_perm (perm_r);
khU.set_sptrsv_perm (perm_c);

// specify whether to run trmm on device
std::cout << " TRMM on device : " << trmm_on_device << std::endl;
khL.set_sptrsv_trmm_on_device (trmm_on_device);
khU.set_sptrsv_trmm_on_device (trmm_on_device);

// block size to switch to device call
if (block_size >= 0) {
std::cout << " Block Size : " << block_size << std::endl;
Expand Down Expand Up @@ -660,6 +665,8 @@ int main(int argc, char **argv) {
bool invert_offdiag = false;
// store U in CSR, or CSC
bool u_in_csr = true;
// specify whether to run KokkosKernels::trmm on device
bool trmm_on_device = false;
// block size to switch to device call (default is 100)
int block_size = -1;
// parameters for SuperLU (only affects factorization)
Expand Down Expand Up @@ -734,6 +741,10 @@ int main(int argc, char **argv) {
u_in_csr = false;
continue;
}
if((strcmp(argv[i],"--trmm-on-device")==0)) {
trmm_on_device = true;
continue;
}
if((strcmp(argv[i],"--panel-size")==0)) {
panel_size = atoi(argv[++i]);
continue;
Expand Down Expand Up @@ -767,35 +778,35 @@ int main(int argc, char **argv) {
#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE)
scalarTypeString = "(scalar_t = Kokkos::complex<double>)";
total_errors = test_sptrsv_perf<Kokkos::complex<double>> (tests, verbose, filename, symm_mode, metis, merge,
invert_diag, invert_offdiag, u_in_csr, panel_size,
relax_size, block_size, loop);
invert_diag, invert_offdiag, u_in_csr, trmm_on_device,
panel_size, relax_size, block_size, loop);
#else
std::cout << std::endl << " KOKKOSKERNELS_INST_COMPLEX_DOUBLE is not enabled ** " << std::endl << std::endl;
#endif
} else if (char_scalar == "c") {
#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT)
scalarTypeString = "(scalar_t = Kokkos::complex<float>)";
total_errors = test_sptrsv_perf<Kokkos::complex<float>> (tests, verbose, filename, symm_mode, metis, merge,
invert_diag, invert_offdiag, u_in_csr, panel_size,
relax_size, block_size, loop);
invert_diag, invert_offdiag, u_in_csr, trmm_on_device,
panel_size, relax_size, block_size, loop);
#else
std::cout << std::endl << " KOKKOSKERNELS_INST_COMPLEX_FLOAT is not enabled ** " << std::endl << std::endl;
#endif
} else if (char_scalar == "d") {
#if defined(KOKKOSKERNELS_INST_DOUBLE)
scalarTypeString = "(scalar_t = double)";
total_errors = test_sptrsv_perf<double> (tests, verbose, filename, symm_mode, metis, merge,
invert_diag, invert_offdiag, u_in_csr, panel_size,
relax_size, block_size, loop);
invert_diag, invert_offdiag, u_in_csr, trmm_on_device,
panel_size, relax_size, block_size, loop);
#else
std::cout << std::endl << " KOKKOSKERNELS_INST_DOUBLE is not enabled ** " << std::endl << std::endl;
#endif
} else if (char_scalar == "f") {
#if defined(KOKKOSKERNELS_INST_FLOAT)
scalarTypeString = "(scalar_t = float)";
total_errors = test_sptrsv_perf<float> (tests, verbose, filename, symm_mode, metis, merge,
invert_diag, invert_offdiag, u_in_csr, panel_size,
relax_size, block_size, loop);
invert_diag, invert_offdiag, u_in_csr, trmm_on_device,
panel_size, relax_size, block_size, loop);
#else
std::cout << std::endl << " KOKKOSKERNELS_INST_FLOAT is not enabled ** " << std::endl << std::endl;
#endif
Expand Down
3 changes: 3 additions & 0 deletions perf_test/sparse/KokkosSparse_sptrsv_supernode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,9 @@ int test_sptrsv_perf (std::vector<int> tests, bool verbose, std::string& lower_f
khL.set_sptrsv_verbose (verbose);
khU.set_sptrsv_verbose (verbose);

// specify if U is stored in CSR or CSC
khU.set_sptrsv_column_major (!u_in_csr);

// specify wheather to merge supernodes (optional, default merge is false)
khL.set_sptrsv_merge_supernodes (merge);
khU.set_sptrsv_merge_supernodes (merge);
Expand Down
4 changes: 4 additions & 0 deletions src/common/KokkosKernels_Handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,10 @@ class KokkosKernelsHandle
bool is_sptrsv_column_major () {
return this->sptrsvHandle->is_column_major ();
}

void set_sptrsv_trmm_on_device (bool trmm_on_device) {
this->sptrsvHandle->set_trmm_on_device (trmm_on_device);
}
#endif
void destroy_sptrsv_handle(){
if (is_owner_of_the_sptrsv_handle && this->sptrsvHandle != nullptr)
Expand Down
4 changes: 2 additions & 2 deletions src/sparse/KokkosSparse_sptrsv_cholmod.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ namespace Experimental {

/* ========================================================================================= */
template <typename cholmod_int_type, typename graph_t, typename KernelHandle>
graph_t read_cholmod_graphL(KernelHandle kernelHandle, cholmod_factor *L, cholmod_common *cm) {
graph_t read_cholmod_graphL(KernelHandle *kernelHandle, cholmod_factor *L, cholmod_common *cm) {

/* ---------------------------------------------------------------------- */
/* get inputs */
Expand Down Expand Up @@ -169,7 +169,7 @@ namespace Experimental {

/* ========================================================================================= */
template <typename cholmod_int_type, typename crsmat_t, typename graph_t, typename KernelHandle>
crsmat_t read_cholmod_factor(KernelHandle kernelHandle, cholmod_factor *L, cholmod_common *cm, graph_t &static_graph) {
crsmat_t read_cholmod_factor(KernelHandle *kernelHandle, cholmod_factor *L, cholmod_common *cm, graph_t &static_graph) {

using values_view_t = typename crsmat_t::values_type::non_const_type;
using scalar_t = typename values_view_t::value_type;
Expand Down
12 changes: 12 additions & 0 deletions src/sparse/KokkosSparse_sptrsv_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ class SPTRSVHandle {
int *etree;

// type of kernels used at each level
bool trmm_on_device;
int sup_size_unblocked;
int sup_size_blocked;
integer_view_host_t diag_kernel_type_host;
Expand Down Expand Up @@ -422,6 +423,7 @@ class SPTRSVHandle {
, invert_diagonal (true)
, invert_offdiagonal (false)
, etree (nullptr)
, trmm_on_device (true)
, sup_size_unblocked (100)
, sup_size_blocked (200)
, perm_avail (false)
Expand Down Expand Up @@ -549,6 +551,16 @@ class SPTRSVHandle {
return this->work_offset_host;
}

// specify whether too run KokkosKernels::trmm on device or not
void set_trmm_on_device (bool flag) {
this->trmm_on_device = flag;
}

bool get_trmm_on_device () {
return trmm_on_device;
}


// supernode size tolerance to pick right kernel type
int get_supernode_size_unblocked() {
return this->sup_size_unblocked;
Expand Down
6 changes: 3 additions & 3 deletions src/sparse/KokkosSparse_sptrsv_superlu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ graph_t read_superlu_graphL(KernelHandle *kernelHandle, SuperMatrix *L) {
/* ========================================================================================= */
// read SuperLU U factor into CSR
template <typename graph_t, typename KernelHandle>
graph_t read_superlu_graphU(KernelHandle kernelHandle, SuperMatrix *L, SuperMatrix *U) {
graph_t read_superlu_graphU(KernelHandle *kernelHandle, SuperMatrix *L, SuperMatrix *U) {

using row_map_view_t = typename graph_t::row_map_type::non_const_type;
using cols_view_t = typename graph_t::entries_type::non_const_type;
Expand Down Expand Up @@ -326,7 +326,7 @@ void sptrsv_symbolic(

/* ========================================================================================= */
template <typename crsmat_t, typename graph_t, typename KernelHandle>
crsmat_t read_superlu_valuesL(KernelHandle kernelHandle, SuperMatrix *L, graph_t &static_graph) {
crsmat_t read_superlu_valuesL(KernelHandle *kernelHandle, SuperMatrix *L, graph_t &static_graph) {

using values_view_t = typename crsmat_t::values_type::non_const_type;
using scalar_t = typename values_view_t::value_type;
Expand Down Expand Up @@ -357,7 +357,7 @@ template <typename crsmat_t,
typename graph_t,
typename KernelHandle>
crsmat_t
read_superlu_valuesU(KernelHandle kernelHandle,
read_superlu_valuesU(KernelHandle *kernelHandle,
SuperMatrix *L, SuperMatrix *U, graph_t &static_graph) {

using values_view_t = typename crsmat_t::values_type::non_const_type;
Expand Down
Loading