Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Point multicolor GS: faster handling of long/bulk rows #993

Merged
merged 10 commits into from
May 28, 2021
1 change: 1 addition & 0 deletions perf_test/sparse/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ KOKKOSKERNELS_ADD_EXECUTABLE(
KOKKOSKERNELS_ADD_EXECUTABLE(
sparse_gs
SOURCES KokkosSparse_gs.cpp
TESTONLYLIBS kokkoskernelsperf_gtest
)

KOKKOSKERNELS_ADD_EXECUTABLE(
Expand Down
452 changes: 309 additions & 143 deletions perf_test/sparse/KokkosSparse_gs.cpp

Large diffs are not rendered by default.

3 changes: 0 additions & 3 deletions src/common/KokkosKernels_Handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -636,9 +636,6 @@ class KokkosKernelsHandle
}
void destroy_gs_handle(){
if (is_owner_of_the_gs_handle && this->gsHandle != NULL){
if (this->gsHandle->is_owner_of_coloring()){
this->destroy_graph_coloring_handle();
}
delete this->gsHandle;
this->gsHandle = NULL;
}
Expand Down
63 changes: 47 additions & 16 deletions src/sparse/KokkosSparse_gauss_seidel_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,6 @@ namespace KokkosSparse{
//getters
GSAlgorithm get_algorithm_type() const {return this->algorithm_type;}

virtual bool is_owner_of_coloring() const {return false;}

nnz_lno_persistent_work_host_view_t get_color_xadj() const {
return this->color_xadj;
}
Expand Down Expand Up @@ -245,11 +243,18 @@ namespace KokkosSparse{
scalar_persistent_work_view_t permuted_inverse_diagonal;
nnz_lno_t block_size; //this is for block sgs

nnz_lno_t max_nnz_input_row;

nnz_lno_t num_values_in_l1, num_values_in_l2, num_big_rows;
size_t level_1_mem, level_2_mem;
bool owner_of_coloring;

//Option set by user: rows with at least this many nonzeros are handled by a separate kernel
nnz_lno_t long_row_threshold;
//Number of long rows per color set. They are all grouped at the end of each color set.
nnz_lno_persistent_work_host_view_t long_rows_per_color;
//Maximum row length in each color set.
nnz_lno_persistent_work_host_view_t max_row_length_per_color;
//Temporary space for matvec over long rows - size is only max num long rows in a color.
scalar_persistent_work_view_t long_row_x;

public:

/**
Expand All @@ -260,17 +265,13 @@ namespace KokkosSparse{
permuted_xadj(), permuted_adj(), permuted_adj_vals(), old_to_new_map(),
permuted_y_vector(), permuted_x_vector(),
permuted_inverse_diagonal(), block_size(1),
max_nnz_input_row(-1),
num_values_in_l1(-1), num_values_in_l2(-1),num_big_rows(0), level_1_mem(0), level_2_mem(0),
owner_of_coloring(false)
long_row_threshold(0)
{
if (gs == GS_DEFAULT)
this->choose_default_algorithm();
}

bool is_owner_of_coloring() const override {return this->owner_of_coloring;}
void set_owner_of_coloring(bool owner = true) {this->owner_of_coloring = owner;}

void set_block_size(nnz_lno_t bs){this->block_size = bs; }
nnz_lno_t get_block_size() const {return this->block_size;}

Expand Down Expand Up @@ -363,14 +364,44 @@ namespace KokkosSparse{
return this->num_big_rows;
}

nnz_lno_t get_max_nnz() const {
if(max_nnz_input_row == static_cast<nnz_lno_t>(-1))
throw std::runtime_error("Requested max nnz per input row, but this has not been set in the PointGS handle.");
return this->max_nnz_input_row;
nnz_lno_t get_long_row_threshold() const
{
return long_row_threshold;
}

void set_max_nnz(nnz_lno_t num_result_nnz_) {
this->max_nnz_input_row = num_result_nnz_;
void set_long_row_threshold(nnz_lno_t lrt)
{
long_row_threshold = lrt;
}

nnz_lno_persistent_work_host_view_t get_long_rows_per_color() const
{
return long_rows_per_color;
}

void set_long_rows_per_color(const nnz_lno_persistent_work_host_view_t& long_rows_per_color_)
{
long_rows_per_color = long_rows_per_color_;
}

nnz_lno_persistent_work_host_view_t get_max_row_length_per_color() const
{
return max_row_length_per_color;
}

void set_max_row_length_per_color(const nnz_lno_persistent_work_host_view_t& max_row_length_per_color_)
{
max_row_length_per_color = max_row_length_per_color_;
}

scalar_persistent_work_view_t get_long_row_x() const
{
return long_row_x;
}

void set_long_row_x(const scalar_persistent_work_view_t& long_row_x_)
{
long_row_x = long_row_x_;
}

void allocate_x_y_vectors(nnz_lno_t num_rows, nnz_lno_t num_cols, nnz_lno_t num_vecs){
Expand Down
Loading