Skip to content

Commit

Permalink
fix ParILU diagonal entries
Browse files Browse the repository at this point in the history
  • Loading branch information
upsj committed Dec 11, 2019
1 parent 287f600 commit f783634
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 28 deletions.
33 changes: 27 additions & 6 deletions common/factorization/par_ilu_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,16 @@ __global__ __launch_bounds__(default_block_size) void count_nnz_per_l_u_row(
if (row < num_rows) {
IndexType l_row_nnz{};
IndexType u_row_nnz{};
bool has_diagonal{};
for (auto idx = row_ptrs[row]; idx < row_ptrs[row + 1]; ++idx) {
auto col = col_idxs[idx];
l_row_nnz += (col <= row);
u_row_nnz += (row <= col);
has_diagonal |= col == row;
}
l_nnz_row[row] = l_row_nnz;
u_nnz_row[row] = u_row_nnz;
// if we didn't find it, add the diagonal entry
l_nnz_row[row] = l_row_nnz + !has_diagonal;
u_nnz_row[row] = u_row_nnz + !has_diagonal;
}
}

Expand All @@ -68,21 +71,39 @@ __global__ __launch_bounds__(default_block_size) void initialize_l_u(
const auto row = blockDim.x * blockIdx.x + threadIdx.x;
if (row < num_rows) {
auto l_idx = l_row_ptrs[row];
auto u_idx = u_row_ptrs[row];
auto u_idx = u_row_ptrs[row] + 1; // we treat the diagonal separately
bool has_diagonal{};
ValueType diag_val{};
for (size_type i = row_ptrs[row]; i < row_ptrs[row + 1]; ++i) {
const auto col = col_idxs[i];
const auto val = values[i];
if (col <= row) {
// save diagonal entry for later
if (col == row) {
has_diagonal = true;
diag_val = val;
}
if (col < row) {
l_col_idxs[l_idx] = col;
l_values[l_idx] = (col == row ? one<ValueType>() : val);
l_values[l_idx] = val;
++l_idx;
}
if (row <= col) {
if (row < col) {
u_col_idxs[u_idx] = col;
u_values[u_idx] = val;
++u_idx;
}
}
// if there was no diagonal entry, set it to one
if (!has_diagonal) {
diag_val = one<ValueType>();
}
// store diagonal entries
auto l_diag_idx = l_row_ptrs[row + 1] - 1;
auto u_diag_idx = u_row_ptrs[row];
l_col_idxs[l_diag_idx] = row;
u_col_idxs[u_diag_idx] = row;
l_values[l_diag_idx] = one<ValueType>();
u_values[u_diag_idx] = diag_val;
}
}

Expand Down
33 changes: 22 additions & 11 deletions omp/factorization/par_ilu_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ void initialize_row_ptrs_l_u(
for (size_type row = 0; row < system_matrix->get_size()[0]; ++row) {
size_type l_nnz{};
size_type u_nnz{};
bool has_diagonal{};
for (size_type el = row_ptrs[row]; el < row_ptrs[row + 1]; ++el) {
size_type col = col_idxs[el];
if (col <= row) {
Expand All @@ -73,9 +74,10 @@ void initialize_row_ptrs_l_u(
if (col >= row) {
++u_nnz;
}
has_diagonal |= col == row;
}
l_row_ptrs[row + 1] = l_nnz;
u_row_ptrs[row + 1] = u_nnz;
l_row_ptrs[row + 1] = l_nnz + !has_diagonal;
u_row_ptrs[row + 1] = u_nnz + !has_diagonal;
}

// Now, compute the prefix-sum, to get proper row_ptrs for L and U
Expand Down Expand Up @@ -115,7 +117,10 @@ void initialize_l_u(std::shared_ptr<const OmpExecutor> exec,
#pragma omp parallel for
for (size_type row = 0; row < system_matrix->get_size()[0]; ++row) {
size_type current_index_l = row_ptrs_l[row];
size_type current_index_u = row_ptrs_u[row];
size_type current_index_u =
row_ptrs_u[row] + 1; // we treat the diagonal separately
bool has_diagonal{};
ValueType diag_val{};
for (size_type el = row_ptrs[row]; el < row_ptrs[row + 1]; ++el) {
const auto col = col_idxs[el];
const auto val = vals[el];
Expand All @@ -124,20 +129,26 @@ void initialize_l_u(std::shared_ptr<const OmpExecutor> exec,
vals_l[current_index_l] = val;
++current_index_l;
} else if (col == row) {
// Update both L and U
col_idxs_l[current_index_l] = col;
vals_l[current_index_l] = one<ValueType>();
++current_index_l;

col_idxs_u[current_index_u] = col;
vals_u[current_index_u] = val;
++current_index_u;
// save value for later
has_diagonal = true;
diag_val = val;
} else { // col > row
col_idxs_u[current_index_u] = col;
vals_u[current_index_u] = val;
++current_index_u;
}
}
// if there was no diagonal entry, set it to one
if (!has_diagonal) {
diag_val = one<ValueType>();
}
// store diagonal entries
size_type l_diag_idx = row_ptrs_l[row + 1] - 1;
size_type u_diag_idx = row_ptrs_u[row];
col_idxs_l[l_diag_idx] = row;
col_idxs_u[u_diag_idx] = row;
vals_l[l_diag_idx] = one<ValueType>();
vals_u[u_diag_idx] = diag_val;
}
}

Expand Down
33 changes: 22 additions & 11 deletions reference/factorization/par_ilu_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ void initialize_row_ptrs_l_u(
l_row_ptrs[0] = 0;
u_row_ptrs[0] = 0;
for (size_type row = 0; row < system_matrix->get_size()[0]; ++row) {
bool has_diagonal{};
for (size_type el = row_ptrs[row]; el < row_ptrs[row + 1]; ++el) {
size_type col = col_idxs[el];
if (col <= row) {
Expand All @@ -71,9 +72,10 @@ void initialize_row_ptrs_l_u(
if (col >= row) {
++u_nnz;
}
has_diagonal |= col == row;
}
l_row_ptrs[row + 1] = l_nnz;
u_row_ptrs[row + 1] = u_nnz;
l_row_ptrs[row + 1] = l_nnz + !has_diagonal;
u_row_ptrs[row + 1] = u_nnz + !has_diagonal;
}
}

Expand Down Expand Up @@ -101,7 +103,10 @@ void initialize_l_u(std::shared_ptr<const ReferenceExecutor> exec,

for (size_type row = 0; row < system_matrix->get_size()[0]; ++row) {
size_type current_index_l = row_ptrs_l[row];
size_type current_index_u = row_ptrs_u[row];
size_type current_index_u =
row_ptrs_u[row] + 1; // we treat the diagonal separately
bool has_diagonal{};
ValueType diag_val{};
for (size_type el = row_ptrs[row]; el < row_ptrs[row + 1]; ++el) {
const auto col = col_idxs[el];
const auto val = vals[el];
Expand All @@ -110,20 +115,26 @@ void initialize_l_u(std::shared_ptr<const ReferenceExecutor> exec,
vals_l[current_index_l] = val;
++current_index_l;
} else if (col == row) {
// Update both L and U
col_idxs_l[current_index_l] = col;
vals_l[current_index_l] = one<ValueType>();
++current_index_l;

col_idxs_u[current_index_u] = col;
vals_u[current_index_u] = val;
++current_index_u;
// save diagonal value
has_diagonal = true;
diag_val = val;
} else { // col > row
col_idxs_u[current_index_u] = col;
vals_u[current_index_u] = val;
++current_index_u;
}
}
// if there was no diagonal entry, set it to one
if (!has_diagonal) {
diag_val = one<ValueType>();
}
// store diagonal values separately
auto l_diag_idx = row_ptrs_l[row + 1] - 1;
auto u_diag_idx = row_ptrs_u[row];
col_idxs_l[l_diag_idx] = row;
col_idxs_u[u_diag_idx] = row;
vals_l[l_diag_idx] = one<ValueType>();
vals_u[u_diag_idx] = diag_val;
}
}

Expand Down
35 changes: 35 additions & 0 deletions reference/test/factorization/par_ilu_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,27 @@ class ParIlu : public ::testing::Test {
{0., 0., 0., 0., 5., -15.},
{0., 0., 0., 0., 0., 6.}},
exec)),
mtx_big_nodiag(gko::initialize<Csr>({{1., 1., 1., 0., 1., 3.},
{1., 2., 2., 0., 2., 0.},
{0., 2., 0., 3., 3., 5.},
{1., 0., 3., 4., 4., 4.},
{1., 2., 0., 4., 1., 6.},
{0., 2., 3., 4., 5., 8.}},
exec)),
big_nodiag_l_expected(gko::initialize<Dense>({{1., 0., 0., 0., 0., 0.},
{1., 1., 0., 0., 0., 0.},
{0., 2., 1., 0., 0., 0.},
{2., 0., 0., 1., 0., 0.},
{1., 1., 0., 1., 1., 0.},
{0., 2., 1., 0.25, -0.5, 1.}},
exec)),
big_nodiag_u_expected(gko::initialize<Dense>({{1., 1., 1., 0., 1., 3.},
{0., 1., 1., 0., 1., 0.},
{0., 0., 1., 3., 1., -2.},
{0., 0., 0., 4., 2., 0.},
{0., 0., 0., 0., -3., 3.},
{0., 0., 0., 0., 0., 11.5}},
exec)),
// clang-format on
ilu_factory_skip(
gko::factorization::ParIlu<>::build().with_skip_sorting(true).on(
Expand All @@ -153,6 +174,9 @@ class ParIlu : public ::testing::Test {
std::shared_ptr<const Dense> mtx_big;
std::shared_ptr<const Dense> big_l_expected;
std::shared_ptr<const Dense> big_u_expected;
std::shared_ptr<const Csr> mtx_big_nodiag;
std::shared_ptr<const Dense> big_nodiag_l_expected;
std::shared_ptr<const Dense> big_nodiag_u_expected;
std::unique_ptr<gko::factorization::ParIlu<>::Factory> ilu_factory_skip;
std::unique_ptr<gko::factorization::ParIlu<>::Factory> ilu_factory_sort;
};
Expand Down Expand Up @@ -385,6 +409,17 @@ TEST_F(ParIlu, GenerateForCsrSmall)
}


TEST_F(ParIlu, GenerateForCsrBigWithDiagonalZeros)
{
auto factors = ilu_factory_skip->generate(mtx_big_nodiag);
auto l_factor = factors->get_l_factor();
auto u_factor = factors->get_u_factor();

GKO_ASSERT_MTX_NEAR(l_factor, big_nodiag_l_expected, 1e-14);
GKO_ASSERT_MTX_NEAR(u_factor, big_nodiag_u_expected, 1e-14);
}


TEST_F(ParIlu, GenerateForDenseSmallWithMultipleIterations)
{
auto multiple_iter_factory = gko::factorization::ParIlu<>::build()
Expand Down

0 comments on commit f783634

Please sign in to comment.