Skip to content

Commit

Permalink
MDF: update implementation to lower algorithmic complexity
Browse files Browse the repository at this point in the history
mostly changing the way the discarded fill is computed at each
factorization step, only selecting rows that were impacted by the
last factorized row.
  • Loading branch information
lucbv committed Oct 4, 2022
1 parent 12a6d18 commit a8f01df
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 48 deletions.
122 changes: 81 additions & 41 deletions sparse/impl/KokkosSparse_mdf_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,21 +153,21 @@ struct MDF_discarded_fill_norm {
KAS::abs(At.values(alphaIdx) * A.values(betaIdx)) *
KAS::abs(At.values(alphaIdx) * A.values(betaIdx));
if (verbosity > 1) {
printf("Adding value A[%d,%d]=%f to discard norm of row %d\n",
int(At.graph.entries(alphaIdx)),
int(A.graph.entries(betaIdx)),
KAS::abs(At.values(alphaIdx) * A.values(betaIdx)) *
KAS::abs(At.values(alphaIdx) * A.values(betaIdx)),
int(rowIdx));
KOKKOS_IMPL_DO_NOT_USE_PRINTF("Adding value A[%d,%d]=%f to discard norm of row %d\n",
int(At.graph.entries(alphaIdx)),
int(A.graph.entries(betaIdx)),
KAS::abs(At.values(alphaIdx) * A.values(betaIdx)) *
KAS::abs(At.values(alphaIdx) * A.values(betaIdx)),
int(rowIdx));
}
}
}
}
} else if (fillRowIdx == rowIdx) {
diag_val = At.values(alphaIdx);
if (verbosity > 1) {
printf("Row %d diagonal value dected, values(%d)=%f\n", int(rowIdx),
int(alphaIdx), At.values(alphaIdx));
KOKKOS_IMPL_DO_NOT_USE_PRINTF("Row %d diagonal value dected, values(%d)=%f\n", int(rowIdx),
int(alphaIdx), At.values(alphaIdx));
}
}
}
Expand All @@ -179,7 +179,7 @@ struct MDF_discarded_fill_norm {
if (verbosity > 0) {
const ordinal_type degree = ordinal_type(A.graph.row_map(rowIdx + 1) -
A.graph.row_map(rowIdx) - 1);
printf(
KOKKOS_IMPL_DO_NOT_USE_PRINTF(
"Row %d has discarded fill of %f, deficiency of %d and degree %d\n",
rowIdx, KAS::sqrt(discard_norm), deficiency(rowIdx), degree);
}
Expand Down Expand Up @@ -267,7 +267,7 @@ struct MDF_selective_discarded_fill_norm {
KAS::abs(At.values(alphaIdx) * A.values(betaIdx)) *
KAS::abs(At.values(alphaIdx) * A.values(betaIdx));
if (verbosity > 1) {
printf("Adding value A[%d,%d]=%f to discard norm of row %d\n",
KOKKOS_IMPL_DO_NOT_USE_PRINTF("Adding value A[%d,%d]=%f to discard norm of row %d\n",
int(At.graph.entries(alphaIdx)),
int(A.graph.entries(betaIdx)),
KAS::abs(At.values(alphaIdx) * A.values(betaIdx)) *
Expand All @@ -280,7 +280,7 @@ struct MDF_selective_discarded_fill_norm {
} else if (fillRowIdx == rowIdx) {
diag_val = At.values(alphaIdx);
if (verbosity > 1) {
printf("Row %d diagonal value dected, values(%d)=%f\n", int(rowIdx),
KOKKOS_IMPL_DO_NOT_USE_PRINTF("Row %d diagonal value dected, values(%d)=%f\n", int(rowIdx),
int(alphaIdx), At.values(alphaIdx));
}
}
Expand All @@ -293,7 +293,7 @@ struct MDF_selective_discarded_fill_norm {
if (verbosity > 0) {
const ordinal_type degree = ordinal_type(A.graph.row_map(rowIdx + 1) -
A.graph.row_map(rowIdx) - 1);
printf(
KOKKOS_IMPL_DO_NOT_USE_PRINTF(
"Row %d has discarded fill of %f, deficiency of %d and degree %d\n",
rowIdx, KAS::sqrt(discard_norm), deficiency(rowIdx), degree);
}
Expand Down Expand Up @@ -434,6 +434,8 @@ struct MDF_factorize_row {
values_type valuesU;

col_ind_type permutation, permutation_inv;
values_type discarded_fill;
col_ind_type factored;
ordinal_type selected_row_idx, factorization_step;

int verbosity;
Expand All @@ -443,8 +445,9 @@ struct MDF_factorize_row {
values_type valuesL_, row_map_type row_mapU_,
col_ind_type entriesU_, values_type valuesU_,
col_ind_type permutation_, col_ind_type permutation_inv_,
ordinal_type selected_row_idx_,
ordinal_type factorization_step_, int verbosity_)
values_type discarded_fill_, col_ind_type factored_,
ordinal_type selected_row_idx_, ordinal_type factorization_step_,
int verbosity_)
: A(A_),
At(At_),
row_mapL(row_mapL_),
Expand All @@ -455,13 +458,16 @@ struct MDF_factorize_row {
valuesU(valuesU_),
permutation(permutation_),
permutation_inv(permutation_inv_),
discarded_fill(discarded_fill_),
factored(factored_),
selected_row_idx(selected_row_idx_),
factorization_step(factorization_step_),
verbosity(verbosity_){};

KOKKOS_INLINE_FUNCTION
void operator()(const ordinal_type /* idx */) const {
const ordinal_type selected_row = permutation(selected_row_idx);
discarded_fill(selected_row) = Kokkos::ArithTraits<value_type>::max();

// Swap entries in permutation vectors
permutation(selected_row_idx) = permutation(factorization_step);
Expand All @@ -470,11 +476,11 @@ struct MDF_factorize_row {
permutation_inv(permutation(selected_row_idx)) = selected_row_idx;

if (verbosity > 0) {
printf("Permutation vector: { ");
KOKKOS_IMPL_DO_NOT_USE_PRINTF("Permutation vector: { ");
for (ordinal_type rowIdx = 0; rowIdx < A.numRows(); ++rowIdx) {
printf("%d ", permutation(rowIdx));
KOKKOS_IMPL_DO_NOT_USE_PRINTF("%d ", permutation(rowIdx));
}
printf("}\n");
KOKKOS_IMPL_DO_NOT_USE_PRINTF("}\n");
}

// Insert the upper part of the selected row in U
Expand All @@ -495,25 +501,25 @@ struct MDF_factorize_row {
row_mapU(factorization_step + 1) = U_entryIdx;

if (verbosity > 0) {
printf("Diagonal values of row %d is %f\n", selected_row, diag);
KOKKOS_IMPL_DO_NOT_USE_PRINTF("Diagonal values of row %d is %f\n", selected_row, diag);
}

if (verbosity > 2) {
printf("U, row_map={ ");
KOKKOS_IMPL_DO_NOT_USE_PRINTF("U, row_map={ ");
for (ordinal_type rowIdx = 0; rowIdx < factorization_step + 1; ++rowIdx) {
printf("%d ", int(row_mapU(rowIdx)));
KOKKOS_IMPL_DO_NOT_USE_PRINTF("%d ", int(row_mapU(rowIdx)));
}
printf("}, entries={ ");
KOKKOS_IMPL_DO_NOT_USE_PRINTF("}, entries={ ");
for (size_type entryIdx = row_mapU(0);
entryIdx < row_mapU(factorization_step + 1); ++entryIdx) {
printf("%d ", int(entriesU(entryIdx)));
KOKKOS_IMPL_DO_NOT_USE_PRINTF("%d ", int(entriesU(entryIdx)));
}
printf("}, values={ ");
KOKKOS_IMPL_DO_NOT_USE_PRINTF("}, values={ ");
for (size_type entryIdx = row_mapU(0);
entryIdx < row_mapU(factorization_step + 1); ++entryIdx) {
printf("%f ", valuesU(entryIdx));
KOKKOS_IMPL_DO_NOT_USE_PRINTF("%f ", valuesU(entryIdx));
}
printf("}\n");
KOKKOS_IMPL_DO_NOT_USE_PRINTF("}\n");
}

// Insert the lower part of the selected column of A
Expand All @@ -534,20 +540,20 @@ struct MDF_factorize_row {
row_mapL(factorization_step + 1) = L_entryIdx;

if (verbosity > 2) {
printf("L(%d), [row_map(%d), row_map(%d)[ = [%d, %d[, entries={ ",
KOKKOS_IMPL_DO_NOT_USE_PRINTF("L(%d), [row_map(%d), row_map(%d)[ = [%d, %d[, entries={ ",
int(factorization_step), int(factorization_step),
int(factorization_step + 1), int(row_mapL(factorization_step)),
int(row_mapL(factorization_step + 1)));
for (size_type entryIdx = row_mapL(factorization_step);
entryIdx < row_mapL(factorization_step + 1); ++entryIdx) {
printf("%d ", int(entriesL(entryIdx)));
KOKKOS_IMPL_DO_NOT_USE_PRINTF("%d ", int(entriesL(entryIdx)));
}
printf("}, values={ ");
KOKKOS_IMPL_DO_NOT_USE_PRINTF("}, values={ ");
for (size_type entryIdx = row_mapL(factorization_step);
entryIdx < row_mapL(factorization_step + 1); ++entryIdx) {
printf("%f ", valuesL(entryIdx));
KOKKOS_IMPL_DO_NOT_USE_PRINTF("%f ", valuesL(entryIdx));
}
printf("}\n");
KOKKOS_IMPL_DO_NOT_USE_PRINTF("}\n");
}

// If this was the last row no need to update A and At!
Expand Down Expand Up @@ -603,7 +609,7 @@ struct MDF_factorize_row {
At.values(alphaIdx) * A.values(betaIdx) / diag_val;

if (verbosity > 1) {
printf("A[%d, %d] -= %f\n", int(fillRowIdx), int(fillColIdx),
KOKKOS_IMPL_DO_NOT_USE_PRINTF("A[%d, %d] -= %f\n", int(fillRowIdx), int(fillColIdx),
At.values(alphaIdx) * A.values(betaIdx) / diag_val);
}
}
Expand All @@ -621,17 +627,19 @@ struct MDF_factorize_row {
}
}

factored(selected_row) = 1;

if (verbosity > 0) {
printf("New values in A: { ");
KOKKOS_IMPL_DO_NOT_USE_PRINTF("New values in A: { ");
for (size_type entryIdx = 0; entryIdx < A.nnz(); ++entryIdx) {
printf("%f ", A.values(entryIdx));
KOKKOS_IMPL_DO_NOT_USE_PRINTF("%f ", A.values(entryIdx));
}
printf("}\n");
printf("New values in At: { ");
KOKKOS_IMPL_DO_NOT_USE_PRINTF("}\n");
KOKKOS_IMPL_DO_NOT_USE_PRINTF("New values in At: { ");
for (size_type entryIdx = 0; entryIdx < At.nnz(); ++entryIdx) {
printf("%f ", At.values(entryIdx));
KOKKOS_IMPL_DO_NOT_USE_PRINTF("%f ", At.values(entryIdx));
}
printf("}\n");
KOKKOS_IMPL_DO_NOT_USE_PRINTF("}\n");
}
} // operator()

Expand All @@ -644,17 +652,49 @@ struct MDF_compute_list_length {
using ordinal_type = typename crs_matrix_type::ordinal_type;
using size_type = typename crs_matrix_type::size_type;

ordinal_type rowIdx;
ordinal_type selected_row_idx;
crs_matrix_type A;
crs_matrix_type At;
col_ind_type permutation;
col_ind_type factored;
col_ind_type update_list_length;
col_ind_type update_list;

MDF_compute_list_length(const ordinal_type rowIdx_, const crs_matrix_type& A_,
col_ind_type& update_list_) : rowIdx(rowIdx_), A(A_),
update_list(update_list_) {}
const crs_matrix_type& At_, const col_ind_type& permutation_,
const col_ind_type factored_, col_ind_type &update_list_length_,
col_ind_type& update_list_)
: selected_row_idx(rowIdx_), A(A_), At(At_), permutation(permutation_), factored(factored_),
update_list_length(update_list_length_), update_list(update_list_) {}

KOKKOS_INLINE_FUNCTION
void operator()(const size_type /*idx*/) const {

const ordinal_type selected_row = permutation(selected_row_idx);

size_type updateIdx = 0;
for(size_type entryIdx = A.graph.row_map(selected_row); entryIdx < A.graph.row_map(selected_row+1); ++entryIdx) {
if((A.graph.entries(entryIdx) != selected_row) && (factored(A.graph.entries(entryIdx)) != 1)) {
update_list(updateIdx) = A.graph.entries(entryIdx);
++updateIdx;
}
}
size_type update_rows = updateIdx;
for(size_type entryIdx = At.graph.row_map(selected_row); entryIdx < At.graph.row_map(selected_row+1); ++entryIdx) {
if((At.graph.entries(entryIdx) != selected_row) && (factored(A.graph.entries(entryIdx)) != 1)) {
bool already_updated = false;
for(size_type checkIdx = 0; checkIdx < update_rows; ++checkIdx) {
if(At.graph.entries(entryIdx) == update_list(checkIdx)) {
already_updated = true;
break;
}
}
if(already_updated == false) {
update_list(updateIdx) = At.graph.entries(entryIdx);
++updateIdx;
}
}
}
update_list_length(0) = updateIdx;
}
};

Expand Down
17 changes: 10 additions & 7 deletions sparse/src/KokkosSparse_mdf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,9 @@ void mdf_numeric(crs_matrix_type& A, MDF_handle& handle) {
KokkosSparse::sort_crs_matrix<crs_matrix_type>(At);
values_type discarded_fill("discarded fill", A.numRows());
col_ind_type deficiency("deficiency", A.numRows());
col_ind_type update_list("update list", 0);
col_ind_type update_list_length("update list length", 1);
col_ind_type update_list("update list", A.numRows());
col_ind_type factored("factored rows", A.numRows());
Kokkos::deep_copy(discarded_fill, Kokkos::ArithTraits<value_type>::max());
Kokkos::deep_copy(deficiency, Kokkos::ArithTraits<ordinal_type>::max());

Expand All @@ -130,27 +132,28 @@ void mdf_numeric(crs_matrix_type& A, MDF_handle& handle) {
static_cast<int>(factorization_step));
}

range_policy_type stepPolicy(0, update_list.extent(0));
range_policy_type updatePolicy(0, update_list_length(0));
KokkosSparse::Impl::MDF_selective_discarded_fill_norm<crs_matrix_type> MDF_update_df_norm(
Atmp, At, factorization_step, handle.permutation, update_list,
discarded_fill, deficiency, verbosity_level);
Kokkos::parallel_for("MDF: updating fill norms", stepPolicy, MDF_update_df_norm);
Kokkos::parallel_for("MDF: updating fill norms", updatePolicy, MDF_update_df_norm);

range_policy_type stepPolicy(factorization_step, Atmp.numRows());
ordinal_type selected_row_idx = 0;
KokkosSparse::Impl::MDF_select_row<crs_matrix_type> MDF_row_selector(
factorization_step, discarded_fill, deficiency, Atmp.graph.row_map,
handle.permutation);
Kokkos::parallel_reduce("MDF: select pivot", stepPolicy, MDF_row_selector, selected_row_idx);

KokkosSparse::Impl::MDF_compute_list_length<crs_matrix_type> compute_list_length(
selected_row_idx, Atmp, update_list);
Kokkos::parallel_for(range_policy_type(0, 1), compute_list_length);
selected_row_idx, Atmp, At, handle.permutation, factored, update_list_length, update_list);
Kokkos::parallel_for("MDF: compute update list", range_policy_type(0, 1), compute_list_length);

KokkosSparse::Impl::MDF_factorize_row<crs_matrix_type> factorize_row(
Atmp, At, handle.row_mapL, handle.entriesL, handle.valuesL,
handle.row_mapU, handle.entriesU, handle.valuesU, handle.permutation,
handle.permutation_inv, selected_row_idx, factorization_step,
verbosity_level);
handle.permutation_inv, discarded_fill, factored, selected_row_idx,
factorization_step, verbosity_level);
Kokkos::parallel_for("MDF: factorize row", range_policy_type(0, 1), factorize_row);

if (verbosity_level > 0) {
Expand Down

0 comments on commit a8f01df

Please sign in to comment.