Skip to content

Commit

Permalink
pass spgemm results as out parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
upsj committed Nov 18, 2019
1 parent 367481f commit 5bb5531
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 70 deletions.
22 changes: 20 additions & 2 deletions core/matrix/csr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,17 @@ void Csr<ValueType, IndexType>::apply_impl(const LinOp *b, LinOp *x) const
using Dense = Dense<ValueType>;
using TCsr = Csr<ValueType, IndexType>;
if (auto b_csr = dynamic_cast<const TCsr *>(b)) {
auto exec = this->get_executor();
Array<IndexType> x_rows(exec);
Array<IndexType> x_cols(exec);
Array<ValueType> x_vals(exec);
auto x_csr = as<TCsr>(x);
this->get_executor()->run(csr::make_spgemm(this, b_csr, x_csr));
this->get_executor()->run(
csr::make_spgemm(this, b_csr, x_csr, x_rows, x_cols, x_vals));
auto new_x = TCsr::create(exec, x->get_size(), std::move(x_vals),
std::move(x_cols), std::move(x_rows),
x_csr->get_strategy());
new_x->move_to(x_csr);
} else {
this->get_executor()->run(
csr::make_spmv(this, as<Dense>(b), as<Dense>(x)));
Expand All @@ -103,9 +112,18 @@ void Csr<ValueType, IndexType>::apply_impl(const LinOp *alpha, const LinOp *b,
using Dense = Dense<ValueType>;
using TCsr = Csr<ValueType, IndexType>;
if (auto b_csr = dynamic_cast<const TCsr *>(b)) {
auto exec = this->get_executor();
Array<IndexType> x_rows(exec);
Array<IndexType> x_cols(exec);
Array<ValueType> x_vals(exec);
auto x_csr = as<TCsr>(x);
this->get_executor()->run(csr::make_advanced_spgemm(
as<Dense>(alpha), this, b_csr, as<Dense>(beta), x_csr));
as<Dense>(alpha), this, b_csr, as<Dense>(beta), x_csr, x_rows,
x_cols, x_vals));
auto new_x = TCsr::create(exec, x->get_size(), std::move(x_vals),
std::move(x_cols), std::move(x_rows),
x_csr->get_strategy());
new_x->move_to(x_csr);
} else {
this->get_executor()->run(
csr::make_advanced_spmv(as<Dense>(alpha), this, as<Dense>(b),
Expand Down
17 changes: 11 additions & 6 deletions core/matrix/csr_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,19 +62,24 @@ namespace kernels {
const matrix::Dense<ValueType> *beta, \
matrix::Dense<ValueType> *c)

#define GKO_DECLARE_CSR_SPGEMM_KERNEL(ValueType, IndexType) \
void spgemm(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<ValueType, IndexType> *a, \
const matrix::Csr<ValueType, IndexType> *b, \
matrix::Csr<ValueType, IndexType> *c)
#define GKO_DECLARE_CSR_SPGEMM_KERNEL(ValueType, IndexType) \
void spgemm(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<ValueType, IndexType> *a, \
const matrix::Csr<ValueType, IndexType> *b, \
const matrix::Csr<ValueType, IndexType> *c, \
Array<IndexType> &c_row_ptrs, Array<IndexType> &c_col_idxs, \
Array<ValueType> &c_vals)

#define GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType) \
void advanced_spgemm(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Dense<ValueType> *alpha, \
const matrix::Csr<ValueType, IndexType> *a, \
const matrix::Csr<ValueType, IndexType> *b, \
const matrix::Dense<ValueType> *beta, \
matrix::Csr<ValueType, IndexType> *c)
const matrix::Csr<ValueType, IndexType> *c, \
Array<IndexType> &c_row_ptrs, \
Array<IndexType> &c_col_idxs, \
Array<ValueType> &c_vals)

#define GKO_DECLARE_CSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \
void convert_to_dense(std::shared_ptr<const DefaultExecutor> exec, \
Expand Down
8 changes: 6 additions & 2 deletions cuda/matrix/csr_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,9 @@ template <typename ValueType, typename IndexType>
void spgemm(std::shared_ptr<const CudaExecutor> exec,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
matrix::Csr<ValueType, IndexType> *c) GKO_NOT_IMPLEMENTED;
const matrix::Csr<ValueType, IndexType> *c,
Array<IndexType> &c_row_ptrs, Array<IndexType> &c_col_idxs,
Array<ValueType> &c_vals) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);

Expand All @@ -370,7 +372,9 @@ void advanced_spgemm(std::shared_ptr<const CudaExecutor> exec,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
const matrix::Dense<ValueType> *beta,
matrix::Csr<ValueType, IndexType> *c) GKO_NOT_IMPLEMENTED;
const matrix::Csr<ValueType, IndexType> *c,
Array<IndexType> &c_row_ptrs, Array<IndexType> &c_col_idxs,
Array<ValueType> &c_vals) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL);
Expand Down
8 changes: 6 additions & 2 deletions hip/matrix/csr_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,9 @@ template <typename ValueType, typename IndexType>
void spgemm(std::shared_ptr<const HipExecutor> exec,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
matrix::Csr<ValueType, IndexType> *c) GKO_NOT_IMPLEMENTED;
const matrix::Csr<ValueType, IndexType> *c,
Array<IndexType> &c_row_ptrs, Array<IndexType> &c_col_idxs,
Array<ValueType> &c_vals) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);

Expand All @@ -399,7 +401,9 @@ void advanced_spgemm(std::shared_ptr<const HipExecutor> exec,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
const matrix::Dense<ValueType> *beta,
matrix::Csr<ValueType, IndexType> *c) GKO_NOT_IMPLEMENTED;
const matrix::Csr<ValueType, IndexType> *c,
Array<IndexType> &c_row_ptrs, Array<IndexType> &c_col_idxs,
Array<ValueType> &c_vals) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL);
Expand Down
52 changes: 23 additions & 29 deletions omp/matrix/csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,37 +208,38 @@ template <typename ValueType, typename IndexType>
void spgemm(std::shared_ptr<const OmpExecutor> exec,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
matrix::Csr<ValueType, IndexType> *c)
const matrix::Csr<ValueType, IndexType> *c,
Array<IndexType> &c_row_ptrs_array,
Array<IndexType> &c_col_idxs_array, Array<ValueType> &c_vals_array)
{
auto c_size = dim<2>{a->get_size()[0], b->get_size()[1]};
auto c_rows = c_size[0];
auto rows = a->get_size()[0];

// first sweep: count nnz for each row
Array<IndexType> c_row_ptrs_array(exec, c_rows + 1);
c_row_ptrs_array.resize_and_reset(rows + 1);
auto c_row_ptrs = c_row_ptrs_array.get_data();

std::unordered_set<IndexType> local_col_idxs;
#pragma omp parallel for schedule(dynamic, 256) firstprivate(local_col_idxs)
for (size_type a_row = 0; a_row < c_rows; ++a_row) {
for (size_type a_row = 0; a_row < rows; ++a_row) {
local_col_idxs.clear();
spgemm_insert_row2(local_col_idxs, a, b, a_row);
c_row_ptrs[a_row + 1] = local_col_idxs.size();
}

// build row pointers: exclusive scan (thus the + 1)
c_row_ptrs[0] = 0;
std::partial_sum(c_row_ptrs + 1, c_row_ptrs + c_rows + 1, c_row_ptrs + 1);
std::partial_sum(c_row_ptrs + 1, c_row_ptrs + rows + 1, c_row_ptrs + 1);

// second sweep: accumulate non-zeros
auto new_nnz = c_row_ptrs[c_rows];
Array<IndexType> c_col_idxs_array(exec, new_nnz);
Array<ValueType> c_vals_array(exec, new_nnz);
auto new_nnz = c_row_ptrs[rows];
c_col_idxs_array.resize_and_reset(new_nnz);
c_vals_array.resize_and_reset(new_nnz);
auto c_col_idxs = c_col_idxs_array.get_data();
auto c_vals = c_vals_array.get_data();

std::unordered_map<IndexType, ValueType> local_row_nzs;
#pragma omp parallel for schedule(dynamic, 256) firstprivate(local_row_nzs)
for (size_type a_row = 0; a_row < c_rows; ++a_row) {
for (size_type a_row = 0; a_row < rows; ++a_row) {
local_row_nzs.clear();
spgemm_accumulate_row2(local_row_nzs, a, b, one<ValueType>(), a_row);
// store result
Expand All @@ -249,11 +250,6 @@ void spgemm(std::shared_ptr<const OmpExecutor> exec,
++c_nz;
}
}

auto new_c = matrix::Csr<ValueType, IndexType>::create(
exec, c_size, std::move(c_vals_array), std::move(c_col_idxs_array),
std::move(c_row_ptrs_array), c->get_strategy());
new_c->move_to(c);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);
Expand All @@ -265,20 +261,22 @@ void advanced_spgemm(std::shared_ptr<const OmpExecutor> exec,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
const matrix::Dense<ValueType> *beta,
matrix::Csr<ValueType, IndexType> *c)
const matrix::Csr<ValueType, IndexType> *c,
Array<IndexType> &c_row_ptrs_array,
Array<IndexType> &c_col_idxs_array,
Array<ValueType> &c_vals_array)
{
auto c_size = dim<2>{a->get_size()[0], b->get_size()[1]};
auto c_rows = c_size[0];
auto rows = a->get_size()[0];
auto valpha = alpha->at(0, 0);
auto vbeta = beta->at(0, 0);

// first sweep: count nnz for each row
Array<IndexType> c_row_ptrs_array(exec, c_rows + 1);
c_row_ptrs_array.resize_and_reset(rows + 1);
auto c_row_ptrs = c_row_ptrs_array.get_data();

std::unordered_set<IndexType> local_col_idxs;
#pragma omp parallel for schedule(dynamic, 256) firstprivate(local_col_idxs)
for (size_type a_row = 0; a_row < c_rows; ++a_row) {
for (size_type a_row = 0; a_row < rows; ++a_row) {
local_col_idxs.clear();
if (vbeta != zero(vbeta)) {
spgemm_insert_row(local_col_idxs, c, a_row);
Expand All @@ -291,17 +289,18 @@ void advanced_spgemm(std::shared_ptr<const OmpExecutor> exec,

// build row pointers: exclusive scan (thus the + 1)
c_row_ptrs[0] = 0;
std::partial_sum(c_row_ptrs + 1, c_row_ptrs + c_rows + 1, c_row_ptrs + 1);
std::partial_sum(c_row_ptrs + 1, c_row_ptrs + rows + 1, c_row_ptrs + 1);

// second sweep: accumulate non-zeros
Array<IndexType> c_col_idxs_array(exec, c_row_ptrs[c_rows]);
Array<ValueType> c_vals_array(exec, c_row_ptrs[c_rows]);
auto new_nnz = c_row_ptrs[rows];
c_col_idxs_array.resize_and_reset(new_nnz);
c_vals_array.resize_and_reset(new_nnz);
auto c_col_idxs = c_col_idxs_array.get_data();
auto c_vals = c_vals_array.get_data();

std::unordered_map<IndexType, ValueType> local_row_nzs;
#pragma omp parallel for schedule(dynamic, 256) firstprivate(local_row_nzs)
for (size_type a_row = 0; a_row < c_rows; ++a_row) {
for (size_type a_row = 0; a_row < rows; ++a_row) {
local_row_nzs.clear();
if (vbeta != zero(vbeta)) {
spgemm_accumulate_row(local_row_nzs, c, vbeta, a_row);
Expand All @@ -317,11 +316,6 @@ void advanced_spgemm(std::shared_ptr<const OmpExecutor> exec,
++c_nz;
}
}

auto new_c = matrix::Csr<ValueType, IndexType>::create(
exec, c_size, std::move(c_vals_array), std::move(c_col_idxs_array),
std::move(c_row_ptrs_array), c->get_strategy());
new_c->move_to(c);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
Expand Down
52 changes: 23 additions & 29 deletions reference/matrix/csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,35 +206,36 @@ template <typename ValueType, typename IndexType>
void spgemm(std::shared_ptr<const ReferenceExecutor> exec,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
matrix::Csr<ValueType, IndexType> *c)
const matrix::Csr<ValueType, IndexType> *c,
Array<IndexType> &c_row_ptrs_array,
Array<IndexType> &c_col_idxs_array, Array<ValueType> &c_vals_array)
{
auto c_size = dim<2>{a->get_size()[0], b->get_size()[1]};
auto c_rows = c_size[0];
auto rows = a->get_size()[0];

// first sweep: count nnz for each row
Array<IndexType> c_row_ptrs_array(exec, c_rows + 1);
c_row_ptrs_array.resize_and_reset(rows + 1);
auto c_row_ptrs = c_row_ptrs_array.get_data();

std::unordered_set<IndexType> local_col_idxs;
for (size_type a_row = 0; a_row < c_rows; ++a_row) {
for (size_type a_row = 0; a_row < rows; ++a_row) {
local_col_idxs.clear();
spgemm_insert_row2(local_col_idxs, a, b, a_row);
c_row_ptrs[a_row + 1] = local_col_idxs.size();
}

// build row pointers: exclusive scan (thus the + 1)
c_row_ptrs[0] = 0;
std::partial_sum(c_row_ptrs + 1, c_row_ptrs + c_rows + 1, c_row_ptrs + 1);
std::partial_sum(c_row_ptrs + 1, c_row_ptrs + rows + 1, c_row_ptrs + 1);

// second sweep: accumulate non-zeros
auto new_nnz = c_row_ptrs[c_rows];
Array<IndexType> c_col_idxs_array(exec, new_nnz);
Array<ValueType> c_vals_array(exec, new_nnz);
auto new_nnz = c_row_ptrs[rows];
c_col_idxs_array.resize_and_reset(new_nnz);
c_vals_array.resize_and_reset(new_nnz);
auto c_col_idxs = c_col_idxs_array.get_data();
auto c_vals = c_vals_array.get_data();

std::unordered_map<IndexType, ValueType> local_row_nzs;
for (size_type a_row = 0; a_row < c_rows; ++a_row) {
for (size_type a_row = 0; a_row < rows; ++a_row) {
local_row_nzs.clear();
spgemm_accumulate_row2(local_row_nzs, a, b, one<ValueType>(), a_row);
// store result
Expand All @@ -245,11 +246,6 @@ void spgemm(std::shared_ptr<const ReferenceExecutor> exec,
++c_nz;
}
}

auto new_c = matrix::Csr<ValueType, IndexType>::create(
exec, c_size, std::move(c_vals_array), std::move(c_col_idxs_array),
std::move(c_row_ptrs_array), c->get_strategy());
new_c->move_to(c);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);
Expand All @@ -261,19 +257,21 @@ void advanced_spgemm(std::shared_ptr<const ReferenceExecutor> exec,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
const matrix::Dense<ValueType> *beta,
matrix::Csr<ValueType, IndexType> *c)
const matrix::Csr<ValueType, IndexType> *c,
Array<IndexType> &c_row_ptrs_array,
Array<IndexType> &c_col_idxs_array,
Array<ValueType> &c_vals_array)
{
auto c_size = dim<2>{a->get_size()[0], b->get_size()[1]};
auto c_rows = c_size[0];
auto rows = a->get_size()[0];
auto valpha = alpha->at(0, 0);
auto vbeta = beta->at(0, 0);

// first sweep: count nnz for each row
Array<IndexType> c_row_ptrs_array(exec, c_rows + 1);
c_row_ptrs_array.resize_and_reset(rows + 1);
auto c_row_ptrs = c_row_ptrs_array.get_data();

std::unordered_set<IndexType> local_col_idxs;
for (size_type a_row = 0; a_row < c_rows; ++a_row) {
for (size_type a_row = 0; a_row < rows; ++a_row) {
local_col_idxs.clear();
if (vbeta != zero(vbeta)) {
spgemm_insert_row(local_col_idxs, c, a_row);
Expand All @@ -286,16 +284,17 @@ void advanced_spgemm(std::shared_ptr<const ReferenceExecutor> exec,

// build row pointers: exclusive scan (thus the + 1)
c_row_ptrs[0] = 0;
std::partial_sum(c_row_ptrs + 1, c_row_ptrs + c_rows + 1, c_row_ptrs + 1);
std::partial_sum(c_row_ptrs + 1, c_row_ptrs + rows + 1, c_row_ptrs + 1);

// second sweep: accumulate non-zeros
Array<IndexType> c_col_idxs_array(exec, c_row_ptrs[c_rows]);
Array<ValueType> c_vals_array(exec, c_row_ptrs[c_rows]);
auto new_nnz = c_row_ptrs[rows];
c_col_idxs_array.resize_and_reset(new_nnz);
c_vals_array.resize_and_reset(new_nnz);
auto c_col_idxs = c_col_idxs_array.get_data();
auto c_vals = c_vals_array.get_data();

std::unordered_map<IndexType, ValueType> local_row_nzs;
for (size_type a_row = 0; a_row < c_rows; ++a_row) {
for (size_type a_row = 0; a_row < rows; ++a_row) {
local_row_nzs.clear();
if (vbeta != zero(vbeta)) {
spgemm_accumulate_row(local_row_nzs, c, vbeta, a_row);
Expand All @@ -311,11 +310,6 @@ void advanced_spgemm(std::shared_ptr<const ReferenceExecutor> exec,
++c_nz;
}
}

auto new_c = matrix::Csr<ValueType, IndexType>::create(
exec, c_size, std::move(c_vals_array), std::move(c_col_idxs_array),
std::move(c_row_ptrs_array), c->get_strategy());
new_c->move_to(c);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
Expand Down

0 comments on commit 5bb5531

Please sign in to comment.