Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add basic SpGeMM kernel #386

Merged
merged 6 commits into from
Nov 21, 2019
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,17 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE);
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL);

template <typename ValueType, typename IndexType>
GKO_DECLARE_CSR_SPGEMM_KERNEL(ValueType, IndexType)
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);

template <typename ValueType, typename IndexType>
GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType)
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL);

template <typename ValueType, typename IndexType>
GKO_DECLARE_CSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType)
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
Expand Down
41 changes: 38 additions & 3 deletions core/matrix/csr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ namespace csr {

GKO_REGISTER_OPERATION(spmv, csr::spmv);
GKO_REGISTER_OPERATION(advanced_spmv, csr::advanced_spmv);
GKO_REGISTER_OPERATION(spgemm, csr::spgemm);
GKO_REGISTER_OPERATION(advanced_spgemm, csr::advanced_spgemm);
GKO_REGISTER_OPERATION(convert_to_coo, csr::convert_to_coo);
GKO_REGISTER_OPERATION(convert_to_dense, csr::convert_to_dense);
GKO_REGISTER_OPERATION(convert_to_sellp, csr::convert_to_sellp);
Expand Down Expand Up @@ -83,7 +85,23 @@ template <typename ValueType, typename IndexType>
void Csr<ValueType, IndexType>::apply_impl(const LinOp *b, LinOp *x) const
{
using Dense = Dense<ValueType>;
this->get_executor()->run(csr::make_spmv(this, as<Dense>(b), as<Dense>(x)));
using TCsr = Csr<ValueType, IndexType>;
if (auto b_csr = dynamic_cast<const TCsr *>(b)) {
upsj marked this conversation as resolved.
Show resolved Hide resolved
auto exec = this->get_executor();
Array<IndexType> x_rows(exec);
Array<IndexType> x_cols(exec);
Array<ValueType> x_vals(exec);
auto x_csr = as<TCsr>(x);
this->get_executor()->run(
csr::make_spgemm(this, b_csr, x_csr, x_rows, x_cols, x_vals));
auto new_x = TCsr::create(exec, x->get_size(), std::move(x_vals),
upsj marked this conversation as resolved.
Show resolved Hide resolved
std::move(x_cols), std::move(x_rows),
x_csr->get_strategy());
new_x->move_to(x_csr);
} else {
this->get_executor()->run(
csr::make_spmv(this, as<Dense>(b), as<Dense>(x)));
}
}


Expand All @@ -92,8 +110,25 @@ void Csr<ValueType, IndexType>::apply_impl(const LinOp *alpha, const LinOp *b,
const LinOp *beta, LinOp *x) const
{
using Dense = Dense<ValueType>;
this->get_executor()->run(csr::make_advanced_spmv(
as<Dense>(alpha), this, as<Dense>(b), as<Dense>(beta), as<Dense>(x)));
using TCsr = Csr<ValueType, IndexType>;
if (auto b_csr = dynamic_cast<const TCsr *>(b)) {
upsj marked this conversation as resolved.
Show resolved Hide resolved
auto exec = this->get_executor();
Array<IndexType> x_rows(exec);
Array<IndexType> x_cols(exec);
Array<ValueType> x_vals(exec);
auto x_csr = as<TCsr>(x);
yhmtsai marked this conversation as resolved.
Show resolved Hide resolved
this->get_executor()->run(csr::make_advanced_spgemm(
as<Dense>(alpha), this, b_csr, as<Dense>(beta), x_csr, x_rows,
x_cols, x_vals));
auto new_x = TCsr::create(exec, x->get_size(), std::move(x_vals),
upsj marked this conversation as resolved.
Show resolved Hide resolved
std::move(x_cols), std::move(x_rows),
x_csr->get_strategy());
new_x->move_to(x_csr);
} else {
this->get_executor()->run(
csr::make_advanced_spmv(as<Dense>(alpha), this, as<Dense>(b),
as<Dense>(beta), as<Dense>(x)));
}
}


Expand Down
23 changes: 23 additions & 0 deletions core/matrix/csr_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,25 @@ namespace kernels {
const matrix::Dense<ValueType> *beta, \
matrix::Dense<ValueType> *c)

#define GKO_DECLARE_CSR_SPGEMM_KERNEL(ValueType, IndexType) \
void spgemm(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<ValueType, IndexType> *a, \
const matrix::Csr<ValueType, IndexType> *b, \
const matrix::Csr<ValueType, IndexType> *c, \
Array<IndexType> &c_row_ptrs, Array<IndexType> &c_col_idxs, \
Array<ValueType> &c_vals)

#define GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType) \
void advanced_spgemm(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Dense<ValueType> *alpha, \
const matrix::Csr<ValueType, IndexType> *a, \
const matrix::Csr<ValueType, IndexType> *b, \
const matrix::Dense<ValueType> *beta, \
const matrix::Csr<ValueType, IndexType> *c, \
Array<IndexType> &c_row_ptrs, \
Array<IndexType> &c_col_idxs, \
Array<ValueType> &c_vals)

#define GKO_DECLARE_CSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \
void convert_to_dense(std::shared_ptr<const DefaultExecutor> exec, \
matrix::Dense<ValueType> *result, \
Expand Down Expand Up @@ -155,6 +174,10 @@ namespace kernels {
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_SPGEMM_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_CONVERT_TO_COO_KERNEL(ValueType, IndexType); \
Expand Down
25 changes: 25 additions & 0 deletions cuda/matrix/csr_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,31 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL);


template <typename ValueType, typename IndexType>
void spgemm(std::shared_ptr<const CudaExecutor> exec,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
const matrix::Csr<ValueType, IndexType> *c,
Array<IndexType> &c_row_ptrs, Array<IndexType> &c_col_idxs,
Array<ValueType> &c_vals) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);


template <typename ValueType, typename IndexType>
void advanced_spgemm(std::shared_ptr<const CudaExecutor> exec,
const matrix::Dense<ValueType> *alpha,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
const matrix::Dense<ValueType> *beta,
const matrix::Csr<ValueType, IndexType> *c,
Array<IndexType> &c_row_ptrs, Array<IndexType> &c_col_idxs,
Array<ValueType> &c_vals) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL);


template <typename IndexType>
void convert_row_ptrs_to_idxs(std::shared_ptr<const CudaExecutor> exec,
const IndexType *ptrs, size_type num_rows,
Expand Down
25 changes: 25 additions & 0 deletions hip/matrix/csr_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,31 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL);


template <typename ValueType, typename IndexType>
void spgemm(std::shared_ptr<const HipExecutor> exec,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
const matrix::Csr<ValueType, IndexType> *c,
Array<IndexType> &c_row_ptrs, Array<IndexType> &c_col_idxs,
Array<ValueType> &c_vals) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);


template <typename ValueType, typename IndexType>
void advanced_spgemm(std::shared_ptr<const HipExecutor> exec,
const matrix::Dense<ValueType> *alpha,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
const matrix::Dense<ValueType> *beta,
const matrix::Csr<ValueType, IndexType> *c,
Array<IndexType> &c_row_ptrs, Array<IndexType> &c_col_idxs,
Array<ValueType> &c_vals) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL);


template <typename IndexType>
void convert_row_ptrs_to_idxs(std::shared_ptr<const HipExecutor> exec,
const IndexType *ptrs, size_type num_rows,
Expand Down
195 changes: 195 additions & 0 deletions omp/matrix/csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <algorithm>
#include <iostream>
#include <numeric>
#include <unordered_map>
#include <unordered_set>
#include <utility>


Expand Down Expand Up @@ -127,6 +129,199 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL);


template <typename ValueType, typename IndexType>
void spgemm_insert_row(std::unordered_set<IndexType> &cols,
const matrix::Csr<ValueType, IndexType> *c,
size_type row)
{
auto row_ptrs = c->get_const_row_ptrs();
auto col_idxs = c->get_const_col_idxs();
cols.insert(col_idxs + row_ptrs[row], col_idxs + row_ptrs[row + 1]);
}


template <typename ValueType, typename IndexType>
void spgemm_insert_row2(std::unordered_set<IndexType> &cols,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
size_type row)
{
auto a_row_ptrs = a->get_const_row_ptrs();
auto a_col_idxs = a->get_const_col_idxs();
auto b_row_ptrs = b->get_const_row_ptrs();
auto b_col_idxs = b->get_const_col_idxs();
for (size_type a_nz = a_row_ptrs[row];
a_nz < size_type(a_row_ptrs[row + 1]); ++a_nz) {
auto a_col = a_col_idxs[a_nz];
auto b_row = a_col;
upsj marked this conversation as resolved.
Show resolved Hide resolved
cols.insert(b_col_idxs + b_row_ptrs[b_row],
b_col_idxs + b_row_ptrs[b_row + 1]);
}
}


template <typename ValueType, typename IndexType>
void spgemm_accumulate_row(std::unordered_map<IndexType, ValueType> &cols,
const matrix::Csr<ValueType, IndexType> *c,
ValueType scale, size_type row)
{
auto row_ptrs = c->get_const_row_ptrs();
auto col_idxs = c->get_const_col_idxs();
auto vals = c->get_const_values();
for (size_type c_nz = row_ptrs[row]; c_nz < size_type(row_ptrs[row + 1]);
++c_nz) {
auto c_col = col_idxs[c_nz];
auto c_val = vals[c_nz];
cols[c_col] += scale * c_val;
}
}


template <typename ValueType, typename IndexType>
void spgemm_accumulate_row2(std::unordered_map<IndexType, ValueType> &cols,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
ValueType scale, size_type row)
{
auto a_row_ptrs = a->get_const_row_ptrs();
auto a_col_idxs = a->get_const_col_idxs();
auto a_vals = a->get_const_values();
auto b_row_ptrs = b->get_const_row_ptrs();
auto b_col_idxs = b->get_const_col_idxs();
auto b_vals = b->get_const_values();
for (size_type a_nz = a_row_ptrs[row];
a_nz < size_type(a_row_ptrs[row + 1]); ++a_nz) {
auto a_col = a_col_idxs[a_nz];
auto a_val = a_vals[a_nz];
auto b_row = a_col;
for (size_type b_nz = b_row_ptrs[b_row];
b_nz < size_type(b_row_ptrs[b_row + 1]); ++b_nz) {
auto b_col = b_col_idxs[b_nz];
auto b_val = b_vals[b_nz];
cols[b_col] += scale * a_val * b_val;
}
}
}


template <typename ValueType, typename IndexType>
void spgemm(std::shared_ptr<const OmpExecutor> exec,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
const matrix::Csr<ValueType, IndexType> *c,
Array<IndexType> &c_row_ptrs_array,
Array<IndexType> &c_col_idxs_array, Array<ValueType> &c_vals_array)
{
auto num_rows = a->get_size()[0];

// first sweep: count nnz for each row
c_row_ptrs_array.resize_and_reset(num_rows + 1);
auto c_row_ptrs = c_row_ptrs_array.get_data();

std::unordered_set<IndexType> local_col_idxs;
#pragma omp parallel for schedule(dynamic, 256) firstprivate(local_col_idxs)
upsj marked this conversation as resolved.
Show resolved Hide resolved
for (size_type a_row = 0; a_row < num_rows; ++a_row) {
local_col_idxs.clear();
spgemm_insert_row2(local_col_idxs, a, b, a_row);
c_row_ptrs[a_row + 1] = local_col_idxs.size();
}

// build row pointers: exclusive scan (thus the + 1)
c_row_ptrs[0] = 0;
std::partial_sum(c_row_ptrs + 1, c_row_ptrs + num_rows + 1, c_row_ptrs + 1);

// second sweep: accumulate non-zeros
auto new_nnz = c_row_ptrs[num_rows];
c_col_idxs_array.resize_and_reset(new_nnz);
c_vals_array.resize_and_reset(new_nnz);
auto c_col_idxs = c_col_idxs_array.get_data();
auto c_vals = c_vals_array.get_data();

std::unordered_map<IndexType, ValueType> local_row_nzs;
#pragma omp parallel for schedule(dynamic, 256) firstprivate(local_row_nzs)
for (size_type a_row = 0; a_row < num_rows; ++a_row) {
local_row_nzs.clear();
spgemm_accumulate_row2(local_row_nzs, a, b, one<ValueType>(), a_row);
// store result
auto c_nz = c_row_ptrs[a_row];
for (auto pair : local_row_nzs) {
c_col_idxs[c_nz] = pair.first;
c_vals[c_nz] = pair.second;
++c_nz;
}
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);


template <typename ValueType, typename IndexType>
void advanced_spgemm(std::shared_ptr<const OmpExecutor> exec,
const matrix::Dense<ValueType> *alpha,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Csr<ValueType, IndexType> *b,
const matrix::Dense<ValueType> *beta,
const matrix::Csr<ValueType, IndexType> *c,
Array<IndexType> &c_row_ptrs_array,
Array<IndexType> &c_col_idxs_array,
Array<ValueType> &c_vals_array)
{
auto num_rows = a->get_size()[0];
auto valpha = alpha->at(0, 0);
auto vbeta = beta->at(0, 0);

// first sweep: count nnz for each row
c_row_ptrs_array.resize_and_reset(num_rows + 1);
auto c_row_ptrs = c_row_ptrs_array.get_data();

std::unordered_set<IndexType> local_col_idxs;
#pragma omp parallel for schedule(dynamic, 256) firstprivate(local_col_idxs)
for (size_type a_row = 0; a_row < num_rows; ++a_row) {
local_col_idxs.clear();
if (vbeta != zero(vbeta)) {
spgemm_insert_row(local_col_idxs, c, a_row);
}
if (valpha != zero(valpha)) {
spgemm_insert_row2(local_col_idxs, a, b, a_row);
}
c_row_ptrs[a_row + 1] = local_col_idxs.size();
}

// build row pointers: exclusive scan (thus the + 1)
c_row_ptrs[0] = 0;
std::partial_sum(c_row_ptrs + 1, c_row_ptrs + num_rows + 1, c_row_ptrs + 1);

// second sweep: accumulate non-zeros
auto new_nnz = c_row_ptrs[num_rows];
c_col_idxs_array.resize_and_reset(new_nnz);
c_vals_array.resize_and_reset(new_nnz);
auto c_col_idxs = c_col_idxs_array.get_data();
auto c_vals = c_vals_array.get_data();

std::unordered_map<IndexType, ValueType> local_row_nzs;
#pragma omp parallel for schedule(dynamic, 256) firstprivate(local_row_nzs)
for (size_type a_row = 0; a_row < num_rows; ++a_row) {
local_row_nzs.clear();
if (vbeta != zero(vbeta)) {
spgemm_accumulate_row(local_row_nzs, c, vbeta, a_row);
}
if (valpha != zero(valpha)) {
spgemm_accumulate_row2(local_row_nzs, a, b, valpha, a_row);
}
// store result
auto c_nz = c_row_ptrs[a_row];
for (auto pair : local_row_nzs) {
c_col_idxs[c_nz] = pair.first;
c_vals[c_nz] = pair.second;
++c_nz;
}
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL);


template <typename IndexType>
void convert_row_ptrs_to_idxs(std::shared_ptr<const OmpExecutor> exec,
const IndexType *ptrs, size_type num_rows,
Expand Down
Loading