Skip to content

Commit

Permalink
Leverage std library in BsrMatrix constructor
Browse files Browse the repository at this point in the history
  • Loading branch information
cwpearson committed Mar 22, 2023
1 parent 0481552 commit f996bdb
Showing 1 changed file with 111 additions and 87 deletions.
198 changes: 111 additions & 87 deletions sparse/src/KokkosSparse_BsrMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -451,12 +451,12 @@ class BsrMatrix {
}
}

/// \brief Constructor that copies raw arrays of host data in
/// coordinate format.
/// \brief Construct BsrMatrix from host data in COO format.
///
/// On input, each entry of the sparse matrix is stored in val[k],
/// with row index rows[k] and column index cols[k]. We assume that
/// the entries are sorted in increasing order by row index.
/// The COO matrix must already have a block structure.
/// Each entry k of the input sparse matrix has a value stored in val[k],
/// row index in rows[k] and column index incols[k].
/// The COO data must be sorted by increasing row index
///
/// This constructor is mainly useful for benchmarking or for
/// reading the sparse matrix's data from a file.
Expand All @@ -465,19 +465,19 @@ class BsrMatrix {
/// \param nrows [in] The number of rows.
/// \param ncols [in] The number of columns.
/// \param annz [in] The number of entries.
/// \param val [in] The entries.
/// \param rows [in] The row indices. rows[k] is the row index of
/// \param vals [in] The entries.
/// \param rows [in] The row indices. rows[k] is the row index of
/// val[k].
/// \param cols [in] The column indices. cols[k] is the column
/// \param cols [in] The column indices. cols[k] is the column
/// index of val[k].
/// \param blockdim [in] The block dimensions.
/// \param blockdim [in] The block size of the constructed BsrMatrix.
/// \param pad [in] If true, pad the sparse matrix's storage with
/// zeros in order to improve cache alignment and / or
/// vectorization.
///
/// The \c pad argument is currently not used.
BsrMatrix(const std::string& label, OrdinalType nrows, OrdinalType ncols,
size_type annz, ScalarType* val, OrdinalType* rows,
size_type annz, ScalarType* vals, OrdinalType* rows,
OrdinalType* cols, OrdinalType blockdim, bool pad = false) {
(void)label;
(void)pad;
Expand All @@ -496,93 +496,117 @@ class BsrMatrix {
assert((nrows % blockDim_ == 0) &&
"BsrMatrix: input CrsMatrix rows is not a multiple of block size");
}
if (annz % (blockDim_ * blockDim_)) {
throw std::runtime_error(
"BsrMatrix:: annz should be a multiple of the number of entries in a "
"block");
}

numCols_ = ncols / blockDim_;
ordinal_type tmp_num_rows = nrows / blockDim_;

//
// Wrap the raw pointers in unmanaged host Views
// Note that the inputs are in coordinate format.
// So unman_rows and unman_cols have the same type.
//
typename values_type::HostMirror unman_val(val, annz);
typename index_type::HostMirror unman_rows(rows, annz);
typename index_type::HostMirror unman_cols(cols, annz);

typename row_map_type::non_const_type tmp_row_map(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "rowmap"),
tmp_num_rows + 1);
auto row_map_host = Kokkos::create_mirror_view(tmp_row_map);
Kokkos::deep_copy(row_map_host, 0);

if (annz > 0) {
ordinal_type iblock = 0;
std::set<ordinal_type> set_blocks;
for (size_type ii = 0; ii <= annz; ++ii) {
if ((ii == annz) || ((unman_rows(ii) / blockDim_) > iblock)) {
// Flush the stored entries
row_map_host(iblock + 1) = set_blocks.size();
if (ii == annz) break;
set_blocks.clear();
iblock = unman_rows(ii) / blockDim_;
}
ordinal_type tmp_jblock = unman_cols(ii) / blockDim_;
set_blocks.insert(tmp_jblock);
using Coord = std::pair<OrdinalType, OrdinalType>; // row, col
using CoordComp = std::function<bool(
const Coord&, const Coord&)>; // type that can order Coords
using Entry = std::pair<Coord, ScalarType>; // (row, col), val
using Blocks = std::map<Coord, std::vector<Entry>,
CoordComp>; // map a block to its non-zeros, sorted
// by row, then col

numCols_ = ncols / blockDim_;
ordinal_type numRows = nrows / blockDim_;
size_type numBlocks = annz / (blockDim_ * blockDim_);

// device data
typename row_map_type::non_const_type row_map_device(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "row_map_device"),
numRows + 1);
index_type entries_device("entries_device", numBlocks);
Kokkos::resize(values, annz);

// mirror views on host
auto row_map_host = Kokkos::create_mirror_view(row_map_device);
auto entries_host = Kokkos::create_mirror_view(entries_device);
auto values_host = Kokkos::create_mirror_view(values);

auto coord_by_row_col = [](const Coord& a, const Coord& b) {
const auto& arow = std::get<0>(a);
const auto& brow = std::get<0>(b);
const auto& acol = std::get<1>(a);
const auto& bcol = std::get<1>(b);
if (arow < brow) {
return true;
} else if (arow > brow) {
return false;
} else {
return acol < bcol;
}
};

auto entry_by_row_col = [coord_by_row_col](const Entry& a, const Entry& b) {
return coord_by_row_col(std::get<0>(a), std::get<0>(b));
};

// organize all blocks and their entries
Blocks blocks(coord_by_row_col);
for (size_type i = 0; i < annz; ++i) {
const ordinal_type row = rows[i];
const ordinal_type col = cols[i];
const ScalarType val = vals[i];
const Coord block = Coord(row / blockDim_, col / blockDim_);
const Entry entry(Coord(row, col), val);

// add entry to the correct block
auto it = blocks.find(block);
if (it == blocks.end()) {
std::vector<Entry> entries = {entry};
entries.reserve(blockDim_ * blockDim_);
blocks[block] = std::move(entries); // new block with entry
} else {
it->second.push_back(entry); // add entry to block
}
}

for (size_type ii = 0; ii < annz; ++ii)
row_map_host(ii + 1) += row_map_host(ii);

Kokkos::deep_copy(tmp_row_map, row_map_host);

// Create temporary Views for row_map and entries
// because the StaticCrsGraph ctor requires View inputs
index_type tmp_entries("tmp_entries", row_map_host(tmp_num_rows));
auto tmp_entries_host = Kokkos::create_mirror_view(tmp_entries);

Kokkos::resize(values, row_map_host(tmp_num_rows) * blockDim_ * blockDim_);
auto values_host = Kokkos::create_mirror_view(values);
Kokkos::deep_copy(values_host, 0);

if (annz > 0) {
//--- Fill tmp_entries
ordinal_type cur_block = 0;
std::set<ordinal_type> set_blocks;
for (size_type ii = 0; ii <= annz; ++ii) {
if ((ii == annz) || ((unman_rows(ii) / blockDim_) > cur_block)) {
// Flush the stored entries
ordinal_type ipos = row_map_host(cur_block);
for (auto jblock : set_blocks) tmp_entries_host(ipos++) = jblock;
if (ii == annz) break;
set_blocks.clear();
cur_block = unman_rows(ii) / blockDim_;
}
ordinal_type tmp_jblock = unman_cols(ii) / blockDim_;
set_blocks.insert(tmp_jblock);
// write block data out to BSR format
ordinal_type row = 0; // current row we're in
size_t bi = 0; // how many blocks so far
for (auto& kv : blocks) { // iterating through blocks in row/col order
const Coord& block = kv.first; // block's position
auto& entries = kv.second; // non-zeros in the block

if (OrdinalType(entries.size()) != blockDim_ * blockDim_) {
std::stringstream ss;
ss << "BsrMatrix: block " << block.first << "," << block.second
<< " had only " << entries.size() << " non-zeros, expected "
<< blockDim_ * blockDim_;
KokkosKernels::Impl::throw_runtime_exception(ss.str());
}
//--- Fill numerical values
for (size_type ii = 0; ii < annz; ++ii) {
const auto ilocal = unman_rows(ii) % blockDim_;
const auto jblock = unman_cols(ii) / blockDim_;
const auto jlocal = unman_cols(ii) % blockDim_;
for (auto jj = row_map_host(jblock); jj < row_map_host(jblock + 1);
++jj) {
if (tmp_entries_host(jj) == jblock) {
const auto shift =
jj * blockDim_ * blockDim_ + ilocal * blockDim_ + jlocal;
values_host(shift) = unman_val(ii);
break;
}
}

// update row-map if block is in a new row
for (; row < block.first; ++row) {
row_map_host(row + 1) = bi; // `row` ends at bi
}

// record column of block
entries_host(bi) = block.second; // block's column

// add contiguous entries of block sorted by row/col
std::sort(entries.begin(), entries.end(), entry_by_row_col);
for (size_type ei = 0; ei < size_type(entries.size()); ++ei) {
values_host(bi * blockDim_ * blockDim_ + ei) = std::get<1>(entries[ei]);
}

// next block
++bi;
}
// complete row map if last blocks are empty
for (; row < numRows; ++row) {
row_map_host(row + 1) = bi;
}

Kokkos::deep_copy(tmp_entries, tmp_entries_host);
// move graph data to the requested device
Kokkos::deep_copy(row_map_device, row_map_host);
Kokkos::deep_copy(entries_device, entries_host);
Kokkos::deep_copy(values, values_host);

// Initialize graph using the temp entries and row_map Views
graph = staticcrsgraph_type(tmp_entries, tmp_row_map);
graph = staticcrsgraph_type(entries_device, row_map_device);
}

/// \brief Constructor that accepts a row map, column indices, and
Expand Down

0 comments on commit f996bdb

Please sign in to comment.