From f996bdb2162ddcdc59e2c8ce5ee0f2b1f9e65f96 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Tue, 14 Mar 2023 11:17:01 -0600 Subject: [PATCH] Leverage std library in BsrMatrix constructor --- sparse/src/KokkosSparse_BsrMatrix.hpp | 198 +++++++++++++++----------- 1 file changed, 111 insertions(+), 87 deletions(-) diff --git a/sparse/src/KokkosSparse_BsrMatrix.hpp b/sparse/src/KokkosSparse_BsrMatrix.hpp index ea4e50a8fe..7fe77ab0ec 100644 --- a/sparse/src/KokkosSparse_BsrMatrix.hpp +++ b/sparse/src/KokkosSparse_BsrMatrix.hpp @@ -451,12 +451,12 @@ class BsrMatrix { } } - /// \brief Constructor that copies raw arrays of host data in - /// coordinate format. + /// \brief Construct BsrMatrix from host data in COO format. /// - /// On input, each entry of the sparse matrix is stored in val[k], - /// with row index rows[k] and column index cols[k]. We assume that - /// the entries are sorted in increasing order by row index. + /// The COO matrix must already have a block structure. + /// Each entry k of the input sparse matrix has a value stored in val[k], + /// row index in rows[k] and column index incols[k]. + /// The COO data must be sorted by increasing row index /// /// This constructor is mainly useful for benchmarking or for /// reading the sparse matrix's data from a file. @@ -465,19 +465,19 @@ class BsrMatrix { /// \param nrows [in] The number of rows. /// \param ncols [in] The number of columns. /// \param annz [in] The number of entries. - /// \param val [in] The entries. - /// \param rows [in] The row indices. rows[k] is the row index of + /// \param vals [in] The entries. + /// \param rows [in] The row indices. rows[k] is the row index of /// val[k]. - /// \param cols [in] The column indices. cols[k] is the column + /// \param cols [in] The column indices. cols[k] is the column /// index of val[k]. - /// \param blockdim [in] The block dimensions. + /// \param blockdim [in] The block size of the constructed BsrMatrix. /// \param pad [in] If true, pad the sparse matrix's storage with /// zeros in order to improve cache alignment and / or /// vectorization. /// /// The \c pad argument is currently not used. BsrMatrix(const std::string& label, OrdinalType nrows, OrdinalType ncols, - size_type annz, ScalarType* val, OrdinalType* rows, + size_type annz, ScalarType* vals, OrdinalType* rows, OrdinalType* cols, OrdinalType blockdim, bool pad = false) { (void)label; (void)pad; @@ -496,93 +496,117 @@ class BsrMatrix { assert((nrows % blockDim_ == 0) && "BsrMatrix: input CrsMatrix rows is not a multiple of block size"); } + if (annz % (blockDim_ * blockDim_)) { + throw std::runtime_error( + "BsrMatrix:: annz should be a multiple of the number of entries in a " + "block"); + } - numCols_ = ncols / blockDim_; - ordinal_type tmp_num_rows = nrows / blockDim_; - - // - // Wrap the raw pointers in unmanaged host Views - // Note that the inputs are in coordinate format. - // So unman_rows and unman_cols have the same type. - // - typename values_type::HostMirror unman_val(val, annz); - typename index_type::HostMirror unman_rows(rows, annz); - typename index_type::HostMirror unman_cols(cols, annz); - - typename row_map_type::non_const_type tmp_row_map( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "rowmap"), - tmp_num_rows + 1); - auto row_map_host = Kokkos::create_mirror_view(tmp_row_map); - Kokkos::deep_copy(row_map_host, 0); - - if (annz > 0) { - ordinal_type iblock = 0; - std::set set_blocks; - for (size_type ii = 0; ii <= annz; ++ii) { - if ((ii == annz) || ((unman_rows(ii) / blockDim_) > iblock)) { - // Flush the stored entries - row_map_host(iblock + 1) = set_blocks.size(); - if (ii == annz) break; - set_blocks.clear(); - iblock = unman_rows(ii) / blockDim_; - } - ordinal_type tmp_jblock = unman_cols(ii) / blockDim_; - set_blocks.insert(tmp_jblock); + using Coord = std::pair; // row, col + using CoordComp = std::function; // type that can order Coords + using Entry = std::pair; // (row, col), val + using Blocks = std::map, + CoordComp>; // map a block to its non-zeros, sorted + // by row, then col + + numCols_ = ncols / blockDim_; + ordinal_type numRows = nrows / blockDim_; + size_type numBlocks = annz / (blockDim_ * blockDim_); + + // device data + typename row_map_type::non_const_type row_map_device( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "row_map_device"), + numRows + 1); + index_type entries_device("entries_device", numBlocks); + Kokkos::resize(values, annz); + + // mirror views on host + auto row_map_host = Kokkos::create_mirror_view(row_map_device); + auto entries_host = Kokkos::create_mirror_view(entries_device); + auto values_host = Kokkos::create_mirror_view(values); + + auto coord_by_row_col = [](const Coord& a, const Coord& b) { + const auto& arow = std::get<0>(a); + const auto& brow = std::get<0>(b); + const auto& acol = std::get<1>(a); + const auto& bcol = std::get<1>(b); + if (arow < brow) { + return true; + } else if (arow > brow) { + return false; + } else { + return acol < bcol; + } + }; + + auto entry_by_row_col = [coord_by_row_col](const Entry& a, const Entry& b) { + return coord_by_row_col(std::get<0>(a), std::get<0>(b)); + }; + + // organize all blocks and their entries + Blocks blocks(coord_by_row_col); + for (size_type i = 0; i < annz; ++i) { + const ordinal_type row = rows[i]; + const ordinal_type col = cols[i]; + const ScalarType val = vals[i]; + const Coord block = Coord(row / blockDim_, col / blockDim_); + const Entry entry(Coord(row, col), val); + + // add entry to the correct block + auto it = blocks.find(block); + if (it == blocks.end()) { + std::vector entries = {entry}; + entries.reserve(blockDim_ * blockDim_); + blocks[block] = std::move(entries); // new block with entry + } else { + it->second.push_back(entry); // add entry to block } } - for (size_type ii = 0; ii < annz; ++ii) - row_map_host(ii + 1) += row_map_host(ii); - - Kokkos::deep_copy(tmp_row_map, row_map_host); - - // Create temporary Views for row_map and entries - // because the StaticCrsGraph ctor requires View inputs - index_type tmp_entries("tmp_entries", row_map_host(tmp_num_rows)); - auto tmp_entries_host = Kokkos::create_mirror_view(tmp_entries); - - Kokkos::resize(values, row_map_host(tmp_num_rows) * blockDim_ * blockDim_); - auto values_host = Kokkos::create_mirror_view(values); - Kokkos::deep_copy(values_host, 0); - - if (annz > 0) { - //--- Fill tmp_entries - ordinal_type cur_block = 0; - std::set set_blocks; - for (size_type ii = 0; ii <= annz; ++ii) { - if ((ii == annz) || ((unman_rows(ii) / blockDim_) > cur_block)) { - // Flush the stored entries - ordinal_type ipos = row_map_host(cur_block); - for (auto jblock : set_blocks) tmp_entries_host(ipos++) = jblock; - if (ii == annz) break; - set_blocks.clear(); - cur_block = unman_rows(ii) / blockDim_; - } - ordinal_type tmp_jblock = unman_cols(ii) / blockDim_; - set_blocks.insert(tmp_jblock); + // write block data out to BSR format + ordinal_type row = 0; // current row we're in + size_t bi = 0; // how many blocks so far + for (auto& kv : blocks) { // iterating through blocks in row/col order + const Coord& block = kv.first; // block's position + auto& entries = kv.second; // non-zeros in the block + + if (OrdinalType(entries.size()) != blockDim_ * blockDim_) { + std::stringstream ss; + ss << "BsrMatrix: block " << block.first << "," << block.second + << " had only " << entries.size() << " non-zeros, expected " + << blockDim_ * blockDim_; + KokkosKernels::Impl::throw_runtime_exception(ss.str()); } - //--- Fill numerical values - for (size_type ii = 0; ii < annz; ++ii) { - const auto ilocal = unman_rows(ii) % blockDim_; - const auto jblock = unman_cols(ii) / blockDim_; - const auto jlocal = unman_cols(ii) % blockDim_; - for (auto jj = row_map_host(jblock); jj < row_map_host(jblock + 1); - ++jj) { - if (tmp_entries_host(jj) == jblock) { - const auto shift = - jj * blockDim_ * blockDim_ + ilocal * blockDim_ + jlocal; - values_host(shift) = unman_val(ii); - break; - } - } + + // update row-map if block is in a new row + for (; row < block.first; ++row) { + row_map_host(row + 1) = bi; // `row` ends at bi + } + + // record column of block + entries_host(bi) = block.second; // block's column + + // add contiguous entries of block sorted by row/col + std::sort(entries.begin(), entries.end(), entry_by_row_col); + for (size_type ei = 0; ei < size_type(entries.size()); ++ei) { + values_host(bi * blockDim_ * blockDim_ + ei) = std::get<1>(entries[ei]); } + + // next block + ++bi; + } + // complete row map if last blocks are empty + for (; row < numRows; ++row) { + row_map_host(row + 1) = bi; } - Kokkos::deep_copy(tmp_entries, tmp_entries_host); + // move graph data to the requested device + Kokkos::deep_copy(row_map_device, row_map_host); + Kokkos::deep_copy(entries_device, entries_host); Kokkos::deep_copy(values, values_host); - // Initialize graph using the temp entries and row_map Views - graph = staticcrsgraph_type(tmp_entries, tmp_row_map); + graph = staticcrsgraph_type(entries_device, row_map_device); } /// \brief Constructor that accepts a row map, column indices, and