Skip to content

Commit

Permalink
WIP Implementation of default block SpGEMM algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
Mikołaj Zuzek committed Nov 24, 2021
1 parent 34ac8cf commit 993e92c
Show file tree
Hide file tree
Showing 6 changed files with 272 additions and 387 deletions.
113 changes: 94 additions & 19 deletions src/common/KokkosKernels_BlockHashmapAccumulator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,19 @@
// ************************************************************************
//@HEADER
*/
#ifndef _KOKKOSKERNELS_HASHMAPACCUMULATOR_HPP
#define _KOKKOSKERNELS_HASHMAPACCUMULATOR_HPP
#ifndef _KOKKOSKERNELS_BLOCKHASHMAPACCUMULATOR_HPP
#define _KOKKOSKERNELS_BLOCKHASHMAPACCUMULATOR_HPP
#include <Kokkos_Atomic.hpp>
#include <atomic>
#include "KokkosBatched_Gemm_Serial_Internal.hpp"

//#define HASHMAPACCUMULATOR_ASSERT_ENABLED

namespace KokkosKernels {

namespace Experimental {

#if 0 // defined in HashmapAccumulator header - include if needed or drop
/**
* @brief types of hash operations supported by HashmapAccumulator.
*
Expand All @@ -64,11 +66,12 @@ struct HashOpType {
struct modulo {};
struct pow2Modulo {};
};
#endif

template <typename size_type, typename key_type, typename value_type,
typename hash_type>
/**
* \brief HashmapAccumulator class
* \brief BlockHashmapAccumulator class
* The use of this is described in the paper:
* "Performance-portable sparse matrix-matrix multiplication for many-core
* architectures" ( https://ieeexplore.ieee.org/abstract/document/7965111/ ) in
Expand All @@ -88,14 +91,14 @@ template <typename size_type, typename key_type, typename value_type,
* \var __insert_success: Value to return upon insertion success.
* \var __insert_full: Value to return upon insertion failure.
*/
struct HashmapAccumulator {
struct BlockHashmapAccumulator {
// begin public members
// issue-508, TODO: It's best for used_size to be an internal member of this
// class but the current use-cases rely on used_size to be a parameter to the
// below insertion routines. One way to remove used_size as a parameter to the
// insertion routines is to instantiate multiple HashmapAccumulator objects
// insertion routines is to instantiate multiple BlockHashmapAccumulator objects
// (one hashmap for each team of threads) instead of using a single
// HashmapAccumulator object for multiple teams of threads; this entails
// BlockHashmapAccumulator object for multiple teams of threads; this entails
// major refactoring throughout the kokkos-kernels code base.
// Making used_size a pointer and private member of this
// class still exposes access to this member outside of the class and is
Expand All @@ -104,8 +107,8 @@ struct HashmapAccumulator {

// issue-508, TODO: The hash_begins, hash_nexts, keys, values,
// __insert_success, and __insert_full members should all be private as well.
// They should be managed solely by this HashmapAccumulator class: initialized
// in the constructor(s) and only managed by HashmapAccumulator insertion
// They should be managed solely by this BlockHashmapAccumulator class: initialized
// in the constructor(s) and only managed by BlockHashmapAccumulator insertion
// routines. Making these members private requires major refactoring
// throughout the kokkos-kernels code base. If allocations for these members
// must really live outside this class, we need new members that break
Expand All @@ -116,16 +119,22 @@ struct HashmapAccumulator {
size_type *hash_nexts;
key_type *keys;
value_type *values;
const size_type block_dim;
const size_type block_size;

// serial matrix-matrix multiplication for row-major data
typedef KokkosBatched::SerialGemmInternal<
KokkosBatched::Algo::Gemm::Unblocked> GEMM;

/**
* \brief default constructor HashmapAccumulator
* \brief default constructor BlockHashmapAccumulator
* Sets used_size to 0, __insert_success to 0, __insert_full to 1, and
* __hashOpRHS to 0.
*
* Assumption: hash_begins_ are all initialized to -1.
*/
KOKKOS_INLINE_FUNCTION
HashmapAccumulator()
BlockHashmapAccumulator()
: hash_begins(),
hash_nexts(),
keys(),
Expand All @@ -134,7 +143,7 @@ struct HashmapAccumulator {
__hashOpRHS(0) {}

/**
* \brief parameterized constructor HashmapAccumulator
* \brief parameterized constructor BlockHashmapAccumulator
* Sets used_size to 0, __insert_success to 0, and __insert_full to 1.
*
* \param max_value_size_: The length of the two arrays (keys and hash_nexts)
Expand All @@ -149,11 +158,14 @@ struct HashmapAccumulator {
* Assumption: hash_begins_ are all initialized to -1.
*/
KOKKOS_INLINE_FUNCTION
HashmapAccumulator(const size_type max_value_size_, const size_type hashOpRHS,
BlockHashmapAccumulator(const size_type max_value_size_, const size_type hashOpRHS,
size_type block_dim_,
size_type *hash_begins_, size_type *hash_nexts_,
key_type *keys_, value_type *values_)
: hash_begins(hash_begins_),
hash_nexts(hash_nexts_),
block_dim(block_dim_),
block_size(block_dim_ * block_dim_),
keys(keys_),
values(values_),
__max_value_size(max_value_size_),
Expand All @@ -164,6 +176,7 @@ struct HashmapAccumulator {
}
}

#if 0 // not used in block SPGEMM
// function to be called from device.
// Accumulation is OR operation.
// Insertion is sequential, no race condition for the insertion.
Expand Down Expand Up @@ -340,13 +353,53 @@ struct HashmapAccumulator {
return __insert_success;
}

// function to be called from device.
#endif

// Performs C += A * B on blocks
// Note: block is assumed to be row-major, dense matrix (no extra padding)
// Note: set clear=true to set C = 0 before increment
KOKKOS_INLINE_FUNCTION
void update_accumulator(
value_type *target,
const value_type *valA, const value_type *valB,
const bool clear = false) const
{
constexpr auto ZERO = static_cast<value_type>(0);
constexpr auto ONE = static_cast<value_type>(1);
GEMM::invoke(
block_dim, block_dim, block_dim,
ONE,
valA, block_dim, 1,
valB, block_dim, 1,
clear ? ZERO : ONE,
target, block_dim, 1
);
}

// dgemm: C = A * B
KOKKOS_INLINE_FUNCTION
void block_set_mul(value_type *c_val,
const value_type *a_val, const value_type *b_val) const
{
update_accumulator(c_val, a_val, b_val, true);
}

// dgemm: C += A * B
KOKKOS_INLINE_FUNCTION
void block_add_mul(value_type *c_val,
const value_type *a_val, const value_type *b_val) const
{
update_accumulator(c_val, a_val, b_val, false);
}

// Performs C[hash] += A * B (for existing entry)
// or C[hash] = A * B (for new entry)
// Insertion is sequential, no race condition for the insertion.
// the mergeadd used in the numeric of KKMEM.
KOKKOS_INLINE_FUNCTION
int sequential_insert_into_hash_mergeAdd_TrackHashes(
key_type key, value_type value, size_type *used_size_,
size_type *used_hash_size, size_type *used_hashes) {
int sequential_block_muladd(
key_type key, const value_type *valueA, const value_type *valueB,
size_type *used_size_, size_type *used_hash_size, size_type *used_hashes) {
size_type hash, i, my_index;

if (key == -1) return __insert_success;
Expand All @@ -356,7 +409,7 @@ struct HashmapAccumulator {
hash = __compute_hash(key, __hashOpRHS);
for (i = hash_begins[hash]; i != -1; i = hash_nexts[i]) {
if (keys[i] == key) {
values[i] = values[i] + value;
block_add_mul(values + i * block_size, valueA, valueB);
return __insert_success;
}
}
Expand All @@ -370,10 +423,31 @@ struct HashmapAccumulator {

hash_begins[hash] = my_index;
keys[my_index] = key;
values[my_index] = value;
block_set_mul(values + my_index * block_size, valueA, valueB);
return __insert_success;
}

KOKKOS_INLINE_FUNCTION
void sequential_export_values(const size_type used_size,
size_type *used_hashes,
key_type *out_keys, value_type *out_values,
const bool clear = true)
{
const size_t block_bytes = block_size * sizeof(value_type);
for (size_type i = 0; i < used_size; ++i) {
out_keys[i] = keys[i];
const auto ib = i * block_size;
memcpy(out_values + ib, values + ib, block_bytes);
if (clear) {
const size_type used_index = used_hashes[i];
keys[i] = -1;
hash_begins[used_index] = -1;
hash_nexts[i] = -1;
}
}
}

#if 0
// no values. simply adds to the keys.
// used in the compression to count the sets.
// also used in the symbolic of spgemm if no compression is applied.
Expand Down Expand Up @@ -780,6 +854,7 @@ struct HashmapAccumulator {
return __insert_success;
}
}
#endif
// end public members
private:
size_type __max_value_size;
Expand Down Expand Up @@ -813,7 +888,7 @@ struct HashmapAccumulator {
return hash;
}
// private
}; // struct HashmapAccumulator
}; // struct BlockHashmapAccumulator

} // namespace Experimental
} // namespace KokkosKernels
Expand Down
Loading

0 comments on commit 993e92c

Please sign in to comment.