Skip to content

Commit

Permalink
Add faster merge events. (#2108)
Browse files Browse the repository at this point in the history
Use an adaptive algorithm to merge events: Switches between Tree Merge and Linear Scan at size 20.
Also add a microbenchmark to test/derive these numbers.
  • Loading branch information
thorstenhater authored May 9, 2023
1 parent dd75b03 commit 8e07459
Show file tree
Hide file tree
Showing 10 changed files with 388 additions and 174 deletions.
1 change: 1 addition & 0 deletions arbor/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ set(arbor_sources
tree.cpp
util/dylib.cpp
util/hostname.cpp
util/tourney_tree.cpp
util/unwind.cpp
version.cpp
)
Expand Down
197 changes: 63 additions & 134 deletions arbor/merge_events.cpp
Original file line number Diff line number Diff line change
@@ -1,157 +1,86 @@
#include <set>
#include <vector>
#include <numeric>
#include <queue>

#include <arbor/assert.hpp>
#include <arbor/common_types.hpp>
#include <arbor/math.hpp>

#include "io/trace.hpp"
#include "merge_events.hpp"
#include "profile/profiler_macro.hpp"
#include "util/tourney_tree.hpp"

namespace arb {

namespace impl {

// A postsynaptic spike event that has delivery time set to
// terminal_time, used as a sentinel in `tourney_tree`.

static constexpr spike_event terminal_pse{0, terminal_time, 0};


// The tournament tree data structure is used to merge k sorted lists of events.
// See online for high-level information about tournament trees.
//
// This implementation maintains a heap-like data structure, with entries of type:
// std::pair<unsigned, post_synaptic_event>
// where the unsigned ∈ [0, k-1] is the id of the list from which the event was
// drawn. The id is stored so that the operation of removing the most recent event
// knows which leaf node needs to be updated (i.e. the leaf node of the list from
// which the most recent event was drawn).
//
// unsigned is used for storing the index, because if drawing events from more
// event generators than can be counted using an unsigned a complete redesign
// will be needed.

tourney_tree::tourney_tree(std::vector<event_span>& input):
input_(input),
n_lanes_(input_.size())
{
// Must have at least 1 queue.
arb_assert(n_lanes_>=1u);

leaves_ = math::next_pow2(n_lanes_);

// Must be able to fit leaves in unsigned count.
arb_assert(leaves_>=n_lanes_);
nodes_ = 2*leaves_-1;

// Allocate space for the tree nodes
heap_.resize(nodes_);
// Set the leaf nodes
for (auto i=0u; i<leaves_; ++i) {
heap_[leaf(i)] = i<n_lanes_?
key_val(i, input[i].empty()? terminal_pse: input[i].front()):
key_val(i, terminal_pse); // null leaf node
}
// Walk the tree to initialize the non-leaf nodes
setup(0);
}

std::ostream& operator<<(std::ostream& out, const tourney_tree& tt) {
unsigned nxt = 1;
for (unsigned i = 0; i<tt.nodes_; ++i) {
if (i==nxt-1) {
nxt*=2;
out << "\n";
// k-way linear merge:
// Pick stream with the minimum element, pop that and push into output.
// Repeat.
void linear_merge_events(std::vector<event_span>& sources, pse_vector& out) {
// Consume all events.
for (;;) {
// Now find the minimum
auto mevt = spike_event{0, terminal_time, 0};;
auto midx = -1;
for (auto idx = 0ull; idx < sources.size(); ++idx) {
auto& source = sources[idx];
if (!source.empty()) {
auto& evt = source.front();
if (evt < mevt) {
mevt = evt;
midx = idx;
}
}
}
out << "{" << tt.heap_[i].first << "," << tt.heap_[i].second << "}\n";
if (midx == -1) break;
// Take event: bump chosen stream and stuff event into output.
sources[midx].left++;
out.emplace_back(mevt);
}
return out;
}

bool tourney_tree::empty() const {
return event(0).time == terminal_time;
}

spike_event tourney_tree::head() const {
return event(0);
}

// Remove the smallest (most recent) event from the tree, then update the
// tree so that head() returns the next event.
void tourney_tree::pop() {
unsigned lane = id(0);
unsigned i = leaf(lane);

// draw the next event from the input lane
auto& in = input_[lane];

if (!in.empty()) {
++in.left;
// priority-queue based merge.
void pqueue_merge_events(std::vector<event_span>& sources, pse_vector& out) {
// Min heap tracking the minimum element from each span
using kv_type = std::pair<spike_event, int>;
std::priority_queue<kv_type, std::vector<kv_type>, std::greater<>> heap;

// Add the first element from each sorted vector to the min heap
for (std::size_t ix = 0; ix < sources.size(); ++ix) {
auto& source = sources[ix];
if (!source.empty()) {
heap.emplace(source.front(), ix);
source.left++;
}
}

event(i) = in.empty()? terminal_pse: in.front();

// re-heapify the tree with a single walk from leaf to root
while ((i=parent(i))) {
merge_up(i);
// Merge by continually popping the minimum element from the min heap
while (!heap.empty()) {
auto [value, ix] = heap.top();
heap.pop();
out.emplace_back(value);

// If the sorted vector from which the minimum element was taken still
// has elements, add the next smallest element to the heap
auto& source = sources[ix];
if (!source.empty()) {
heap.emplace(source.front(), ix);
source.left++;
}
}
merge_up(0); // handle the root
}

void tourney_tree::setup(unsigned i) {
if (is_leaf(i)) return;
setup(left(i));
setup(right(i));
merge_up(i);
};

// Update the value at node i of the tree to be the smallest
// of its left and right children.
// The result is undefined for leaf nodes.
void tourney_tree::merge_up(unsigned i) {
const auto l = left(i);
const auto r = right(i);
heap_[i] = event(l)<event(r)? heap_[l]: heap_[r];
}

// The tree is stored using the standard heap indexing scheme.

unsigned tourney_tree::parent(unsigned i) const {
return (i-1)>>1;
}
unsigned tourney_tree::left(unsigned i) const {
return (i<<1) + 1;
}
unsigned tourney_tree::right(unsigned i) const {
return left(i)+1;
}
unsigned tourney_tree::leaf(unsigned i) const {
return i+leaves_-1;
}
bool tourney_tree::is_leaf(unsigned i) const {
return i>=leaves_-1;
}
const unsigned& tourney_tree::id(unsigned i) const {
return heap_[i].first;
}
spike_event& tourney_tree::event(unsigned i) {
return heap_[i].second;
}
const spike_event& tourney_tree::event(unsigned i) const {
return heap_[i].second;
}

} // namespace impl

void tree_merge_events(std::vector<event_span>& sources, pse_vector& out) {
impl::tourney_tree tree(sources);
while (!tree.empty()) {
out.push_back(tree.head());
tree.pop();
void merge_events(std::vector<event_span>& sources, pse_vector &out) {
// Count events, bail if none; else allocate enough space to store them.
auto n_evts = std::accumulate(sources.begin(), sources.end(),
0,
[] (auto acc, const auto& rng) { return acc + rng.size(); });
out.reserve(out.size() + n_evts);
auto n_queues = sources.size();
if (n_queues < 20) { // NOTE: MAGIC NUMBER, found by ubench/merge
linear_merge_events(sources, out);
}
else {
pqueue_merge_events(sources, out);
}
}

} // namespace arb

41 changes: 4 additions & 37 deletions arbor/merge_events.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,42 +16,9 @@ namespace arb {

using event_span = util::range<const spike_event*>;

void tree_merge_events(std::vector<event_span>& sources, pse_vector& out);

namespace impl {
// The tournament tree is used internally by the merge_events method, and
// it is not intended for use elsewhere. It is exposed here for unit testing
// of its functionality.
class ARB_ARBOR_API tourney_tree {
using key_val = std::pair<unsigned, spike_event>;

public:
tourney_tree(std::vector<event_span>& input);
bool empty() const;
spike_event head() const;
void pop();
friend std::ostream& operator<<(std::ostream&, const tourney_tree&);

private:
void setup(unsigned i);
void merge_up(unsigned i);
void update_lane(unsigned lane);
unsigned parent(unsigned i) const;
unsigned left(unsigned i) const;
unsigned right(unsigned i) const;
unsigned leaf(unsigned i) const;
bool is_leaf(unsigned i) const;
const unsigned& id(unsigned i) const;
spike_event& event(unsigned i);
const spike_event& event(unsigned i) const;
unsigned next_power_2(unsigned x) const;

std::vector<key_val> heap_;
std::vector<event_span>& input_;
unsigned leaves_;
unsigned nodes_;
unsigned n_lanes_;
};
}
void linear_merge_events(std::vector<event_span>& sources, pse_vector& out);
void pqueue_merge_events(std::vector<event_span>& sources, pse_vector& out);

void merge_events(std::vector<event_span>& sources, pse_vector& out);

} // namespace arb
2 changes: 1 addition & 1 deletion arbor/simulation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ ARB_ARBOR_API void merge_cell_events(
PL();

PE(communication:enqueue:tree);
tree_merge_events(spanbuf, new_events);
merge_events(spanbuf, new_events);
PL();

old_events = old_split.second;
Expand Down
Loading

0 comments on commit 8e07459

Please sign in to comment.