Skip to content

Commit

Permalink
Generate, test and print instruction graph
Browse files Browse the repository at this point in the history
  • Loading branch information
fknorr committed Feb 12, 2024
1 parent f4a5726 commit 594d5d5
Show file tree
Hide file tree
Showing 49 changed files with 7,116 additions and 233 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ set(SOURCES
src/distributed_graph_generator.cc
src/graph_serializer.cc
src/grid.cc
src/instruction_graph_generator.cc
src/print_graph.cc
src/recorders.cc
src/runtime.cc
Expand Down
31 changes: 31 additions & 0 deletions debug/gdb/celerity/gdb/prettyprinters.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,30 @@ def __init__(self, prefix: str, val: gdb.Value):
def to_string(self) -> str:
return self.prefix + str(self.value)


class AllocationIdPrinter:
def __init__(self, val: gdb.Value):
bits = int(val['m_bits'])
self.is_null = bits == 0
self.mid = (bits >> 56)
self.raid = (bits & 0x00ff_ffff_ffff_ffff)

def to_string(self) -> str:
return 'M{}.A{}'.format(self.mid, self.raid) if not self.is_null else 'null'


class AllocationWithOffsetPrinter:
def __init__(self, val: gdb.Value):
self.id = val['id']
self.offset_bytes = int(val['offset_bytes'])

def to_string(self) -> str:
if self.offset_bytes > 0:
return '{} + {} bytes'.format(self.id, self.offset_bytes)
else:
return str(self.id)


class TransferIdPrinter:
def __init__(self, val: gdb.Value):
self.consumer_tid = val['consumer_tid']
Expand Down Expand Up @@ -153,6 +177,13 @@ def build_pretty_printer():
add_strong_type_alias_printer(pp, 'reduction_id', 'R')
add_strong_type_alias_printer(pp, 'host_object_id', 'H')
add_strong_type_alias_printer(pp, 'hydration_id', 'HY')
add_strong_type_alias_printer(pp, 'memory_id', 'M')
add_strong_type_alias_printer(pp, 'device_id', 'D')
add_strong_type_alias_printer(pp, 'raw_allocation_id', 'A')
add_strong_type_alias_printer(pp, 'instruction_id', 'I')
add_strong_type_alias_printer(pp, 'message_id', 'MSG')
pp.add_printer('allocation_id', '^celerity::detail::allocation_id$', AllocationIdPrinter)
pp.add_printer('allocation_with_offset', '^celerity::detail::allocation_with_offset$', AllocationWithOffsetPrinter)
pp.add_printer('id', '^celerity::id<.*>$', CoordinatePrinter)
pp.add_printer('range', '^celerity::range<.*>$', CoordinatePrinter)
pp.add_printer('subrange', '^celerity::subrange<.*>$', SubrangePrinter)
Expand Down
13 changes: 1 addition & 12 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,4 @@ if(CELERITY_FEATURE_SCALAR_REDUCTIONS)
add_example(reduction)
endif()

find_package(PkgConfig QUIET)
set(HDF5_FOUND OFF)
if(PkgConfig_FOUND)
pkg_search_module(HDF5 IMPORTED_TARGET hdf5-openmpi hdf5-1.12.0 hdf5)
if(HDF5_FOUND)
add_example(distr_io)
target_link_libraries(distr_io PUBLIC PkgConfig::HDF5)
endif()
endif()
if(CELERITY_EXAMPLES_REQUIRE_HDF5 AND NOT HDF5_FOUND)
message(FATAL_ERROR "HDF5 not found, but required by CELERITY_EXAMPLES_REQUIRE_HDF5")
endif()
# NOCOMMIT do not remove HDF5 example
4 changes: 2 additions & 2 deletions include/accessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ class accessor<DataT, Dims, Mode, target::device> : public detail::accessor_base
accessor(const ctor_internal_tag /* tag */, const buffer<DataT, Dims>& buff, handler& cgh, const Functor& rmfn) {
using range_mapper = detail::range_mapper<Dims, std::decay_t<Functor>>; // decay function type to function pointer
const auto hid = detail::add_requirement(cgh, detail::get_buffer_id(buff), std::make_unique<range_mapper>(rmfn, Mode, buff.get_range()));
detail::extend_lifetime(cgh, std::move(detail::get_lifetime_extending_state(buff)));
detail::extend_lifetime(cgh, detail::get_lifetime_extending_state(buff));
m_device_ptr = detail::embed_hydration_id<DataT*>(hid);
}

Expand Down Expand Up @@ -551,7 +551,7 @@ class accessor<DataT, Dims, Mode, target::host_task> : public detail::accessor_b
accessor(ctor_internal_tag /* tag */, const buffer<DataT, Dims>& buff, handler& cgh, const Functor& rmfn) : m_virtual_buffer_range(buff.get_range()) {
using range_mapper = detail::range_mapper<Dims, std::decay_t<Functor>>; // decay function type to function pointer
const auto hid = detail::add_requirement(cgh, detail::get_buffer_id(buff), std::make_unique<range_mapper>(rmfn, Mode, buff.get_range()));
detail::extend_lifetime(cgh, std::move(detail::get_lifetime_extending_state(buff)));
detail::extend_lifetime(cgh, detail::get_lifetime_extending_state(buff));
m_host_ptr = detail::embed_hydration_id<DataT*>(hid);
}

Expand Down
68 changes: 50 additions & 18 deletions include/command.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,16 @@ namespace detail {
// ----------------------------------------------------------------------------------------------------------------

// TODO: Consider adding a mechanism (during debug builds?) to assert that dependencies can only exist between commands on the same node
class abstract_command : public intrusive_graph_node<abstract_command> {
class abstract_command : public intrusive_graph_node<abstract_command>,
public matchbox::acceptor<class epoch_command, class horizon_command, class execution_command, class push_command,
class await_push_command, class reduction_command, class fence_command> {
friend class command_graph;

protected:
abstract_command(command_id cid) : m_cid(cid) {}

public:
virtual ~abstract_command() = 0;
virtual command_type get_type() const = 0;

command_id get_cid() const { return m_cid; }

Expand All @@ -47,12 +49,13 @@ namespace detail {
command_id m_cid;
bool m_flushed = false;
};
inline abstract_command::~abstract_command() {}

class push_command final : public abstract_command {
class push_command final : public matchbox::implement_acceptor<abstract_command, push_command> {
friend class command_graph;
push_command(const command_id cid, const node_id target, const transfer_id& trid, const subrange<3>& push_range)
: abstract_command(cid), m_target(target), m_trid(trid), m_push_range(push_range) {}
: acceptor_base(cid), m_target(target), m_trid(trid), m_push_range(push_range) {}

command_type get_type() const override { return command_type::push; }

public:
node_id get_target() const { return m_target; }
Expand All @@ -65,10 +68,11 @@ namespace detail {
subrange<3> m_push_range;
};

class await_push_command final : public abstract_command {
class await_push_command final : public matchbox::implement_acceptor<abstract_command, await_push_command> {
friend class command_graph;
await_push_command(const command_id cid, const transfer_id& trid, region<3> region)
: abstract_command(cid), m_trid(trid), m_region(std::move(region)) {}
await_push_command(const command_id cid, const transfer_id& trid, region<3> region) : acceptor_base(cid), m_trid(trid), m_region(std::move(region)) {}

command_type get_type() const override { return command_type::await_push; }

public:
const transfer_id& get_transfer_id() const { return m_trid; }
Expand All @@ -79,9 +83,11 @@ namespace detail {
region<3> m_region;
};

class reduction_command final : public abstract_command {
class reduction_command final : public matchbox::implement_acceptor<abstract_command, reduction_command> {
friend class command_graph;
reduction_command(command_id cid, const reduction_info& info) : abstract_command(cid), m_info(info) {}
reduction_command(command_id cid, const reduction_info& info) : acceptor_base(cid), m_info(info) {}

command_type get_type() const override { return command_type::reduction; }

public:
const reduction_info& get_reduction_info() const { return m_info; }
Expand All @@ -101,29 +107,45 @@ namespace detail {
task_id m_tid;
};

class epoch_command final : public task_command {
class epoch_command final : public matchbox::implement_acceptor<task_command, epoch_command> {
friend class command_graph;
epoch_command(const command_id& cid, const task_id& tid, epoch_action action) : task_command(cid, tid), m_action(action) {}
epoch_command(const command_id cid, const task_id tid, const epoch_action action, std::vector<reduction_id> completed_reductions)
: acceptor_base(cid, tid), m_action(action), m_completed_reductions(std::move(completed_reductions)) {}

command_type get_type() const override { return command_type::epoch; }

public:
epoch_action get_epoch_action() const { return m_action; }
const std::vector<reduction_id>& get_completed_reductions() const { return m_completed_reductions; }

private:
epoch_action m_action;
std::vector<reduction_id> m_completed_reductions;
};

class horizon_command final : public task_command {
class horizon_command final : public matchbox::implement_acceptor<task_command, horizon_command> {
friend class command_graph;
using task_command::task_command;
horizon_command(const command_id cid, const task_id tid, std::vector<reduction_id> completed_reductions)
: acceptor_base(cid, tid), m_completed_reductions(std::move(completed_reductions)) {}

command_type get_type() const override { return command_type::horizon; }

public:
const std::vector<reduction_id>& get_completed_reductions() const { return m_completed_reductions; }

private:
std::vector<reduction_id> m_completed_reductions;
};

class execution_command final : public task_command {
class execution_command final : public matchbox::implement_acceptor<task_command, execution_command> {
friend class command_graph;

protected:
execution_command(command_id cid, task_id tid, subrange<3> execution_range) : task_command(cid, tid), m_execution_range(execution_range) {}
execution_command(command_id cid, task_id tid, subrange<3> execution_range) : acceptor_base(cid, tid), m_execution_range(execution_range) {}

public:
command_type get_type() const override { return command_type::execution; }

const subrange<3>& get_execution_range() const { return m_execution_range; }

void set_is_reduction_initializer(bool is_initializer) { m_initialize_reductions = is_initializer; }
Expand All @@ -135,9 +157,19 @@ namespace detail {
bool m_initialize_reductions = false;
};

class fence_command final : public task_command {
class fence_command final : public matchbox::implement_acceptor<task_command, fence_command> {
friend class command_graph;
using task_command::task_command;
using acceptor_base::acceptor_base;

command_type get_type() const override { return command_type::fence; }
};

/// Hash function for `unordered_sets/maps` of `command *` that is deterministic even as allocation addresses change between application runs.
struct command_hash_by_id {
template <typename Pointer>
constexpr size_t operator()(const Pointer instr) const {
return std::hash<command_id>()(instr->get_cid());
}
};

// ----------------------------------------------------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions include/command_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,15 +153,15 @@ namespace detail {

void remove_dependency(abstract_command* depender, abstract_command* dependee) { depender->remove_dependency(dependee); }

const std::unordered_set<abstract_command*>& get_execution_front() const { return m_execution_front; }
const std::unordered_set<abstract_command*, command_hash_by_id>& get_execution_front() const { return m_execution_front; }

private:
command_id m_next_cmd_id = 0;
// TODO: Consider storing commands in a contiguous memory data structure instead
std::unordered_map<command_id, std::unique_ptr<abstract_command>> m_commands;
std::unordered_map<task_id, std::vector<task_command*>> m_by_task;

std::unordered_set<abstract_command*> m_execution_front;
std::unordered_set<abstract_command*, command_hash_by_id> m_execution_front;
};

} // namespace detail
Expand Down
33 changes: 33 additions & 0 deletions include/dense_map.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#pragma once

#include <cassert>
#include <cstdlib>
#include <vector>

namespace celerity::detail {

/// Like a simple std::unordered_map, but implemented by indexing into a vector with the integral key type.
// TODO I'm taking bikeshedding suggestions for the name. dense_map? integral_map? vector_map?
template <typename KeyId, typename Value>
class dense_map : private std::vector<Value> {
private:
using vector = std::vector<Value>;

public:
dense_map() = default;
explicit dense_map(const size_t size) : vector(size) {}

using vector::begin, vector::end, vector::cbegin, vector::cend, vector::empty, vector::size, vector::resize;

Value& operator[](const KeyId key) {
assert(key < size());
return vector::operator[](static_cast<size_t>(key));
}

const Value& operator[](const KeyId key) const {
assert(key < size());
return vector::operator[](static_cast<size_t>(key));
}
};

} // namespace celerity::detail
12 changes: 10 additions & 2 deletions include/distributed_graph_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class distributed_graph_generator {

void destroy_host_object(host_object_id hoid);

std::unordered_set<abstract_command*> build_task(const task& tsk);
std::unordered_set<abstract_command*, command_hash_by_id> build_task(const task& tsk);

command_graph& get_command_graph() { return m_cdag; }

Expand Down Expand Up @@ -160,7 +160,11 @@ class distributed_graph_generator {
command_id m_current_horizon = no_command;

// Batch of commands currently being generated. Returned (and thereby emptied) by build_task().
std::unordered_set<abstract_command*> m_current_cmd_batch;
std::unordered_set<abstract_command*, command_hash_by_id> m_current_cmd_batch;

// List of reductions that have either completed globally or whose result has been discarded. This list will be appended to the next horizon to eventually
// inform the instruction executor that it can safely garbage-collect runtime info on the reduction operation.
std::vector<reduction_id> m_completed_reductions;

// For proper handling of anti-dependencies we also have to store for each command which buffer regions it reads.
// We do this because we cannot reconstruct the requirements from a command within the graph alone (e.g. for compute commands).
Expand All @@ -175,6 +179,10 @@ class distributed_graph_generator {
detail::command_recorder* m_recorder = nullptr;
};

/// Topologically sort a command-set as returned from distributed_graph_generator::build_task() such that sequential excution satisfies all dependencies.
/// TODO refactor distributed_graph_generator to intrinsically generate commands in dependency-order.
std::vector<abstract_command*> topsort(std::unordered_set<abstract_command*, command_hash_by_id> unmarked);

} // namespace celerity::detail

namespace std {
Expand Down
8 changes: 7 additions & 1 deletion include/fence.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ class host_object_fence_promise final : public detail::fence_promise {

void fulfill() override { m_promise.set_value(std::as_const(detail::get_host_object_instance(m_host_object))); }

allocation_id get_user_allocation_id() override { utils::panic("host_object_fence_promise::get_user_allocation_id"); }

private:
experimental::host_object<T> m_host_object;
std::promise<T> m_promise;
Expand All @@ -84,7 +86,8 @@ class host_object_fence_promise final : public detail::fence_promise {
template <typename DataT, int Dims>
class buffer_fence_promise final : public detail::fence_promise {
public:
explicit buffer_fence_promise(const buffer<DataT, Dims>& buf, const subrange<Dims>& sr) : m_buffer(buf), m_subrange(sr) {}
explicit buffer_fence_promise(const buffer<DataT, Dims>& buf, const subrange<Dims>& sr)
: m_buffer(buf), m_subrange(sr), m_aid(null_allocation_id /* [IDAG placeholder] */) {}

std::future<buffer_snapshot<DataT, Dims>> get_future() { return m_promise.get_future(); }

Expand All @@ -98,9 +101,12 @@ class buffer_fence_promise final : public detail::fence_promise {
m_promise.set_value(buffer_snapshot<DataT, Dims>(m_subrange, std::move(data)));
}

allocation_id get_user_allocation_id() override { return m_aid; }

private:
buffer<DataT, Dims> m_buffer;
subrange<Dims> m_subrange;
allocation_id m_aid;
std::promise<buffer_snapshot<DataT, Dims>> m_promise;
};

Expand Down
2 changes: 1 addition & 1 deletion include/graph_serializer.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ namespace detail {
/**
* Serializes a set of commands. Assumes task commands all belong to the same task.
*/
void flush(const std::unordered_set<abstract_command*>& cmds);
void flush(const std::unordered_set<abstract_command*, command_hash_by_id>& cmds);

private:
flush_callback m_flush_cb;
Expand Down
3 changes: 3 additions & 0 deletions include/grid.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,9 @@ box<Dims> bounding_box(const region<Dims>& region) {
return bounding_box(region.get_boxes().begin(), region.get_boxes().end());
}

template <int Dims>
void merge_adjacent_boxes(box_vector<Dims>& boxes);

template <int Dims>
region<Dims> region_union(const region<Dims>& lhs, const region<Dims>& rhs);

Expand Down
6 changes: 3 additions & 3 deletions include/handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,12 @@ namespace experimental {
class collective_group {
public:
/// Creates a new collective group with a globally unique id. This must only be called from the main thread.
collective_group() noexcept : m_cgid(next_cgid++) {}
collective_group() noexcept : m_cgid(s_next_cgid++) {}

private:
friend class collective_tag_factory;
detail::collective_group_id m_cgid;
inline static size_t next_cgid = 1;
inline static detail::collective_group_id s_next_cgid = detail::root_collective_group_id + 1;
};

/**
Expand Down Expand Up @@ -286,7 +286,7 @@ namespace detail {

const auto rid = detail::runtime::get_instance().get_reduction_manager().create_reduction<DataT, Dims>(bid, op, identity);
add_reduction(cgh, reduction_info{rid, bid, include_current_buffer_value});
extend_lifetime(cgh, std::move(get_lifetime_extending_state(vars)));
extend_lifetime(cgh, get_lifetime_extending_state(vars));

return detail::reduction_descriptor<DataT, Dims, BinaryOperation, WithExplicitIdentity>{bid, op, identity, include_current_buffer_value};
#endif
Expand Down
Loading

0 comments on commit 594d5d5

Please sign in to comment.