Skip to content

Commit

Permalink
[BASE] celerity#204 Re-implement box and region types
Browse files Browse the repository at this point in the history
  • Loading branch information
fknorr committed Aug 23, 2023
1 parent 55a82d9 commit ee39542
Show file tree
Hide file tree
Showing 97 changed files with 2,453 additions and 21,492 deletions.
8 changes: 2 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,9 @@ set(SOURCES
src/executor.cc
src/distributed_graph_generator.cc
src/graph_serializer.cc
src/grid.cc
src/print_graph.cc
src/print_utils.cc
src/recorders.cc
src/c/corders.cc
src/runtime.cc
src/scheduler.cc
src/task.cc
Expand Down Expand Up @@ -346,10 +346,6 @@ install(
DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/
DESTINATION include/celerity
)
install(
DIRECTORY ${PROJECT_SOURCE_DIR}/vendor/allscale/
DESTINATION include/celerity/vendor/allscale
)
install(
FILES ${PROJECT_SOURCE_DIR}/vendor/ctpl_stl.h
DESTINATION include/celerity/vendor
Expand Down
227 changes: 141 additions & 86 deletions ci/perf/gpuc2_bench.csv

Large diffs are not rendered by default.

229 changes: 142 additions & 87 deletions ci/perf/gpuc2_bench.md

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions include/accessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,8 @@ class accessor<DataT, Dims, Mode, target::device> : public detail::accessor_base
// We currently don't support boundary checking for accessors created using accessor_testspy::make_device_accessor,
// which does not set m_oob_indices.
if(m_oob_indices != nullptr) {
const id<Dims> all_true = detail::id_cast<Dims>(id<3>(true, true, true));
const bool is_within_bounds_lo = (index >= m_accessed_virtual_subrange.offset) == all_true;
const bool is_within_bounds_hi = (index < (m_accessed_virtual_subrange.offset + m_accessed_virtual_subrange.range)) == all_true;
const bool is_within_bounds_lo = all_true(index >= m_accessed_virtual_subrange.offset);
const bool is_within_bounds_hi = all_true(index < (m_accessed_virtual_subrange.offset + m_accessed_virtual_subrange.range));
if((!is_within_bounds_lo || !is_within_bounds_hi)) {
for(int d = 0; d < Dims; ++d) {
sycl::atomic_ref<size_t, sycl::memory_order::relaxed, sycl::memory_scope::device>{m_oob_indices[0][d]}.fetch_min(index[d]);
Expand Down
1 change: 0 additions & 1 deletion include/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include <memory>

#include <CL/sycl.hpp>
#include <allscale/utils/functional_utils.h>

#include "buffer_manager.h"
#include "lifetime_extending_state.h"
Expand Down
4 changes: 2 additions & 2 deletions include/buffer_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,8 @@ namespace detail {
resize_info result;
if(!is_inside_old_range) {
result.resize_required = true;
result.new_offset = min_id(request_offset, buffer.offset);
result.new_range = range_cast<3>(id_cast<3>(max_range(old_abs_range, new_abs_range)) - result.new_offset);
result.new_offset = id_min(request_offset, buffer.offset);
result.new_range = range_cast<3>(id_cast<3>(range_max(old_abs_range, new_abs_range)) - result.new_offset);
}
return result;
}
Expand Down
4 changes: 2 additions & 2 deletions include/buffer_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ namespace detail {

inline void assert_copy_is_in_range(
const range<3>& source_range, const range<3>& target_range, const id<3>& source_offset, const id<3>& target_offset, const range<3>& copy_range) {
assert(max_range(source_range, range_cast<3>(source_offset + copy_range)) == source_range);
assert(max_range(target_range, range_cast<3>(target_offset + copy_range)) == target_range);
assert(range_max(source_range, range_cast<3>(source_offset + copy_range)) == source_range);
assert(range_max(target_range, range_cast<3>(target_offset + copy_range)) == target_range);
}

template <typename DataT, int Dims>
Expand Down
16 changes: 8 additions & 8 deletions include/buffer_transfer_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,23 +63,23 @@ namespace detail {
struct incoming_transfer_handle : transfer_handle {
incoming_transfer_handle(const size_t num_nodes) : m_num_nodes(num_nodes) {}

void set_expected_region(GridRegion<3> region) { m_expected_region = std::move(region); }
void set_expected_region(region<3> region) { m_expected_region = std::move(region); }

void add_transfer(std::unique_ptr<transfer_in>&& t) {
assert(!complete);
assert(t->frame->rid == 0 || m_is_reduction || m_transfers.empty()); // Either all or none
m_is_reduction = t->frame->rid != 0;
const auto box = subrange_to_grid_box(t->frame->sr);
assert(GridRegion<3>::intersect(m_received_region, box).empty() || m_is_reduction);
assert(!m_expected_region.has_value() || GridRegion<3>::difference(box, *m_expected_region).empty());
m_received_region = GridRegion<3>::merge(m_received_region, box);
const auto box = detail::box(t->frame->sr);
assert(region_intersection(m_received_region, box).empty() || m_is_reduction);
assert(!m_expected_region.has_value() || region_difference(box, *m_expected_region).empty());
m_received_region = region_union(m_received_region, box);
m_transfers.push_back(std::move(t));
}

bool received_full_region() const {
if(!m_expected_region.has_value()) return false;
if(m_is_reduction) {
assert(m_expected_region->area() == 1);
assert(m_expected_region->get_area() == 1);
// For reductions we're waiting to receive one message per peer
return m_transfers.size() == m_num_nodes - 1;
}
Expand All @@ -99,8 +99,8 @@ namespace detail {
size_t m_num_nodes; // Number of nodes in the system, required for reductions
bool m_is_reduction = false;
std::vector<std::unique_ptr<transfer_in>> m_transfers;
std::optional<GridRegion<3>> m_expected_region; // This will only be set once the await push job has started
GridRegion<3> m_received_region;
std::optional<region<3>> m_expected_region; // This will only be set once the await push job has started
region<3> m_received_region;
};

struct transfer_out {
Expand Down
8 changes: 4 additions & 4 deletions include/command.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,22 +70,22 @@ namespace detail {

class await_push_command final : public abstract_command {
friend class command_graph;
await_push_command(command_id cid, buffer_id bid, reduction_id rid, transfer_id trid, GridRegion<3> region)
await_push_command(command_id cid, buffer_id bid, reduction_id rid, transfer_id trid, region<3> region)
: abstract_command(cid), m_bid(bid), m_rid(rid), m_trid(trid), m_region(std::move(region)) {}

public:
buffer_id get_bid() const { return m_bid; }
reduction_id get_reduction_id() const { return m_rid; }
transfer_id get_transfer_id() const { return m_trid; }
GridRegion<3> get_region() const { return m_region; }
region<3> get_region() const { return m_region; }

private:
buffer_id m_bid;
// Having the reduction ID here isn't strictly required for matching against incoming pushes,
// but it allows us to sanity check that they match as well as include the ID during graph printing.
reduction_id m_rid;
transfer_id m_trid;
GridRegion<3> m_region;
region<3> m_region;
};

class reduction_command final : public abstract_command {
Expand Down Expand Up @@ -184,7 +184,7 @@ namespace detail {
buffer_id bid;
reduction_id rid;
transfer_id trid;
GridRegion<3> region;
region<3> region;
};

struct reduction_data {
Expand Down
4 changes: 2 additions & 2 deletions include/distributed_graph_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class distributed_graph_generator {
void generate_distributed_commands(const task& tsk);

void generate_anti_dependencies(
task_id tid, buffer_id bid, const region_map<write_command_state>& last_writers_map, const GridRegion<3>& write_req, abstract_command* write_cmd);
task_id tid, buffer_id bid, const region_map<write_command_state>& last_writers_map, const region<3>& write_req, abstract_command* write_cmd);

void process_task_side_effect_requirements(const task& tsk);

Expand All @@ -117,7 +117,7 @@ class distributed_graph_generator {
void prune_commands_before(const command_id epoch);

private:
using buffer_read_map = std::unordered_map<buffer_id, GridRegion<3>>;
using buffer_read_map = std::unordered_map<buffer_id, region<3>>;
using side_effect_map = std::unordered_map<host_object_id, command_id>;

size_t m_num_nodes;
Expand Down
2 changes: 1 addition & 1 deletion include/fence.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class buffer_fence_promise final : public detail::fence_promise {
void fulfill() override {
const auto access_info =
runtime::get_instance().get_buffer_manager().access_host_buffer<DataT, Dims>(get_buffer_id(m_buffer), access_mode::read, m_subrange);
assert((id_cast<Dims>(access_info.backing_buffer_offset) <= m_subrange.offset) == id_cast<Dims>(id<3>(true, true, true)));
assert(all_true(id_cast<Dims>(access_info.backing_buffer_offset) <= m_subrange.offset));
auto data = std::make_unique<DataT[]>(m_subrange.range.size());
memcpy_strided_host(access_info.ptr, data.get(), sizeof(DataT), range_cast<Dims>(access_info.backing_buffer_range),
m_subrange.offset - id_cast<Dims>(access_info.backing_buffer_offset), m_subrange.range, {}, m_subrange.range);
Expand Down
Loading

0 comments on commit ee39542

Please sign in to comment.