Skip to content

Commit

Permalink
Refactor unique_payload_ptr; move frame/payload_ptr types into separa…
Browse files Browse the repository at this point in the history
…te headers
  • Loading branch information
fknorr committed Jul 18, 2022
1 parent 738a789 commit 3e72250
Show file tree
Hide file tree
Showing 12 changed files with 158 additions and 148 deletions.
1 change: 1 addition & 0 deletions include/buffer_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "buffer_storage.h"
#include "device_queue.h"
#include "mpi_support.h"
#include "payload.h"
#include "ranges.h"
#include "region_map.h"
#include "types.h"
Expand Down
6 changes: 3 additions & 3 deletions include/buffer_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

#include <CL/sycl.hpp>

#include "mpi_support.h"
#include "payload.h"
#include "ranges.h"
#include "workaround.h"

Expand Down Expand Up @@ -260,7 +260,7 @@ namespace detail {
// TODO: Optimize for contiguous copies - we could do a single SYCL H->D copy directly.
else if(source.get_type() == buffer_type::HOST_BUFFER) {
auto& host_source = dynamic_cast<const host_buffer_storage<DataT, Dims>&>(source);
unique_payload_ptr tmp{unique_payload_ptr::allocate_uninitialized<DataT>, copy_range.size()};
auto tmp = make_uninitialized_payload<DataT>(copy_range.size());
host_source.get_data(subrange{source_offset, copy_range}, static_cast<DataT*>(tmp.get_pointer()));
set_data(subrange{target_offset, copy_range}, static_cast<const DataT*>(tmp.get_pointer()));
}
Expand All @@ -278,7 +278,7 @@ namespace detail {
// TODO: Optimize for contiguous copies - we could do a single SYCL D->H copy directly.
if(source.get_type() == buffer_type::DEVICE_BUFFER) {
// This looks more convoluted than using a vector<DataT>, but that would break if DataT == bool
unique_payload_ptr tmp{unique_payload_ptr::allocate_uninitialized<DataT>, copy_range.size()};
auto tmp = make_uninitialized_payload<DataT>(copy_range.size());
source.get_data(subrange{source_offset, copy_range}, static_cast<DataT*>(tmp.get_pointer()));
set_data(subrange{target_offset, copy_range}, static_cast<const DataT*>(tmp.get_pointer()));
}
Expand Down
2 changes: 1 addition & 1 deletion include/buffer_transfer_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

#include "buffer_storage.h"
#include "command.h"
#include "mpi_support.h"
#include "frame.h"
#include "types.h"

namespace celerity {
Expand Down
100 changes: 100 additions & 0 deletions include/frame.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#pragma once

#include <cassert>

#include "payload.h"

namespace celerity::detail {

struct from_payload_count_tag {
} inline constexpr from_payload_count;

struct from_size_bytes_tag {
} inline constexpr from_size_bytes;

// unique_frame_ptr manually `operator new`s the underlying frame memory, placement-new-constructs the frame and casts it to a frame pointer.
// I'm convinced that I'm actually, technically allowed to use the resulting frame pointer in a delete-expression and therefore keep `std::default_delete` as
// the deleter type for `unique_frame_ptr::impl`: Following the standard, delete-expression requires its operand to originate from a new-expression,
// and placement-new is defined to be a new-expression. The following implicit call to operator delete is also legal, since memory was obtained from
// `operator new`. Despite the beauty of this standards loophole, @BlackMark29A and @PeterTh couldn't be convinced to let me merge it :( -- @fknorr
template <typename Frame>
struct unique_frame_delete {
void operator()(Frame* frame) const {
if(frame) {
frame->~Frame();
operator delete(frame);
}
}
};

/**
* Owning smart pointer for variable-sized structures with a 0-sized array of type Frame::payload_type as the last member.
*/
template <typename Frame>
class unique_frame_ptr : private std::unique_ptr<Frame, unique_frame_delete<Frame>> {
private:
using impl = std::unique_ptr<Frame, unique_frame_delete<Frame>>;

friend class unique_payload_ptr;

public:
using payload_type = typename Frame::payload_type;

unique_frame_ptr() = default;

unique_frame_ptr(from_payload_count_tag, size_t payload_count) : unique_frame_ptr(from_size_bytes, sizeof(Frame) + sizeof(payload_type) * payload_count) {}

unique_frame_ptr(from_size_bytes_tag, size_t size_bytes) : impl(make_frame(size_bytes)), size_bytes(size_bytes) {}

unique_frame_ptr(unique_frame_ptr&& other) noexcept : impl(static_cast<impl&&>(other)), size_bytes(other.size_bytes) { other.size_bytes = 0; }

unique_frame_ptr& operator=(unique_frame_ptr&& other) noexcept {
if(this == &other) return *this; // gracefully handle self-assignment
static_cast<impl&>(*this) = static_cast<impl&&>(other); // delegate to base class unique_ptr<Frame>::operator=() to delete previously held frame
size_bytes = other.size_bytes;
other.size_bytes = 0;
return *this;
}

Frame* get_pointer() { return impl::get(); }
const Frame* get_pointer() const { return impl::get(); }
size_t get_size_bytes() const { return size_bytes; }
size_t get_payload_count() const { return (size_bytes - sizeof(Frame)) / sizeof(payload_type); }

using impl::operator bool;
using impl::operator*;
using impl::operator->;

unique_payload_ptr into_payload_ptr() && {
unique_payload_ptr::deleter_type deleter{delete_frame_from_payload}; // allocate deleter (aka std::function) first so `result` construction is noexcept
const auto frame = this->release();
const auto payload = reinterpret_cast<typename Frame::payload_type*>(frame + 1); // payload is located at +sizeof(Frame) bytes (+1 Frame object)
return unique_payload_ptr{payload, std::move(deleter)};
}

private:
size_t size_bytes = 0;

static Frame* make_frame(const size_t size_bytes) {
assert(size_bytes >= sizeof(Frame));
assert((size_bytes - sizeof(Frame)) % sizeof(payload_type) == 0);
const auto mem = operator new(size_bytes);
try {
new(mem) Frame;
} catch(...) {
operator delete(mem);
throw;
}
return static_cast<Frame*>(mem);
}


private:
static void delete_frame_from_payload(void* const type_erased_payload) {
const auto payload = static_cast<typename Frame::payload_type*>(type_erased_payload);
const auto frame = reinterpret_cast<Frame*>(payload) - 1; // frame header is located at -sizeof(Frame) bytes (-1 Frame object)
delete frame;
}
};

} // namespace celerity::detail
1 change: 1 addition & 0 deletions include/graph_serializer.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <vector>

#include "command.h"
#include "frame.h"
#include "types.h"

namespace celerity {
Expand Down
145 changes: 5 additions & 140 deletions include/mpi_support.h
Original file line number Diff line number Diff line change
@@ -1,144 +1,9 @@
#pragma once

#include <cassert>
#include <functional>
#include <memory>
#include <utility>
namespace celerity::detail::mpi_support {

namespace celerity::detail {
constexpr int TAG_CMD = 0;
constexpr int TAG_DATA_TRANSFER = 1;
constexpr int TAG_TELEMETRY = 2;

namespace mpi_support {

constexpr int TAG_CMD = 0;
constexpr int TAG_DATA_TRANSFER = 1;
constexpr int TAG_TELEMETRY = 2;

} // namespace mpi_support

struct from_payload_count_tag {
} inline constexpr from_payload_count;

struct from_size_bytes_tag {
} inline constexpr from_size_bytes;

// unique_frame_ptr manually `operator new`s the underlying frame memory, placement-new-constructs the frame and casts it to a frame pointer.
// I'm convinced that I'm actually, technically allowed to use the resulting frame pointer in a delete-expression and therefore keep `std::default_delete` as
// the deleter type for `unique_frame_ptr::impl`: Following the standard, delete-expression requires its operand to originate from a new-expression,
// and placement-new is defined to be a new-expression. The following implicit call to operator delete is also legal, since memory was obtained from
// `operator new`. Despite the beauty of this standards loophole, @BlackMark29A and @PeterTh couldn't be convinced to let me merge it :( -- @fknorr
template <typename Frame>
struct unique_frame_delete {
void operator()(Frame* frame) const {
if(frame) {
frame->~Frame();
operator delete(frame);
}
}
};

/**
* Owning smart pointer for variable-sized structures with a 0-sized array of type Frame::payload_type as the last member.
*/
template <typename Frame>
class unique_frame_ptr : private std::unique_ptr<Frame, unique_frame_delete<Frame>> {
private:
using impl = std::unique_ptr<Frame, unique_frame_delete<Frame>>;

friend class unique_payload_ptr;

public:
using payload_type = typename Frame::payload_type;

unique_frame_ptr() = default;

unique_frame_ptr(from_payload_count_tag, size_t payload_count) : unique_frame_ptr(from_size_bytes, sizeof(Frame) + sizeof(payload_type) * payload_count) {}

unique_frame_ptr(from_size_bytes_tag, size_t size_bytes) : impl(make_frame(size_bytes)), size_bytes(size_bytes) {}

unique_frame_ptr(unique_frame_ptr&& other) noexcept : impl(static_cast<impl&&>(other)), size_bytes(other.size_bytes) { other.size_bytes = 0; }

unique_frame_ptr& operator=(unique_frame_ptr&& other) noexcept {
if(this == &other) return *this; // gracefully handle self-assignment
static_cast<impl&>(*this) = static_cast<impl&&>(other); // delegate to base class unique_ptr<Frame>::operator=() to delete previously held frame
size_bytes = other.size_bytes;
other.size_bytes = 0;
return *this;
}

Frame* get_pointer() { return impl::get(); }
const Frame* get_pointer() const { return impl::get(); }
size_t get_size_bytes() const { return size_bytes; }
size_t get_payload_count() const { return (size_bytes - sizeof(Frame)) / sizeof(payload_type); }

using impl::operator bool;
using impl::operator*;
using impl::operator->;

private:
size_t size_bytes = 0;

static Frame* make_frame(const size_t size_bytes) {
assert(size_bytes >= sizeof(Frame));
assert((size_bytes - sizeof(Frame)) % sizeof(payload_type) == 0);
const auto mem = operator new(size_bytes);
try {
new(mem) Frame;
} catch(...) {
operator delete(mem);
throw;
}
return static_cast<Frame*>(mem);
}
};

class unique_payload_ptr : private std::unique_ptr<void, std::function<void(void*)>> {
private:
using impl = std::unique_ptr<void, std::function<void(void*)>>;

public:
template <typename T>
struct allocate_uninitialized_tag {};

template <typename T>
inline static constexpr allocate_uninitialized_tag<T> allocate_uninitialized;

unique_payload_ptr() noexcept = default;

template <typename T>
explicit unique_payload_ptr(allocate_uninitialized_tag<T>, size_t count) : impl(allocate_uninitialized_payload<T>(count)) {}

template <typename Frame>
explicit unique_payload_ptr(unique_frame_ptr<Frame> frame) : impl(unique_frame_to_payload(std::move(frame))) {}

void* get_pointer() { return impl::get(); }
const void* get_pointer() const { return impl::get(); }

using impl::operator bool;

private:
template <typename Frame>
static void delete_frame_from_payload(void* const type_erased_payload) {
const auto payload = static_cast<typename Frame::payload_type*>(type_erased_payload);
const auto frame = reinterpret_cast<Frame*>(payload) - 1; // frame header is located at -sizeof(Frame) bytes (-1 Frame object)
delete frame;
}

template <typename Frame>
static impl unique_frame_to_payload(unique_frame_ptr<Frame> unique_frame) {
deleter_type deleter{delete_frame_from_payload<Frame>}; // allocate deleter (aka std::function) first so `impl` construction is noexcept
const auto frame = unique_frame.release();
const auto payload = reinterpret_cast<typename Frame::payload_type*>(frame + 1); // payload is located at +sizeof(Frame) bytes (+1 Frame object)
return impl{payload, std::move(deleter)};
}

static void delete_uninitialized_payload(void* const p) { operator delete(p); }

template <typename T>
static impl allocate_uninitialized_payload(size_t count) {
deleter_type deleter{delete_uninitialized_payload}; // allocate deleter (aka std::function) first so `impl` construction is noexcept
const auto payload = operator new(count * sizeof(T));
return impl{payload, std::move(deleter)};
}
};

} // namespace celerity::detail
} // namespace celerity::detail::mpi_support
42 changes: 42 additions & 0 deletions include/payload.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#pragma once

#include <functional>
#include <memory>
#include <utility>

namespace celerity::detail {

/*
* Owning smart pointer for arbitrary structures with a type-erased deleter.
*/
class unique_payload_ptr : private std::unique_ptr<void, std::function<void(void*)>> {
private:
using impl = std::unique_ptr<void, std::function<void(void*)>>;

public:
using typename impl::deleter_type;

template <typename T>
struct allocate_uninitialized_tag {};

template <typename T>
inline static constexpr allocate_uninitialized_tag<T> allocate_uninitialized;

unique_payload_ptr() noexcept = default;
unique_payload_ptr(void* const ptr, deleter_type&& deleter) : impl{ptr, std::move(deleter)} {}

void* get_pointer() { return impl::get(); }
const void* get_pointer() const { return impl::get(); }

using impl::operator bool;
};

template <typename T>
unique_payload_ptr make_uninitialized_payload(const size_t count) {
// allocate deleter (aka std::function) first so construction unique_payload_ptr is noexcept
unique_payload_ptr::deleter_type deleter{[](void* const p) { operator delete(p); }};
const auto payload = operator new(count * sizeof(T));
return unique_payload_ptr{payload, std::move(deleter)};
}

} // namespace celerity::detail
2 changes: 1 addition & 1 deletion include/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
#include "command.h"
#include "config.h"
#include "device_queue.h"
#include "frame.h"
#include "host_queue.h"
#include "mpi_support.h"
#include "types.h"

namespace celerity {
Expand Down
2 changes: 1 addition & 1 deletion src/buffer_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ namespace detail {
intersection.scanByBoxes([&](const GridBox<3>& box) {
auto sr = grid_box_to_subrange(box);
// TODO can this temp buffer be avoided?
unique_payload_ptr tmp{unique_payload_ptr::allocate_uninitialized<std::byte>, sr.range.size() * element_size};
auto tmp = make_uninitialized_payload<std::byte>(sr.range.size() * element_size);
linearize_subrange(t.linearized.get_pointer(), tmp.get_pointer(), element_size, t.sr.range, {sr.offset - t.sr.offset, sr.range});
target_buffer.storage->set_data({target_buffer.get_local_offset(sr.offset), sr.range}, tmp.get_pointer());
updated_region = GridRegion<3>::merge(updated_region, box);
Expand Down
2 changes: 1 addition & 1 deletion src/buffer_transfer_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ namespace detail {

void buffer_transfer_manager::commit_transfer(transfer_in& transfer) {
const auto& frame = *transfer.frame;
unique_payload_ptr payload{std::move(transfer.frame)};
auto payload = std::move(transfer.frame).into_payload_ptr();

if(frame.rid) {
auto& rm = runtime::get_instance().get_reduction_manager();
Expand Down
1 change: 1 addition & 0 deletions src/executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <queue>

#include "distr_queue.h"
#include "frame.h"
#include "log.h"
#include "mpi_support.h"
#include "named_threads.h"
Expand Down
2 changes: 1 addition & 1 deletion src/worker_job.cc
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ namespace detail {
for(auto rid : tsk->get_reductions()) {
auto reduction = reduction_mngr.get_reduction(rid);
const auto element_size = buffer_mngr.get_buffer_info(reduction.output_buffer_id).element_size;
unique_payload_ptr operand{unique_payload_ptr::allocate_uninitialized<std::byte>, element_size};
auto operand = make_uninitialized_payload<std::byte>(element_size);
buffer_mngr.get_buffer_data(reduction.output_buffer_id, {{}, {1, 1, 1}}, operand.get_pointer());
reduction_mngr.push_overlapping_reduction_data(rid, local_nid, std::move(operand));
}
Expand Down

0 comments on commit 3e72250

Please sign in to comment.