From 3e722501a21c4634205de480abad61305a108c77 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Mon, 18 Jul 2022 17:28:34 +0200 Subject: [PATCH] Refactor unique_payload_ptr; move frame/payload_ptr types into separate headers --- include/buffer_manager.h | 1 + include/buffer_storage.h | 6 +- include/buffer_transfer_manager.h | 2 +- include/frame.h | 100 +++++++++++++++++++++ include/graph_serializer.h | 1 + include/mpi_support.h | 145 ++---------------------------- include/payload.h | 42 +++++++++ include/runtime.h | 2 +- src/buffer_manager.cc | 2 +- src/buffer_transfer_manager.cc | 2 +- src/executor.cc | 1 + src/worker_job.cc | 2 +- 12 files changed, 158 insertions(+), 148 deletions(-) create mode 100644 include/frame.h create mode 100644 include/payload.h diff --git a/include/buffer_manager.h b/include/buffer_manager.h index 4429ac7b4..8724309e1 100644 --- a/include/buffer_manager.h +++ b/include/buffer_manager.h @@ -15,6 +15,7 @@ #include "buffer_storage.h" #include "device_queue.h" #include "mpi_support.h" +#include "payload.h" #include "ranges.h" #include "region_map.h" #include "types.h" diff --git a/include/buffer_storage.h b/include/buffer_storage.h index 999055b15..eb52c8053 100644 --- a/include/buffer_storage.h +++ b/include/buffer_storage.h @@ -6,7 +6,7 @@ #include -#include "mpi_support.h" +#include "payload.h" #include "ranges.h" #include "workaround.h" @@ -260,7 +260,7 @@ namespace detail { // TODO: Optimize for contiguous copies - we could do a single SYCL H->D copy directly. else if(source.get_type() == buffer_type::HOST_BUFFER) { auto& host_source = dynamic_cast&>(source); - unique_payload_ptr tmp{unique_payload_ptr::allocate_uninitialized, copy_range.size()}; + auto tmp = make_uninitialized_payload(copy_range.size()); host_source.get_data(subrange{source_offset, copy_range}, static_cast(tmp.get_pointer())); set_data(subrange{target_offset, copy_range}, static_cast(tmp.get_pointer())); } @@ -278,7 +278,7 @@ namespace detail { // TODO: Optimize for contiguous copies - we could do a single SYCL D->H copy directly. if(source.get_type() == buffer_type::DEVICE_BUFFER) { // This looks more convoluted than using a vector, but that would break if DataT == bool - unique_payload_ptr tmp{unique_payload_ptr::allocate_uninitialized, copy_range.size()}; + auto tmp = make_uninitialized_payload(copy_range.size()); source.get_data(subrange{source_offset, copy_range}, static_cast(tmp.get_pointer())); set_data(subrange{target_offset, copy_range}, static_cast(tmp.get_pointer())); } diff --git a/include/buffer_transfer_manager.h b/include/buffer_transfer_manager.h index a5a2e5c7d..3043bb5ad 100644 --- a/include/buffer_transfer_manager.h +++ b/include/buffer_transfer_manager.h @@ -12,7 +12,7 @@ #include "buffer_storage.h" #include "command.h" -#include "mpi_support.h" +#include "frame.h" #include "types.h" namespace celerity { diff --git a/include/frame.h b/include/frame.h new file mode 100644 index 000000000..a82e95c6b --- /dev/null +++ b/include/frame.h @@ -0,0 +1,100 @@ +#pragma once + +#include + +#include "payload.h" + +namespace celerity::detail { + +struct from_payload_count_tag { +} inline constexpr from_payload_count; + +struct from_size_bytes_tag { +} inline constexpr from_size_bytes; + +// unique_frame_ptr manually `operator new`s the underlying frame memory, placement-new-constructs the frame and casts it to a frame pointer. +// I'm convinced that I'm actually, technically allowed to use the resulting frame pointer in a delete-expression and therefore keep `std::default_delete` as +// the deleter type for `unique_frame_ptr::impl`: Following the standard, delete-expression requires its operand to originate from a new-expression, +// and placement-new is defined to be a new-expression. The following implicit call to operator delete is also legal, since memory was obtained from +// `operator new`. Despite the beauty of this standards loophole, @BlackMark29A and @PeterTh couldn't be convinced to let me merge it :( -- @fknorr +template +struct unique_frame_delete { + void operator()(Frame* frame) const { + if(frame) { + frame->~Frame(); + operator delete(frame); + } + } +}; + +/** + * Owning smart pointer for variable-sized structures with a 0-sized array of type Frame::payload_type as the last member. + */ +template +class unique_frame_ptr : private std::unique_ptr> { + private: + using impl = std::unique_ptr>; + + friend class unique_payload_ptr; + + public: + using payload_type = typename Frame::payload_type; + + unique_frame_ptr() = default; + + unique_frame_ptr(from_payload_count_tag, size_t payload_count) : unique_frame_ptr(from_size_bytes, sizeof(Frame) + sizeof(payload_type) * payload_count) {} + + unique_frame_ptr(from_size_bytes_tag, size_t size_bytes) : impl(make_frame(size_bytes)), size_bytes(size_bytes) {} + + unique_frame_ptr(unique_frame_ptr&& other) noexcept : impl(static_cast(other)), size_bytes(other.size_bytes) { other.size_bytes = 0; } + + unique_frame_ptr& operator=(unique_frame_ptr&& other) noexcept { + if(this == &other) return *this; // gracefully handle self-assignment + static_cast(*this) = static_cast(other); // delegate to base class unique_ptr::operator=() to delete previously held frame + size_bytes = other.size_bytes; + other.size_bytes = 0; + return *this; + } + + Frame* get_pointer() { return impl::get(); } + const Frame* get_pointer() const { return impl::get(); } + size_t get_size_bytes() const { return size_bytes; } + size_t get_payload_count() const { return (size_bytes - sizeof(Frame)) / sizeof(payload_type); } + + using impl::operator bool; + using impl::operator*; + using impl::operator->; + + unique_payload_ptr into_payload_ptr() && { + unique_payload_ptr::deleter_type deleter{delete_frame_from_payload}; // allocate deleter (aka std::function) first so `result` construction is noexcept + const auto frame = this->release(); + const auto payload = reinterpret_cast(frame + 1); // payload is located at +sizeof(Frame) bytes (+1 Frame object) + return unique_payload_ptr{payload, std::move(deleter)}; + } + + private: + size_t size_bytes = 0; + + static Frame* make_frame(const size_t size_bytes) { + assert(size_bytes >= sizeof(Frame)); + assert((size_bytes - sizeof(Frame)) % sizeof(payload_type) == 0); + const auto mem = operator new(size_bytes); + try { + new(mem) Frame; + } catch(...) { + operator delete(mem); + throw; + } + return static_cast(mem); + } + + + private: + static void delete_frame_from_payload(void* const type_erased_payload) { + const auto payload = static_cast(type_erased_payload); + const auto frame = reinterpret_cast(payload) - 1; // frame header is located at -sizeof(Frame) bytes (-1 Frame object) + delete frame; + } +}; + +} // namespace celerity::detail diff --git a/include/graph_serializer.h b/include/graph_serializer.h index 82da5e9f0..278b5139c 100644 --- a/include/graph_serializer.h +++ b/include/graph_serializer.h @@ -4,6 +4,7 @@ #include #include "command.h" +#include "frame.h" #include "types.h" namespace celerity { diff --git a/include/mpi_support.h b/include/mpi_support.h index a1d900581..0867630d5 100644 --- a/include/mpi_support.h +++ b/include/mpi_support.h @@ -1,144 +1,9 @@ #pragma once -#include -#include -#include -#include +namespace celerity::detail::mpi_support { -namespace celerity::detail { +constexpr int TAG_CMD = 0; +constexpr int TAG_DATA_TRANSFER = 1; +constexpr int TAG_TELEMETRY = 2; -namespace mpi_support { - - constexpr int TAG_CMD = 0; - constexpr int TAG_DATA_TRANSFER = 1; - constexpr int TAG_TELEMETRY = 2; - -} // namespace mpi_support - -struct from_payload_count_tag { -} inline constexpr from_payload_count; - -struct from_size_bytes_tag { -} inline constexpr from_size_bytes; - -// unique_frame_ptr manually `operator new`s the underlying frame memory, placement-new-constructs the frame and casts it to a frame pointer. -// I'm convinced that I'm actually, technically allowed to use the resulting frame pointer in a delete-expression and therefore keep `std::default_delete` as -// the deleter type for `unique_frame_ptr::impl`: Following the standard, delete-expression requires its operand to originate from a new-expression, -// and placement-new is defined to be a new-expression. The following implicit call to operator delete is also legal, since memory was obtained from -// `operator new`. Despite the beauty of this standards loophole, @BlackMark29A and @PeterTh couldn't be convinced to let me merge it :( -- @fknorr -template -struct unique_frame_delete { - void operator()(Frame* frame) const { - if(frame) { - frame->~Frame(); - operator delete(frame); - } - } -}; - -/** - * Owning smart pointer for variable-sized structures with a 0-sized array of type Frame::payload_type as the last member. - */ -template -class unique_frame_ptr : private std::unique_ptr> { - private: - using impl = std::unique_ptr>; - - friend class unique_payload_ptr; - - public: - using payload_type = typename Frame::payload_type; - - unique_frame_ptr() = default; - - unique_frame_ptr(from_payload_count_tag, size_t payload_count) : unique_frame_ptr(from_size_bytes, sizeof(Frame) + sizeof(payload_type) * payload_count) {} - - unique_frame_ptr(from_size_bytes_tag, size_t size_bytes) : impl(make_frame(size_bytes)), size_bytes(size_bytes) {} - - unique_frame_ptr(unique_frame_ptr&& other) noexcept : impl(static_cast(other)), size_bytes(other.size_bytes) { other.size_bytes = 0; } - - unique_frame_ptr& operator=(unique_frame_ptr&& other) noexcept { - if(this == &other) return *this; // gracefully handle self-assignment - static_cast(*this) = static_cast(other); // delegate to base class unique_ptr::operator=() to delete previously held frame - size_bytes = other.size_bytes; - other.size_bytes = 0; - return *this; - } - - Frame* get_pointer() { return impl::get(); } - const Frame* get_pointer() const { return impl::get(); } - size_t get_size_bytes() const { return size_bytes; } - size_t get_payload_count() const { return (size_bytes - sizeof(Frame)) / sizeof(payload_type); } - - using impl::operator bool; - using impl::operator*; - using impl::operator->; - - private: - size_t size_bytes = 0; - - static Frame* make_frame(const size_t size_bytes) { - assert(size_bytes >= sizeof(Frame)); - assert((size_bytes - sizeof(Frame)) % sizeof(payload_type) == 0); - const auto mem = operator new(size_bytes); - try { - new(mem) Frame; - } catch(...) { - operator delete(mem); - throw; - } - return static_cast(mem); - } -}; - -class unique_payload_ptr : private std::unique_ptr> { - private: - using impl = std::unique_ptr>; - - public: - template - struct allocate_uninitialized_tag {}; - - template - inline static constexpr allocate_uninitialized_tag allocate_uninitialized; - - unique_payload_ptr() noexcept = default; - - template - explicit unique_payload_ptr(allocate_uninitialized_tag, size_t count) : impl(allocate_uninitialized_payload(count)) {} - - template - explicit unique_payload_ptr(unique_frame_ptr frame) : impl(unique_frame_to_payload(std::move(frame))) {} - - void* get_pointer() { return impl::get(); } - const void* get_pointer() const { return impl::get(); } - - using impl::operator bool; - - private: - template - static void delete_frame_from_payload(void* const type_erased_payload) { - const auto payload = static_cast(type_erased_payload); - const auto frame = reinterpret_cast(payload) - 1; // frame header is located at -sizeof(Frame) bytes (-1 Frame object) - delete frame; - } - - template - static impl unique_frame_to_payload(unique_frame_ptr unique_frame) { - deleter_type deleter{delete_frame_from_payload}; // allocate deleter (aka std::function) first so `impl` construction is noexcept - const auto frame = unique_frame.release(); - const auto payload = reinterpret_cast(frame + 1); // payload is located at +sizeof(Frame) bytes (+1 Frame object) - return impl{payload, std::move(deleter)}; - } - - static void delete_uninitialized_payload(void* const p) { operator delete(p); } - - template - static impl allocate_uninitialized_payload(size_t count) { - deleter_type deleter{delete_uninitialized_payload}; // allocate deleter (aka std::function) first so `impl` construction is noexcept - const auto payload = operator new(count * sizeof(T)); - return impl{payload, std::move(deleter)}; - } -}; - -} // namespace celerity::detail +} // namespace celerity::detail::mpi_support diff --git a/include/payload.h b/include/payload.h new file mode 100644 index 000000000..640ba3b4c --- /dev/null +++ b/include/payload.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include +#include + +namespace celerity::detail { + +/* + * Owning smart pointer for arbitrary structures with a type-erased deleter. + */ +class unique_payload_ptr : private std::unique_ptr> { + private: + using impl = std::unique_ptr>; + + public: + using typename impl::deleter_type; + + template + struct allocate_uninitialized_tag {}; + + template + inline static constexpr allocate_uninitialized_tag allocate_uninitialized; + + unique_payload_ptr() noexcept = default; + unique_payload_ptr(void* const ptr, deleter_type&& deleter) : impl{ptr, std::move(deleter)} {} + + void* get_pointer() { return impl::get(); } + const void* get_pointer() const { return impl::get(); } + + using impl::operator bool; +}; + +template +unique_payload_ptr make_uninitialized_payload(const size_t count) { + // allocate deleter (aka std::function) first so construction unique_payload_ptr is noexcept + unique_payload_ptr::deleter_type deleter{[](void* const p) { operator delete(p); }}; + const auto payload = operator new(count * sizeof(T)); + return unique_payload_ptr{payload, std::move(deleter)}; +} + +} // namespace celerity::detail diff --git a/include/runtime.h b/include/runtime.h index 95e00ecc9..55af876be 100644 --- a/include/runtime.h +++ b/include/runtime.h @@ -9,8 +9,8 @@ #include "command.h" #include "config.h" #include "device_queue.h" +#include "frame.h" #include "host_queue.h" -#include "mpi_support.h" #include "types.h" namespace celerity { diff --git a/src/buffer_manager.cc b/src/buffer_manager.cc index a56800a9a..a00824ea5 100644 --- a/src/buffer_manager.cc +++ b/src/buffer_manager.cc @@ -170,7 +170,7 @@ namespace detail { intersection.scanByBoxes([&](const GridBox<3>& box) { auto sr = grid_box_to_subrange(box); // TODO can this temp buffer be avoided? - unique_payload_ptr tmp{unique_payload_ptr::allocate_uninitialized, sr.range.size() * element_size}; + auto tmp = make_uninitialized_payload(sr.range.size() * element_size); linearize_subrange(t.linearized.get_pointer(), tmp.get_pointer(), element_size, t.sr.range, {sr.offset - t.sr.offset, sr.range}); target_buffer.storage->set_data({target_buffer.get_local_offset(sr.offset), sr.range}, tmp.get_pointer()); updated_region = GridRegion<3>::merge(updated_region, box); diff --git a/src/buffer_transfer_manager.cc b/src/buffer_transfer_manager.cc index 1785b2fbf..d2ba5b155 100644 --- a/src/buffer_transfer_manager.cc +++ b/src/buffer_transfer_manager.cc @@ -145,7 +145,7 @@ namespace detail { void buffer_transfer_manager::commit_transfer(transfer_in& transfer) { const auto& frame = *transfer.frame; - unique_payload_ptr payload{std::move(transfer.frame)}; + auto payload = std::move(transfer.frame).into_payload_ptr(); if(frame.rid) { auto& rm = runtime::get_instance().get_reduction_manager(); diff --git a/src/executor.cc b/src/executor.cc index f37904427..d5e8ad04e 100644 --- a/src/executor.cc +++ b/src/executor.cc @@ -3,6 +3,7 @@ #include #include "distr_queue.h" +#include "frame.h" #include "log.h" #include "mpi_support.h" #include "named_threads.h" diff --git a/src/worker_job.cc b/src/worker_job.cc index 067749d6e..5f132b1d9 100644 --- a/src/worker_job.cc +++ b/src/worker_job.cc @@ -212,7 +212,7 @@ namespace detail { for(auto rid : tsk->get_reductions()) { auto reduction = reduction_mngr.get_reduction(rid); const auto element_size = buffer_mngr.get_buffer_info(reduction.output_buffer_id).element_size; - unique_payload_ptr operand{unique_payload_ptr::allocate_uninitialized, element_size}; + auto operand = make_uninitialized_payload(element_size); buffer_mngr.get_buffer_data(reduction.output_buffer_id, {{}, {1, 1, 1}}, operand.get_pointer()); reduction_mngr.push_overlapping_reduction_data(rid, local_nid, std::move(operand)); }